Author Topic: Non-Standard gBuf Ideas  (Read 4726 times)

0 Members and 1 Guest are viewing this topic.

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Non-Standard gBuf Ideas
« on: July 16, 2013, 02:02:25 pm »
I have had this idea since my first attempt at an OS, but I ran into a few problems. Basically, I wanted to store the graph buffer in columns because I thought it would be very useful for drawing tiles and updating the LCD. Then I started thinking about line drawing, circle drawing, and anything that would cross a byte boundary and I realised that some routines would take a major hit to speed.

I am working on a project and I am still trying to decide if it would be beneficial to organise the screen in this manner. Here is an example of a tile drawing routine using the current buffer setup:
Code: [Select]
drawtile:
;DE points to the sprite data
;BC = (y,x)
; draw an 8x8 tile where X is on [0,11] and Y is on [0,7]
     ld a,b
     add a,a
     add a,b
     add a,a
     add a,a
     add a,a
     ld h,0
     ld b,h
     ld l,a
     add hl,hl
     add hl,hl
     add hl,bc
     ld bc,(DrawBufPtr)
     add hl,bc

     ld bc,12
     ld a,8
       ex de,hl   ; 32
       ldi        ;128
       ex de,hl   ; 32
       add hl,bc  ; 88
       inc c      ; 32
       dec a      ; 32
       jr nz,$-7  ; 91
     ret
And here is how it looks the other way:
Code: [Select]
drawtile:
;DE points to the sprite
;BC = (y,x)
;X*64
     or a
     ld a,c
     ld l,0
     rra
     rr l
     rra
     rr l
     ld h,a
;y*8
     ld a,b
     add a,a
     add a,a
     add a,a
     add a,l
     ld l,a
     ld bc,(DrawBufPtr)
     add hl,bc
     ex de,hl
     ld bc,8
     ldir
     ret
The former is 565 t-states 33 bytes, the latter is 281 t-states 28 bytes. There are ways to optimise both routines for speed.
Spoiler For Optimised:
Taking the first routine and replacing the sprite drawing code:
Code: [Select]
     push hl \ pop ix
;IX points to where it gets drawn
;DE is the sprite layer
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix),a    ;19
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix+12),a ;19
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix+24),a ;19
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix+36),a ;19
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix+48),a ;19
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix+60),a ;19
     ld a,(de)    ; 7
     inc de       ; 6
     ld (ix+72),a ;19
     ld a,(de)    ; 7
     ld (ix+84),a ;19
     ret
Now it is 388 t-states (saving 187 cycles), but the cost is 25 bytes. The latter has a similar optimisation (unrolling). It saves only 45 t-states at the cost of 11 bytes by replacing the LD BC,8 \ LDIR with 8 LDI instructions.
If you have ever written your own LCD updating routine, you probably already realized just how straight forward this would make the routine (and if we had no LCD delay, it would amount to basically 12 iterations of ld b,64 \ outir). We typically don't need to optimise for speed with such an LCD update because most of the time, the code is waiting for the LCD to respond before moving to the next byte. However, if you are doing something like grayscale or interleaving another routine with the LCD update (like drawing a tilemap at the same time), this gives you even more time to do more complicated things with the LCD, putting your 'waste cycles' to more use.

Sprite Drawing
The reason for why some drawing will be so easy is that each 8 columns of pixels is 64 bytes which is much nicer to work with than a row of pixels being 12 bytes. We also see a huge boost in performance when moving down or up a pixel because that only requires an increment or decrement of a pointer, instead of adding 12 each time. However, now we get the same problem when moving left or right across byte boundaries. This means that sprite routines could take a hit, but let's see how far we can remedy this.

This will be a very simple routine to XOR an 8x8 sprite to the gbuf:
Code: [Select]
PutSprite8x8:
;Note: No clipping.
;Inputs:
;     BC = (x,y)
;     IX points to the sprite
;     1871 worst-case
     ld a,b
     and $F8
     ld h,0
     rla \ rl h
     rla \ rl h
     rla \ rl h
     ld l,a
     ld a,b
     ld b,0
     add hl,bc
     ld bc,9340h
     add hl,bc
;HL points to the first byte to draw at
     and 7
     jr nz,crossedbound
       push ix \ pop de
       ld b,8
         ld a,(de)
         xor (hl)
         ld (hl),a
         inc hl
         inc de
         djnz $-5
       ret
crossedbound:
     ld b,a
     dec a
     ld (smc_jump1),a
     ld (smc_jump2),a
     ld a,1
       rrca
       djnz $-1
     dec a
     ld e,a
     ld c,8
;E is the mask
;IX points to the sprite
;HL points to where to draw
drawloop1:
     ld a,(ix)
     .db 18h      ;start of jr *
smc_jump1:
     .db 0
       rlca
       rlca
       rlca
       rlca
       rlca
       rlca
       rlca
     and e
     xor (hl)
     ld (hl),a
     inc ix
     inc hl
     dec c
     jr nz,drawloop1
     ld c,56
     add hl,bc
     ld a,e
     cpl
     ld e,a
     ld c,8
drawloop2:
     ld a,(ix-8)
     .db 18h      ;start of jr *
smc_jump2:
     .db 0
       rlca
       rlca
       rlca
       rlca
       rlca
       rlca
       rlca
     and e
     xor (hl)
     ld (hl),a
     inc ix
     inc hl
     dec c
     jr nz,drawloop2
     ret
That actually turns out to be pretty fast, so if you need to draw sprites, this is still a viable buffer setup.

LCD Updating
As promised, the routine to update the LCD is fairly straight forward:
Code: [Select]
#define     lcddelay()  in a,(16) \ rlca \ jr c,$-3
     ld a,5
     out (16),a
     lcddelay()
     ld a,80h
     out (16),a
     ld hl,9340h
     lcddelay()
     ld a,20h
col:
     out (16),a
     push af
     ld bc,4011h
row:
     lcddelay()
     outi
     jr nz,row
     lcddelay()
     pop af
     inc a
     cp 2Ch
     jr nz,col
     ret

Note that if you are only ever doing fullscreen updates (or at least full columns) and you are always using the same increment mode, you can leave the first part of that code in a setup portion of your code:
Code: [Select]
     .org 9D93h
     .db $BB,$6D
Start:
     ld a,5        ;set the increment mode, only needs to be done once
     out (16),a
     lcddelay()
     ld a,80h    ;set the row pointer, only needs to be done once, since the LCD update routine leaves it where it started.
     out (16),a
Main:

<code>

UpdateLCD:
     ld hl,9340h
     ld a,20h
col:
     out (16),a
     push af
     ld bc,4011h
row:
     lcddelay()
     outi
     jr nz,row
     lcddelay()
     pop af
     inc a
     cp 2Ch
     jr nz,col
     ret

Pixel Plotting
Code: [Select]
;GetPixelLoc
;Inputs:
;     BC =(x,y)
;     DE is the buffer on which to draw
;Outputs:
;     Returns HL pointing to the byte where the pixel gets plotted
;     Returns A as a mask
;     NC returned if out of bounds, else C if in bounds
     ld a,c \ cp 64 \ ret nc
     ld a,b \ cp 96 \ ret nc
     and $F8
     ld h,0
     rla \ rl h
     rla \ rl h
     rla \ rl h
     ld l,a
     ld a,b
     ld b,0
     add hl,bc
     add hl,de
;HL points to the first byte to draw at
     and 7
     ld b,a
     ld a,1
     inc b
     rrca \ djnz $-1
     scf
     ret
Now to set the pixel, use or (hl)  \ ld (hl),a or use xor to invert, and to erase, cpl \ and (hl) \ ld (hl),a.

Final Analysis
It turns out that most drawing is faster and that my original fears were just based on me being too accustomed to one way of doing things. Line drawing, circle drawing, and rectangle drawing are all faster (lines and circles just because it is faster to locate a pixel, rectangles because it just works fantastically). Sprites, tiles, and LCD updating work out great. However, there is one area that does in fact take hit and that is scrolling the screen. Shifting up and down is still relatively easy, but shifting left and right will be slower and more complicated. Shifting up or down is just shifting the whole buffer 1 byte instead of 12, which is the same speed. Here is shifting right:
Code: [Select]
     ld hl,9340h     ;gbuf
     ld de,64
     ld c,e
loop:
     or a
     ld b,12
       rr (hl)
       push af \ add hl,de \ pop af
       djnz $-5
       dec h \ dec h \ dec h
       inc l
       dec c
       jr nz,loop
     ret
That is now half the speed of what it is for the current gbuf setup. We can cut out 9828 t-states if interrupts are off, though, but that is still a huge hit to speed.

Aside from that, I like the idea of organising the buffer this way.

EDIT: Modified a few routines to be smaller, no speed change, though.
EDIT2: Added a link to the rectangle routines below.

Offline chickendude

  • LV8 Addict (Next: 1000)
  • ********
  • Posts: 817
  • Rating: +90/-1
  • Pro-Riot Squad
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #1 on: July 17, 2013, 06:23:45 am »
If you disabled interrupts you could use ex af,af'. Also, it's pretty common practice to unroll the shifting routines, which would save a little time. Also, i wonder if using ix would actually be faster?

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #2 on: July 17, 2013, 09:12:14 am »
Yeah, in my app the shadow registers are free game (the interrupt routine preserves them). As for using IX, it depends on where it is used, because something like ld a,(ix+n) is 19 t-states, versus 7 for ld a,(hl).

Offline utz

  • LV4 Regular (Next: 200)
  • ****
  • Posts: 161
  • Rating: +28/-0
    • View Profile
    • official hp - music, demos, and more
Re: Non-Standard gBuf Ideas
« Reply #3 on: July 17, 2013, 09:56:26 am »
Wow Xeda, this looks pretty awesome. I haven't looked into LCD updating at all so far, so I'm very happy to see this bit of fairly understandable code ;)

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #4 on: July 17, 2013, 03:02:46 pm »
Cool, I hope that helps! It turns out that people like Iambian and tr1p1ea have been using this technique for years! Here are a few more routines that could probably still be optimised:
Code: [Select]
;RectangleErase can be optimised to be much faster if you make it its own routine using cpl \ and (hl) for logic. Currently, it uses RectangleOR \ RectangleXOR
;Included Routines
;======================
;Inputs:
;     B = Height
;     C = Width
;     D = X (leftmost)
;     E = Y (upper)
;RectangleBoxEraseFill
;  Draws a box with a black border, interior is erased.
;  Returns B-2, C-2, D+1, E+1
;RectArrowXOR
;  Draws an arrow pointing right with XOR logic
;  Returns BC,DE unchanged
;RectangleMenu
;  Draws a box with rounded edges
;  Returns D+1,C+2, B and E unchanged
;RectangleOR
;  Draws a black rectangle
;  Returns BC,DE unchanged
;RectangleXOR
;  Inverts the region of the screen
;  Returns BC,DE unchanged
;RectangleErase
;  Draws a white rectangle
;  Returns BC,DE unchanged

RectangleBoxEraseFill:
     call RectangleOR
     inc d
     inc e
     dec b
     dec b
     dec c
     dec c
     jp RectangleXOR
RectArrowXOR:
     push de
     push bc
     ld a,b
     cp c
     jr c,$+3
       ld b,c
     ld c,1
;B is now the smaller of the two
;C is 1
     call RectangleXOR
     inc d \ inc e
     djnz $+5
     pop bc
     pop de
     ret
     djnz $-10
     jr $-5
RectangleMenu:
     call RectangleErase
     push bc \ push de
     inc e \ dec b \ dec b
     call RectangleXOR
     pop de \ pop bc
     inc d \ dec c \ dec c
     jr RectangleXOR
RectangleErase:
     call RectangleOR
RectangleXOR:
;Inputs:
;     DE is (x,y)
;     BC is (h,w)
;Outputs:
     push de
     push bc
     call RectMain
     jr nz,$+11
       ld a,c
       xor (hl)     ;logic
       ld (hl),a
       inc hl
       djnz $-4
       jp EndRect
     ld d,a        ;number of columns before last col
     ld e,b
     ld a,(rect_FirstByte)
     ld c,a
xorrectcol:
     ld b,e        ;height
     push hl
xorrectloop:
     ld a,(hl)
     xor c          ;logic
     ld (hl),a
     inc hl
     djnz xorrectloop
     pop hl
     ld c,64
     add hl,bc
     ld c,-1
     dec d
     jp m,endrect
     jr nz,xorrectcol
     ld bc,(rect_LastByte)
     jp xorrectcol
RectangleOR:
;Inputs:
;     DE is (x,y)
;     BC is (h,w)
;Outputs:
     push de
     push bc
     call RectMain
     jr nz,$+11
       ld a,c
       or (hl)     ;logic
       ld (hl),a
       inc hl
       djnz $-4
       jp EndRect
     ld d,a        ;number of columns before last col
     ld e,b
     ld a,(rect_FirstByte)
     ld c,a
orrectcol:
     ld b,e        ;height
     push hl
orrectloop:
     ld a,(hl)
     or c          ;logic
     ld (hl),a
     inc hl
     djnz orrectloop
     pop hl
     ld c,64
     add hl,bc
     ld c,-1
     dec d
     jp m,endrect
     jr nz,orrectcol
     ld bc,(rect_LastByte)
     jp orrectcol
EndRect:
     pop bc
     pop de
     ret
RectMain:
;Inputs:
;     DE is (x,y)
;     BC is (h,w)
;Outputs:
;     returns z if it is all in one column, else it returns the number of columns
;     HL points to the start byte
;     (rect_FirstByte), (rect_LastByte)
;     A is negative the number of columns
;     BC is (h,w)
;
;If it is a single column wide, C is the mask, B is the height
     ld a,d
     and %11111000
     ld h,0
     rla \ rl h
     rla \ rl h
     rla \ rl h
     ld l,a
     ld a,d
     ld d,0
     add hl,de
;HL points to the byte it will start on
     push hl
     ld d,a
     push bc
     call ComputeByte
     ld (rect_FirstByte),a
     ex (sp),hl
     ld a,d
     cpl
     and 7
     inc a
     ld b,a
     ld a,l
     sub b
     ex (sp),hl
     call ComputeByte
     cpl
     ld (rect_LastByte),a
;last and first byte are computed   
     ld a,d
     and %11111000
     ld e,a
     ld a,d
     add a,c
     and %11111000

     pop hl
     ex (sp),hl
     ld bc,(DrawBufPtr)
     add hl,bc
     pop bc

     sub e
     rrca
     rrca
     rrca
     and %00011111
     ret nz
     ld de,(rect_FirstByte)
     ld a,d \ and e
     ld c,a
     xor a
     ret
ComputeByte:
     and 7
     ld b,a
     ld a,80h
     jr z,$+5
       rrca
       djnz $-1
     add a,a
     dec a
     ret
I also made some text display routines, but they are pretty complicated, so I will have to organise them before posting.

Offline chickendude

  • LV8 Addict (Next: 1000)
  • ********
  • Posts: 817
  • Rating: +90/-1
  • Pro-Riot Squad
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #5 on: July 18, 2013, 12:44:32 pm »
Yeah, in my app the shadow registers are free game (the interrupt routine preserves them). As for using IX, it depends on where it is used, because something like ld a,(ix+n) is 19 t-states, versus 7 for ld a,(hl).
I was talking more for the rr (hl) bit, something like:
rr (ix-128) \ rr (ix-64) \ rr (ix) \ rr (ix+64) \ inc ixh

That way you can remove the push/pops.

rr (hl) is 15 t-states vs 23 for rr (ix), and 21 t-states for the push/pop (plus you save a little on the add hl,de's you'd need to unroll with rr (hl)).
« Last Edit: July 18, 2013, 12:52:19 pm by chickendude »

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #6 on: July 18, 2013, 12:59:33 pm »
That is an excellent idea! So the code would look like:
Code: [Select]
     ld ix,gBuf+128
     ld b,64
shiftrightloop:
     srl (ix-128) \ rr (ix-64) \ rr (ix) \ rr (ix+64) \ inc ixh  ;100
     rr (ix-128) \ rr (ix-64) \ rr (ix) \ rr (ix+64) \ inc ixh   ;100
     rr (ix-128) \ rr (ix-64) \ rr (ix) \ rr (ix+64)  ;92
     dec ixh \ dec ixh \ inc ix ; 26
     djnz shiftrightloop
     ret
That is 21210 t-states, which is much nicer!

Offline thepenguin77

  • z80 Assembly Master
  • LV10 31337 u53r (Next: 2000)
  • **********
  • Posts: 1594
  • Rating: +823/-5
  • The game in my avatar is bit.ly/p0zPWu
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #7 on: July 18, 2013, 05:01:20 pm »
Well, if we're going to keep talking about what's possible. What about this lcd update routine: (This would be the very core of the routine)

Code: [Select]
interrupt:                      ;19
        ex      af, af'         ;4
        exx                     ;4
        out     ($31), a        ;11 a = 3
        outi                    ;16
        jr      z, nextColumm   ;7
        exx                     ;4
        ex      af, af'         ;4
        ei                      ;4
        ret                     ;10
                                ;83 total

I don't feel like actually making the rest of the routine, but this will allow you to update the LCD in around 65,000 t-states in 15MHz mode. (I think it normally takes around 130,000). Also, with a few tweaks, you could make this into a grayscale routine. But I think the coolest part about this thing is that it runs in the background. Essentially you start the lcd update and your code continues to run while this thing interrupts in (rather frequently) and writes another byte to the screen.

A quick calculation shows that this could probably update grayscale using only 30% of the processor time when it usually takes 50%.
« Last Edit: July 21, 2013, 04:08:44 pm by thepenguin77 »
zStart v1.3.013 9-20-2013 
All of my utilities
TI-Connect Help
You can build a statue out of either 1'x1' blocks or 12'x12' blocks. The 1'x1' blocks will take a lot longer, but the final product is worth it.
       -Runer112

Offline tr1p1ea

  • LV7 Elite (Next: 700)
  • *******
  • Posts: 647
  • Rating: +110/-0
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #8 on: July 18, 2013, 05:33:36 pm »
Ahh good old buffer changeup :). Its always good to change things up, since as you have realised ... you can get some good improvments out of it! :).

Great set of routines btw :).
"My world is Black & White. But if I blink fast enough, I see it in Grayscale."


Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #9 on: July 19, 2013, 10:34:09 pm »
@thepenguin77: I haven't used timers before, but I think I understand your code and that is clever! nextCol would basically adjust the LCD column by incrementing it until it reached 2C at which point it would reset the pointer (dec h \ dec h \ dec h) and the column to 20h ?

@tr1p1ea: Thanks! I have these text display commands, but they don't properly clip on the edges of the window :/
Code: [Select]
;RAM required:
;fmt_leftmost   1 byte , define the left edge of where text can be drawn
;fmt_rightmost  1 byte , define the right edge of where text can be drawn
;fmt_lower      1 byte , define the lower edge of where text can be drawn
;fmt_upper      1 byte , define the upper edge of where text can be drawn
;bit wordrawp,(iy+textflags)    ;define your own location and whatnot
;textcol        1 byte
;textrow        1 byte
;fontptr        2 bytes points to the nibble packed 4x6 fontset
;DrawBufPtr     2 bytes, points to the buffer where text gets drawn
;lFont_record is defined by the ti83plus.inc. 6 bytes are used
;
;Sample Fontset included
;Inlcuded Routines, Input:
;  IGPutSFmt     null-terminated string, directly following the call
;  GPutSFmt      HL points to the null terminated string
;  GPutCFmt      A is the character to display (all formatting applied)
;  GPutC         B is the character to display (some formatting applied)
;
;All text is written to the buffer with OR logic.
;These routines are modified for vertically aligned buffers, not the way the OS does it.


IGPutSFmt:
;Inputs:
;     The null-terminated string to display immediately follows the call.
;Example:
;     call IGPutSFmt
;     .db "Hello World!",0
     pop hl
     call GPutSFmt
     jp (hl)
GPutSFmt:
;Inputs:
;     HL points to the zero-terminated string to display
;The following define a rectangular region for where text is allowed:
;     (fmt_leftmost)
;     (fmt_rightmost)
;     (fmt_lower)
;     (fmt_upper)
;     bit wordwrap,(iy+textflags)
     ld a,(hl)
     inc hl
     or a
     ret z
     push de
     push hl
     call GPutCFmt
     pop hl
     pop de
     jr GPutSFmt
GPutCFmt:
;Input:
;     A is the char to display
     ld b,a
     call GPutC
;perform formatting based on indents, wordwrap
     bit wordwrap,(iy+textflags)
     ret z
     ld hl,(textcol)
     ld de,(fmt_rightmost)
     ld a,e
     sub l
     ret nc
     ld a,d
     sub h
     ret c
     ld a,(fmt_leftmost)
     ld l,a
     ld a,h
     add a,6
     ld h,a
     ld (textcol),hl
     ret
GPutC:
;Inputs:
;     (fontptr)
;     (textcol)
;     (textrow)
;     B is the char to display
     ld a,$D6
     cp b
     jr nz,charreg
nextline:
       ld a,(fmt_leftmost)
       ld (textcol),a
       ld a,(textrow)
       add a,6
       ld (textrow),a
       ret
charreg:
     ld hl,(fontptr)
     ld a,b
     ld c,b
     ld b,0
     add hl,bc
     add hl,bc
     add hl,bc
     ld de,lFont_record
     ld b,3
       ld a,(hl)
       and $F0
       ld (de),a
       inc e
       ld a,(hl)
       rrca \ rrca
       rrca \ rrca
       and $F0
       ld (de),a
       inc e
       inc hl
       djnz $-15
     ld ix,lFont_record
     ld de,0406h
     ld bc,(textcol)
     ld hl,fmt_lower
     ld a,b
     cp (hl) \ ret nc
;e is height
     add a,e
     sub (hl)
     jr c,$+6
       neg
       add a,e
       ld e,a

     ld a,c
     dec l
     cp (hl) \ ret nc


     ld a,c
     add a,4
     ld (textcol),a
     ld a,b
     ld b,c
     ld c,a
ORSprite8xY:
;Note: No clipping.
;Inputs:
;     BC = (x,y)
;     IX points to the sprite
;     E is the height
     ld a,b
     and $F8
     ld h,0
     rla \ rl h
     rla \ rl h
     rla \ rl h
     ld l,a
     ld a,b
     ld b,0
     add hl,bc
     ld bc,(DrawBufPtr)
     add hl,bc
;HL points to the first byte to draw at
     and 7
     jr nz,crossedbound
       ld b,e
       push ix \ pop de
         ld a,(de)
         xor (hl)
         ld (hl),a
         inc hl
         inc de
         djnz $-5
       ret
crossedbound:
     ld b,a
     ld d,a
     ld a,1
       rrca
       djnz $-1
     dec a
     ld c,e
     push bc
     ld e,a
;E is the mask
;IX points to the sprite
;HL points to where to draw
drawloop1:
     ld a,(ix)
     ld b,d \ rrca \ djnz $-1
     and e
     or (hl)
     ld (hl),a
     inc ix
     inc hl
     dec c
     jr nz,drawloop1
     ld c,64
     add hl,bc
     ld a,e
     cpl
     ld e,a
     pop bc
drawloop2:
     dec ix
     dec hl
     ld a,(ix)
     ld b,d \ rrca \ djnz $-1
     and e
     or (hl)
     ld (hl),a
     dec c
     jr nz,drawloop2
     ret
;===============================================================
FontSet:
;===============================================================
;00~7F
.db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$8C,$EC,$80,$00,$00,$00,$00,$00,$00
.db $24,$44,$20,$0A,$4A,$00,$00,$EA,$E0,$00,$4E,$40,$00,$04,$00,$00,$E4,$44,$E4,$2C,$00,$EC,$EC,$C0

.db $74,$4C,$40,$62,$A2,$00,$C2,$4E,$00,$02,$48,$E0,$69,$96,$00,$AC,$88,$00,$E4,$40,$00,$68,$60,$E0
.db $2E,$4E,$80,$C2,$C0,$E0,$06,$00,$00,$E8,$C8,$E0,$42,$F2,$40,$AD,$DD,$A0,$4E,$44,$40,$44,$4E,$40

.db $00,$00,$00,$44,$40,$40,$AA,$A0,$00,$00,$00,$00,$00,$00,$00,$A2,$48,$A0,$4A,$4A,$50,$88,$80,$00
.db $24,$44,$20,$84,$44,$80,$00,$40,$00,$04,$E4,$00,$00,$44,$80,$00,$E0,$00,$00,$00,$80,$22,$48,$80

;FontNumbers
.db $4A,$AA,$40,$4C,$44,$E0,$C2,$48,$E0,$C2,$42,$C0,$AA,$E2,$20,$E8,$C2,$C0,$68,$EA,$E0,$E2,$44,$40,$EA,$EA,$E0,$EA,$E2,$20

;3Ah~3Fh
.db $04,$04,$00,$04,$04,$80,$24,$84,$20,$0E,$0E,$00,$84,$24,$80,$C2,$40,$40

.db $00,$00,$00,$4A,$EA,$A0,$CA,$CA,$C0,$68,$88,$60,$CA,$AA,$C0,$E8,$C8,$E0,$E8,$C8,$80,$68,$AA,$60
.db $AA,$EA,$A0,$E4,$44,$E0,$62,$2A,$40,$AA,$CA,$A0,$88,$88,$E0,$AE,$AA,$A0,$CA,$AA,$A0,$EA,$AA,$E0
.db $CA,$C8,$80,$EA,$AE,$60,$CA,$CA,$A0,$68,$42,$C0,$E4,$44,$40,$AA,$AA,$E0,$AA,$AA,$40,$AA,$AE,$A0
.db $AA,$4A,$A0,$AA,$44,$40,$E2,$48,$E0,$4A,$EA,$40,$88,$42,$20,$C4,$44,$C0,$4A,$00,$00,$00,$00,$E0
.db $84,$00,$00,$06,$AA,$60,$88,$CA,$C0,$06,$88,$60,$22,$6A,$60,$04,$AC,$60,$48,$C8,$80,$06,$A6,$2C
.db $88,$CA,$A0,$40,$44,$40,$20,$22,$A4,$8A,$CA,$A0,$88,$88,$40,$0A,$EA,$A0,$0C,$AA,$A0,$04,$AA,$40
.db $0C,$AC,$80,$06,$A6,$22,$0A,$C8,$80,$0C,$84,$C0,$4E,$44,$20,$0A,$AA,$E0,$0A,$AA,$40,$0A,$AE,$A0
.db $0A,$44,$A0,$0A,$A6,$24,$0E,$24,$E0,$64,$84,$60,$44,$44,$40,$C4,$24,$C0,$05,$A0,$00,$E0,$E0,$E0

;FontNumbers2
.db $04,$AA,$A4,$04,$C4,$4E,$0C,$24,$8E,$0C,$24,$2C,$0A,$AE,$22,$0E,$8C,$2C,$06,$8E,$AE,$0E,$24,$44
.db $0E,$AE,$AE,$0E,$AE,$22

;Accented A
.db $24,$AE,$A0,$84,$AE,$A0,$00,$00,$00,$A4,$AE,$A0

;Accented a
.db $24,$06,$A5,$42,$06,$A5,$4A,$06,$A5,$A0,$6A,$60

;Accented E
.db $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00

;Accented e
.db $48,$4A,$C6,$42,$4A,$C6,$4A,$4A,$C6,$A0,$4A,$C6

;Accented I
.db $24,$0E,$4E,$84,$0E,$4E,$4A,$0E,$4E,$A0,$E4,$E0

;Accented i
.db $24,$04,$44,$84,$04,$44,$4A,$04,$44,$A0,$44,$40

;Bombs... er, accented O
.db $24,$69,$96,$84,$69,$96,$4A,$69,$96,$A0,$69,$96

;Lowercase bombs
.db $24,$06,$96,$84,$06,$96,$4A,$06,$96,$A0,$06,$96

;Accented U
.db $24,$AA,$A6,$84,$AA,$A6,$4A,$AA,$A6,$A0,$AA,$A6

;Accented u
.db $24,$0A,$A6,$84,$0A,$A6,$4A,$0A,$A6,$A0,$0A,$A6

;Accented C,c,N,n
.db $4A,$8A,$48,$06,$88,$6C,$5A,$0C,$AA,$5A,$0C,$AA

;Other Puntuation
.db $24,$00,$00,$84,$00,$00,$A0,$00,$00,$40,$48,$60

;Upside-Down Exclamation Point Identical to lowercase i
;Change to something else?
.db $00,$00,$00

;Greek
.db $05,$AA,$50,$25,$65,$A0,$05,$A2,$20,$00,$4A,$E0,$34,$27,$96,$68,$E8,$60

;[
.db $64,$44,$60

;Greek (continued)
.db $84,$25,$90,$0A,$AD,$80,$0F,$55,$90,$25,$56,$48,$F4,$24,$F0
.db $07,$55,$40,$07,$A2,$10
;Idunno howta do these
.db $4E,$AE,$40,$69,$99,$69

;CC~CF
.db $E0,$A4,$A0,$E0,$A6,$24,$52,$50,$00,$00,$00,$A0,$26,$E6,$20

;D0~D5
.db $44,$40,$00,$22,$48,$80,$00,$60,$00,$C4,$8C,$00,$EA,$E0,$00,$E4,$2C,$00

;D6
.db $00,$00,$00

;D7~DF
.db $40,$44,$20,$04,$CA,$C8,$8A,$4A,$20,$E9,$AE,$A8,$69,$E8,$60,$00,$44,$60,$9D,$FB,$90,$A5,$55,$A0,$4E,$FE,$40

;Overwrite Cursor
.db $FF,$FF,$FF,$FB,$1B,$BF,$FB,$51,$5F,$FF,$95,$9F

;Insert Cursor
.db $00,$00,$0F,$4E,$EE,$0F,$4A,$EA,$0F,$06,$A6,$0F

;E8~EF
.db $00,$84,$20,$00,$C6,$20,$00,$E6,$20,$00,$8C,$E0,$25,$D5,$20,$4A,$AA,$40,$4E,$44,$40

;F0~F4
.db $44,$4E,$40,$5A,$5A,$5A,$27,$A6,$3E,$4E,$44,$00,$69,$A9,$A0

;male/female
.db $73,$5E,$AE,$EA,$E4,$E4

;BlockEater Down $F7
.db $6F,$96,$90
;BlockEater Left        $F8
.db $6F,$16,$90
;BlockEater Right       $F9
.db $6F,$86,$90
;BlockEater Up          $FA
.db $69,$96,$90

;FB~FE
.db $09,$AC,$E0,$08,$53,$70,$EC,$A1,$00,$73,$58,$00

;FF
.db $A5,$A5,$A5
Those use a nibble-packed 4 wide by 6 tall font (3 bytes per char for a total of 768 bytes).

Offline thepenguin77

  • z80 Assembly Master
  • LV10 31337 u53r (Next: 2000)
  • **********
  • Posts: 1594
  • Rating: +823/-5
  • The game in my avatar is bit.ly/p0zPWu
    • View Profile
Re: Non-Standard gBuf Ideas
« Reply #10 on: July 21, 2013, 04:08:18 pm »
Ok, I finished it:
Code: [Select]
#define DWAIT in a, ($10) \ or a \ jp m, $-3

copySetup:
        di
        ld hl, $9900
        ld de, $9901
        ld bc, $100
        ld (hl), $98
        ldir
       
        ld a, $C3
        ld ($9898), a
        ld hl, interrupt
        ld ($9899), hl

        ld a, $99
        ld i, a
        im 2

        xor a
        out (03), a ;don't use halt

        ld a, $A0 ;64 t-state timer
        out ($30), a
        ret

; hl is the buffer to use
interruptCopy:
        nop
        ld a, $C9
        ld (interruptCopy), a ;safeguard against running twice
        ld e, $20
       
nextColumn:
        DWAIT
        ld a, e
        out ($10), a
        cp $2E
        jr nz, notDoneYet
        xor a
        ld (interruptCopy), a
        ex af, af'
        exx
        ret ;returns with interrupts disabled
notDoneYet:
        inc e
        DWAIT
        ld a, $80
        out ($10), a
        ld bc, 64*256+$11
        ld a, 3
        out ($31), a ;this is 3*64 = 192 t-states per write
        out ($32), a ; you can refine this, but 192 is probably good
        ex af, af'
        exx
        ei
        ret

interrupt:                      ;19 + 10
        ex      af, af'         ;4
        exx                     ;4
        out     ($31), a ;11 a = 3
        outi                    ;16
        jr      z, nextColumn   ;7
        exx                     ;4
        ex      af, af'         ;4
        ei                      ;4
        ret                     ;10
                                ;93 total

First of all, this only works in 15 MHz. It is actually worse in 6 MHz mode than the regular methods. To use this, call copySetup once, and then call interruptCopy whenever you need to update the screen. It won't let you run it again if it's still updating the screen (this would be bad) so that's good. The only downside is that you can't use interrupts (including getCSC) and you can't use HALT.
« Last Edit: July 21, 2013, 04:10:19 pm by thepenguin77 »
zStart v1.3.013 9-20-2013 
All of my utilities
TI-Connect Help
You can build a statue out of either 1'x1' blocks or 12'x12' blocks. The 1'x1' blocks will take a lot longer, but the final product is worth it.
       -Runer112