0 Members and 1 Guest are viewing this topic.
Huh - I had added a While getkey(0) DispgraphEndspecifically to prevent that problem. Are you using Wabbitemu? Because, for me, Wabbitemu has a tendency to speed things up and make controlling things harder.Compared to Quigbo's, mine is more convoluted to allow editing.
As requested from IRC, a piece of ASM from my own 2D Cellular Automata program. This one handles the buffer, generates the needed LUTs and performs the stuff needed for each "generation."I know there are better ways to handle generating the rules, but I was lazy. Back in 2006, and even now in 2010.Code: [Select]begincellular2: call preservedata ;preserving both sides of both tables so buffer under/overflow can occur call begincellular call restoredata ;loading back data stuffs. retpreservedata: ld hl,appbackupscreen-12 ld de,statVars ld bc,12 ldir ld hl,appbackupscreen+768 ld bc,12 ldir ld hl,savesscreen-12 ld bc,12 ldir ld hl,savesscreen+768 ld bc,12 ldir retrestoredata: ld de,appbackupscreen-12 ld hl,statVars ld bc,12 ldir ld de,appbackupscreen+768 ld bc,12 ldir ld de,savesscreen-12 ld bc,12 ldir ld de,savesscreen+768 ld bc,12 ldir retbegincellular: ld a,0 out ($20),a ld hl,$8B00 ;embedded in StatVars memory location. Invalidate stat vars else sys will crash. ld de,0;# of bits in each byte, grouped by nibblescenloop00: xor a ;table 1 ld b,4 rrc l adc a,d djnz $-3 ld e,a xor a ld b,4 rrc l adc a,d djnz $-3 rrca rrca rrca rrca or e ld (hl),a inc l jr nz,cenloop00;ReturnToLife and KeepAlive tables in LUT format ld IX,$8000 ;initialize LUT position ld b,$FF ;255 loops. Fencepost error elimated, since 1/2 of data isn't used.maxloopa: ld hl,(rules) ; load ruleset ld c,(hl) ; first byte is count inc hl ; next byte ld (IX+0),$00 ; clear off byteminloop01: ld a,IXl ; get current position on LUT and $0F ; get lower nibble cp (hl) ; compare with ruleset (bits set = perform this action) for lower half jr nz,$+6 ; if no match, do not change bits set 2,(IX+0) ; change upper bit of nibble ld a,IXl ; get current position on LUT rlca \ rlca \ rlca \ rlca ;shift and $0F ;strip MSN from byte cp (hl) ;compare that with data in HL (the rule byte) jr nz,$+6 ;if not equal, then set bit 7 for rule set 6,(IX+0) ;... inc hl ;next rule byte dec c ;decrement counter for number of rule bytes jr nz,minloop01 ;go back to start of routine to check next rule inc IXh ;go to next aligned set for rules ld c,(hl) ;load next set of rules. byte counter inc hl ;next byte ld (IX+0),$FF ;load LUT with this valueminloop02: ;AND loop ld a,IXl ;get LUT LSB and $0F ;strip MSN cp (hl) ;compare with rule bit jr z,$+8 ;do not reset if they match. Else, reset bit. res 2,(IX+0) ;... jr $+6 res 0,(IX+0) ;used for flag if a match has been taken at some point ld a,IXl ;get MSN of LUT position rlca \ rlca \ rlca \ rlca ;shift to LSB and $0F ;strip MSN (now LSB) of LUT position cp (hl) ;compare with rule byte jr z,$+8 ;do not reset if they match. Else, reset bit res 6,(IX+0) ;... jr $+6 res 4,(IX+0) ;used for flag if a match has been taken at some point inc hl ;get next rule byte dec c ;decrement rule byte counter jr nz,minloop02 ;loop until there are no more rule bytes left in counter bit 0,(IX+0) ;testing series... for the AND loop jr nz,$+10 ; set 0,(IX+0) ; set 2,(IX+0) ; bit 4,(IX+0) ; jr nz,$+10 ; set 4,(IX+0) ; set 6,(IX+0) ; dec IXh ;move cursor back to first LUT inc IXl ;next byte in LUT djnz maxloopa ;go back to main set. Reset pointer to rule table for another run-through.;Tacked-on table converter code ld bc,2 ld hl,$8000fixloopa: ld a,(hl); 76543210;%01000100 bit 2,a jr z,$+6 or 00001111b jr $+4 and 11110000b bit 6,a jr z,$+6 or 11110000b jr $+4 and 00001111b ld (hl),a inc hl djnz fixloopa dec c jr nz,fixloopa;End LUT generator xor a ld (halted),abegincellular2a: ld a,(halted) or a jr nz,$+5 call workscreen call showgraph call _getkbd cp $37 ret z cp $36 ;2nd. Step over iteration jr nz,$+7 call workscreen jr begincellular2a cp $38 ;DEL. Toggle halt mode jr nz,begincellular2delskip call _getkbd or a jr nz,$-4 ;wait until the key has been released. ld a,(halted) inc a and 00000001b ld (halted),a jr begincellular2abegincellular2delskip: jp begincellular2aworkscreen: push IY ;save this register, 'tis prone to being destroyed in this routine. di ;prevent interrupts. Will be using shadow registers for spare registers ld a,(iterations) ld hl,savesscreen ld de,appbackupscreen bit 0,a jr nz,$+3 ;jump past the "jr $+3" that is below. ex de,hl ld (buf1),hl ld (buf2),de; ld hl,(buf1) ;buffer copy outside of range. Previous routine fixes it later. ld e,l ld d,h ld bc,-12 add hl,bc ex de,hl ld bc,768-12 add hl,bc ld bc,12 ldir ld hl,(buf1) ld e,l ld d,h ex de,hl ld bc,768 add hl,bc ex de,hl ld bc,12 ldir; exx ;saving these values for very quick reference ld hl,$8100 ; exx ld hl,$8B00;; Clock initializing for speed count:; ld a,0; out ($30),a; ld a,$41; out ($30),a ;Timer/33; ld a,0; out ($31),a ;Set mode: Count to zero & stop; ld a,$FF; out ($32),a ;Begin counting from $FF;Clock init done; ld ix,(buf1) ;IX=first buffer, IY=second buffer. HL buf dropped altogether. ld iy,(buf2) ld b,64 push bc call workonthisrow pop bc djnz $-5 pop iy;;Fetch value off the clock; ld hl,$0505; ld (currow),hl; in a,($32); ld l,a; ld h,0; bcall(_DispHL); ld a,0; out ($30),a ;disable timer;Stop clock section ; di ld (temp1),SP ld SP,iterations pop hl \ pop de \ ld bc,$0001 ;| add hl,bc \ dec bc \ ex de,hl ; >Increment 4-byte counter and use set vectors to compare adc hl,bc \ push hl \ push de ;| pop hl \ pop ix \ pop bc \ pop de or a \ sbc hl,bc push hl \ pop ix sbc hl,de ;4-byte subtraction ld SP,(temp1) ret nz; ld hl,(iterations); inc hl; ld (iterations),hl; ld a,l; or h; jr nz,$+9; ld hl,(iterations+2); inc hl; ld (iterations+2),hl ;if first HL=0, then boundry has crossed and need to increment next set of HL 2 bytes above it; ld hl,(iterations); ld de,(setloops); ld a,e \ xor l \ ret nz; ld a,d \ xor h \ ret nz; ld hl,(iterations+2); ld de,(setloops+2); ld a,e \ xor l \ ret nz; ld a,d \ xor h \ ret nz;if passed this point, then this means that (iterations) and (setloops) in all four bytes match. Tells program to end iterations now by setting (done) to 1. ld a,1 ;setting to 1. Calling routine cutoff has been commented out ld (done),a ;so that this has no effect ret;===========================================================================================;Main Work algorithm found here. Not as big of bloat as before, but it's still fast...... ==;===========================================================================================;===========================================================================================;workonthisrow: call leftiter call centeriter call fourthiter ld a,10Workonthisrowsub: push af call firstiter call centeriter call fourthiter pop af dec a jr nz,Workonthisrowsub call firstiter call centeriter call rightiter ret;External setup:;HL = LUT for bit comparison;HL'= LUT for result testing; Two LUTs are indexed by HL by incrementing and decrementing H (256 byte wide tables);IX = pointer to buffer 1 (reading);IY = pointer to buffer 2 (writing);;Internal setup:;D= row above;E= row below;C= current position;B= temporary variable;B'=center byte storage;;Registers used so far:; AF, BC, DE, HL, AF', BC', HL', IX, IY;;Free registers:; DE';firstiter: ld d,(ix-12) ld e,(ix+12) ld c,(ix+00) ld a,(ix-13) rrca ld a,d rra and 11101110b ld l,a ld b,(hl) ld a,(ix+11) rrca ld a,e rra and 11101110b ld l,a ld a,(hl) ex af,af' ld a,(ix-01) rrca ld a,c rra and 10101010b ld l,a ld a,c ex af,af' add a,(hl) add a,b exx ld l,a ex af,af' ld b,a and (hl) \ dec h or (hl) \ inc h and 10001000b ld c,a exx ret centeriter: ld a,d and 11101110b ld l,a ld b,(hl) ld a,e and 11101110b ld l,a ld a,(hl) ex af,af' ld a,c and 10101010b ld l,a ex af,af' add a,(hl) add a,b exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 01000100b or c ld c,a exx ld a,d and 01110111b ld l,a ld b,(hl) ld a,e and 01110111b ld l,a ld a,(hl) ex af,af' ld a,c and 01010101b ld l,a ex af,af' add a,(hl) add a,b exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 00100010b or c ld c,a exx retfourthiter: ld a,(ix-11) rlca ld a,d rla and 01110111b ld l,a ld d,(hl) ld a,(ix+13) rlca ld a,e rla and 01110111b ld l,a ld e,(hl) ld a,(ix+01) rlca ld a,c rla and 01010101b ld l,a ld a,(hl) add a,e add a,d exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 00010001b or c ld (iy+0),a exx inc ix inc iy ret;=============== side of screen routinesleftiter: ld d,(ix-12) ld e,(ix+12) ld c,(ix+00) ld a,(ix-01) rrca ld a,d rra and 11101110b ld l,a ld b,(hl) ld a,(ix+23) rrca ld a,e rra and 11101110b ld l,a ld a,(hl) ex af,af' ld a,(ix+11) rrca ld a,c rra and 10101010b ld l,a ld a,c ex af,af' add a,(hl) add a,b exx ld l,a ex af,af' ld b,a and (hl) \ dec h or (hl) \ inc h and 10001000b ld c,a exx retrightiter: ld a,(ix-23) rlca ld a,d rla and 01110111b ld l,a ld d,(hl) ld a,(ix+01) rlca ld a,e rla and 01110111b ld l,a ld e,(hl) ld a,(ix-11) rlca ld a,c rla and 01010101b ld l,a ld a,(hl) add a,e add a,d exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 00010001b or c ld (iy+0),a exx inc ix inc iy ret
begincellular2: call preservedata ;preserving both sides of both tables so buffer under/overflow can occur call begincellular call restoredata ;loading back data stuffs. retpreservedata: ld hl,appbackupscreen-12 ld de,statVars ld bc,12 ldir ld hl,appbackupscreen+768 ld bc,12 ldir ld hl,savesscreen-12 ld bc,12 ldir ld hl,savesscreen+768 ld bc,12 ldir retrestoredata: ld de,appbackupscreen-12 ld hl,statVars ld bc,12 ldir ld de,appbackupscreen+768 ld bc,12 ldir ld de,savesscreen-12 ld bc,12 ldir ld de,savesscreen+768 ld bc,12 ldir retbegincellular: ld a,0 out ($20),a ld hl,$8B00 ;embedded in StatVars memory location. Invalidate stat vars else sys will crash. ld de,0;# of bits in each byte, grouped by nibblescenloop00: xor a ;table 1 ld b,4 rrc l adc a,d djnz $-3 ld e,a xor a ld b,4 rrc l adc a,d djnz $-3 rrca rrca rrca rrca or e ld (hl),a inc l jr nz,cenloop00;ReturnToLife and KeepAlive tables in LUT format ld IX,$8000 ;initialize LUT position ld b,$FF ;255 loops. Fencepost error elimated, since 1/2 of data isn't used.maxloopa: ld hl,(rules) ; load ruleset ld c,(hl) ; first byte is count inc hl ; next byte ld (IX+0),$00 ; clear off byteminloop01: ld a,IXl ; get current position on LUT and $0F ; get lower nibble cp (hl) ; compare with ruleset (bits set = perform this action) for lower half jr nz,$+6 ; if no match, do not change bits set 2,(IX+0) ; change upper bit of nibble ld a,IXl ; get current position on LUT rlca \ rlca \ rlca \ rlca ;shift and $0F ;strip MSN from byte cp (hl) ;compare that with data in HL (the rule byte) jr nz,$+6 ;if not equal, then set bit 7 for rule set 6,(IX+0) ;... inc hl ;next rule byte dec c ;decrement counter for number of rule bytes jr nz,minloop01 ;go back to start of routine to check next rule inc IXh ;go to next aligned set for rules ld c,(hl) ;load next set of rules. byte counter inc hl ;next byte ld (IX+0),$FF ;load LUT with this valueminloop02: ;AND loop ld a,IXl ;get LUT LSB and $0F ;strip MSN cp (hl) ;compare with rule bit jr z,$+8 ;do not reset if they match. Else, reset bit. res 2,(IX+0) ;... jr $+6 res 0,(IX+0) ;used for flag if a match has been taken at some point ld a,IXl ;get MSN of LUT position rlca \ rlca \ rlca \ rlca ;shift to LSB and $0F ;strip MSN (now LSB) of LUT position cp (hl) ;compare with rule byte jr z,$+8 ;do not reset if they match. Else, reset bit res 6,(IX+0) ;... jr $+6 res 4,(IX+0) ;used for flag if a match has been taken at some point inc hl ;get next rule byte dec c ;decrement rule byte counter jr nz,minloop02 ;loop until there are no more rule bytes left in counter bit 0,(IX+0) ;testing series... for the AND loop jr nz,$+10 ; set 0,(IX+0) ; set 2,(IX+0) ; bit 4,(IX+0) ; jr nz,$+10 ; set 4,(IX+0) ; set 6,(IX+0) ; dec IXh ;move cursor back to first LUT inc IXl ;next byte in LUT djnz maxloopa ;go back to main set. Reset pointer to rule table for another run-through.;Tacked-on table converter code ld bc,2 ld hl,$8000fixloopa: ld a,(hl); 76543210;%01000100 bit 2,a jr z,$+6 or 00001111b jr $+4 and 11110000b bit 6,a jr z,$+6 or 11110000b jr $+4 and 00001111b ld (hl),a inc hl djnz fixloopa dec c jr nz,fixloopa;End LUT generator xor a ld (halted),abegincellular2a: ld a,(halted) or a jr nz,$+5 call workscreen call showgraph call _getkbd cp $37 ret z cp $36 ;2nd. Step over iteration jr nz,$+7 call workscreen jr begincellular2a cp $38 ;DEL. Toggle halt mode jr nz,begincellular2delskip call _getkbd or a jr nz,$-4 ;wait until the key has been released. ld a,(halted) inc a and 00000001b ld (halted),a jr begincellular2abegincellular2delskip: jp begincellular2aworkscreen: push IY ;save this register, 'tis prone to being destroyed in this routine. di ;prevent interrupts. Will be using shadow registers for spare registers ld a,(iterations) ld hl,savesscreen ld de,appbackupscreen bit 0,a jr nz,$+3 ;jump past the "jr $+3" that is below. ex de,hl ld (buf1),hl ld (buf2),de; ld hl,(buf1) ;buffer copy outside of range. Previous routine fixes it later. ld e,l ld d,h ld bc,-12 add hl,bc ex de,hl ld bc,768-12 add hl,bc ld bc,12 ldir ld hl,(buf1) ld e,l ld d,h ex de,hl ld bc,768 add hl,bc ex de,hl ld bc,12 ldir; exx ;saving these values for very quick reference ld hl,$8100 ; exx ld hl,$8B00;; Clock initializing for speed count:; ld a,0; out ($30),a; ld a,$41; out ($30),a ;Timer/33; ld a,0; out ($31),a ;Set mode: Count to zero & stop; ld a,$FF; out ($32),a ;Begin counting from $FF;Clock init done; ld ix,(buf1) ;IX=first buffer, IY=second buffer. HL buf dropped altogether. ld iy,(buf2) ld b,64 push bc call workonthisrow pop bc djnz $-5 pop iy;;Fetch value off the clock; ld hl,$0505; ld (currow),hl; in a,($32); ld l,a; ld h,0; bcall(_DispHL); ld a,0; out ($30),a ;disable timer;Stop clock section ; di ld (temp1),SP ld SP,iterations pop hl \ pop de \ ld bc,$0001 ;| add hl,bc \ dec bc \ ex de,hl ; >Increment 4-byte counter and use set vectors to compare adc hl,bc \ push hl \ push de ;| pop hl \ pop ix \ pop bc \ pop de or a \ sbc hl,bc push hl \ pop ix sbc hl,de ;4-byte subtraction ld SP,(temp1) ret nz; ld hl,(iterations); inc hl; ld (iterations),hl; ld a,l; or h; jr nz,$+9; ld hl,(iterations+2); inc hl; ld (iterations+2),hl ;if first HL=0, then boundry has crossed and need to increment next set of HL 2 bytes above it; ld hl,(iterations); ld de,(setloops); ld a,e \ xor l \ ret nz; ld a,d \ xor h \ ret nz; ld hl,(iterations+2); ld de,(setloops+2); ld a,e \ xor l \ ret nz; ld a,d \ xor h \ ret nz;if passed this point, then this means that (iterations) and (setloops) in all four bytes match. Tells program to end iterations now by setting (done) to 1. ld a,1 ;setting to 1. Calling routine cutoff has been commented out ld (done),a ;so that this has no effect ret;===========================================================================================;Main Work algorithm found here. Not as big of bloat as before, but it's still fast...... ==;===========================================================================================;===========================================================================================;workonthisrow: call leftiter call centeriter call fourthiter ld a,10Workonthisrowsub: push af call firstiter call centeriter call fourthiter pop af dec a jr nz,Workonthisrowsub call firstiter call centeriter call rightiter ret;External setup:;HL = LUT for bit comparison;HL'= LUT for result testing; Two LUTs are indexed by HL by incrementing and decrementing H (256 byte wide tables);IX = pointer to buffer 1 (reading);IY = pointer to buffer 2 (writing);;Internal setup:;D= row above;E= row below;C= current position;B= temporary variable;B'=center byte storage;;Registers used so far:; AF, BC, DE, HL, AF', BC', HL', IX, IY;;Free registers:; DE';firstiter: ld d,(ix-12) ld e,(ix+12) ld c,(ix+00) ld a,(ix-13) rrca ld a,d rra and 11101110b ld l,a ld b,(hl) ld a,(ix+11) rrca ld a,e rra and 11101110b ld l,a ld a,(hl) ex af,af' ld a,(ix-01) rrca ld a,c rra and 10101010b ld l,a ld a,c ex af,af' add a,(hl) add a,b exx ld l,a ex af,af' ld b,a and (hl) \ dec h or (hl) \ inc h and 10001000b ld c,a exx ret centeriter: ld a,d and 11101110b ld l,a ld b,(hl) ld a,e and 11101110b ld l,a ld a,(hl) ex af,af' ld a,c and 10101010b ld l,a ex af,af' add a,(hl) add a,b exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 01000100b or c ld c,a exx ld a,d and 01110111b ld l,a ld b,(hl) ld a,e and 01110111b ld l,a ld a,(hl) ex af,af' ld a,c and 01010101b ld l,a ex af,af' add a,(hl) add a,b exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 00100010b or c ld c,a exx retfourthiter: ld a,(ix-11) rlca ld a,d rla and 01110111b ld l,a ld d,(hl) ld a,(ix+13) rlca ld a,e rla and 01110111b ld l,a ld e,(hl) ld a,(ix+01) rlca ld a,c rla and 01010101b ld l,a ld a,(hl) add a,e add a,d exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 00010001b or c ld (iy+0),a exx inc ix inc iy ret;=============== side of screen routinesleftiter: ld d,(ix-12) ld e,(ix+12) ld c,(ix+00) ld a,(ix-01) rrca ld a,d rra and 11101110b ld l,a ld b,(hl) ld a,(ix+23) rrca ld a,e rra and 11101110b ld l,a ld a,(hl) ex af,af' ld a,(ix+11) rrca ld a,c rra and 10101010b ld l,a ld a,c ex af,af' add a,(hl) add a,b exx ld l,a ex af,af' ld b,a and (hl) \ dec h or (hl) \ inc h and 10001000b ld c,a exx retrightiter: ld a,(ix-23) rlca ld a,d rla and 01110111b ld l,a ld d,(hl) ld a,(ix+01) rlca ld a,e rla and 01110111b ld l,a ld e,(hl) ld a,(ix-11) rlca ld a,c rla and 01010101b ld l,a ld a,(hl) add a,e add a,d exx ld l,a ld a,b and (hl) \ dec h or (hl) \ inc h and 00010001b or c ld (iy+0),a exx inc ix inc iy ret
I wish I had the extra RAM pages so that I could use that, Iambian. As it is: goodbye Graph3, hello cellular automata.