0 Members and 1 Guest are viewing this topic.
mul16:;;Expects Z80 mode;;Inputs: hl,bc;;Outputs: Upper 16 bits in DE, lower 16 bits in BC;;53 or 55 t-states;;29 bytes ld d,c ld e,l mlt de push de ; Push low product onto stack. ld d,h ; Prepare for high product ld e,b ld h,e ; Swap bytes for 2 middle products. Do here when I have copies of high values ld b,d ; instead of later, needing A as temp. mlt de ; Calculate high product mlt hl ; Calculate first middle product mlt bc ; Calculate second middle product add hl,bc ; Sum middle products jr nc,$+3 inc d ; Inc D if carry. (The INC DE was a bug). pop bc ; Retrieve low product. ld a,b ; Add in middle products to create final answer. add a,l ld b,a ld a,e adc a,h ld e,a ret nc inc d ret
; HLDE = HL * DE; 52 cc, 28 bytesMult16: ld b,h ld c,d mlt bc push bc ld b,l ld c,e ld l,c ld e,b mlt bc mlt de mlt hl xor a add hl,de ld e,h ld h,l ld l,a adc a,a ld d,a add hl,bc ex de,hl pop bc adc hl,bc ret
; (IX+8) = (IX+0)*(IX+4);; Register usage; AHL = running sum for result.; Periodically, L is saved as next byte and sum shifted right by 1 byte.; DE = Next sub product to be added; B = Cached value of (ix+0) and later (ix+7); C = Cached value of (ix+4) and later (ix+3); IX = pointer to values being multiplied and result.;; 178 bytes; 275 clock cyclesMult32: xor a; Calc byte 0 ld b,(ix+0) ld h,b ; Prod 00 ld c,(ix+4) ld l,c mlt hl ld (ix+8),l ld l,h ld h,a; Calc byte 1 ld d,b ; Prod 01 ld e,(ix+5) mlt de add hl,de ; No carry possible ld d,(ix+1) ; Prod 10 ld e,c mlt de add hl,de adc a,a ld (ix+9),l ld l,h ld h,a xor a; Calc byte 2 ld d,b ; Prod 02 ld e,(ix+6) mlt de add hl,de ; No carry possible ld d,(ix+2) ; Prod 20 ld e,c mlt de add hl,de adc a,a ld d,(ix+1) ; Prod 11 ld e,(ix+5) mlt de add hl,de adc a,0 ld (ix+10),l ld l,h ld h,a xor a; Calc byte 3 ld d,b ; Prod 03 (Last use of cached (ix+0) in b) ld b,(ix+7) ; Cache (ix+7) ld e,b mlt de add hl,de adc a,a ld de,(ix+3) ; Prod 30. Minor optimization that saves a byte ld c,e ; Cache (ix+3) mlt de add hl,de adc a,0 ld d,(ix+1) ; Prod 12 ld e,(ix+6) mlt de add hl,de adc a,0 ld d,(ix+2) ; Prod 21 ld e,(ix+5) mlt de add hl,de adc a,0 ld (ix+11),l ld l,h ld h,a xor a; Calc byte 4 ld d,c ; Prod 31 ld e,(ix+5) mlt de add hl,de adc a,a ld d,(ix+1) ; Prod 13 ld e,b mlt de add hl,de adc a,0 ld d,(ix+2) ; Prod 22 ld e,(ix+6) mlt de add hl,de adc a,0 ld (ix+12),l ld l,h ld h,a xor a; Calc byte 5 ld d,(ix+2) ; Prod 23 ld e,b mlt de add hl,de adc a,a ld d,c ; Prod 32 ld e,(ix+6) mlt de add hl,de adc a,0 ld (ix+13),l ld l,h ld h,a; Calc bytes 6 & 7 mlt bc ; Prod 33 add hl,bc ; No carry possible ld (ix+14),hl ret