Show Posts

This section allows you to view all posts made by this member. Note that you can only see posts made in areas you currently have access to.

Messages - jacobly

Pages: 1 ... 8 9 [10] 11 12 ... 14

136

ASM / Re: 24 bit multiplication

« on: December 11, 2011, 02:06:55 pm »

Code: [Select]

// Multiply a times b
temp = 0
repeat for each bit in a
	temp <<= 1
	if (high bit of a set)
		temp += b
	a <<= 1
return temp

if a and b are 2 bytes, temp is 4 bytes, and you loop 16 times.

Spoiler For for code:

Code: [Select]

// Sqrt a
temp = high byte of a
a <<= 8
b = 0
repeat for every 2 bits in a
	test = b << 8 + 0x40
	b <<= 1
	if (temp >= test)
		temp -= test
		set low bit of b
	temp += high 2 bits of a
	a <<= 2
return b

If a is 4 bytes, then b and temp are 2 bytes, and you loop 16 times.

Spoiler For code:

137

The Axe Parser Project / Re: Assembly Programmers - Help Axe Optimize!

« on: December 11, 2011, 07:34:47 am »

Lol... I'm already optimizing

p_ArcTan: same size, save 19 or 10 (avg 14.5) cycles

Original

Code: [Select]

p_ArcTan:
	.db __ArcTanEnd-1-$
	ex	de,hl		;de = y
	pop	hl
	ex	(sp),hl		;hl = x
	push	hl
	ld	a,h		;\
	xor	d		; |Get pairity
	rla			;/
	jr	c,__ArcTanSS	;\
	add	hl,de		; |
	add	hl,de		; |
__ArcTanSS:			; |
	or	a		; |hl = x +- y
	sbc	hl,de		;/
	ex	de,hl		;de = x +- y
	ld	b,6		;\
__ArcTan64:			; |
	add	hl,hl		; |hl = 64y
	djnz	__ArcTan64	;/
	call	$3F00+sub_SDiv	;hl = 64y/(x +- y)
	pop	af		;\
	rla			; |Right side, fine
	ret	nc		;/
	sbc	a,a		;\
	sub	h		; |Reverse sign extend
	ld	h,a		;/
	ld	a,l		;\
	add	a,128		; |Add or sub 128
	ld	l,a		;/
	ret
__ArcTanEnd:

Optimized

Code: [Select]

p_ArcTan:
	.db __ArcTanEnd-1-$
	ex	de,hl		;de = y
	pop	hl
	ex	(sp),hl		;hl = x
	push	hl
	ld	a,h		;\
	xor	d		;/ Get parity
	jp	m,__ArcTanSS-p_ArcTan-1
	add	hl,de		;\
	jr	__ArcTanDS	; |
__ArcTanSS:			; |hl = x +- y
	sbc	hl,de		; |
__ArcTanDS:			;/
	ex	de,hl		;de = x +- y
	ld	b,6		;\
__ArcTan64:			; |
	add	hl,hl		; |hl = 64y
	djnz	__ArcTan64	;/
	call	$3F00+sub_SDiv	;hl = 64y/(x +- y)
	pop	af		;\
	rla			; |Right side, fine
	ret	nc		;/
	sbc	a,a		;\
	sub	h		; |Reverse sign extend
	ld	h,a		;/
	ld	a,l		;\
	add	a,128		; |Add or sub 128
	ld	l,a		;/
	ret
__ArcTanEnd:

I'm curious as to why you multiplied by 64 before dividing. It would seem that if the times 64 was after the division, the result would generally be the same, but there would be less of a chance of overflow. It's possible though that it doesn't matter.
Edit: Oh yeah... accuracy. Your way is more accurate, nvm.

p_DrawBmp: saves 3 to 4 bytes, and (8 ± 0 or 4) × (visible height) - 8 cycles

Original

Code: [Select]

p_DrawBmp:
	; ...
__DrawBmpGoodSize:
	ld	b,a			;B = plot_height
	push	bc			;****** BEGIN BUFFER CALCULATIONS ******
	; ...
__DrawBmpLeftLoop:
	inc	c
	dec	c
	jr	z,__DrawBmpSkipMain
	dec	c
	; ...
__DrawBmpOnLeft:			;A = X + 8
	inc	c
	dec	c
	ld	d,(hl)
	inc	hl
	ld	e,c			;E = 0 if z
	jr	z,__DrawBmpSt
	; ...
__DrawBmpStSkip:
	ld	a,e
	pop	de			;D = X
	ld	e,c
	pop	bc
	ld	c,e			;C = bytes
	; ...
__DrawBmpColWall:
	inc	c
	dec	c
	jr	z,__DrawBmpSkipMain
	dec	c
	ld	a,d
	jr	nz,__DrawBmpColLeft
	cp	88
	ld	d,(hl)
	inc	hl
	jr	nc,__DrawBmpSkipMain
	ld	e,c
	jr	__DrawBmpSt
	; ...

Optimized

Code: [Select]

p_DrawBmp:
	; ... c = bytes + 1 is required for the rest of the optimizations
__DrawBmpGoodSize:
	ld	b,a			;B = plot_height
	inc	c			;C = bytes+1
	push	bc			;****** BEGIN BUFFER CALCULATIONS ******
	; ... undo inc c above, affect z flag the same as before, c is still one more than before
__DrawBmpLeftLoop:
	dec	c
	jr	z,__DrawBmpSkipMain
	; ... since c is one more than before, check e = c - 1 for 0, instead of c
__DrawBmpOnLeft:			;A = X + 8
	ld	d,(hl)
	inc	hl
	ld	e,c
	dec	e			;E = 0 and z (if bytes = 0)
	jr	z,__DrawBmpSt
	; ... this stores one more than before to e, but all code paths lead to
	; either pop de, ld e,(hl), or ld e,c before e is ever used.
__DrawBmpStSkip:
	ld	a,e
	pop	de			;D = X
	ld	e,c
	pop	bc
	ld	c,e			;C = bytes+1
	; ... same as above
__DrawBmpColWall:
	dec	c
	jr	z,__DrawBmpSkipMain
	ld	a,d
	jr	nz,__DrawBmpColLeft
	cp	88
	ld	d,(hl)
	inc	hl
	jr	nc,__DrawBmpSkipMain
	; I do not understand the reason for ld e,c, however, c is one more than before,
	; so dec e to have e be the same as before, but I don't know if this is necessary.
	ld	e,c
	dec	e
	jr	__DrawBmpSt
	; ...

Sorry for bumping some of these so soon, but I wanted to change them to work with the new version.

p_88Mul: same size, saves 1 or 6 (avg 3.5) cycles

Original

Code: [Select]

p_88Mul:
	.db __88MulEnd-1-$
	ld	a,h
	xor	d
	push	af
	bit	7,h
	jr	z,$+8
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	bit	7,d
	jr	z,$+8
	xor	a
	sub	e
	ld	e,a
	sbc	a,a
	sub	d
	ld	d,a
	call	$3F00+sub_MulFull
	ld	l,h
	ld	h,a
	pop	af
	xor	h
	ret	p
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	ret
__88MulEnd:

Optimized

Code: [Select]

p_88Mul:
	.db __88MulEnd-1-$
	ld	a,h
	xor	d
	push	af
	xor	d ; a = h
	jp	p,$+9-p_88Mul-1
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	bit	7,d
	jr	z,$+8
	xor	a
	sub	e
	ld	e,a
	sbc	a,a
	sub	d
	ld	d,a
	call	$3F00+sub_MulFull
	ld	l,h
	ld	h,a
	pop	af
	xor	h
	ret	p
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	ret
__88MulEnd:

p_SDiv: same size, saves 1 or 6 (avg 3.5) cycles

Original

Code: [Select]

p_SDiv:
	.db __SDivEnd-1-$
	ld	a,h
	xor	d
	push	af
	bit	7,h
	jr	z,$+8
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	bit	7,d
	jr	z,$+8
	xor	a
	sub	e
	ld	e,a
	sbc	a,a
	sub	d
	ld	d,a
	call	$3F00+sub_Div
	pop	af
	ret	p
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	ret
__SDivEnd:

Optimized

Code: [Select]

p_SDiv:
	.db __SDivEnd-1-$
	ld	a,h
	xor	d
	push	af
	xor	d ; a = h
	jp	p,$+9-p_SDiv-1
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	bit	7,d
	jr	z,$+8
	xor	a
	sub	e
	ld	e,a
	sbc	a,a
	sub	d
	ld	d,a
	call	$3F00+sub_Div
	pop	af
	ret	p
	xor	a
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	ret
__SDivEnd:

p_Reciprocal: same size, saves 31 cycles
Let me know if you want this one explained.

Original

Code: [Select]

p_Reciprocal:
	.db __ReciprocalEnd-1-$
	xor	a
	bit	7,h
	push	af
	jr	z,$+8
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	xor	a
	ex	de,hl
	ld	bc,$1000
	ld	hl,1
	ld b,b \ .db 7 \ call $3F00+sub_Mod
	ld	h,a
	ld	l,c
	pop	af
	ret	z
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	ret
__ReciprocalEnd:

Optimized

Code: [Select]

p_Reciprocal:
	.db __ReciprocalEnd-1-$
	xor	a
	bit	7,h
	push	af
	jr	z,$+8
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	xor	a
	ex	de,hl
	ld	bc,$1001
	ld	hl,2
	ld b,b \ .db 13 \ call $3F00+sub_Mod
	ld	h,a
	ld	l,c
	pop	af
	ret	z
	sub	l
	ld	l,a
	sbc	a,a
	sub	h
	ld	h,a
	ret
__ReciprocalEnd:

138

The Axe Parser Project / Re: Bug Reports

« on: December 11, 2011, 03:11:59 am »

Although the new getKeyʳ is more useful, it can also now be very confusing. For example:

Code: [Select]

:Repeat getKeyʳ=64
:End

Could cause an infinite loop that may be impossible to get out of, if lowercase is enabled.

The reason for this is that if you type a lowercase letter, it is stored at $8446, but if you type any other key, it does not reset the value at $8446. (Edit: This causes all of the other keycodes to change to different values every time a lowercase letter is typed.) On the plus side, its value does seem to always be reset at the beginning of the program.

Some fixes for this are to only read $8446 if a >= $fc, reset $8446 after it is read, or to change checks for any key that is not a lowercase letter to getKeyʳ^256=key code.
Edit: And in the last case, it should probably be documented somewhere, since it is not obvious just from playing around with getKeyʳ.

139

Axe / Re: Routines

« on: December 11, 2011, 02:31:21 am »

Due to multiple requests, I wrote an axe clock library, LIBCLOCK. See CLOCKTST for example code.

Axe Code Function on calculators with a clock (function on calculators without a clock).

Main functions:
ClkOf() Turns the clock off (does nothing).
ClkOn() Turns the clock on (does nothing).
IsClk() Returns 1 if the clock is on, 0 if off (returns 0).
°A:GetDT() Gets the current date and time. Sets 6 consecutive variables, or 6 consecutive words, starting at the passed in address. In this example, A = year, B = month, C = day, D = hour, E = minute, F = second (returns midnight of January 1, 1997). Do not pass in °r₁.
SetDT(year,month,day,hour,minute,second) Sets the current date and time (does nothing relatively slowly).
DOfWk(year,month,day) Returns the day of the week of the specified date, 1 = Sunday, ..., 7 = Saturday.

Low level functions:
°A:GetRT() Gets the current raw time. Sets 2 consecutive variables, or 2 consecutive words, starting at the passed in address. In this example, AB = seconds since January 1, 1997 (AB = 0).
°A:SetRT() Sets the current raw time. Uses 2 consecutive variables, or 2 consecutive words, starting at the passed in address (does nothing).

Bonus functions:
Mul21(r₁,r₂,r₃) Multiplies r₁r₂ by r₃ and stores the result in r₁r₂.
Div21(r₁,r₂,r₃) Divides r₁r₂ by r₃ and stores the result in r₁r₂, and the remainder in r₄. Edit: I think r₃ must be < 256.

140

The Axe Parser Project / Re: Features Wishlist

« on: December 10, 2011, 10:32:09 pm »

#-(#^A)... optimized!

Edit: Wow... ninja'd with a better response

141

The Axe Parser Project / Re: Axe Parser

« on: December 10, 2011, 09:59:25 pm »

Quick question: do ...'s nest? They obviously can't normally (start and end are indistinguishable) but with preprocessor conditionals they theoretically could. Not that I need it yet, but it would be a good thing to know.

142

The Axe Parser Project / Re: Axe Parser

« on: December 10, 2011, 08:58:51 pm »

Quote from: epic7 on December 10, 2011, 08:43:21 pm

Awesome!! NEW AXE

I was the first person to download the new version

And I was the second!

Quote from: epic7 on December 10, 2011, 08:43:21 pm

Now...
Is absorbing appvars like
[pic1]->pic1
but for appvars?

I believe so.

Quote from: epic7 on December 10, 2011, 08:43:21 pm

And what are preprocessor conditionals?

Instead of commenting out code like this:
... .Code ...
You can do something like this:
...If condition.Code ...
And it will only comment out the code if the condition is true (condition must be a constant).

Quote from: epic7 on December 10, 2011, 08:43:21 pm

Also, what does Single argument for loops can now take any expression as an argument mean?

For(A) .loop A times
used to not work (because it wasn't implemented)... now it is!

Quote from: epic7 on December 10, 2011, 08:43:21 pm

Also, yet another question, what is
"Now able to use Return in a single argument for loop."

This used to be an error (because it wouldn't work):
For(10)
If A+1→A=B
Return
End
End
Now, Quigibo has found some amazing way to make it work, so it is allowed

.

btw, most of this info is probably in the command list

Edit:

Quote from: epic7 on December 10, 2011, 08:43:21 pm

Edit:
Yet another yet another question is:
What does select() do? I don't get what the commands list is saying

Select(EXP1,EXP2) finds the value of EXP1, stores it into some secret place, then it evaluates EXP2, and lastly, it looks in that secret place and returns the original value of EXP1.
For example:
Select(A,B→A)→B swaps A and B.
A is stored to a secret place, B is stored to A, then the value of that secret place (the original value of A) is stored to B.

143

The Axe Parser Project / Re: Assembly Programmers - Help Axe Optimize!

« on: December 10, 2011, 07:43:58 pm »

I had a sneaking suspicion that you would find some reason to call p_Mod

For the first peephole optimization, since you already had

Code: [Select]

	.db 3
	sbc	hl,hl
	ld	a,h
	or	l
	.db 2
	sbc	hl,hl

I figured that you already checked that the value of a is not used.

The second one was an optimization for 32-bit subtraction (it was supposed to be p_LtLeXX followed by dec hl). However, the following should work because it has no differing side effects and should be more common. (btw, I think Runer may have made a similar suggestion)
.db 2
inc hl
dec hl
.db 0 ;<- dont know if this works

144

ASM / Re: 24 bit multiplication

« on: December 10, 2011, 06:13:47 pm »

Quote from: cerzus69 on December 10, 2011, 08:26:06 am

Hey, jacobly, I tested both of your multiplication routines and it doesn't seem like they're doing the same thing... I've tried both of them in my program but they have different results.

They both have different inputs.

The first is hla * cde, and the second is hlc * bde.
You probably want to add some code to the beginning of each so that the input works better for what you are doing.
For example:
ld a,c ld c,b
at the beginning of the first routine causes them to have the same input (hlc and bde).

Edit: Here's some pseudo code that might help.

Code: [Select]

// Multiply a times b
temp = 0
repeat for each bit in a
	temp <<= 1
	if (high bit of a set)
		temp += b
	a <<= 1
return temp

// Divide a by b
temp = 0
repeat for each bit in a
	temp <<= 1
	temp += high bit of a
	a <<= 1
	if (temp >= b)
		temp -= b
		set low bit of a
return a

// Sqrt a
temp = 0
b = 0
repeat for every 2 bits in a
	temp += high 2 bits of a
	a <<= 2
	test = b << 2 + 1
	b <<= 1
	if (temp >= test)
		temp -= test
		set low bit of b
return b
// Sqrt a, sometimes better with multiple-of-a-byte registers
temp = high byte of a
a <<= 8
b = 0
repeat for every 2 bits in a
	test = b << 8 + 0x40
	b <<= 1
	if (temp >= test)
		temp -= test
		set low bit of b
	temp += high 2 bits of a
	a <<= 2
return b

The tricky part is figuring out how many bits are in each variable and allocating the z80 registers accordingly.

145

The Axe Parser Project / Re: Assembly Programmers - Help Axe Optimize!

« on: December 10, 2011, 05:11:59 pm »

For p_SortD, affecting de doesn't matter because if you follow the code path, the next occurrence of de is when it is loaded from hl, so its contents don't matter.
As for p_Mod, ac is the division result, which is never needed. You can also notice that in the original routine, the new bits shifted into ac are never read.

Edit: fixed grammar

Also, some peephole ops I would find useful.

Code: [Select]

	.db 3
	sbc	hl,de
	ld	a,h
	or	l
	.db 2
	sbc	hl,de

Code: [Select]

	.db	8
	ld	de,$0000
	add	hl,de
	sbc	hl,hl
	inc	hl
	dec	hl
	.db	6
	ld	de,$0000
	add	hl,de
	sbc	hl,hl

146

ASM / Re: 24 bit multiplication

« on: December 10, 2011, 02:45:24 am »

Actually, my routine only uses about 3 virtual registers. The only reason it uses so many z80 registers is because the virtual registers are so big. In fact, the first routine uses the same number of bits as your's would (don't forget that A and B in your routine would each be 48 bits wide). However, your routine does have the advantage of using similarly sized registers (and fewer iterations), so it probably would be more useful on other processors.

147

The Axe Parser Project / Re: Bug Reports

« on: December 09, 2011, 06:50:55 pm »

A*^B seems to be compiling to
ld hl,A
ld de,B
call sub_Mul
ld h,c
ld l,a
Which always returns zero because it calls sub_Mul instead of sub_MulFull.
This happens with and without peephole opts.

148

Humour and Jokes / Re: If there was tech support in botswana...

« on: December 09, 2011, 03:26:18 pm »

Use keyboard shortcuts for *everything*

Windows games work better on my computer in Wine than in an emulator running Windows XP.

149

Humour and Jokes / Re: If there was tech support in botswana...

« on: December 09, 2011, 02:45:48 pm »

Buy them a new computer.

I can't think of a problem to put in the second line.

150

Axe / Re: Exiting For-loop in a search-subroutine

« on: December 09, 2011, 02:42:29 pm »

Almost. CPIR returns an address, whereas this routine returns an index. Also, this routine is much easier to extend to arrays of words or objects (but I'm not saying that access to CPIR through an axe command wouldn't be cool

Pages: 1 ... 8 9 [10] 11 12 ... 14