Author Topic: Shifting 64-bit numbers (68k)  (Read 2256 times)

0 Members and 1 Guest are viewing this topic.

Offline christop

  • LV3 Member (Next: 100)
  • ***
  • Posts: 87
  • Rating: +20/-0
    • View Profile
Shifting 64-bit numbers (68k)
« on: February 27, 2011, 10:15:50 pm »
Ok, so I know most people here use Z80, but here is a set of 68k routines for shifting unsigned 64-bit numbers left and right. I wrote these for a floating-point emulator that I'm writing, but they're useful for other stuff too.

Code: [Select]
.equ SHIFT_THRESH, 5 | XXX the exact optimal value needs to be figured out

| unsigned long long sr64(unsigned long long, unsigned);
sr64:
        move.l  (4,%sp),%d0
        move.l  (8,%sp),%d1
        move.w  (12,%sp),%d2

| shift right a 64-bit number
| input:
|  %d0:%d1 = 64-bit number (%d0 is upper 32 bits)
|  %d2.w = shift amount (unsigned)
| output:
|  %d0:%d1, shifted
sr64_reg:
        cmp     #SHIFT_THRESH,%d2
        blo     8f
        cmp     #32,%d2
        bhs     5f
        ror.l   %d2,%d0
        lsr.l   %d2,%d1         | 00..xx (upper bits cleared)

        move.l  %d3,-(%sp)

        | compute masks
        moveq   #-1,%d3
        lsr.l   %d2,%d3         | 00..11 (lower bits)
        move.l  %d3,%d2
        not.l   %d2             | 11..00 (upper bits)

        and.l   %d0,%d2         | only upper bits from %d0
        or.l    %d2,%d1         | put upper bits from %d0 into %d1
        and.l   %d3,%d0         | clear upper bits in %d0

        move.l  (%sp)+,%d3
        rts

        | shift amount is >= 32
5:
        cmp     #64,%d2
        bhs     6f
        sub     #32,%d2
        move.l  %d0,%d1
        lsr.l   %d2,%d1
        moveq   #0,%d0
        rts

        | shift amount < threshold
7:
        | shift right one bit
        lsr.l   #1,%d0
        roxr.l  #1,%d1
8:
        dbra    %d2,7b
        rts

| unsigned long long sl64(unsigned long long, unsigned);
sl64:
        move.l  (4,%sp),%d0
        move.l  (8,%sp),%d1
        move.w  (12,%sp),%d2

| shift left a 64-bit number
| input:
|  %d0:%d1 = 64-bit number (%d0 is upper 32 bits)
|  %d2.w = shift amount (unsigned)
| output:
|  %d0:%d1, shifted
sl64_reg:
        cmp     #SHIFT_THRESH,%d2
        blo     8f
        cmp     #32,%d2
        bhs     5f
        rol.l   %d2,%d1
        lsl.l   %d2,%d0         | xx..00 (lower bits cleared)

        move.l  %d3,-(%sp)

        | compute masks
        moveq   #-1,%d3         | mask
        lsl.l   %d2,%d3         | 11..00 (upper bits)
        move.l  %d3,%d2
        not.l   %d2             | 00..11 (lower bits)

        and.l   %d1,%d2         | only lower bits from %d1
        or.l    %d2,%d0         | put lower bits from %d1 into %d0
        and.l   %d3,%d1         | clear lower bits in %d1

        move.l  (%sp)+,%d3
        rts

        | shift amount is >= 32
5:
        cmp     #64,%d2
        bhs     6f
        sub     #32,%d2
        move.l  %d1,%d0
        lsl.l   %d2,%d0
        moveq   #0,%d1
        rts

        | shift amount is >= 64
6:
        moveq.l #0,%d0
        move.l  %d0,%d1
        rts

        | shift amount < threshold
7:
        | shift left one bit
        lsl.l   #1,%d1
        roxl.l  #1,%d0
8:
        dbra    %d2,7b
        rts

These routines can be used from assembly (using the _reg versions) or from C (using the non-_reg versions).

Also, if most shifts in your program are more than the threshold (SHIFT_THRESH), you can remove the 2 instructions at the beginning and 4 lines at the bottom of both routines (in the _reg versions, that is). That would save a bit of time (for most cases) and some space.

Without the threshold sections, these routines seem to be smaller (and probably faster) than the assembly code generated by TIGCC for shifting 64-bit numbers ("long long" type). My code does check for shifts greater than 64, whereas TIGCC doesn't (shifts greater than the width of the type produce undefined results in C anyway, but I wanted defined behavior in my code).

I'll double-check the sizes and timings in mine relative to the generated code and then bring this to the TIGCC maintainers' attention if mine is better. Smaller and faster code is always a good thing, right? :D
« Last Edit: February 27, 2011, 10:19:36 pm by christop »
Christopher Williams