Author Topic: Shunting-Yard Algorithm  (Read 7362 times)

0 Members and 1 Guest are viewing this topic.

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Shunting-Yard Algorithm
« on: September 27, 2017, 08:37:33 pm »
So I have been really wanting to implement the Shunting-Yard algorithm for a few years now and I think I've got it in assembly. I keep finding bugs, but in case other people are interested, here is my code. It assumes that the operator stack and output (combined) won't exceed 768 bytes.

Things that would be useful:
  • Converting numbers to an intermediate format.
  • Handling multi-byte tokens and named vars and functions.
This way the code can be pseudo-compiled when it is converted to RPN, then an RPN calculator can parse the code faster or prep it for actual compiling.
Code: [Select]
#define bcall(x) rst 28h \ .dw x
saveSScreen = 86ECh
scrap=saveSScreen
outhead=8000h
stackhead = 8002h
.db $BB,$6D
.org $9D95
    ld hl,test
    ld bc,test_end-test
    call shuntingyard
    call rpn
    ld hl,scrap
    bcall(450Ah)
    bcall(452Eh)
    ret
shuntingyard:
    ld de,scrap
    ld (outhead),de
    ld d,(scrap/256)+3
    ld (stackhead),de
_:
    ld a,(hl)
    call +_
    cpi
    jp pe,-_
    ld hl,scrap+768
    ld de,(stackhead)
    or a
    sbc hl,de
    ld b,h
    ld c,l
    ld hl,(outhead)
    ex de,hl
    jr z,$+3
    ldir
    dec de
    xor a
    ld (de),a
    ret
_:
    cp '.'
    jp z,num_dec
    cp 30h
    jr c,+_
    cp 3Ah
    jp c,num
_:
    cp '('
    jp nz,+_
    ex de,hl
    ld hl,(stackhead)
    dec hl
    ld (hl),','
    dec hl
    ld (hl),a
    ld (stackhead),hl
    ex de,hl
    ret
_:
    cp ')'
    jp nz,checkunops
    push hl
    push bc
    ld hl,scrap+768
    ld de,(stackhead)
    sbc hl,de
    jp z,ERR_Unmatched_lparens
    ld b,h
    ld c,l
    ex de,hl
    ld de,(outhead)
;BC is the size of the stack. Use this in case there is a missing ')' so we don't read garbage.
;basically search for the matching '(' while piping out the stack to the output.
outerloop:
    ld a,(hl)
    cp '('
    jr z,parens_found
    ld a,','
_:
    cp (hl)
    ldi
    jp po,ERR_Unmatched_lparens
    jr z,outerloop
    jp -_
parens_found:
    inc hl
    inc hl
    ld (outhead),de
    ld (stackhead),hl
    pop bc
    pop hl
    ret
checkunops:
checkbinops:
;; if the token is an operator, then:
;; while there is an operator at the top of the operator stack with
;; greater than or equal to precedence and the operator is left associative:
;; pop operators from the operator stack, onto the output queue.
;; push the read operator onto the operator stack.
;;
;;
    push bc
    ex de,hl
    call getprecedence
    ld a,c
    pop bc
    ex de,hl
    jp c,search_function
    ;now C is the precedence, with lower bit = 1 if left-associative
    push bc
    push hl
    ld de,(stackhead)
    ld hl,scrap+768
    sbc hl,de
    ld b,h
    ld c,l
    ld hl,(outhead)
    ex de,hl
    jr z,pushop
    ;a is the precedence against which to compare
_:
    push hl
    push bc
    push af
    ld a,(hl)
    call getprecedence
    jr c,+_
    pop hl
    ld a,h      ;incoming
    cp c
    jr nz,$+4
    rra \ nop

    pop bc
    pop hl
   
 ;======================================================
    jr nc,pushop
.echo "The following code only works until we have to add >1 byte tokens."
 ldi
 ldi
    jp pe,-_
    jp $+6
_:
    pop af
    pop bc
    pop hl
pushop:
    ld (outhead),de
    pop de
    dec hl
    ld (hl),','
    dec hl
    ld a,(de)
    ld (stackhead),hl
    ld (hl),a
    ex de,hl
    pop bc
    ret
search_function:
    jp ERR_Func_Not_Found
getprecedence:
    ld hl,binops
    ld b,(binops_end-binops)/2
_:
    cp (hl)
    inc hl
    ld c,(hl)
    ret z
    inc hl
    djnz -_
    scf
    ret
binops:
    .db 4,  $01
    .db '=',$50
    .db '|',$60
    .db '&',$70
    .db '-',$81     ;right associative is odd
    .db '+',$80     ;left associative is even
    .db '/',$83     ;right associative
    .db '*',$82     ;left associative
    .db '^',$85     ;right associative
binops_end:
num:
    ld de,(outhead)
_:
    ldi
    jp po,+_
    ld a,(hl)
    cp '.'
    jr z,num_dec+4
    cp 30h
    jr c,+_
    cp 3Ah
    jr c,-_
_:
    ld a,','
    ld (de),a
    inc de
    ld (outhead),de
    dec hl
    inc bc
    ret
num_dec:
    ld de,(outhead)
_:
    ldi
    jp po,+_
    ld a,(hl)
    cp 30h
    jr c,+_
    cp 3Ah
    jr c,-_
_:
    cp '.'
    jp z,ERR_Syntax_00
    ld a,','
    ld (de),a
    inc de
    ld (outhead),de
    dec hl
    inc bc
    ret
ERR_Syntax_00:      ;Too many decimal points.
ERR_Func_Not_Found:
ERR_Unmatched_lparens:
    ret
rpn:
    ret
test:
;    .db "(3.1415926535)"
.db "(3.142+6/2-7)*3^6*3"
test_end:

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #1 on: November 05, 2017, 09:15:04 pm »
I combined the shunting yard code with my latest function lookup code, and now it is recognizing named functions as well!

Here is the code that is run in the attached gif:
Code: [Select]
#include "grammer3.inc"
#ifndef scrap
#define scrap 8000h
#endif
 
#define bcall(x) rst 28h \ .dw x
saveSScreen = 86ECh
_PutS = 450Ah
_NewLine = 452Eh

sybuf           = saveSScreen
outhead         = scrap
stackhead       = scrap+2
input_save      = scrap+4
input_savesize  = scrap+6
hash            = scrap+8
spsave          = scrap+10

.db $BB,$6D
.org $9D95

;    ld hl,testinput
;    ld bc,testinput_end-testinput
;    call hashlookup     ;returns c if success

    ld hl,test
    bcall(_PutS)
    bcall(_NewLine)
    ld hl,s_RightArrow
    bcall(_PutS)
    bcall(_NewLine)
    ld hl,test
    ld bc,test_end-test
    ld (spsave),sp
    call shuntingyard
    call rpn
    ld hl,sybuf
    bcall(_PutS)
    bcall(_NewLine)
    ret
shuntingyard:
    ld de,sybuf
    ld (outhead),de
    ld d,(sybuf/256)+3
    ld (stackhead),de
_:
    ld a,(hl)
    call +_
    cpi
    jp pe,-_
    ld hl,sybuf+768
    ld de,(stackhead)
    or a
    sbc hl,de
    ld b,h
    ld c,l
    ld hl,(outhead)
    ex de,hl
    jr z,$+3
    ldir
    dec de
    xor a
    ld (de),a
    ret
_:
    cp '.'
    jp z,num_dec
    cp 30h
    jr c,+_
    cp 3Ah
    jp c,num
_:
    cp '('
    jp nz,+_
    ex de,hl
    ld hl,(stackhead)
    dec hl
    ld (hl),','
    dec hl
    ld (hl),a
    ld (stackhead),hl
    ex de,hl
    ret
_:
    cp ')'
    jp nz,checkunops
    push hl
    push bc
    ld hl,sybuf+768
    ld de,(stackhead)
    sbc hl,de
    jp z,ERR_Unmatched_lparens
    ld b,h
    ld c,l
    ex de,hl
    ld de,(outhead)
;BC is the size of the stack. Use this in case there is a missing ')' so we don't read garbage.
;basically search for the matching '(' while piping out the stack to the output.
_:
    ld a,(hl)
    cp '('
    jr z,+_
    ldi
    jp pe,-_
    jp ERR_Unmatched_lparens
_:
    dec de
    ld a,(de)
    inc de
    cp ','
    jr z,+_
    ld a,','
    ld (de),a
    inc de
_:
.echo "Potential bug here."     ;Should be fine unless an external routine randomly jumps partway into the shunting yard code :P
    inc hl
    inc hl
    ld (outhead),de
    ld (stackhead),hl
    pop bc
    pop hl
    ret
checkunops:
checkbinops:
;; if the token is an operator, then:
;;  while there is an operator at the top of the operator stack with
;;   greater than or equal to precedence and the operator is left associative:
;;    pop operators from the operator stack, onto the output queue.
;;  push the read operator onto the operator stack.
;;
;;
    push bc
    ex de,hl
    call getprecedence
    ld a,c
    pop bc
    ex de,hl
    jp c,search_function
    ;now C is the precedence, with lower bit = 1 if left-associative
    push bc
    push hl
    ld de,(stackhead)
    ld hl,sybuf+768
    sbc hl,de
    ld b,h
    ld c,l
    ld hl,(outhead)
    ex de,hl
    jr z,pushop
    ;a is the precedence against which to compare
_:
    push hl
    push bc
    push af
    ld a,(hl)
    call getprecedence
    jr c,+_
    pop hl
    ld a,h      ;incoming
    cp c
    jr nz,$+4
    rra \ nop

    pop bc
    pop hl
   
 ;======================================================
    jr nc,pushop
.echo "The following code only works until we have to add >1 byte tokens."
 ldi
 ldi
    jp pe,-_
    jp $+6
_:
    pop af
    pop bc
    pop hl
pushop:
    ld (outhead),de
    pop de
    dec hl
    ld (hl),','
    dec hl
    ld a,(de)
    ld (stackhead),hl
    ld (hl),a
    ex de,hl
    pop bc
    ret
search_function:
    push bc
    call hashlookup
    ld a,c
    pop bc
    jp nc,ERR_Func_Not_Found
    push hl
    ld de,(stackhead)
    dec de
    ex de,hl
    ld (hl),','
    ex de,hl
    inc a
    dec de
_:
    ldd
    dec a
    jr nz,-_
    pop hl
    inc de
    ld (stackhead),de
    inc bc
    ret
getprecedence:
    ld hl,binops
    ld b,(binops_end-binops)/2
_:
    cp (hl)
    inc hl
    ld c,(hl)
    ret z
    inc hl
    djnz -_
    scf
    ret
binops:
    .db 4,  $01
    .db '=',$50
    .db '|',$60
    .db '&',$70
    .db '-',$81     ;right associative is odd
    .db '+',$80     ;left associative is even
    .db '/',$83     ;right associative
    .db '*',$82     ;left associative
    .db '^',$85     ;right associative
binops_end:
num:
    ld de,(outhead)
_:
    ldi
    jp po,+_
    ld a,(hl)
    cp '.'
    jr z,num_dec+4
    cp 30h
    jr c,+_
    cp 3Ah
    jr c,-_
_:
    ld a,','
    ld (de),a
    inc de
    ld (outhead),de
    dec hl
    inc bc
    ret
num_dec:
    ld de,(outhead)
_:
    ldi
    jp po,+_
    ld a,(hl)
    cp 30h
    jr c,+_
    cp 3Ah
    jr c,-_
_:
    cp '.'
    jp z,ERR_Syntax_00
    ld a,','
    ld (de),a
    inc de
    ld (outhead),de
    dec hl
    inc bc
    ret



hashlookup:
;HL points to the input
;BC is the max size
;return nc if failed to match, c if success
    ld (input_save),hl
    ld (input_savesize),bc
    ld a,b
    or c
    jr z,match_null
    call computehash
    ld hl,(input_savesize)
    xor a
    sbc hl,bc
    jr z,match_fail
    ld b,h
    ld c,l
    ld d,a
    ex de,hl
    add hl,hl
    ld (hash),hl


    ld de,hashlut_builtin
    add hl,de
    ld e,(hl)
    inc hl
    ld d,(hl)

    ld hl,(input_save)
;BC is the input size
;HL points to the input string
;DE points to the comparison
    ld a,(de)
    cp c
    jr nz,match_fail
    inc de
    ld b,c

_:
    ld a,(de)
    inc de
    cp (hl)
    jr nz,match_fail
    inc hl
    djnz -_
    scf
    ret
match_null:
    ld de,t_null+1
match_fail:
    ld hl,(input_save)
    ld bc,(input_savesize)
    or a
    ret
computehash:
    ld e,0
_:
    ld a,(hl)
    sub 48
    ret c
    cp 10
    jr c,$+5
    sub 7
    ret c
    cp 68
    ret nc
    ld d,a
    add a,a
    add a,d
    xor e
    ld e,a
    cpi
    jp pe,-_
    ret
   
   
hashlut_builtin:
.dw t_null              ;00
.dw t_null              ;01
.dw t_null              ;02
.dw t_null              ;03
.dw t_null              ;04
.dw t_null              ;05
.dw t_null              ;06
.dw t_null              ;07
.dw t_null              ;08
.dw t_null              ;09
.dw t_null              ;0a
.dw t_cosh              ;0b
.dw t_null              ;0c
.dw t_null              ;0d
.dw t_null              ;0e
.dw t_null              ;0f
.dw t_null              ;10
.dw t_null              ;11
.dw t_atan              ;12
.dw t_null              ;13
.dw t_sinh              ;14
.dw t_null              ;15
.dw t_null              ;16
.dw t_null              ;17
.dw t_null              ;18
.dw t_null              ;19
.dw t_null              ;1a
.dw t_null              ;1b
.dw t_sqrt              ;1c
.dw t_null              ;1d
.dw t_null              ;1e
.dw t_max               ;1f
.dw t_null              ;20
.dw t_null              ;21
.dw t_ClrDraw           ;22
.dw t_null              ;23
.dw t_null              ;24
.dw t_null              ;25
.dw t_null              ;26
.dw t_null              ;27
.dw t_null              ;28
.dw t_Ellipse           ;29
.dw t_null              ;2a
.dw t_null              ;2b
.dw t_null              ;2c
.dw t_null              ;2d
.dw t_null              ;2e
.dw t_null              ;2f
.dw t_null              ;30
.dw t_null              ;31
.dw t_null              ;32
.dw t_null              ;33
.dw t_null              ;34
.dw t_null              ;35
.dw t_null              ;36
.dw t_null              ;37
.dw t_null              ;38
.dw t_null              ;39
.dw t_ln                ;3a
.dw t_null              ;3b
.dw t_null              ;3c
.dw t_pi                ;3d
.dw t_null              ;3e
.dw t_null              ;3f
.dw t_null              ;40
.dw t_null              ;41
.dw t_null              ;42
.dw t_null              ;43
.dw t_null              ;44
.dw t_null              ;45
.dw t_null              ;46
.dw t_null              ;47
.dw t_null              ;48
.dw t_null              ;49
.dw t_null              ;4a
.dw t_null              ;4b
.dw t_null              ;4c
.dw t_null              ;4d
.dw t_null              ;4e
.dw t_null              ;4f
.dw t_null              ;50
.dw t_null              ;51
.dw t_null              ;52
.dw t_null              ;53
.dw t_null              ;54
.dw t_null              ;55
.dw t_null              ;56
.dw t_null              ;57
.dw t_null              ;58
.dw t_null              ;59
.dw t_null              ;5a
.dw t_null              ;5b
.dw t_null              ;5c
.dw t_null              ;5d
.dw t_null              ;5e
.dw t_null              ;5f
.dw t_null              ;60
.dw t_null              ;61
.dw t_null              ;62
.dw t_null              ;63
.dw t_null              ;64
.dw t_null              ;65
.dw t_null              ;66
.dw t_null              ;67
.dw t_null              ;68
.dw t_randint           ;69
.dw t_asinh             ;6a
.dw t_null              ;6b
.dw t_tan               ;6c
.dw t_null              ;6d
.dw t_null              ;6e
.dw t_null              ;6f
.dw t_null              ;70
.dw t_null              ;71
.dw t_null              ;72
.dw t_null              ;73
.dw t_null              ;74
.dw t_acosh             ;75
.dw t_null              ;76
.dw t_null              ;77
.dw t_null              ;78
.dw t_null              ;79
.dw t_null              ;7a
.dw t_null              ;7b
.dw t_null              ;7c
.dw t_null              ;7d
.dw t_null              ;7e
.dw t_null              ;7f
.dw t_null              ;80
.dw t_atanh             ;81
.dw t_null              ;82
.dw t_null              ;83
.dw t_null              ;84
.dw t_null              ;85
.dw t_Line              ;86
.dw t_sin               ;87
.dw t_null              ;88
.dw t_null              ;89
.dw t_e                 ;8a
.dw t_null              ;8b
.dw t_null              ;8c
.dw t_mod               ;8d
.dw t_null              ;8e
.dw t_null              ;8f
.dw t_null              ;90
.dw t_min               ;91
.dw t_Circle            ;92
.dw t_gcd               ;93
.dw t_null              ;94
.dw t_null              ;95
.dw t_null              ;96
.dw t_null              ;97
.dw t_cos               ;98
.dw t_null              ;99
.dw t_null              ;9a
.dw t_null              ;9b
.dw t_null              ;9c
.dw t_null              ;9d
.dw t_null              ;9e
.dw t_null              ;9f
.dw t_null              ;a0
.dw t_log2              ;a1
.dw t_null              ;a2
.dw t_Tilemap           ;a3
.dw t_log10             ;a4
.dw t_null              ;a5
.dw t_null              ;a6
.dw t_null              ;a7
.dw t_null              ;a8
.dw t_Text              ;a9
.dw t_null              ;aa
.dw t_null              ;ab
.dw t_null              ;ac
.dw t_null              ;ad
.dw t_Disp              ;ae
.dw t_null              ;af
.dw t_null              ;b0
.dw t_null              ;b1
.dw t_null              ;b2
.dw t_null              ;b3
.dw t_null              ;b4
.dw t_null              ;b5
.dw t_null              ;b6
.dw t_null              ;b7
.dw t_DispBuf           ;b8
.dw t_lcm               ;b9
.dw t_setseed           ;ba
.dw t_null              ;bb
.dw t_null              ;bc
.dw t_null              ;bd
.dw t_null              ;be
.dw t_null              ;bf
.dw t_pow10             ;c0
.dw t_null              ;c1
.dw t_null              ;c2
.dw t_null              ;c3
.dw t_null              ;c4
.dw t_pow2              ;c5
.dw t_null              ;c6
.dw t_null              ;c7
.dw t_null              ;c8
.dw t_null              ;c9
.dw t_null              ;ca
.dw t_null              ;cb
.dw t_null              ;cc
.dw t_null              ;cd
.dw t_null              ;ce
.dw t_null              ;cf
.dw t_null              ;d0
.dw t_null              ;d1
.dw t_null              ;d2
.dw t_null              ;d3
.dw t_null              ;d4
.dw t_null              ;d5
.dw t_null              ;d6
.dw t_null              ;d7
.dw t_null              ;d8
.dw t_null              ;d9
.dw t_null              ;da
.dw t_null              ;db
.dw t_null              ;dc
.dw t_Shiftbuf          ;dd
.dw t_randseed          ;de
.dw t_null              ;df
.dw t_null              ;e0
.dw t_null              ;e1
.dw t_exp               ;e2
.dw t_Output            ;e3
.dw t_null              ;e4
.dw t_Sprite            ;e5
.dw t_acos              ;e6
.dw t_null              ;e7
.dw t_Rect              ;e8
.dw t_null              ;e9
.dw t_null              ;ea
.dw t_null              ;eb
.dw t_null              ;ec
.dw t_rand              ;ed
.dw t_null              ;ee
.dw t_null              ;ef
.dw t_null              ;f0
.dw t_null              ;f1
.dw t_null              ;f2
.dw t_null              ;f3
.dw t_null              ;f4
.dw t_null              ;f5
.dw t_null              ;f6
.dw t_null              ;f7
.dw t_null              ;f8
.dw t_asin              ;f9
.dw t_null              ;fa
.dw t_null              ;fb
.dw t_null              ;fc
.dw t_null              ;fd
.dw t_null              ;fe
.dw t_tanh              ;ff
t_null:
.db 0
t_cosh:
.db 4,"cosh"
#include "commands\cosh.z80"
t_atan:
.db 4,"atan"
#include "commands\atan.z80"
t_sinh:
.db 4,"sinh"
#include "commands\sinh.z80"
t_sqrt:
.db 4,"sqrt"
#include "commands\sqrt.z80"
t_max:
.db 3,"max"
#include "commands\max.z80"
t_ClrDraw:
.db 7,"ClrDraw"
#include "commands\ClrDraw.z80"
t_Ellipse:
.db 7,"Ellipse"
#include "commands\Ellipse.z80"
t_ln:
.db 2,"ln"
#include "commands\ln.z80"
t_pi:
.db 2,"pi"
#include "commands\pi.z80"
t_randint:
.db 7,"randint"
#include "commands\randint.z80"
t_asinh:
.db 5,"asinh"
#include "commands\asinh.z80"
t_tan:
.db 3,"tan"
#include "commands\tan.z80"
t_acosh:
.db 5,"acosh"
#include "commands\acosh.z80"
t_atanh:
.db 5,"atanh"
#include "commands\atanh.z80"
t_Line:
.db 4,"Line"
#include "commands\Line.z80"
t_sin:
.db 3,"sin"
#include "commands\sin.z80"
t_e:
.db 1,"e"
#include "commands\e.z80"
t_mod:
.db 3,"mod"
#include "commands\mod.z80"
t_min:
.db 3,"min"
#include "commands\min.z80"
t_Circle:
.db 6,"Circle"
#include "commands\Circle.z80"
t_gcd:
.db 3,"gcd"
#include "commands\gcd.z80"
t_cos:
.db 3,"cos"
#include "commands\cos.z80"
t_log2:
.db 4,"log2"
#include "commands\log2.z80"
t_Tilemap:
.db 7,"Tilemap"
#include "commands\Tilemap.z80"
t_log10:
.db 5,"log10"
#include "commands\log10.z80"
t_Text:
.db 4,"Text"
#include "commands\Text.z80"
t_Disp:
.db 4,"Disp"
#include "commands\Disp.z80"
t_DispBuf:
.db 7,"DispBuf"
#include "commands\DispBuf.z80"
t_lcm:
.db 3,"lcm"
#include "commands\lcm.z80"
t_setseed:
.db 7,"setseed"
#include "commands\setseed.z80"
t_pow10:
.db 5,"pow10"
#include "commands\pow10.z80"
t_pow2:
.db 4,"pow2"
#include "commands\pow2.z80"
t_Shiftbuf:
.db 8,"Shiftbuf"
#include "commands\Shiftbuf.z80"
t_randseed:
.db 8,"randseed"
#include "commands\randseed.z80"
t_exp:
.db 3,"exp"
#include "commands\exp.z80"
t_Output:
.db 6,"Output"
#include "commands\Output.z80"
t_Sprite:
.db 6,"Sprite"
#include "commands\Sprite.z80"
t_acos:
.db 4,"acos"
#include "commands\acos.z80"
t_Rect:
.db 4,"Rect"
#include "commands\Rect.z80"
t_rand:
.db 4,"rand"
#include "commands\rand.z80"
t_asin:
.db 4,"asin"
#include "commands\asin.z80"
t_tanh:
.db 4,"tanh"
#include "commands\tanh.z80"

poparg:
    ret
tostr:
    ret


ERR_Syntax_00:      ;Too many decimal points.
    ld hl,s_Syntax_00
    jr error
ERR_Func_Not_Found:
    ld hl,s_Func_Not_Found
    jr error
ERR_Unmatched_lparens:
    ld hl,s_Unmatched_lparens
error:
    ld sp,(spsave)
    push hl
    ld hl,s_Err
    bcall(_PutS)
    pop hl
    bcall(_PutS)
    bcall(_NewLine)
    ret
rpn:
    ret
s_Err:
    .db "Err:",0
s_Unmatched_lparens:
    .db "Missing '('",0
s_Syntax_00:
    .db "Too Many '.'",0
s_Func_Not_Found:
    .db "FuncNotFound",0
s_RightArrow:
    .db $3D,$3D,$05,0
test:
;    .db "(3.1415926535)"
.db "asinh(3.142+6/2-7)*3^(6*3)"
test_end: .db 0

.echo $-$9D95," bytes"

It includes error messages for a few typical errors. Unfortunately it is only converting plaintext to plaintext, so there is no compiling or 'tokenizing' or actual evaluation going on-- it's just converting from infix to postfix.

Offline TIfanx1999

  • ಠ_ಠ ( ͡° ͜ʖ ͡°)
  • CoT Emeritus
  • LV13 Extreme Addict (Next: 9001)
  • *
  • Posts: 6173
  • Rating: +191/-9
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #2 on: November 06, 2017, 10:13:43 pm »
Very nice! :D

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #3 on: November 27, 2017, 03:32:48 pm »
Now I have it 'tokenizing' as it is parsing the ASCII string! So numbers are converted to a machine-friendly format, function names are converted to one-byte 'tokens', and order of operations is preserved. I also made a routine that converts tokens to an ASCII string, in order to display the result of the shunting yard algorithm.

I whipped up an RPN parser to test some code, and it worked! Keep in mind that the RPN parser I made for this is not at all how I want to ideally implement it. Instead manually comparing the bytes, I need to look it up in the lookup table already available. I want it to be fast so I am going to have to modify the LUT to point directly to the code, but that is okay. Bleggh, this is tedious work :P

There is an example in the screenshots where I draw a sprite located at address 0x0000. I don't have the sprite type implemented yet, or even variables for that matter, so it's just a constant. In the attached files, I changed the example to have '06/3' to kind of show that the input won't always be able to be reconstructed from the output. During the tokenizing, '06' is converted to 0x0006, and during the token->ascii output, 0x0006 is converted to just '6'.

Offline TIfanx1999

  • ಠ_ಠ ( ͡° ͜ʖ ͡°)
  • CoT Emeritus
  • LV13 Extreme Addict (Next: 9001)
  • *
  • Posts: 6173
  • Rating: +191/-9
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #4 on: November 29, 2017, 06:05:20 pm »
Cool stuff!

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #5 on: November 30, 2017, 02:11:34 am »
Today I wrote a Python version of the "compiler." Currently it only recognizes a handful of binary operations, and the function 'sin(', but it does recognize signed and unsigned integers of sizes 8,16,32, and 64, as well as single precision floats. The variable type 'single' gets mapped to 'float' and 'char' gets mapped to 'uint8' and 'int' is mapped to 'int32'.

I've got it doing some complicated stuff like basic math optimizations (ex. 3*3 is just changed to 9) and variable mapping, neither of which the Z80 version do. As well, the Python version actually handles multiple lines of source code. In the example, 'code.txt' we have the three-line code:
Code: [Select]
var float x0=0
var uint8 y0=0
sin(3.14159*2*x0+y0)^9
This compiles to 44 bytes if you include the metadata about variable names, and 35 bytes for the code+header+static variables.

The source for the Python program is included in the attached file, but for those who don't want to go through the trouble of downloading, here is the current source for 'compyler.py'. It's ugly, for now.
Code: [Select]
from math import sin
t_plus      = 0xD1
t_minus     = 0xD2
t_mul       = 0xD3
t_div       = 0xD4
t_pow       = 0xD5
t_and       = 0xD6
t_xor       = 0xD7
t_or        = 0xD8
t_eq        = 0xD9
t_neq       = 0xDA
t_gt        = 0xDB
t_lt        = 0xDC
t_ge        = 0xDD
t_le        = 0xDE
t_sto       = 0xDF
binops=[
        ["~",0x01,t_sto],
        ["=",0x50,t_eq],
        ["|",0x60,t_or],
        ["&",0x70,t_and],
        ["-",0x81,t_minus],
        ["+",0x80,t_plus],
        ["/",0x83,t_div],
        ["*",0x82,t_mul],
        ["^",0x85,t_pow],
        ["",0x00,0]
       ]
varlist=[]
def getbinopsinfo(s):
    n=0
    m=len(binops)-1
    while (n<m) and s!=binops[n][0]:
        n+=1
    return binops[n]
def isvar(s):
    for i in varlist:
        if s==i[0]:
            return True
    return False
def shuntingyard(inp):
    size = len(inp)
    opstack=""
    out=""
    k=0
    inp+=" "
    while k<size:
        n=k
        while ("0"<inp[k]<="9"):
            out+=inp[k]
            k+=1
        if inp[k]==".":
            out+=inp[k]
            k+=1
            while ("0"<inp[k]<="9"):
                out+=inp[k]
                k+=1
            if inp[k]==".":
                return "Err: Too Many Decimals."
        if n!=k:
            out+=","
        else:
#Check Variables
            while ("a"<=inp[k]<="z") or ("A"<=inp[k]<="Z") or ("0"<=inp[k]<="9") or inp[k]=="_":
                k+=1
            if n==k or inp[k]=="(":
                k=n
            else:
                if isvar(inp[n:k])==True:
                    out+=inp[n:k]+","
                else:
                    return "Err: Var Not Found  :  "+inp[n:k]
        if inp[k]=="(":
            opstack="(,"+opstack
            k+=1
        if inp[k]==")":
            m=len(opstack)           
            if m==0:
                return "Err: Unmatched ')'."
            n=0
            opstack+=" "
            while opstack[n]!="(" and n<m:
                n+=1
            if n==m:
                return "Err: Unmatched ')'."
            out+=opstack[0:n+2]
            opstack=opstack[n+2:-1]
            k+=1
        n=getbinopsinfo(inp[k])
        if n[0]!="":
            p=n[1]
            m=0
            j=len(opstack)
            opstack+=" "
            while (p<=getbinopsinfo(opstack[m])[1]) and m<j:
                m+=1
                if m<j:
                    if opstack[m]==',':
                        m+=1
                if m<j:
                    if getbinopsinfo(opstack[m])[0]=='':
                        j=m
            out+=opstack[0:m]
            opstack=n[0]+","+opstack[m:-1]
            k+=1
        if ("A"<inp[k]<="Z") or ("a"<inp[k]<="z"):
            n=k
            while (("A"<inp[k]<="Z") or ("a"<inp[k]<="z") or ("0"<inp[k]<="1") or (inp[k]=="_")) and k<size:
                k+=1
            if inp[k]=="(":
                k+=1
                opstack=inp[n:k]+","+opstack
            else:
                k=n
    out+=opstack
    out=out.strip(",")
    return out.split(",")
def int2hex(n):
    n=int(n)
    if n==0:
        return "0"
    s=""
    hexstr="0123456789ABCDEF"
    while n!=0:
        s=hexstr[n%16]+s
        n>>=4
    return s
def littleendianify(s):
    out=""
    n=len(s)
    while n>0:
        out+=s[n-2:n]
        n-=2
    return out
def floatsingleify(s):
    x=eval(s)
    if x==0:
        return "00000000"
    s=0
    if x<0:
        s=1
        x=-x
    exp=128
    mant=x
    while x>=2:
        exp+=1
        x/=2
    while x<1:
        exp-=1
        x*=2
    x*=(1<<24)
    x=int(x)
    x+=(x&1)
    x>>=1
    if x>=(1<<24):
        exp+=1
        x>>=1
    if exp<0:
        return "00000000"
    if exp>255:
        return "00004000"
    if s==0:
        x-=(1<<23)
    s=int2hex(x)
    s="0"*(6-len(s))+s
    s=littleendianify(s)
    t=int2hex(exp)
    return s+"0"*(2-len(t))+t
def hextok(s):
    if ("0"<=s[0]<="9") or s[0]=="." or s[0]=="-":
        s=s.split(".")
        if len(s)==1:
            s=int(s[0])
            sign=0
            if s<0:
                sign=1
                if s>-(1<<7):
                    s+=256
                if s>-(1<<15):
                    s+=(1<<16)
                if s>-(1<<31):
                    s+=(1<<32)
                if s>-(1<<63):
                    s+=(1<<64)
            s=int2hex(s)
            if len(s)%2:
                s="0"+s
            s=littleendianify(s)
            if (len(s)==2) and sign==0:
                return "00"+s       #uint8, a.k.a 'char', default for 8-bit integer
            if (len(s)==2) and sign==1:
                return "01"+s       #int8
            if (len(s)==4) and sign==0:
                return "02"+s       #uint16, default for 16-bit integer
            if (len(s)==4) and sign==1:
                return "03"+s       #int16
            if len(s)<=8:
                s+="0"*(8-len(s))
                if "0"<s[6]<"8":
                    return "04"+s       #uint32 is the default if the int can't fit into a signed 32-bit int
                return "05"+s       #int32, default for 32-bit integer
            if len(s)==16:
                s+="0"*(16-len(s))
                if "0"<s[14]<"8":
                    return "06"+s       #uint64 is the default if the int can't fit into a signed 64-bit int
                return "07"+s       #int64, default for 64-bit integer
        else:
            s=floatsingleify(s[0]+"."+s[1])
            return "08"+s           #single prec. float, default float type.
    if isvar(s)==True:
        k=0
        while varlist[k][0]!=s:
            k+=1
        s=int2hex(k)
        s="0"*(4-len(s))+s
        return "1B"+littleendianify(s)
    if s=="sin(":
        return "63"
    if getbinopsinfo(s)[0]!="":
        s=int2hex(getbinopsinfo(s)[2])
        return "0"*(2-len(s))+s
    return s
def hextokenize(inp):
    n=len(inp)
    out=[]
    for i in range(n):
        if inp[i]!="(":
            out+=[hextok(inp[i])]   
    return out
def isnum(s):
    for i in s:
        if ((i<"0") or (i>"9")) and (i!=".") and (i!='-'):
            return False
    return True
def optimizer(exp):
    n=len(exp)
    k=0
    while k<n:
        f=False
        if isnum(exp[k-1])==True:
            if exp[k]=="sin(":
                f=True
                s=str(sin(eval(exp[k-1])))
            if f==True:
                f=False
                if k==1:
                    exp=[s]+exp[2:]
                else:
                    exp=exp[0:k-1]+[s]+exp[k+1:]
                k-=1
                n-=1
            if isnum(exp[k-2])==True:
                if exp[k]=="+":
                    f=True
                    s=str(eval(exp[k-2])+eval(exp[k-1]))
                if exp[k]=="-":
                    f=True
                    s=str(eval(exp[k-2])-eval(exp[k-1]))
                if exp[k]=="*":
                    f=True
                    s=str(eval(exp[k-2])*eval(exp[k-1]))
                if exp[k]=="/":
                    f=True
                    s=str(eval(exp[k-2])/eval(exp[k-1]))
                if exp[k]=="^":
                    f=True
                    s=str(eval(exp[k-2])**eval(exp[k-1]))
                if f==True:
                    if k==2:
                        exp=[s]+exp[3:]
                    else:
                        exp=exp[0:k-2]+[s]+exp[k+1:]
                    k-=2
                    n-=2
        k+=1
    return exp
def compileline(exp, varlist=varlist,opt=True):
    if exp=="":
        return exp
    if exp[0:4]=="var ":
        exp=exp[4:].split(" ")
        if len(exp)!=2:
            return "Err: Syntax"
        i=exp[1].split("=")
        s=exp[0]
        if s=="int":
            s="int32"
        if s=="char":
            s="uint8"
        if s=="single":
            s="float"
        if s=="floatext" or s=="float" or s=="int8" or s=="int16" or s=="int32" or s=="int64" or s=="uint8" or s=="uint16" or s=="uint32" or s=="uint64":
            if len(i)==1:
                i+=["0"]
            if len(i)>2:
                return "Err: Syntax"
            varlist+=[[i[0],s,i[1]]]
            return ""


    exp=shuntingyard(exp)
    if exp[0:4] == "Err:":
        return exp
    if opt==True:
        exp=optimizer(exp)
        if exp[0:4] == "Err:":
            return exp
    exp=hextokenize(exp)
    s=""
    for i in exp:
        s+=i
    return s
def tokifyvar(s,x):
    if s=="uint8":
        x=int(x)%256
        s=int2hex(x)
        return "00"+"0"*(2-len(s))+s
    if s=="int8":
        x=int(x)%256
        s=int2hex(x)
        return "01"+"0"*(2-len(s))+s
    if s=="uint16":
        x=int(x)%(1<<16)
        s=int2hex(x)
        return "02"+littleendianify("0"*(4-len(s))+s)
    if s=="int16":
        x=int(x)%(1<<16)
        s=int2hex(x)
        return "03"+littleendianify("0"*(4-len(s))+s)
    if s=="uint32":
        x=int(x)%(1<<32)
        s=int2hex(x)
        return "04"+littleendianify("0"*(8-len(s))+s)
    if s=="int32":
        x=int(x)%(1<<32)
        s=int2hex(x)
        return "05"+littleendianify("0"*(8-len(s))+s)
    if s=="uint64":
        x=int(x)%(1<<64)
        s=int2hex(x)
        return "06"+littleendianify("0"*(16-len(s))+s)
    if s=="int64":
        x=int(x)%(1<<64)
        s=int2hex(x)
        return "07"+littleendianify("0"*(16-len(s))+s)
    if s=="float":
        return "08"+floatsingleify(x)
    return "Err: Unknown Type"
def int2hexle(x,n=0):
    s=int2hex(x)
    if len(s)%2==1:
        s="0"+s
    s=littleendianify(s)
    if n==0:
        return s
    return s+("0"*(n-len(s)))
def compile(code,varlist=varlist,opt=True):
    code=code.split("\n")
    out=''
    k=0
    for i in code:
        k+=1
        s=compileline(i)
        if s[0:4]=="Err:":
            return "Line "+str(k)+"  "+s
        out+=s
#'out' is the code section
#now we need to create the var section, preloaded with default assignment
#then we need to create the var names LUT
    vars=[]
    names=[]
    for i in varlist:
        names+=[i[0]]
        vars+=[tokifyvar(i[1],i[2])]
        if vars[-1][0:4]=="Err:":
            return vars[-1][0:4]
    varslut=int2hexle(len(vars),2)
    offset=2*len(vars)
    for i in vars:
        varslut+=int2hexle(offset,4)
        offset+=int(len(i)/2)
    nameslut=int2hexle(len(names),2)
    offset=2*len(names)
    for i in names:
        nameslut+=int2hexle(offset,4)
        offset+=len(i)+1
    for i in names:
        for j in i:
            nameslut+=int2hexle(ord(j),2)
    for i in vars:
        varslut+=i
    s=int2hexle(int(len(out)/2),4)
    s+=int2hexle(int(len(varslut)/2),4)
    s+=int2hexle(int(len(nameslut)/2),4)
    return [s,out,varslut,nameslut]
f=open("code.txt","r+")
code=f.read()
f.close()
print(code)
cmpld=compile(code)
print(len(cmpld[0]+cmpld[1]+cmpld[2]+cmpld[3])>>1,"bytes :",cmpld)
print(varlist)
# varlist=[['x0'],['y0']]
# exp="sin(3.14159*2*x0+y0)^9"
# print(exp)
# exp=compileline(exp)
# print(int(len(exp)/2),"bytes : ",exp)

Offline PT_

  • LV2 Member (Next: 40)
  • **
  • Posts: 24
  • Rating: +7/-0
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #6 on: December 01, 2017, 05:03:18 pm »
Just take a look at the assembly version of ICE :P

Offline Xeda112358

  • they/them
  • Moderator
  • LV12 Extreme Poster (Next: 5000)
  • ************
  • Posts: 4704
  • Rating: +719/-6
  • Calc-u-lator, do doo doo do do do.
    • View Profile
Re: Shunting-Yard Algorithm
« Reply #7 on: August 02, 2018, 09:46:38 pm »
Update today!
I have successfully run the following code:
Code: [Select]
uint16 varA = 0
Disp "print(uint8(99)+uint32(1))"
Disp uint8(99)+uint32(1)
Pause
Disp "Hello World!"
Lbl loop
varA+uint8(1) -> varA
Disp varA
Pause
GotoIf(varA!=uint16(3),loop)
Mostly I was testing variable storage, mixed-type arithmetic, and conditional goto.

EDIT:
As an update, I'm just working on type conversions which is used in most mixed-type operations. For example, if I want to add a uint8 to a float, I can convert the uint8 to a float and then add them together as a normal float+float operation.
I've added in converting from a string to uint8, int8, uint16, or int16, but more importantly, I've been working on converting more data types to strings. In this screenshot, I am displaying an 8-bit Gaussian integer (I foresee use in graphics), displaying a pixel (for now I am just displaying it's (x,y) coordinates), and finally there is a 16.16 fixed-point number displayed.