pic18/macro.ss
;; PIC18 code generator
#lang scheme/base

(require
 ;; constants are bound as macros, and in the scheme namespace.
 ;; don't re-export constants: require target to provide its own.
 "const.ss"
 "asm.ss"
 "../target.ss"
 "../asm.ss"
 "../tools.ss"
 "../scat.ss"
 "../coma.ss"
 "../control.ss"
 "../comp.ss")
; (for-syntax
;  "../macro.ss"))


;; Pattern matching macros refer to these stubs in case some
;; constructs cannot be optimized away at compile time. To support a
;; runtime construct, simply redefine these stubs with concrete words.
(declare-stubs
 ~bit! ~bit? ~toggle
 ~@ ~!
 ~>z
 )


;; These are referred in parsint-words.ss

(declare-stubs
 f->)



(provide
 (all-defined-out))


  ;; Simulation.

  (compositions
   (scat) scat:

   (truncate #xFF and) ;; truncate data word

   ;; these are infinite precision
   ;; FIXME: port
   ;(>>        1 >>>) 
   ;(<<        1 <<<)
   ;(2/        1 >>>)
   
   ;; these make no sense in infinite precision so are truncated
   (rot<<     truncate dup 7 >>> swap 1 <<< or truncate)
   (rot>>     truncate dup 7 <<< swap 1 >>> or truncate)


   )



  ;; *** CODE GENERATOR ***

  ;; These are asm-transforms generators, applied to regular classes
  ;; of instructions and optimisations.

  ;; UNARY
           
  ;; Unary operations can be combined with memory fetch.
  (meta-pattern  unary (word opcode)
                 (([movf f 0 0] word) ([opcode f 0 0]))
                 ((word)              ([opcode WREG 0 0])))

  ;; Unary operations storing result in variable.
  (meta-pattern  unary->mem (word opcode)
                 (([qw f] word)       ([opcode f 1 0])))
  
  (unary (macro)
         (1+     incf)
         (1-     decf)
         (rot<<c rlcf)
         (rot>>c rrcf)
         (rot<<  rlncf)
         (rot>>  rrncf)
         (swap-nibble
                 swapf))
  
  (unary->mem (macro)
              (1-! decf)
              (1+! incf))

  ;; BINARY
            
  ;; (1) binary ops combine with memory fetch: "123 @ +". only for
  ;; commutative ops!
  (meta-pattern binary (word metafn l-opcode s-opcode)
    (([qw a ] [qw b] word)         ([qw (tscat: a b metafn)]))
    (([l-opcode a] [qw b] word)    ([l-opcode (tscat: a b metafn)]))
    (([qw a] word)                 ([l-opcode a]))
    (([save] [movf a 0 0] word)    ([s-opcode a 0 0])) ;; (1)
    ((word)                        ([s-opcode POSTDEC0 0 0])))

  (meta-pattern binaryc (word s-opcode)
    (([save] [movf a 0 0] word)    ([s-opcode a 0 0])) ;; (1)
    ((word)                        ([s-opcode POSTDEC0 0 0])))

  ;; (1) like "dup 123 +!"
  (meta-pattern binary->mem (word opcode)
    (([dup] [qw f] word)  ([opcode f 1 0]))  ;; (1)
    (([qw f] word)        ([opcode f 1 0] [drop])))

  (binaryc (macro)
           (++ addwfc))
  
  ;; FIXME: add error handling for the patterns that are not defined:
  (binary (macro)
          
          (+    +    addlw  addwf)
          ;; no carry flag emulated, and no literal version
          ;; (++   #f   #f     addwfc)  
          (and  and  andlw  andwf)
          (or   or   iorlw  iorwf)
          (xor  xor  xorlw  xorwf)
          
          ;; These have compile time eval only, currently no
          ;; target equivalents defined.

          (pow pow   invalid     invalid)
          (>>> >>>   invalid     invalid)
          (<<< <<<   invalid     invalid)
          (/   /     invalid     invalid)
          (*   *     invalid     invalid))


   ;; Binary operations storing result in variable.
   (binary->mem  (macro)
                 (--!  subwfb)
                 (-!   subwf)
                 (++!  addwfc)
                 (+!   addwf)
                 (and! andwf)
                 (or!  iorwf)
                 (xor! xorwf))

  
  ;; RPN ASSEMBLER   
  (meta-pattern  asm-f00 opcode (([qw f] opcode) ([opcode f 0 0])))
  (meta-pattern  asm-f0  opcode (([qw f] opcode) ([opcode f 0])))
  (meta-pattern  asm-    opcode ((opcode) ([opcode])))
  

  (asm-f00  (macro)
            movf xorwf andwf iorwf
            subwf subfwb addwf addwfc
            comf rrcf rlcf rrncf rlncf
            )

  (asm-     (macro)
            push pop sleep reset nop clrwdt daw
            tblrd* tblrd*- tblrd*+
            tblwt* tblwt*- tblwt*+)
  
  (asm-f0   (macro)
            cpfseq cpfsgt cpfslt
            clrf setf
            movwf mulwf)

  (asm-     (macro)
            mark) ;; org-pop)

  
  

  ;; This is the bulk of the PIC18 peephole optimizing code
  ;; generator. Each line is a list (pattern expr), expressed in
  ;; RPN. The function name is at the end of the pattern. The
  ;; expression is a sequence of assembler instruction.

  ;; Note that this only 'emulates' algebraic types. You can't use
  ;; scheme functions to generate the assembler instructions, only to
  ;; generate the arguments to the 'type constructors'.

  ;; The syntax ",xxx" in type position means: match any instruction
  ;; type, and bind the variable xxx to the type name. The syntax
  ;; ",xxx" in expression position means: create a type represented by
  ;; the symbol bound to variable xxx.
  
  (patterns
   (macro)

   ;; DEBUG
   ((,word backspace) ())                  

   ;; POST PROCESSING

   ;; There is a single postprocess hook that runs multiple passes
   ;; over the first pass assembly output. These words are specified
   ;; as macros, and executed after pushing an assembly instruction to
   ;; the asm stack.

   ;; The target should leave pseudo ops like QW CW JW and EXIT so
   ;; generic optimizations can be performed on the intermediate asm
   ;; representation produced by the first compilation step. Left over
   ;; pseudo ops are then elimiated using the 'pseudo' macro.
   
   ;; Convert pseudo asm -> real asm
   (([qw a] pseudo)           ([save] [movlw a]))
   (([cw a] pseudo)           ([jsr 0 a]))
   (([jw a] pseudo)           ([jsr 1 a]))
   
   (([movlw a] [exit] pseudo) ([retlw a]))
   (([exit] pseudo)           ([return 0]))

   ((pseudo)          ())

   ;; 'save' elimination
   (([drop] [save] opti-save) ())
   (([,op (? (target = POSTDEC0)) 0 0] [save] opti-save)
    ([,op INDF0 1 0]))
   (([save] opti-save) ([dup]))   
   ((opti-save) ())

   
   ;; SUBTRACT
   ;; special because it's not commutative + sublw has arguments swapped!
   ;;   (-    -         sublw       subwf)
   ;;   (--   invalid   invalid     subfwb)

   (([qw a ] [qw b] -)             ([qw (tscat: a b -)]))
   (([addlw a] [qw b] -)           ([addlw (tscat: a b -)]))
   (([qw a] -)                     ([addlw (tscat: a -1 *)]))
   
   ;; there's no subfw
   (([save] [movf a 0 0] -)        ([bpf 0 STATUS 0 0] [subfwb a 0 0])) 
   ((-)                            ([subwf POSTDEC0 0 0]))

   (([save] [movf a 0 0] --)       ([subfwb a 0 0])) ;; (1)
   ((--)                           ([subwfb POSTDEC0 0 0]))
   

   ;; FETCH
   (([movlw a] @)  ([movf a 0 0]))  ;; register fetch
   (([qw a] @)     ([save] [movf a 0 0]))
   ((@)            (macro: ~@))


   ;; STORE
   (([qw 0]  [qw a] !) ([clrf a 0]))      ;; these 2 better done after assembly..
   (([qw -1] [qw a] !) ([setf a 0]))
   (([dup] [qw a] !)   ([movwf a 0]))     ;; dup a !


   (([qw x] [qw y] [qw a] !)
    (if (eq? x y)
        ;; literal DUP artifact
        (asm-reverse [qw x] [movwf a 0])                 
        ;; literal commutes (FIXME: commute rules)
        (asm-reverse [qw y] [movwf a 0] [drop] [qw x]))) 
   
   (([qw a] !)          ([movwf a 0] [drop]))  ;; simple literal op

   ((!)                 (macro: ~!))

   
   ;; the a reg
   (([qw lo] [qw hi] a!!) ([~lfsr 2 hi] [~nop lo]))
   ((a!!)                 (macro: ~a!!)) ;;(macro-egg '~a!!))


   

   ;; STACK

   (([qw r] swap!)   ([xorwf r 0 0]   ;; swap using the 3 xor trick
                      [xorwf r 1 0]
                      [xorwf r 0 0]))


   ;; RPN ASSEMBER

   (([,opc f d a] d=reg)  ([,opc f 1 a]))
   (([,opc f d a] d=w)    ([,opc f 0 a]))
   ((return)              ([return 0]))


   ;; The 'org' macro in forth requires byte addresses, while the
   ;; 'org' opcode in the assember requires word addresses as internal
   ;; representation. This is where we convert.
 
   ;; (([qw addr] org)      ([org (tscat: addr 2/)]))
   ;; (([qw addr] org-push) ([org-push (tscat: addr 2/)]))
   
   (([qw a] movlw)         ([movlw a]))
   (([qw a] retlw)         ([retlw a]))
   (([qw a] sublw)         ([sublw a]))  ;; subtract W from F

   
   (([qw s] [qw d] movff)  ([~movff s] [~nop d]))
   (([qw s] retfie)        ([retfie s]))

   (([qw addr] [qw reg] lfsr)
    ([~lfsr reg (tscat: addr 8 >>>)]
     [~nop addr])) ;; macro only
   
   ;; TABLES

   ;; Since this is mostly used for data tables it's probably best to
   ;; let it compile bytes instead of words: always use data word
   ;; size.
   
   (([db lo] [qw hi] |,|)  ([d2 lo hi]))
   (([qw lo] |,|)          ([db lo]))   

   (([qw w] |,,|)          ([dw w]))
   
   ;; CONDITIONALS

   ;; There is a lot of machine support for conditional branching, but
   ;; it is a bit non-orthogonal: there are 2 types of conditionals:

   ;; * btfsc and btfss instructions skip the next instruction based on any bit
   ;; * conditional jumps take the condition from the STATUS register directly

   ;; generic bit -> arguments for btfsp
   (([qw f] [qw b] [qw p] bit?) ([bit? f b p]))
   ((bit?)                      (macro: ~bit?))

   ;; STATUS flag -> conditional jump opcode
   (([qw p] pz?)      ([flag? 'bpz p]))    
   (([qw p] pc?)      ([flag? 'bpc p]))
   (([qw p] pn?)      ([flag? 'bpn p]))


   (([qw l] nzjump)  ([bpz 1 l])) ;; ( label -- )  

   ;; I had to put this back for some reason.. Don't remember.
   ;; ((['qw a] not)            `([qw (,@(wrap a) -1 xor)]))


   (([qw a] neg) ([qw (tscat: a -1 *)]))
   ((neg)        ([negf WREG 0]))

   
   ;; Conditional skip optimisation for 'then'.
   (([btfsp  p f b a] [bra l1] ,ins [label l2] swapbra)
    (if (eq? l1 l2)
        `([btfsp ,(flip p) ,f ,b ,a] ,ins)
        (error 'then-opti-error)))

   ((swapbra) ()) ;; FIXME: skip/jump optimization




   
 
   ;; NOTE; propagating drop is not always possible due to flag effect.
   ;; it might be interesting to think about this a bit
   ;; doesn't mess up flags
   ;; ((['movlw f] 1-!) `([drop] [decf ,f 0 1]))
   ;; propagate drop -> messes up flags
   ;; ((['movlw f] 1-!) `([decf ,f 0 1] [drop]))


   ;; Tests that do not consume their arguments: ( a b -- a b ? )

   ;; The polarity bit in the opcode is chosen such that the 'or-jump'
   ;; macro has a simple 'zero?' comparison for inserting the [bra 1] =
   ;; skip instruction.
 
   ((=?) ([cmp? 'cpfseq INDF0 0 1]))
   ((>?) ([cmp? 'cpfsgt INDF0 0 1]))
   ((<?) ([cmp? 'cpfslt INDF0 0 1]))

   ;; FIXME: single instruction then opti doesnt work here.

   ;; Direct bit operations. These to not modify top, so swap
   ;; instructions if there's a drop.
 
   ;; Doesn't work if f is WREG!!!
   
   ;; It is possible to use literal and/or to operate on WREG though, so
   ;; as a general rule, you are not allowed to touch WREG in forth!

   ;; FIXME: use permutation macro
   (([drop] [qw f] [qw b] [qw c] bit!) ([bpf (flip c) f b 0] [drop]))
   (([qw f] [qw b] [qw c] bit!)        ([bpf (flip c) f b 0]))
   ((bit!)                             (macro: ~bit!))
 
   (([qw f] [qw b] toggle) ([btg f b 0]))
   ((toggle)               (macro: ~toggle))
   




   ;; pre/postincrement variable fetch
   (([movf f 0 0] preinc)  ([incf f 1 0] [movf f 0 0]))
   (([movf f 0 0] postinc) ([movf f 0 0] [incf f 1 0]))
 
   ((umul>PROD) ([mulwf POSTDEC0 0] [drop]))
   ((xdrop)     ([movf POSTDEC1 1 0]))

   ;; unsigned min/max
   ((max)    ([cpfsgt INDF0 0] [movwf INDF0 0] [drop]))
   ((min)    ([cpfslt INDF0 0] [movwf INDF0 0] [drop]))



   ;; REDEFINED
   
   (([qw a] dup)  ([qw a] [qw a]))
   (([drop] dup)  ([movf INDF0 0 0]))
   ((dup)         ([dup]))

   ;;     ((dup)         (macro: super))
   
   (([qw a] drop) ())
   ((drop )       ([drop]))


   (([qw a] [qw b] swap)  ([qw b] [qw a]))
   ((swap)                ([xorwf INDF0 0 0]   ;; swap using the 3 xor trick
                           [xorwf INDF0 1 0]
                           [xorwf INDF0 0 0]))

   ;; The 'or-jump' macro recombines the pseudo ops from above into
   ;; jump constructs. These use 'r' instructions. (see assembler.ss)
   (([bit?  f b p] [qw l] or-jump)     ([btfsp (flip p) f b 0] [bra l]))
   (([flag? opc p] [qw l] or-jump)     ([,opc (flip p) l]))
   
   (([cmp? opc f a 0] [qw l] or-jump)  ([,opc f a] [bra 1] [bra l]))
   (([cmp? opc f a 1] [qw l] or-jump)  ([,opc f a] [bra l]))

   ;; FIXME: conditional assembly
   ;; (([qw flag] [qw l] or-jump) (error 'no-conditional-assembly))

   ;; FIXME: using carry is simpler, since it's not affected by 'drop'
   (([qw l] or-jump)  (macro: ~>z ,(insert `([bpz 0 ,l]))))

   ;; The 'not' macro is useful as predicate negation. Note that it's not the
   ;; same as "FF XOR" !
   (([bit?  f b p] not)     ([bit? f b (flip p)]))
   (([flag? opc p] not)     ([flag? opc (flip p)]))
   (([cmp?  opc f a p] not) ([cmp?  opc f a (flip p)]))


   ;; ??
   ;;(([cw word] jump)       ([jw word]))
   ;;(([rcall word] jump)    ([bra word])) ;; FIXME: probably not necessary
   

   

   ;; the name BADNOP comes from a very early implementation of the
   ;; compiler, which encoded error conditions in #xF000 NOP
   ;; instructions. the generic error was #xFBAD.
 
   ((badnop)  ([~nop #xBAD]))
   

   
   )

  ;; *** RECURSIVE MACROS ***


  (compositions
   (macro) macro:

   ;; In Forth code, org uses byte addresses.
   (org       1 >>> word-org)
   (org-push  1 >>> word-org-push)

   (then    m> label: swapbra)  ;; swapbra is an optimization hook for PIC18

   ;; control flow
   ;; simple for..next
   (for0  >x begin)
   (next0 sym label:      ;; split here
          x1- m> nzjump
          xdrop)

   (for for0)
   (next next0)

   ;; control stack
   (+x    PREINC1)
   (x-    POSTDEC1)
   (xtop  INDF1)
   
   (>x    +x !)
   (x     xtop @)
   (x>    x- @)

   ;; target is register, not wreg
   (x1-   xtop 1-!)

   ;; data stack registers
   (1st   WREG)
   (2nd   INDF0)
   (2nd-  POSTDEC0)

   ;; misc data stack ops
   (@!       movff)
   (nfdrop   2nd- 1st @!)  ;; drop without affecting flags
   (test     #xff and)
   (>flags   test nfdrop)

   (even?  1st 0 low?)
   (odd?   1st 0 high?)

   ;; the other conditions are defined as cmpf macros
   (>=?   <? not)
   (<=?   >? not)
 
   
   ;; the control stack is used to help with other data stack jugglings
   (swap>x  2nd- +x movff)
   (over>x  2nd  +x movff)
   
   ;; (over    over>x x>)

   ;; i completely forgot about this one, till i looked at brood 1 source.
   (pick  neg PLUSW0 movf)
   (over  1 pick)
   (nip   POSTDEC0 movwf)  ;; stores wreg in 2nd as side effect

   ;; bit ops
   (high   1 bit!)
   (low    0 bit!)

   ;; flags
   (clc    STATUS C low)
   (stc    STATUS C high)
   (c@     0 rot<<c)
   (c!     STATUS !)  ;; kills other flags
   (sign>c STATUS movwf STATUS rot<<!)

   (high? 1 bit?)
   (low?  0 bit?)

   (z?  0 pz?)
   (nz? 1 pz?)
   (c?  0 pc?)
   (nc? 1 pc?)
   (n?  0 pn?)
   (nn? 1 pn?)
   

   
   ;; shift
   (<< clc rot<<c)
   (2/ #x80 + rot>>c #x40 xor)
   (>> clc rot>>c)

   (rot<<c! rlcf  d=reg)
   (rot>>c! rrcf  d=reg)
   (rot<<!  rlncf d=reg)
   (rot>>!  rrncf d=reg)
   
   ;; multiply
   (u*   umul>PROD PRODL @)
   (u**  u* PRODH @)

   ;; return stack. it is 2 byte wide.
   
   (rl    TOSL)
   (rh    TOSH)
   (rdrop pop)
   (>r    push rl !)	   ;; these are inefficient
   (r>    rl @ pop)
   ;;(_>r   push rh ! rl !)   ;; these operate on 2 bytes at a time
   ;;(_r>   rl @ rh @ pop)

   ;; indirect memory access
   
   ;; second register is 'a'
   (ah FSR2H)
   (al FSR2L)

   ;; the 'f' register is similar but for flash program memory access
   (fh TBLPTRH)
   (fl TBLPTRL)
   
   ;; using double '!' and '@' to indicate a and p are 2-byte regs
   (a@@   al @ ah @)  ;; ( -- lo hi )
   (f@@   fl @ fh @)  ;; ( -- lo hi )

   (~a!!  ah ! al !)  ;; ( lo hi -- )

   
   (f!!   fh ! fl !)  ;; ( lo hi -- )
   
   (@a+  POSTINC2 @)
   (!a+  POSTINC2 !)
   
   (@a-  POSTDEC2 @)
   (!a-  POSTDEC2 !)

   (!+a  PREINC2 !)
   (@+a  PREINC2 @)
    
   (@a   INDF2 @)
   (!a   INDF2 !)

   ;; indirect addressing using FSR2 + WREG
   (@i   PLUSW2 movf)
   (!i   POSTDEC0 PLUSW2 movff drop)


   ;; save/drop at beginning/end to enable opti
   (@f+   save tblrd*+ TABLAT movf)
   (@f    save tblrd*  TABLAT movf)
   (!f+   TABLAT movwf tblwt*+ drop)
   
   ;; conditional branching
   (abs      1st 7 high? if neg then)

   ;; standard forth meaning: ( a b -- ? )
;   (=    xor nfdrop z?)
;   (>=   - nfdrop c?)
;   (<    - nfdrop nc?)

   (~not   z? if -1 else 0 then) ;; FIXME: do this better

   
   )




;; Runs the listed macros in order as an optimizer on the entire
;; assembly code list.
(define pic18-postprocess
  (macros->postprocess (macro)
                       pseudo 
                       opti-save))

(target-postprocess
 pic18-postprocess)



;; These are used as intermediate results for assembler transforms,
;; and do not have a target implementation.
(ir-ops
 (cmp?  opcode reg a d)
 (flag? opcode inverted)
 (bit?  f b p)
 (invalid . _)

 ;; FIXME!!
 (mark)
 ;; (org-push x)
 ;; (org x)
 ;; (org-pop)
 )

(check-opcodes asm-find)