comp/instantiate.ss
#lang scheme/base

;; Code to build structured assembly code graph from forth code. This
;; uses the 'macro' language, which uses scat's 2-stack model to
;; represent concatenative macros, extended with intermediate state
;; for collection of structured assembly code.


(require
 scheme/control
 scheme/match
 "../tools.ss"
 "../scat.ss"
 "../coma.ss"
 "../target.ss"
 "../control.ss"
 "postprocess.ss"
 )

(provide


 make-target-label   ;; create a label
 make-target-split   ;; label -> splitter macro
 
 target-compile-1
 
 wrap-macro/mexit
 wrap-macro/postponed-word
 wrap-macro/postponed-variable

 macro/semicolon
 macro/mexit
 
 macro/terminate-chain ;; indicates that control flow has left current chain
 
 macro/split-store     ;; current state > ctrl + set new state
 macro/combine-store   ;; combine all chains in store to one
 macro/merge-store     ;; combine split store with current
 
 macro/exit

 state:compiler
 (struct-out compiler)

 name-directive?  ;; for assembler
)


;; Representation for different target-word structures. See
;; purrr/purrr-lang.ss for a usage example.


;; Return 3 values:
;;   * a target wird struct (label)
;;   * a referring macro
;;   * a body code generator macro


(define (wrap-macro/postponed-word name loc macro)
  (let ((label (new-target-word #:name name
                                #:realm 'code
                                #:srcloc loc
                                #:postponed macro)))
    (values label
            (macro-prim: ',label compile)
            (compose macro (make-target-split label)))))

(define (wrap-macro/postponed-variable name loc macro)
  (let ((label (new-target-word #:name name
                                #:realm 'data
                                #:srcloc loc)))
    (values label
            (macro-prim: ',label literal)
            (compose
             ;; FIXME: data variables are not chained. This should be:
             ;; data variables are not chained with code chunks.
             terminate-chain 
             macro
             (make-target-split label)))))

;; Macro representations need to be wrapped to implement 'mexit',
;; which jumps past the end of the code generated by macro. For
;; languages that do not use mexit, do not wrap the macro, as this
;; requires at least one call to mexit at the end.

(define (wrap-macro/mexit name loc macro)
  (make-word
   (lambda (state)
     (parameterize-words-ns!
      (macro)
      ((semicolon (ns (macro) mexit)))
      (or
       (mleave (macro (menter state)))
       (match loc
              ((list file l c p s)
               (error
                'non-terminated-macro
                "~a:~a:~a: ~a"
                file l c name))))))))


;; *** COMPILE ***

;; During compilation the assembly code (the result of instantiating
;; macros) is organized in the following hierarchy:

;;   * A word is a single entry point, represented by a target-word
;;     structure associated to a chunk, which is a list of consecutive
;;     assembly code instructions. Code inside a word can only be
;;     reached through a jump to its label, and is thus not observable
;;     to the world. Words serve as the unit of code generation (and
;;     recombination). Any operation on code that doesn't alter
;;     semantics is legal within a chunk.

;;   * A chain is a list of words (chunks) with implicit
;;     fallthrough. Each word indicates a single entry point. Chains
;;     are terminated by exit points. Chains are the unit of target
;;     address allocation: each chain can be associated to an address
;;     independent of other chains. Some chains have fixed addresse
;;     (org).
;;
;;   * The store is a set of recently constructed chains (implemented
;;     as a stack)


;; So, chunks represent ENTRY points, chains represent EXIT
;; points. This hierarchy is necessary because Forth words can have
;; multiple entry and exit points. A Forth word then consists of
;; multipel chains, since a chain has a single EXIT point, but can
;; have multiple ENTRY points.

;; Organizing it this way gives maximum flexibility: the basic
;; operation is the jump, possibly conditional. The Forth-style
;; language is a thin layer on top of assembly code which sacrifices
;; no efficiency, and still allows for simple control flow analysis
;; that can re-arrange code in memory.


(define-struct dict
  (current  ;; current word label
   chain    ;; list of words with fallthrough
   store))  ;; set (stack?) of fallthrough lists


;; Save code under label, but drop if there is no label, which means
;; the code is not reachable.

(define (log-dead code)
  (log:
   (format
    "dead:~a\n"
    (apply
     string-append
     (map (lambda (ins)
            (format " ~a"
                    (instruction->string ins)))
          (reverse code))))))

(define (dict-label d new-word code)
  (match d
         ((struct dict (current chain store))
          (make-dict new-word
                     (if current
                         (cons (list current code) chain)
                         (begin
                           (unless (null? code)
                             (log-dead code))
                           chain))
                     store))))


;; Terminate current fallthrough chain by moving it to the store.
(define dict-terminate
  (match-lambda
   ((struct dict (current chain store))
    (make-dict current '()
               (if (null? chain) ;; drop empty chains
                   store
                   (cons chain store))))))


(define-struct (compiler 2stack)
  (dict      ;; dictionary object: keeps track of label -> code bindings
   rs))      ;; 'return stack' for macros: a list of exit labels + refcount

(define update-compiler
  (case-lambda
    ((state asm)
     (update-compiler state asm
                      (2stack-ctrl-list state)))
    ((state asm ctrl) 
     (update-compiler state asm ctrl
                      (compiler-dict state)
                      (compiler-rs state)))
    ((state asm ctrl dict rs)
     (make-state:compiler asm ctrl dict rs))))

(define (make-state:compiler ctrl asm dict rs)
  (make-compiler update-compiler
                 ctrl asm dict rs))

(define (state:compiler)
  (make-state:compiler '() '()
                       (make-dict #f '() '())
                       '()))

        



;; The 'split' state update function: save the word currently being
;; compiled on the word stack (dictionary) and continue with an empty
;; assembly stack and a new current word.
(define (make-target-split new-word)
  (state-lambda compiler
                (asm ctrl  dict rs)
                ;; =>
                (update '() ctrl
                        (dict-label dict new-word asm)
                        rs)))

;; 'compile-forth' sets up the internal state for compilation of
;; Forth-like functionality. Compared to ordinary 'macro->code' which
;; operates only on the parameter stack and an extra assembly stack,
;; Forth code has access to:

;;  * labels = entry points of target code chains
;;  * fallthrough words
;;  * macro return stack (local exit in macros)

(define print-state
  (match-lambda
   ((struct compiler (update asm ctrl
                             (struct dict (current chain store))
                             rs))
    (printf "STATE:\n~a\n~a\n~a\n~a\n~a\n~a\n"
            asm ctrl current chain store rs))))

(define (compile-forth macro)
  (parameterize-words-ns!
   (macro) ((semicolon (ns (macro) exit)))

   ;; Execute macro on empty state + terminate properly.
   (let ((state
          (terminate-chain
           (macro (state:compiler)))))

     ;; Type check (macros are not allowed to change the state type!)
     ;; and require empty compilation state.
     (unless (compiler? state)
       (error 'compile-state-type-error))

     ;; (print-state state)
     (assert-empty-ctrl state)

     ;; Return the dict-store list. This is then passed to
     ;; target-post! and onward to the assembler.
     (dict-store (compiler-dict state)))))





(define (make-target-label [name (next-label)])
  (new-target-word #:name name))






;; *** FALLTHROUGH CHAINS ***

(define terminate-chain
  (compose
   ;; Terminate the fallthrough chain.
   (state-lambda compiler
                 (asm ctrl dict rs)
                 ;; =>
                 (update
                  asm ctrl (dict-terminate dict) rs))
   ;; Code following the jump is not reachable.
   (make-target-split #f)))

(define-ns (macro) terminate-chain terminate-chain)

(compositions
 (macro) macro:

 ;; _<word> provide functionality but don't terminate chains
 (semicolon exit)
 (exit primitive-exit
       ,terminate-chain)
 )



;; *** ORG ***



;; Save the compilation state to the control stack.
;; FIXME: Maybe this needs to split off only the store?
(define-ns (macro) split-store
  (state-lambda compiler
                (asm ctrl dict rs)
                ;; =>
                (update '()
                        (cons (list asm rs dict) ctrl)
                        (make-dict #f '() '())
                        '())))

;; FIXME: This is currrently only defined for empty asm and rs, and a
;; #f word (i.e. the state after a split) simply because there's no
;; need for this in the middle of a word compilation, and i don't want
;; to invent one to test it.
(define-ns (macro) merge-store
  (state-lambda compiler
                 ('() ;; empty asm
                  (list-rest
                   (list asm rs (struct dict (current chain store)))
                   ctrl)
                  (struct dict (#f '() store+))
                  '()) ;; empty rs
                 (update asm
                         ctrl
                         (make-dict current chain
                                    (append store+ store))
                         rs)))

;; Combine the current word chain into one. This is used in the
;; definition of org-pop, for example to ensure fallthrough for an
;; interrupt vector table. Probably only meaningful after
;; terminate-chain, which results in a store that has all chains.

(define-ns (macro) combine-store
  (state-lambda compiler
                (asm ctrl (struct dict (current chain store)) rs)
                ;; =>
                (update asm ctrl
                        (make-dict current chain
                                   (list (apply append store)))
                        rs)))

(define (name-directive? dir name)
  (let ((dir? (lambda (x) (eq? dir x))))
    (match name
           ((list (? dir?) addr) #f)
           (_ #f))))


;; *** MEXIT ***

;; Multiple exit points for macros. This mechanism makes it possible
;; to turn words into macros, as long as they do not modify the return
;; stack.

(define-struct mexit (label refs))

(define-ns (macro) mexit
  (state-lambda compiler
                (asm ctrl
                 dict (list-rest (struct mexit (label refs)) rs+))
                ;; =>
                (update (cons `[jw ,label] asm) ;; compile jump
                        ctrl dict
                        (cons (make-mexit label (+ 1 refs)) rs+)))) ;; inc ref


;; push new label to rs
(define menter
  (state-lambda compiler
                (asm ctrl dict rs)
                (update asm ctrl dict
                        (cons (make-mexit (make-target-label) 0) rs))))

;; check termination, drop last jump + split if there are more
(define mleave
  (state-lambda compiler
                (asm ctrl dict
                 (list-rest (struct mexit (label refs)) rs+))
                (and
                 (terminated? asm label)
                 (let ((dropped
                        (update (cdr asm) ctrl dict rs+)))
                   (if (> refs 1)
                       ((make-target-split label) dropped)
                       dropped)))))


;; Check if the macro is properly terminated by looking at the
;; last instruction and the current exit label.
(define (terminated? asm exit-label)
  (match asm
         ((list-rest [list 'jw label] _) (eq? label exit-label))
         (else #f)))





;; Project word structs to the compiled state.
(define (target-compile-1 words)
  (append
   (compile-forth
    (apply compose (filter procedure? (cons (lambda (x) x) words))))
   (filter (lambda (x) (not (procedure? x))) words)))