#lang scheme/unit

;; Code to build structured assembly code graph from forth code. This
;; uses an extension to scat's 2-stack model to represent
;; concatenative macros with a Forth-style control stack.

;; The simple ideas behind the compiler is to delimit basic blocks by
;; identifying _entry_ and _exit_ points.

 "../"      ;; for make-word

(import machine^ stack^ code^)
(export jump^ cfg^ org^ instantiate^ ram^)

;; The compiler provides instantiation for the 'forth-begin macro,
;; which uses Forth style syntax to define macros, variables and
;; compiled words.

;; Implement the jump^ signature using the control flow graph compiler.

  (jw word)
;;  (jw/false word)


;; Target labels are target-word structs (tagged chunks).  See
;; for more info on the chain/chunk terminology.
(define (make-target-label [name (next-label)])
  (new-target-word #:name name))


 ;; Redefine the stubs from to introduce labels used to
 ;; construct structured code graphs.
 ((sym)               ([qw (make-target-label)]))
 (([qw name] >label)  ([qw (make-target-label name)]))
 (([qw label] label:) (entry-point label))

 ;; This is for code that's reachable through external means,
 ;; i.e. jump tables.
 ((reachable)         (entry-point (make-target-label))))


 (([qw label] cw)        ([cw label]))
;; (([qw label] jw)        ([jw label]))
;; (([qw label] jw/false)  ([jw/false label]))


 (macro) macro:
 (jw    cw exit))


;; Check if the macro is properly terminated (if ";" was called at
;; the end) by looking at the last instruction and the current exit
;; label.
;; (define (terminated? asm exit-label)
;;   (match asm
;;          ((list-rest [list (? (ns (op ?) jw)) label] _)
;;           (eq? label exit-label))
;;          (else #f)))

;; The ";" word inspects the macro return stack.  If there's context,
;; execute mexit.  Otherwise we're in straight line code and can
;; execute procedure-exit.
(define (semi state)
  (if (null? (compiler-rs state))
      ((ns (macro) procedure-exit) state)
      ((ns (macro) mexit) state)))

;; State update words in terms of the machine/vm macros.
(define-syntax-rule (state-update form)
   (mu-lambda-struct (compiler (update asm ctrl dict rs)) form)))

(define jw? (ns (op ?) jw))
(define jw  (ns (op asm) jw))
(define qw? (ns (op ?) qw))
(define qw  (ns (op asm) qw))

;; The 'split' state update function: save the word currently being
;; compiled on the word stack (dictionary) and continue with an empty
;; assembly stack and a new current word.
(define (entry-point new-word)
    ((asm  -> '())
     (dict -> (dict-label dict new-word asm)))))

 (macro) macro:

 (>label:   >label label:)


 ;; Push new label + reference count to the return stack.  
 (menter ,(state-update
           ((rs -> (cons (make-mcont (make-target-label) 0) rs)))))

 ;; Compile local exit + increment refcount.
 (mexit ,(state-update
          ((asm :  asm
                -> (cons [list jw label] asm))
           (rs  :  (cons (struct mcont (label refs)) rs+)
                -> (cons (make-mcont label (+ 1 refs)) rs+)))))

 ;; The ';' that terminates the macro can be removed.
 (mleave ,(state-update
           ((rs  : (cons (struct mcont (exit-label refs)) rs+) -> rs+)
            (asm : (cons instruction asm+)
                 -> (match
                     ;; Remove the last to the exit-label and
                     ;; leave a split continuation if there are
                     ;; more references.
                     ([list (? jw?)
                            (? (lambda (x) (eq? exit-label x)))]
                      (cons [list qw ;; save cont.
                                  (if (> refs 1)
                                      (entry-point exit-label)
                                      (macro nop))]
                      (error 'non-terminated-macro))))))
         i) ;; run cont.

 ;; CHUNK/CHAIN management

 ;; Closing a chain effecitively marks unreachable code.
 (close-chain ,(entry-point #f) ;; close current asm
              ,(state-update ((dict -> (dict-terminate dict))))) ;; move chain to store

 ;; Move compilation state to and from the control stack to compile
 ;; code in a fresh chain somewhere not related to the current state.
 ;; This is probably followed by an 'org' label.

 ;; FIXME: this is simpler expressed as a primitive which exchanges
 ;; the current state with an object on one of the stacks.  It's like
 ;; a coroutine yield stack switch.
 (begin-chain ,(state-update
                ((asm  -> '())
                 (rs   -> '())
                 (ctrl -> (cons (list asm rs dict) ctrl))
                 (dict -> (make-dict #f '() '())))))

 (end-chain    close-chain
                 ((asm  : '() -> saved-asm)
                  (rs   : '() -> saved-rs)
                  (ctrl : (cons (list saved-asm saved-rs
                                      (struct dict (saved-current
                        -> saved-ctrl)
                  (dict : (struct dict (#f '() store))
                        -> (make-dict saved-current
                                       (combine-chains store) ;; (*)

 ;; (*) The reason we combine here is is that org-push / org-pop
 ;; _guarantees_ the code will be compiled consecutively at a physical
 ;; address without re-arrangement by the optimizer.



;; Return 3 values:
;;   * a target word struct (label)
;;   * a referring macro
;;   * a body code generator macro

(define (wrap-word name loc macro)
  (let ((label (new-target-word #:name name
                                #:realm 'code
                                #:srcloc loc
                                #:postponed macro)))
    (values label
            (macro-prim: ',label compile)
            (macro: ,(entry-point label)

(define (wrap-variable name loc macro)
  (let ((label (new-target-word #:name name
                                #:realm 'data
                                #:srcloc loc)))
    (values label
            (macro-prim: ',label literal)
            (macro: ,(entry-point label)
                    close-chain ;; FIXME: data variables are not
                                ;; chained. This should be: data variables
                                ;; are not chained with code chunks.

;; Macro representations need to be wrapped to implement 'mexit',
;; which jumps past the end of the code generated by macro.

;; For languages that do not use mexit, do not wrap the macro, as
;; this requires at least one call to mexit at the end.  FIXME: this
;; is why Forth-style macros always need ";".

(define (wrap-macro name loc macro)
   (lambda (state)
      ((macro: menter ,macro mleave) state)
      (match loc
             ((list file l c p s)
               "~a:~a:~a: ~a"
               file l c name)))))))

(define (org-label tag address)
  (macro: ',(list tag address) >label:))

;; Combine all chains in store to one chain.  This might be necessary
;; when there is extra information determining that the chains have
;; falltrough.

(define (combine-chains store)
  (let ((combined (apply append store)))
    ;; (print-target-word combined)
    ;; (pretty-print combined)
    (list combined)))

 ;; EXIT

 (([cw word] procedure-exit) ([jw word]))
 ((procedure-exit)           ([exit]))
 ((exit)              (macro: procedure-exit close-chain))

 ;; Semicolon either compiles local macro exit (mexit) within macro
 ;; instantiation, or a procedure-exit. A dot is an 'exit' that doesnt
 ;; close the chain.  It means the code after the exit is
 ;; reachable. Mainly useful for jump tables using 'route'.

 ((";")               semi)
 ((".")               (macro: procedure-exit))

 ;; ORG
 ;; Since the compiler has no access to code addresses, handling a
 ;; physical address specification needs to be postponed to the
 ;; assembly phase in the form of an instruction encoded in the label.

 (([qw address] word-org-begin) (macro: begin-chain ',`(org ,address) >label:))
 ((org-end)                     (macro: end-chain))

 ;; RAM

 ;; While the rest of this file is about code graph construction,
 ;; there's also data to allocate.  This is done with a link to the
 ;; assembler: macros that execute in the assembler context can modify
 ;; its internal state.  (see asm/
 ;; (([qw realm] [qw n] allot) ([allot realm n]))
 (([qw n] allot) ([allot-data n]))
 ((here)         ([here]))

 (macro) macro:

 (org-begin  code-size / word-org-begin))

;; 'compile-forth' sets up the internal state for compilation of
;; Forth-like functionality. Compared to ordinary 'macro->code' which
;; operates only on the parameter stack and an extra assembly stack,
;; Forth code has access to:

;;  * labels = entry points of target code chains
;;  * fallthrough words
;;  * macro return stack (local exit in macros)

;; This code needs to be passed to target-postprocess! which will
;; turn it into a linked graph and perform post-processing
;; optimizations.

(define (compile-forth macro)
  ;; Execute macro on empty state + terminate properly.
  (let ((state
         ((macro: ,macro close-chain)

    ;; Type check (macros are not allowed to change the state type!)
    ;; and require empty compilation state.
    (unless (compiler? state)
      (error 'compile-state-type-error))

    ;; (comp-print-state state)
    (assert-empty-ctrl state)

    ;; Return the dict-store list. This is then passed to
    ;; target-post! and onward to the assembler.
    (dict-store (compiler-dict state))))

;; Project word structs to the compiled state.
(define (compile-words words)
   ;; compile one big macro from all procedures
    (apply compose
           (filter procedure?
                    (lambda (x) x) ;; sentinel
   ;; copy non-procedures (already compiled)
   (filter (lambda (x)
             (not (procedure? x)))