live/tethered.ss
#lang scheme/base

;; TETHERED INTERPRETER / COMPILER


;; FIXME: This is highly PIC/8bit-specific. Porting to a new
;; architecture is going to involve some disentangling, but the code
;; here is quite straightforward bit and buffer twiddling.

(provide (all-defined-out))

(require
 "../op.ss"
 "../tools.ss"
 "../ns.ss"
 "../scat.ss"
 "../target.ss"
 "../asm.ss"
 "../coma/macro.ss"
 "reflection.ss"
 "../comp/state.ss" ;; state for macro eval
 "../code.ss"

 scheme/system
 scheme/match)

;; CHIP SPECIFIC CONFIG
(define datastack (make-parameter #f))

(define (pic18-datastack)
  (list
   (macro-constant 'stack-data)
   (macro-constant 'stack-data-ptr)))
(datastack pic18-datastack)


;; Debug
(define-syntax-rule (d: fmt . e)
  (let ((val (begin . e)))
    ;; (printf fmt val)
    val))


;; CONNECTION

(define tethered-in    (make-parameter (lambda _ (error 'no-input-connected))))
(define tethered-out   (make-parameter (lambda _ (error 'no-output-connected))))
(define tethered-close (make-parameter void))
(define tethered-reset (make-parameter (lambda () (treset))))

(define (cold)
  ((tethered-reset))
  (msleep 100))

;; serial port

(define stty
  (let ((fmt #f)
        (fmts
         '(("Linux"         . "stty -F ~a ~a raw min 1 -echo")
           ("CYGWIN_NT-5.1" . "stty -F ~a ~a min 1 -echo ixon -icanon pass8")
           ("windows"       . "mode ~a: baud=~a parity=n data=8 stop=1 xon=off dtr=off rts=off"))))
    (lambda (name baud)
      (unless fmt
        ;; Fixme: do autodetect using 'uname' or something..
        (set! fmt (cdr (assoc "Linux" fmts))))
      (system (format fmt name baud)))))


(define (tethered-serial-port name baud)

  (define (standard-serial-port)
    (let-values
        (((i o)
          (open-input-output-file name #:exists 'append)))
      (file-stream-buffer-mode o 'none)
      (stty name baud)
      (tethered-reset (lambda () (treset)))
      (tethered-in    (lambda () (d: "in ~x\n"
                                     ;; (read-byte-timeout i 3)
                                     (read-byte i)
                                       )))
      (tethered-out   (lambda (b) (write-byte (d: "out ~x\n" b) o)))
      (tethered-close (lambda () (close-input-port i) (close-output-port o)))))

  ;; FIXME: re-enable pk2 support through external console interface.
  
  '(define (pk2-serial-port)
    (define (stop)
      (uart-stop) (target-off) (msleep 300))
    (define (start)
      (target-on)
      (uart-start baud)
      (tethered-in uart-read)
      (tethered-out uart-write)
      (tethered-close
       (lambda () (stop) (pk2-close)))
      (tethered-reset
       (lambda ()
         (printf "PK2: target cold reset.\n")
         (stop)     ;; don't pk2-close!
         (sleep 1)  ;; this ensures proper shutdown.  FIXME: fix thread sync!
         (start)))

      ;; Hack to start up communication.  For some reason, the first
      ;; ACK request doesn't seem to get through.  It does work when
      ;; proper PK2 shutdown is disabled (EXIT_UART_MODE), but then
      ;; programming doesn't work on next try.  This is probably a
      ;; glitch on the TX line...
      (begin
        (uart-write 1 6)
        (msleep 100)
        (let ((ret (uart-try-read)))
          ;; (printf "usart-start: ~a\n" ret)
          (unless ret
            (printf "uart-start hack\n"))))
      )
    (pk2-boot)
    (start))

  

  ((tethered-close))


  '(if (equal? "pk2" name)
       (pk2-serial-port)
       (standard-serial-port))
  (standard-serial-port)


  )
  

;; SIMULATOR

;; A simulator for testing and definition of target interpreter
;; semantics.  Obviously, this doesn't execute native code.

(define (make-memory [size #x4096] [filler #xFF])
  (define v (make-vector size filler))
  (define (addr x) (modulo x size))
  (case-lambda
    ((ref) (vector-ref v (addr ref)))
    ((ref val) (vector-set! v (addr ref) val))))

(define (make-simulator [amem (make-memory)]
                        [fmem (make-memory)])
  ;; machine state
  (define a 0)
  (define f 0)
  (define stack '())
  
  ;; functionality
  (define (p x) (printf "target: ~a\n" x))
  (define (push x) (push! stack x))
  (define (pop)
    (with-handlers
        ((void (lambda _
                 (p 'stack-underflow)
                 0)))
      (pop! stack)))
  (define I (make-channel))
  (define O (make-channel))
  (define (recv) (channel-get I))
  (define (trns x) (channel-put O x))
  (define (ack) (trns 0))
  (define (ferase) (p '(ferase)) (ack))
  (define (fprog)  (p '(fprog)) (ack))
  (define (recv2) (b->w (recv) (recv)))
  (define (b->w l h) (car (bytes->words (list l h))))

  (define (chkblk)
    (let ((x #xff))
      (for ((i (in-range 64)))
        (set! x (band x (fetch++ fmem f))))
      (trns 1)
      (trns x)))

  (define (stackptr)
    (trns 1)
    (trns (length stack)))

  (define-syntax-rule (fetch++ mem ptr)
    (let ((x (mem ptr)))
      (set! ptr (add1 ptr)) x))
  (define-syntax-rule (store-buf mem ptr)
    (begin
      (for ((i (in-range (recv))))
        (mem ptr (recv))
        (set! ptr (add1 ptr)))
      (ack)))
  (define-syntax-rule (fetch-buf mem ptr)
    (let ((n (recv)))
      (trns n)
      (for ((i (in-range n)))
         (trns (fetch++ mem ptr)))))
  
  (define (interpret cmd)
    (case cmd
      ((0) (ack))
      ((1) (push (recv)) (ack))
      ((2) (trns 1) (trns (pop)))
      ((3) (p (list 'jsr (recv2))) (ack))
      ((4) (set! a (recv2)) (ack))
      ((5) (set! f (recv2)) (ack))
      ((6) (ack))
      ((7) (p '(reset)))  ;; no ack here!
      ((8) (fetch-buf amem a))
      ((9) (fetch-buf fmem f))
      ((10) (store-buf amem a))
      ((11) (store-buf fmem f))
      ((12) (chkblk)) 
      ((13) (stackptr))
      ((14) (ferase))
      ((15) (fprog))))

  (define (interpreter)
    (unless (zero? (recv))
      (interpret (recv)))
    (interpreter))


  (thread interpreter)
  (values I O))
  
(define (tethered-simulator)
  (let-values (((to from) (make-simulator)))
    (tethered-in
     (lambda () (channel-get from)))
    (tethered-out
     (lambda (b) (channel-put to b)))))

;; (tethered-simulator)

;; Shortcut access.

;; (define (io-debug x) (x))
;; (define default-portspec '("/dev/ttyUSB0" 9600))
;; (define (io-debug x)
;;   (with-io-device default-portspec x))
;; (define-syntax-rule (io> . expr)
;;   (io-debug (lambda () . expr)))

(define (in/b)       ((tethered-in)))
(define (out/b byte) ((tethered-out) byte))

;; word/byte lists
(define (bytes->words lst) (join-nibble-list  lst 0 8))
(define (words->bytes lst) (split-nibble-list lst 0 8))

;; values
(define (void/values lst)
  (if (null? lst) (void) (apply values lst)))

;; MESSAGE TRANSPORT PROTOCOL

(define target-id (make-parameter 0)) ;; Current receiver
(define (target! id)
  (when (or (< id 0) (> id 254))
    (error 'invalid-target-id))
  (target-id id))

;; All messages are prepended with address + size to make them
;; self-delimited.  This is in order to facilitate routing without the
;; need for interpretation.

;; Asynchronous send / receive.  All communication code will be built
;; on top of this.

(define (target-send/b . bytes)
  (let ((len (length bytes)))
    (when (> len 255) (error 'message-too-long))
    (out/b (target-id))
    (out/b len)
    (for ((b bytes)) (out/b (int8 b)))))
(define (target-send/w id . words)
  (apply target-send/b id (words->bytes words)))

;; Receive returns lists.
(define (target-receive+id/b)
  (let* ((id (in/b))
         (size (in/b)))
    (values
     (for/list ((i (in-range size))) (in/b))
     id)))

(define (target-receive/b)
  (let-values (((lst id)
                (target-receive+id/b)))
    lst))

(define (target-count)
  (define max-id  255)
  (out/b max-id)
  (out/b 0)
  (let-values (((lst id)
                (target-receive+id/b)))
    (- max-id id)))

(define (scan)
  (let ((targets (target-count)))
    (printf "Found ~a targets(s).\n" targets)))


(define (target-receive/w)
  (bytes->words (target-receive/b)))

;; RPC returns values.
(define (target-rpc/b . args)
  (apply target-send/b args)
  (void/values (target-receive/b)))
(define (target-rpc/w . args)
  (apply target-send/w args)
  (void/values (target-receive/w)))


;; BOOT MONITOR COMMAND PROTOCOL


;; ASYNC COMMANDS

(define (tnop)           (target-send/b))         ;; nop = empty message
(define (treset)         (target-send/b 7))       ;; reset target
(define (tstart/b addr)  (target-send/w 3 addr)) 

;; (*) The command 'start in live/commands.ss uses this. Because the
;; console closes the serial port after every command, running 'start
;; on a word that DOES return an ack byte is ok: the ack byte is just
;; ignored.


;; RPC COMMANDS

(define (>t val)         (target-rpc/b 1 val))
(define (t>)             (target-rpc/b 2))
(define (~texec/b addr)  (target-send/w 3 addr))
(define (a! addr)        (target-rpc/w 4 addr))
(define (f! addr)        (target-rpc/w 5 addr))
(define (target-sync)    (target-rpc/b 6))  
(define (check-block)    (target-rpc/b 12))
(define (stackptr)       (target-rpc/b 13)) ;; get size of data stack
(define (erase)          (target-rpc/b 14)) ;; erase current flash block
(define (program)        (target-rpc/b 15)) ;; program current flash line


;; RPC LIST COMMANDS

;; Receive data packets as lists.  These postincrement the a and f
;; pointers, and are limited to 255 byte lists.  Use the composite
;; commands instead.
(define (~a>/b n)        (target-send/b 8 n) (target-receive/b))
(define (~f>/b n)        (target-send/b 9 n) (target-receive/b))
(define (~>a/b lst)      (apply target-rpc/b 10 (length lst) lst))
(define (~>f/b lst)      (apply target-rpc/b 11 (length lst) lst))






;; COMPOSITE COMMANDS

;; Written in terms of the low level commands defined above.

(define (chunked-receive command addr!)
  (lambda (total-size [at #f])
    (when at (addr! at))
    (if (<= total-size 0)
        '()
        (flatten
         (map command
              (chunk-size-list
               total-size
               #x80))))))  ;; allow for some header wrapping.

(define a>/b  (chunked-receive ~a>/b a!))
(define f>/b  (chunked-receive ~f>/b f!))

;; FIXME: implement chunked message send correctly.
(define (chunked-send command addr!)
  (lambda (lst [at #f])
    (when at (addr! at))
    (unless (zero? (length lst))
      (command lst))))

(define >a/b  (chunked-send ~>a/b a!))
(define >f/b  (chunked-send ~>f/b f!))

(define (f>/w n) (bytes->words (f>/b (<<< n 1))))

;; Target fetch/store + double.
(define (t@  addr)     (a! addr) (car (a>/b 1)))
(define (t!  val addr) (a! addr) (>a/b (list val)))
(define (_t@ addr)     (a! addr) (car (bytes->words (a>/b 2))))
(define (_t! val addr) (a! addr) (>a/b (words->bytes (list val))))

(define (stackbottom) #x80)
(define (stacksize) (- (stackptr) (stackbottom)))

(define (ts-copy)
  (reverse (a>/b (stacksize)
                 (+ 1 (stackbottom)))))
(define (_ts-copy)
  (join-nibble-list (ts-copy) 8 0))

;; Execute + reply message.
(define console-display
  (make-parameter (lambda (reply)
                    (display (list->bytes reply)))))

(define (console-log)
  (let ((reply (target-receive/b)))
    (unless (null? reply)
      ((console-display) reply)
      (console-log))))

(define (texec/b addr)
  (~texec/b addr)
  (console-log))


;; Initial state for macro evaluation.
(define (init-state [lst '()])
  (state:compiler lst))


;; Macros that perform compile time computations can be simulated at
;; the console using the current run-time stack as an input.  This
;; uses a "lazy stack" to avoid having to tunnel data back and forth
;; all the time.  (Communication to the device might be slow, but host
;; CPU time is free.)

;; Simulate a macro by wrapping lazy pops in target values.
(define (tsim coma)
  (define (eval-macro m lst)
    (state->code (m (init-state lst))))
  (let* ((lp (pop->lp (stacksize) t>))
         (stack-in (lp->lazy-stack lp))
         (stack-out (eval-macro coma stack-in)))
    ;; Skip code that has remained the same.
    (let-values (((in out)
                  (diff-lists (reverse stack-in) ;; code stack -> sequential code
                              (reverse stack-out))))
    
      ;; Dummy-interpret to force all values in the remaining output
      ;; _and_ input for the side effect of popping the stack.
      (interpret-cw/qw void void out)
      (interpret-cw/qw void void in)

      ;; Check which values were actually used, and take code from there.
      (let* ((used (lp-have lp))
             (not-used (- (length in) used))
             (nb-ins (- (length out) not-used)))
        (interpret-cw/qw
         texec/b >t  ;; proper interpret
         (reverse
          (take nb-ins stack-out)))))))




(define-struct lp (vector have pop!) #:mutable)

(define (pop->lp n pop!)
  (make-lp (make-vector n) 0 pop!))

(define (lp-ref lp i)
  (define v (lp-vector lp))
  (define (pop!)
    (let ((have (lp-have lp)))
      ;; (printf "popping ~a\n" have)
      (vector-set! v have ((lp-pop! lp)))
      (set-lp-have! lp (add1 have))))
  ;; (printf "deref ~a\n" i)
  (when (>= i (vector-length v))
    (error 'lazy-pop-underflow))
  (let next ()
    (if (< i (lp-have lp))
        (vector-ref v i)
        (begin (pop!) (next)))))

(define (lp->lazy-stack lp)
  (for/list ((i (in-range (vector-length (lp-vector lp)))))
    (op: qw (make-target-value
             (lambda () (lp-ref lp i))
             'lazy-pop))))

(define (interpret-cw/qw _cw _qw code)
  (define *stack* '())
  (define num target-value->number)
  (for ((ins code))
    ;; (printf "interpret ~a\n" ins)
    (match ins
      ([list (? qw?) n] (_qw (num n)))
      ([list (? cw?) a] (_cw (target-byte-addr (num a) 'code)))
      ([list-rest opc _]
       (error 'cannot-simulate-opcode "~a\n~a"
              (asm-name opc)
              (reverse code))))))








;; Double stack.
(define (_>t val)      (for ((w (words->bytes (list val))))
                         (>t w)))
(define (_t>)          (let* ((hi (t>))
                              (lo (t>)))
                         (car (bytes->words (list lo hi)))))

;; Block access

;; FIXME: PIC18 specific.
;; Flash blocks: 64 bytes (erase unit).
;; RAM blocks: 16 bytes (so 8 bits spans the 4KB RAM address space)

(define a-block-size (make-parameter 16))
(define f-block-size (make-parameter 64))

(define (bf! n) (f! (* (f-block-size) n)))
(define (ba! n) (a! (* (a-block-size) n)))
(define (free-block? b)
  (bf! b)
  (= #xff (check-block)))

;; To minimise mistakes, erase and program will all set f.  This as
;; opposed to the readout, which uses words relative to current
;; position.

(define (erase-block b) 
  (bf! b) (erase))

(define (erase-blocks b n)
  (unless (zero? n)
    (erase-block b)
    (erase-blocks (+ b 1) (- n 1))))

(define (erase-from-block b)
  (define erasing #f)
  (let next ((b b))
    (if (free-block? b)
        (when erasing
          (printf "memory clear.\n"))
        (begin
          (unless erasing
            (printf "erasing blocks: ")
            (set! erasing #t))
          (printf "~s " b)
          (flush-output)
          (erase-block b)
          (next (add1 b))))))

(define (erase-from/w addr)
  (erase-from-block
   (ceiling-block addr 32))) ;; 32 words in a block


(define (upload-bytes-line org bytes [bits 3])
  (define n (<<< 1 bits))
  (unless (= n (length bytes))
    (error 'non-normalized-line "~s" bytes))
  (target-sync) ;; make sure target is live
  (>f/b bytes org)
  (program))

;; Upload line-aligned bytes obtained from flattened code chunks.
;; Default is from PIC18, which uses 8 bytes per write line.
;; (require scheme/pretty)
(define (upload-bytes bin [align-bits 3])
  ;; (pretty-print bin)
  (for ((chunk (bin-flatten bin)))
    (for (((org line) (in-binchunk/lines chunk align-bits -1)))
       (display ".")
       (upload-bytes-line org line align-bits))))


;; PRINTING

(define printf-stack stack-print)

(define (psu lst)  (printf-stack lst " ~s"))
(define (psx lst)  (printf-stack (map byte->string lst) " ~a"))
(define (pss lst)  (printf-stack (map (sign-extender 8) lst) " ~s"))
(define (_psx lst) (printf-stack (map word->string lst) " ~a"))
(define (_pss lst) (printf-stack (map (sign-extender 16) lst) " ~s"))

;; Target stack printing.
(define (ts)  (psu (ts-copy)))
(define (tsx) (psx (ts-copy)))
(define (tss) (psu (ts-copy)))

(define (_ts)  (psu (_ts-copy)))
(define (_tsx) (_psx (_ts-copy)))
(define (_tss) (_pss (_ts-copy)))



;; Print a memory map of the first n kb.  Useful for getting a general
;; idea of chip content.
(define (kb n)
  (define (current-block)
    (if (= #xff (check-block)) ". " "x "))
  (define (print-line)
    (printf
     "~a\n"
     (apply
      string-append
      (for/list ((i (in-range 8))) (current-block)))))

  (bf! 0)
  (let ((lines (* 2 n)))
    (for ((i (in-range lines)))
         (print-line))))

;; Read raw bytes from input and print.  See tools/print.ss for args.
(define (hex-dump sequence . args)
  (for ((s sequence)
        (p (apply in-hex-printer args)))
    (p s)))

(define (abd b)
  (let ((bs (a-block-size)))
    (ba! b)
    (hex-dump (in-list (a>/b bs))
              (* bs b) 3 2 8)))
(define (fbd b)
  (let ((bs (f-block-size)))
    (bf! b)
    (hex-dump (in-list (f>/w (/ bs 2)))
              (* bs b) 4 4 4)))


;; Disassembly.
(define (tsee word [n #x10])
  (define addr
    (or
     (cond ((number? word) word)
           ((symbol? word) (target-find-code word))
           (else #f))
     (error 'not-found "~s" word)))
  (define dict (code-labels))
  (f! addr)
  (print-target-word
   (disassemble->word
    (eval 'dasm-collection)
    (f>/w n) (>>> addr 1) 16
    (lambda (addr)
      (let ((rec (code-resolve addr 'code)))
        (or (and rec (car rec))
            addr))))))

(define (bd block)
  (tsee (* 64 block) 32))

(define (print-wlist lst)
  (for ((w lst))
     (printf "~a " (symbol->string w)))
  (newline))

;; All words are accessible through macros.
(define (macros)   (print-wlist (ns-mapped-symbols '(macro))))
(define (commands) (print-wlist (ns-mapped-symbols '(target))))
(define (words)    (print-wlist
                    (filter (lambda (l)
                              (not (eq? #\. (car (string->list (symbol->string l))))))
                            (map car (code-labels)))))

;; Empty clears the target from the 'code pointer onwards.  This is
;; executed on startup to make sure the target is ready for upload.
(define (clear-flash [bits 5]) ;; 2^5 words in a block
  (define (get x) (cadr (assq x (code-pointers))))
  (let ((code (bit-ceil (get 'code) bits))  ;; (*)
        (data (get 'data)))
    (code-pointers-set! `((code ,code)
                          (data ,data)))
  
    (erase-from-block (>>> code bits))))
;; (*) We can only start compiling at the next block-erase boundary.



;; INCREMENTAL UPLOAD
(define *debug* #f)
(define (debug-on)  (set! *debug* #t))
(define (debug-off) (set! *debug* #f))
(define (commit [bin (code->binary)])
  (unless (null? bin)
    (when *debug* (code-print))
    (upload-bytes bin)
    (code-clear!)))


(define (OK)
  (code-clear!)     ;; don't keep stuff around
  (target-sync) 
  (display "OK\n"))

;; PIC18 specific
(define (access-bank x)
  (let ((x (band x #xFF)))
    (if (zero? (band x #x80))
        x
        (bior #xF80 x))))

(define (tfbuffer addr)
  (let ((n (car (f>/b 1 addr))))
    (f>/b n (+ addr 1))))

(define (tfstring addr)
  (list->bytes (tfbuffer addr)))

(define (clear-ram-block b [val 0])
  (>a/b (build-list 64 (lambda _ val))
        (* 64 b)))