asm/dasm.ss
#lang scheme/base

;; Assembler dictionary.

;; Separate file, because the assembler pattern transformer depends on
;; 'asm-find' to check symbols used in the patterns.

;; FIXME: check if assembler name resolution can be moved to compile
;; time + check if contracts can be used for assembler type checking.


(require
 scheme/promise
 ;; "decoder.ss"
 "../op.ss"
 "../scat.ss"
 "pointers.ss"
 "../tools.ss"
 "../target.ss"
 "../ns.ss"
 (for-syntax
  scheme/base
  "../ns.ss"))

(provide
 dasm-parse
 dasm-arity
 define-dasm-collection   ;; call another macro with the visible disassembler collection
 disassemble->word)


(define-syntax (define-dasm-collection stx)
  (syntax-case (list stx (datum->syntax stx (ns-mapped-symbols '(op dasm)))) ()
    (((_ name) (opcode ...))
     #`(define name
         (filter dasm? (list (ns (op dasm opcode)) ...))))))
                 

(define (dasm-arity d) (procedure-arity (dasm-fn d)))


;; The disassembler can't be decentralized like the assembler.
;; Therefore a disassembler is represented by an aggregate object
;; built of individual disassembler objects, one for each instruction.

;; The access method used to be a binary search tree based on the top
;; nb of bits (opcode) in the instruction, but this is inadequate
;; (i.e. for MIPS there is also a "function" field).

;; For now this uses simple incremental search.  Later we can use some
;; grouping that ensures that if an instruction doesn't match for the
;; group's predicate, it won't match for the whole group.


;; Dissembly works simplest as a lazy list operation.

(define (zeros) (delay (cons 0 (zeros))))

(define (dasm-parse ll-bin dasm-list [addr 0])
  (delay
    (let ((bin (force ll-bin)))
      (if (null? bin)
          '()
          (let next ((ds dasm-list))
            (if (null? ds)
                (error 'dasm-parse-error "~a" (car bin))  ;; (*)
                (let ((disassembler (car ds))
                      (ds+ (cdr ds)))
                  (let ((n (sub1 (dasm-arity disassembler)))) ;; don't count PC argument
                    (when (zero? n) (error 'dasm-arity-error))
                    (let-values (((instruction-words ll-bin+)
                                  (ll-take n ll-bin zeros)))  ;; pad with zeros
                      (let ((sym (apply (dasm-fn disassembler)
                                        addr
                                        instruction-words)))
                        (if sym
                            (begin
                              ;; (printf "sym: ~a\n" sym)
                              (cons (list sym instruction-words)
                                    (dasm-parse ll-bin+ dasm-list (+ addr n))))
                            (next ds+))))))))))))

;; (*) This shouldn't happen: the default disassembler should be a
;; "DW" form.

(define (disassemble->word dasm-list
                           bin addr wordsize
                           [resolve (lambda (x) x)])
  (let ((l (reverse (ll->l (dasm-parse (seq->ll bin) dasm-list addr)))))
   (new-target-word #:realm 'code
                    #:address addr
                    #:code    (map car l)
                    #:bin     (map cadr l))))

;; Note that this is for RISC instruction sets only. All instructions
;; have the same size and are word-addressed. Any mult-word
;; instructions need to be parsed in a later step. (This works well
;; for PIC18 because the 2nd word is a valid NOP instruction, but
;; might need some reworking).

;; (define (disassemble->word binary address wordsize
;;                            ;; + 1 because base is AFTER instruction. i think
;;                            ;; this is as good as universal, so hardcoded here.
;;                            [resolve (lambda (x) x)]
;;                            [rel->abs (lambda (addr rel) (+ 1 (+ addr rel)))])
;;   (define *bin* '())
;;   (define *code* '())

;;   (define (dasm addr ins)
;;     (match ((dasm-find ins wordsize) ins)
;;        ((rator . rands)
;;         (cons rator
;;               (map
;;                (match-lambda
;;                 ((type . value)
;;                  (case type
;;                    ((R) (resolve (rel->abs addr value)))
;;                    (else value))))
;;                rands)))))

;;   (for ((a (in-naturals address))
;;         (b binary))
;;     (push! *bin*  (list b))
;;     (push! *code* (dasm a b)))

;;   (new-target-word #:realm 'code
;;                    #:address address
;;                    #:code *code*
;;                    #:bin *bin*))