private/src/emu.ss
;; ---- 1996 Scheme Workshop -- Compiling Scheme

;; emu.ss -- Sparc emulator and other support code

;; ---- Assembly format

;; The instructions used are the following

;; inst := (ld   reg-off reg        )  ; load word                        
;;       | (st   reg     reg-off    )  ; store word                       
;;       | (ldb  reg-off reg        )  ; load byte                        
;;       | (stb  reg     reg-off    )  ; store byte                       
;;       | (set  number  reg        )  ; load immediate to register       
;;       | (mov  reg     reg        )  ; move register to register        
;;                                                                        
;;       | (sll  reg reg-or-imm reg )  ; logical left shift               
;;       | (srl  reg reg-or-imm reg )  ; logical right shift              
;;       | (sra  reg reg-or-imm reg )  ; arithmetic right shift           
;;       | (add  reg reg-or-imm reg )  ; addition                         
;;       | (sub  reg reg-or-imm reg )  ; subtraction                      
;;       | (and  reg reg-or-imm reg )  ; logical and                      
;;       | (or   reg reg-or-imm reg )  ; logical or                       
;;       | (andn reg reg-or-imm reg )  ; logical and-not                  
;;       | (smul reg reg-or-imm reg )  ; signed multiplication            
;;       | (sdiv reg reg-or-imm reg )  ; signed division                  
;;                                                                        
;;       | (nop                     )  ; nothing                          
;;       | (jmp  reg                )  ; jump to address in register      
;;                                                                        
;;       | (cmp  reg reg-or-imm     )  ; compare (sets condition codes,   
;;                                     ;          used with branches)     
;;       | (ba   label              )  ; branch always                    
;;       | (be   label              )  ; branch if last compare was equal 
;;       | (bne  label              )  ; branch if not equal              
;;       | (bl   label              )  ; branch if less than              
;;       | (bge  label              )  ; branch if not less than          
;;
;; directive := (label symbol) | (comment string)
;;                                                                        
;; reg        := fp | cp | ap | ac | t1 | t2 | t3                         
;; reg-off    := (reg number)                                             
;; reg-or-imm := reg | number                                             

;; All instructions may have an optional string at the end which
;; denotes a comment (this may go away)

;; ---- Top level

;; if you want to emulate a program returned by the code generator, use:

(define emu
  (lambda (program)
    (emu-startup (emu-assemble program))))


;; ignore this: it's _very_ chez specific, and will be removed when I
;; return from LA.

;(define sparc
;  (lambda (program)
;    (sparc-assemble program "/tmp/t.s")
;    (printf "running gcc~n")
;    (system "cd /tmp; gcc /u/ehilsdal/c3/startup.c /u/ehilsdal/c3/call_scheme.s /tmp/t.s -o /tmp/a.out")
;    (printf "running program~n")
;    (system "chmod a+w /tmp/a.out; time /tmp/a.out -h 20000")))

;; ---- Assembler

;; We have two ``assemblers''.  One assembles the code into a scheme
;; vector suitable for use with the emulator (replacing all labels
;; with addresses).  The other simply translates the list assembly
;; format into real sparc assembly code.

(define sparc-assemble
  (lambda (code file)
    (let ([op (open-output-file file 'replace)])
      (parameterize ([current-output-port op])
	(sparc-spit (registerize code)))
      (close-output-port op))))

(define emu-assemble
  (lambda (code)
    (emu-transform (registerize code))))

;; registerize does a really dumb substitution of the symbolic
;; registers with numerical registers.

(define registerize
  (letrec ([list-index
	     (lambda (item ls acc)
	       (if (null? ls) #f
		   (if (eq? (car ls) item) acc
		       (list-index item (cdr ls) (add1 acc)))))]
	   [regs '(pc fp cp ap ac t1 t2 t3)])
    (lambda (thing)
      (cond
	[(pair? thing)
	 (let ([x (list-index (car thing) regs 0)])
	   (if x
	       `(reg-off (reg ,x) ,(cadr thing))
	       (map registerize thing)))]
	[(and (symbol? thing) (list-index thing regs 0)) =>
	 (lambda (x)
	   `(reg ,x))]
	[else thing]))))

;; sparc-spit is a simple printer.

(define sparc-spit
  (lambda (ls)
    (define print-elem
      (lambda (obj delim)
	(cond
	  [(pair? obj)
	   (record-case obj
	     [reg (num)
	       (printf "~a%l~s" delim num)]
	     [reg-off (reg off)
	       (let ([num (cadr reg)])
		 (printf "~a[%l~s~a~s]" delim
		   num (if (nonnegative? off) "+" "") off))])]
	  [(string? obj)
	   (printf "~c! ~a " #\tab obj)]
	  [else (printf "~a~a" delim obj)])))
    (printf "~c.align 4~n" #\tab)
    (printf "~c.global _scheme_entry~n" #\tab)
    (let loop ([ls (cdr ls)])
      (unless (null? ls)
	(let ([inst (car ls)])
	  (case (car inst)
	    [(comment)
	     (printf "~c~c! ~a " #\tab #\tab (cadr inst))]
	    [(label)
	     (printf "~a:" (cadr inst))]
	    [(nop)
	     (printf "~c~s" #\tab 'nop)]
	    [else
	      (let ([first (cadr inst)]
		    [rest (cddr inst)])
		(printf "~c~s~c" #\tab (car inst) #\tab)
		(print-elem first "")
		(for-each (lambda (x) (print-elem x ", "))
		  rest))]))
	(newline)
	(loop (cdr ls))))))


;; emu-transform is a badly written two-pass assembler.

(define emu-transform
  (lambda (ls)
    (let ([code-vec (make-vector (sub1 (length ls)))])
      (let loop ([pos 0] [ls (cdr ls)] [atable '()] [back '()])
	(if (null? ls)
	    (for-each
	      (lambda (rec)
		(let ([pos (car rec)]
		      [inst (cadr rec)])
		  (case (car inst)
		    [(set)		; sets are sometimes to labels
		     (let ([label (cadr inst)])
		       (let ([addr (let ([p (assq label atable)])
				     (if p (cadr p) label))])
			 (vector-set! code-vec pos
			   `(set ,addr ,(caddr inst)))))]
		    [(ba bne be bg ble bge bl bpos bneg)
		     (let ([label (cadr inst)])
		       (let ([addr (cadr (assq label atable))])
			 (vector-set! code-vec pos
			   `(,(car inst) ,addr))))])))
	      back)
	    (let ([i (car ls)])
	      (case (car i)
		[(comment)
		 (loop pos (cdr ls) atable back)]
		[(label)
		 (loop pos (cdr ls) (cons (list (cadr i) (* pos 4)) atable)
		   back)]
		[(set ba bne be bg ble bge bl)
		 (loop (add1 pos) (cdr ls) atable
		   (cons (list pos i) back))]
		[else
		  (vector-set! code-vec pos i)
		  (loop (add1 pos) (cdr ls) atable back)]))))
      code-vec)))

;; ---- The emulator

(load "bitfield32.ss")

;; The code, stack and heap are scheme vectors.  The code vector holds
;; instructions which look suspiciously like the assembly format,
;; above.  The stack and heap hold ``32 bit'' numbers, except for the
;; base of the stack, which will hold the special number -1 which
;; represents the return pointer to the operating system.

;; There is one ``address space'' for these vectors.  The code starts
;; at zero, the stack starts at emu-stack-base, and the heap starts at
;; emu-heap-base. 

(define emu-code #f)
(define emu-stack #f)	(define emu-stack-base (* (expt 2 20) 4))
(define emu-heap #f)	(define emu-heap-base (* (expt 2 20) 8))

(define emu-regs (make-vector 8))
(define set-pc! (lambda (n) (vector-set! emu-regs 0 n)))
(define get-pc (lambda () (vector-ref emu-regs 0)))

(define reg-ref (lambda (reg) (vector-ref emu-regs (cadr reg))))
(define reg-set! (lambda (reg o) (vector-set! emu-regs (cadr reg) o)))

(define emu-cc #f)	; will turn into one of 'lt, 'gt, or 'eq

;; all loads and stores are byte-addressed, so when we do these
;; vector-refs we have to divide by 4, the word size.

(define emu-load
  (lambda (address)
    (cond
      [(>= address emu-heap-base)
       (vector-ref emu-heap (/ (- address emu-heap-base) 4))]
      [(>= address emu-stack-base)
       (vector-ref emu-stack (/ (- address emu-stack-base) 4))]
      [(vector-ref emu-code (/ address 4))])))

(define emu-store
  (lambda (address object)
    (cond
      [(>= address emu-heap-base)
       (vector-set! emu-heap (/ (- address emu-heap-base) 4) object)]
      [(>= address emu-stack-base)
       (vector-set! emu-stack (/ (- address emu-stack-base) 4) object)]
      [(vector-set! emu-code (/ address 4) object)])))

(define emu-load-byte
  (lambda (addr)
    (let ([subaddr (remainder addr 4)])
      (let ([word (emu-load (- addr subaddr))])
	(and32 (srl32 word (* (- 3 subaddr) 8))
	  #b11111111)))))

(define emu-store-byte
  (lambda (addr byte)
    (let ([subaddr (remainder addr 4)])
      (let ([word (emu-load (- addr subaddr))])
	(let ([holeyword (and32 (not32 (sll32 #b11111111
					 (* (- 3 subaddr) 8)))
			   word)])
	  (let ([newword
		  (or32 (sll32 byte (* (- 3 subaddr) 8))
		    word)])
	    (emu-store (- addr subaddr) newword)))))))

;; emu-startup eventually returns the ``emu-rebuild'' of whatever is
;; left in the accumulator.

(define emu-startup
  (lambda (program)
    (call/cc
      (lambda (k)
	(set! *reset* k)
	(set! emu-code program)
	(set! emu-stack (make-vector 1024))
	(set! emu-heap (make-vector 4096))
	(set-pc! 0)
	(reg-set! '(reg 1) emu-stack-base)
	(reg-set! '(reg 3) emu-heap-base)
	(emu-store emu-stack-base -1)
	(emu-run)))))

;; this certainly isn't fast, but it runs.  There are more
;; sophisticated solutions to this problem.

(define reg-off->address
  (lambda (reg-off)
    (record-case reg-off
      [reg-off (register offset)
	(+ (reg-ref register) offset)])))

(define reg-or-lit->value
  (lambda (thing)
    (if (number? thing)
	thing
	(reg-ref thing))))

(define emu-run
  (lambda ()
    (let ([pc (get-pc)])
      (when (negative? pc)
	(*reset* (emu-rebuild (reg-ref '(reg 4)))))
      (let ([inst (emu-load pc)])
	(set-pc! (+ pc 4))
	(record-case inst
	  [ld (src dest)
	    (reg-set! dest (emu-load (reg-off->address src)))]
	  [st (src dest)
	    (emu-store (reg-off->address dest) (reg-ref src))]
	  [ldb (src dest)
	    (reg-set! dest (emu-load-byte (reg-off->address src)))]
	  [stb (src dest)
	    (emu-store-byte (reg-off->address dest) (reg-ref src))]
	  [set (lit dest)
	    (reg-set! dest lit)]
	  [mov (src dest)
	    (reg-set! dest (reg-ref src))]
	  [sll (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (sll32 val0 val1)))]
	  [srl (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (srl32 val0 val1)))]
	  [sra (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (sra32 val0 val1)))]
	  [add (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (add32 val0 val1)))]
	  [sub (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (sub32 val0 val1)))]
	  [and (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (and32 val0 val1)))]
	  [or (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (or32 val0 val1)))]
	  [andn (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (and32 val0 (not32 val1))))]
	  [smul (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (times32 val0 val1)))]
	  [sdiv (src0 src1 dest)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (reg-set! dest (quotient32 val0 val1)))]
	  [cmp (src0 src1)
	    (let ([val0 (reg-ref src0)]
		  [val1 (reg-or-lit->value src1)])
	      (cond
		[(= val0 val1) (set! emu-cc 'eq)]
		[(less-than32 val0 val1) (set! emu-cc 'lt)]
		[else (set! emu-cc 'gt)]))]
	  [Nop () (void)]
	  [jmp (reg) (set-pc! (reg-ref reg))]
	  [ba (addr) (set-pc! addr)]
	  [be (addr) (if (eq? emu-cc 'eq)
			 (set-pc! addr))]
	  [bne (addr) (if (not (eq? emu-cc 'eq))
			  (set-pc! addr))]
	  [bl (addr) (if (eq? emu-cc 'lt)
			 (set-pc! addr))]
	  [bge (addr) (if (not (eq? emu-cc 'lt))
			  (set-pc! addr))]
	  [else
	    (error 'emu-run "unimplemented instruction: ~s" inst)])
	(emu-run)))))

;; emu-rebuild turns the number returned in the accumulator into a
;; real scheme value.

(define emu-rebuild
  (lambda (object)
    (let ([type (and32 object (- (expt 2 tag-len) 1))])
      (cond
	[(= type number-tag)
	 (uncomplement32 (sra32 object tag-len))]
	[(= type immed-tag)
	 (let ([im-type (and32 object (- (expt 2 imm-tag-len) 1))])
	   (cond
	     [(= im-type bool-tag) (if (zero? (sra32 object 8)) #f #t)]
	     [(= im-type null-tag) '()]
	     [(= im-type char-tag) (integer->char (sra32 object 8))]))]
	[(= type pair-tag)
	 (let ([addr (- object pair-tag)])
	   (cons (emu-rebuild (emu-load addr))
	     (emu-rebuild (emu-load (+ addr 4)))))]
	[(= type string-tag)
	 (let* ([addr (- object string-tag)]
		[length (emu-load addr)])
	   (let loop ([end length] [acc '()])
	     (if (zero? end) (list->string acc)
		 (loop (- end 1)
		   (cons (integer->char (emu-load-byte (+ addr end ws -1)))
		     acc)))))]
	[(= type symbol-tag)
	 (let ([addr (- object symbol-tag)])
	   (string->symbol (emu-rebuild (emu-load addr))))]
	[(= type vector-tag)
	 (let* ([addr (- object vector-tag)]
		[length (emu-load addr)])
	   (let loop ([end (* length 4)] [acc '()])
	     (if (zero? end) (list->vector acc)
		 (loop (- end 4)
		   (cons (emu-rebuild (emu-load (+ addr end)))
		     acc)))))]
	[(= type closure-tag) '<closure>]
	[else '<garbage>]))))