private/src/front.ss
;; ---- 1996 Scheme Workshop -- Compiling Scheme

;; -- front.ss

;;; This file implements the majority of transformations for the compiler.

; SOURCE FORM
;; <program>		--> <expression>
;; <expression>		--> <reference>
;;			  | <literal>
;;			  | <procedure call>
;;			  | <lambda expression>
;;			  | <conditional>
;;			  | <assignment>
;;			  | <begin expression>
;;			  | <derived expression>
;; <reference>		--> <variable>
;; <begin expression>	--> ( begin <sequence> )
;; <body>		--> <sequence>
;; <formals>		--> ( <variable>* )
;; <alternate>		--> <expression>
;; <derived expression> --> ( let (<binding spec>*) <body> )
;;			  | ( letrec (<binding spec>*) <body> )

; CORE FORM 
;; <begin expression>	--> ( begin <expression> <expression> )
;; <body>		--> <expression>
;; <literal>		--> <quotation>
;; <derived expression> --> <empty>
  
; ANALYZED FORM
;; <lambda expression>	--> ( lambda <formals>
;; 			      ( quote ( assigned <variable>*) )
;; 			      ( quote ( free     <variable>*) )
;; 			      <body> )
;; <literal>		--> ( quote <immediate datum> )
;; <immediate datum>	--> ( ) | <boolean> | <number> | <character>
;; <program>		--> ( let ( <quotation binding>* ) <expression> )
;; <quotation binding>	--> ( <variable> (quote <heap datum>) )
;; <heap datum>		--> <symbol> | <string>
;;			  | ( <datum>+ )
;;			  | ( <datum>+ . <datum> )
;;			  | #( <datum>* )
  
; ASSIGNMENTLESS FORM
;; <assignment>		--> <empty>
;; <lambda expression>	--> ( lambda <formals> ( quote ( free <variable>* ) )
;;			      <body> )

; IMMEDIATE LITERAL FORM
;; <program>		--> <symbol-less program>
;;			  | ( ( lambda ( <variable>+ ) <symbol-less program> )
;;			      <symbol expression>+ )
;; <symbol expression>	--> ( string->uninterned-symbol
;;			      ( string <character>+ ) )
;; <symbol-less program> --> <expression>
;;			  | ( ( lambda ( <variable>+ ) <expression> )
;;			      <expression> )
   
; CODE-GENERATION FORM
;; <reference>		--> ( bound <uinteger 10> <variable> )
;;			  | ( free <uinteger 10> <variable> )
;; <lambda expression>  --> ( build-closure ( lambda <formals> <body> )
;;			      <reference>* )

;; -------------------- the front end of the compiler.

(define-syntax mv-let
  (syntax-rules ()
    [(_ () b0 b1 ...)
     (begin b0 b1 ...)]
    [(_ ((formals e) decl ...) b0 b1 ...)
     (let ((t (lambda () e)))
       (mv-let (decl ...)
	 (call-with-values t (lambda formals b0 b1 ...))))]))

;; ---- Testing

(define front
  (lambda (exp)
    (code-generation-form
      (immediate-literal-form
	(assignmentless-form
	  (analyzed-form
	    (core-form exp)))))))

(define front-test
  (lambda (exp)
    (let* ([exp-co (core-form exp)]
	   [exp-an (analyzed-form exp-co)]
	   [exp-as (assignmentless-form exp-an)]
	   [exp-im (immediate-literal-form exp-as)]
	   [exp-cg (code-generation-form exp-im)]
	   )
      (printf "Core Form:~n")
      (pretty-print  exp-co)
      (printf "Analyzed Form:~n")
      (pretty-print  exp-an)
      (printf "Assignmentless Form:~n")      
      (pretty-print  exp-as)
      (printf "Immediate-literal Form:~n")
      (pretty-print  exp-im)
      (printf "Code-generation Form:~n")
      exp-cg)))

;; ---------- Core Form: Basic error checking and simplification.

(define *prim-names*
  '(+ - * / = < boolean? car cdr char? char->integer cons eq?
     integer? string->uninterned-symbol not null? pair? procedure?
     string string? string-length string-ref
     vector vector? vector-length vector-ref
     vector-set! symbol? symbol->string))

(define *keywords*
  '(quote begin if set! lambda))

(define core-form
  (lambda (exp)
    (core-convert exp)))

(define core-convert
  (lambda (exp)
    (if (not (pair? exp))
	(cond
	  [(symbol? exp) exp]
	  [(or (number? exp) (boolean? exp) (string? exp) (char? exp))
	   `(quote ,exp)]
	  [else
	    (error 'core-convert "Bad expression ~s" exp)])
	(record-case exp
	  [quote (obj)
	    `(quote ,obj)]
	  [begin (e0 . exps)
	    (if (null? exps)
		(core-convert e0)
		(let ([new-e0 (core-convert e0)]
		      [new-e1 (core-convert `(begin . ,exps))])
		  `(begin ,new-e0 ,new-e1)))]
	  [if (t c a)
	    (let ([new-t (core-convert t)]
		  [new-c (core-convert c)]
		  [new-a (core-convert a)])
	      `(if ,new-t ,new-c ,new-a))]
	  [set! (v e)
	    (cond
	      [(not (symbol? v))
	       (error 'core-convert "Bad expression ~s" exp)]
	      [else
		(let ([new-e (core-convert e)])
		  `(set! ,v ,new-e))])]
	  [lambda (formals . bodies)
	    (if (not (and (list? formals)
			  (andmap symbol? formals)
			  (andmap (lambda (x) (not (memq x *keywords*)))
			    formals)
			  (set? formals)))
		(error 'core-convert "Bad formals ~s in ~s" formals exp)
		(let ([new-body (core-convert `(begin ,@bodies))])
		  `(lambda ,formals ,new-body)))]
	  [let (decls . bodies)
	    (let ([vars (map car decls)]
		  [vals (map cadr decls)])
	      (core-convert `((lambda ,vars ,@bodies) ,@vals)))]
	  [letrec (decls . bodies)
	    (let ([vars (map car decls)]
		  [vals (map cadr decls)])
	      (let ([holders (map (lambda (x) #f) vars)]
		    [assigns (map (lambda (v e) `(set! ,v ,e)) vars vals)])
		(core-convert
		  `((lambda ,vars ,@assigns ,@bodies) ,@holders))))]
	  [else
	    (if (or (null? exp)
		    (not (list? exp))
		    (memq (car exp) *keywords*))
		(error 'core-convert "Bad expression ~s" exp)
		(let ([rator (car exp)]
		      [rands (cdr exp)])
		  (let ([new-rator (core-convert rator)]
			[new-rands (core-convert-list rands)])
		    `(,new-rator . ,new-rands))))]))))

(define core-convert-list
  (lambda (ls)
    (map core-convert ls)))

;; ---------- Analyzed Form

(define analyzed-form
  (lambda (exp)
    (mv-let ([(exp quotes poked free)
	      (analyze exp '())])
      `(let ,quotes ,exp))))

(define analyze  ;; returns: exp, quote-pairs, assigned, free
  (lambda (exp env)
    (if (not (pair? exp))
	(if (memq exp env)
	    (values exp '() '() (unit-set exp))
	    (if (memq exp *prim-names*)
		(error 'analyze "Primitive in non-application position ~s"
		  exp)
		(error 'analyze "Unbound variable ~s" exp)))
	(record-case exp
	  [quote (obj)
	    (if (or (number? obj) (null? obj) (boolean? obj) (char? obj))
		(values `(quote ,obj) '() '() '())
		(let ([var (gen-qsym)])
		  (values var (list (list var exp)) '() (unit-set var))))]
	  [begin (a b)
	    (mv-let ([(a-exp a-quotes a-poked a-free) (analyze a env)]
		     [(b-exp b-quotes b-poked b-free) (analyze b env)])
	      (values `(begin ,a-exp ,b-exp)
		(append a-quotes b-quotes)
		(union a-poked b-poked)
		(union a-free b-free)))]
	  [if (t c a)
	    (mv-let ([(t-exp t-quotes t-poked t-free) (analyze t env)]
		     [(c-exp c-quotes c-poked c-free) (analyze c env)]
		     [(a-exp a-quotes a-poked a-free) (analyze a env)])
	      (values `(if ,t-exp ,c-exp ,a-exp)
		(append t-quotes c-quotes a-quotes)
		(union (union t-poked c-poked) a-poked)
		(union (union t-free c-free) a-free)))]
	  [set! (v e)
	    (if (not (memq v env))
		(if (memq v *prim-names*)
		    (error 'analyze "Attempt to set! a primitive in ~s" exp)
		    (error 'analyze "Attempt to set! a free variable in ~s"
		      exp))
		(mv-let ([(e-exp e-quotes e-poked e-free) (analyze e env)])
		  (values `(set! ,v ,e-exp)
		    e-quotes
		    (union (unit-set v) e-poked)
		    (union (unit-set v) e-free))))]
	  [lambda (formals body)
	    (mv-let ([(body-exp body-quotes body-poked body-free)
		      (analyze body (append formals env))])
	      (let ([poked (intersection body-poked formals)]
		    [free-poked (difference body-poked formals)]
		    [free (difference body-free formals)])
		(values `(lambda ,formals (quote (assigned . ,poked))
			   (quote (free . ,free))
			   ,body-exp)
		  body-quotes
		  free-poked
		  free)))]
	  [else
	    (let ([rator (car exp)]
		  [rands (cdr exp)])
	      (mv-let ([(rand-exps rand-quotes rand-poked rand-free)
			(analyze-list rands env)])
		(if (and (symbol? rator)
			 (not (memq rator env))
			 (memq rator *prim-names*))
		    (values `(,rator . ,rand-exps)
		      rand-quotes rand-poked rand-free)
		    (mv-let ([(rator-exp rator-quotes rator-poked rator-free)
			      (analyze rator env)])
		      (values `(,rator-exp . ,rand-exps)
			(append rator-quotes rand-quotes)
			(union rator-poked rand-poked)
			(union rator-free rand-free))))))]))))

(define analyze-list
  (lambda (ls env)
    (if (null? ls)
	(values '() '() '() '())
	(mv-let ([(head-exp head-quotes head-poked head-free)
		  (analyze (car ls) env)]
		 [(tail-exps tail-quotes tail-poked tail-free)
		  (analyze-list (cdr ls) env)])
	  (values (cons head-exp tail-exps)
	    (append head-quotes tail-quotes)
	    (union head-poked tail-poked)
	    (union head-free tail-free))))))

;; ---------- assignmentless-form:  Removing the set! form.

(define assignmentless-form 
  (lambda (exp)
    (let ([qdecls (cadr exp)]
	  [subexp (caddr exp)])
      (let ([new-subexp (assignment-convert subexp '())])
	`(let ,qdecls ,new-subexp)))))

(define assignment-convert
  (lambda (exp env)
    (if (not (pair? exp))
	(if (memq exp env)
	    `(vector-ref ,exp (quote 0))
	    exp)
	(record-case exp
	  [quote (obj) `(quote ,obj)]
	  [begin (a b)
	    (let ([a-exp (assignment-convert a env)]
		  [b-exp (assignment-convert b env)])
	      `(begin ,a-exp ,b-exp))]
	  [if (t c a)
	    (let ([t-exp (assignment-convert t env)]
		  [c-exp (assignment-convert c env)]
		  [a-exp (assignment-convert a env)])
	      `(if ,t-exp ,c-exp ,a-exp))]
	  [set! (v e)
	    (let ([e-exp (assignment-convert e env)])
	      `(vector-set! ,v (quote 0) ,e-exp))]
	  [lambda (formals poked free body)
	    (let ([poked (cdadr poked)] ; remove the quote
		  [free (cdadr free)]) 
	      (let ([new-env (union poked (difference env formals))])
		(let ([body-exp (assignment-convert body new-env)])
		  (if (null? poked)
		      `(lambda ,formals (quote (free . ,free)) ,body-exp)
		      (let ([poked-exps
			      (map (lambda (pv) `(vector ,pv)) poked)]
			    [new-frees
			      (union free (difference formals poked))])
			`(lambda ,formals
			   (quote (free . ,free))
			   ((lambda ,poked
			      (quote (free . ,new-frees))
			      ,body-exp) . 
			      ,poked-exps)))))))]
	  [else
	    (let ([rator (car exp)]
		  [rands (cdr exp)])
	      (let ([rator-exp (assignment-convert rator env)]
		    [rand-exps (assignment-convert-list rands env)])
		`(,rator-exp . ,rand-exps)))]))))

(define assignment-convert-list
  (lambda (ls env)
    (map (lambda (e) (assignment-convert e env)) ls)))

;; ---------- Immediate-literal Form:  Lifting heap immediates

(define s-table '())

(define immediate-literal-form
  (lambda (exp)
    (set! s-table '())
    (let ([quoted (cadr exp)]
	  [exp (caddr exp)])
      (if (null? quoted) exp
	  (let ([q-exps (map heap-literal-destruct (map cadadr quoted))]
		[q-vars (map car quoted)])
	    (let ([exp `((lambda ,q-vars (quote (free)) ,exp) .
			 ,q-exps)])
	      (if (null? s-table) exp
		  (let ([s-exps
			  (map symbol-destruct (map car s-table))]
			[s-vars (map cadr s-table)])
		    `((lambda ,s-vars (quote (free)) ,exp) .
		      ,s-exps)))))))))

(define heap-literal-destruct
  (lambda (obj)
    (cond
      [(symbol? obj)
       (let ([entry (assq obj s-table)])
	 (if (pair? entry)
	     (cadr entry)
	     (let ([v (gen-ssym)])
	       (set! s-table (cons (list obj v) s-table))
	       v)))]
      [(or (boolean? obj) (number? obj) (char? obj) (null? obj))
       `(quote ,obj)]
      [(string? obj)
       (let ([char-exps (map (lambda (c) `(quote ,c)) (string->list obj))])
	 `(string . ,char-exps))]
      [(pair? obj)
       (let ([car-exp (heap-literal-destruct (car obj))]
	     [cdr-exp (heap-literal-destruct (cdr obj))])
	 `(cons ,car-exp ,cdr-exp))]
      [(vector? obj)
       (let ([contents-exps (map heap-literal-destruct (vector->list obj))])
	 `(vector . ,contents-exps))])))

(define symbol-destruct
  (lambda (sym)
    (let ([char-exps (map (lambda (x) `(quote ,x))
		       (string->list (symbol->string sym)))])
      `(string->uninterned-symbol (string .  ,char-exps)))))

;; ---------- Code-generation Form: converting variables and lambdas

(define code-generation-form  
  (lambda (exp)
    (cg-form-convert exp '() '())))

(define cg-form-convert
  (lambda (exp bounds frees)
    (if (not (pair? exp))
	(let ([i (list-index exp bounds)])
	  (if i
	      `(bound ,i ,exp)
	      (let ([i (list-index exp frees)])
		(if i
		    `(free ,i ,exp)
		    exp))))		; inline
	(record-case exp
	  [quote (obj)
	    `(quote ,obj)]
	  [begin (a b)
	    (let ([a-exp (cg-form-convert a bounds frees)]
		  [b-exp (cg-form-convert b bounds frees)])
	      `(begin ,a-exp ,b-exp))]
	  [if (t c a)
	    (let ([t-exp (cg-form-convert t bounds frees)]
		  [c-exp (cg-form-convert c bounds frees)]
		  [a-exp (cg-form-convert a bounds frees)])
	      `(if ,t-exp ,c-exp ,a-exp))]
	  [lambda (formals quoted-frees body)
	    (let ([free (cdadr quoted-frees)]) ; getting rid of the quote
	      (let ([free-exps (cg-form-convert-list free bounds frees)]
		    [body-exp (cg-form-convert body formals free)])
		`(build-closure (lambda ,formals ,body-exp) . 
		   ,free-exps)))]
	  [else
	    (let ([rator (car exp)] [rands (cdr exp)])
	      (let ([rator-exp (cg-form-convert rator bounds frees)]
		    [rand-exps (cg-form-convert-list rands bounds frees)])
		`(,rator-exp . ,rand-exps)))]))))

(define cg-form-convert-list
  (lambda (ls bounds frees)
    (map (lambda (e) (cg-form-convert e bounds frees)) ls)))

;; ---------- Utility procedures

(define list-index
  (lambda (v ls)
    (let loop ([ls ls] [acc 0])
      (cond
	[(null? ls) #f]
	[(eq? (car ls) v) acc]
	[else (loop (cdr ls) (add1 acc))]))))

(define union
  (lambda (a b)
    (cond
      [(null? a) b]
      [(memq (car a) b) (union (cdr a) b)]
      [else (cons (car a) (union (cdr a) b))])))

(define difference
  (lambda (a b)
    (cond
      [(null? a) '()]
      [(memq (car a) b) (difference (cdr a) b)]
      [else (cons (car a) (difference (cdr a) b))])))

(define intersection
  (lambda (a b)
    (cond
      [(null? a) '()]
      [(memq (car a) b) (cons (car a) (intersection (cdr a) b))]
      [else (intersection (cdr a) b)])))

(define unit-set
  (lambda (item)
    (list item)))

(define set?
  (lambda (ls)
    (or (null? ls)
	(and (not (memq (car ls) (cdr ls)))
	     (set? (cdr ls))))))

(define gen-qsym gensym)	; variables holding quoted data
(define gen-ssym gensym)	; variables holding symbols