private/src/scanparse.ss
;; ---- 1996 Scheme Workshop -- Compiling Scheme

;; -- scanparse.ss

;;; This file contains a scanner and a parser for R4RS Scheme (with
;;; integer being the only numeric type).

(define scan
  (let ([digit->value
	  (let ((zero (char->integer #\0)))
	    (lambda (c)
	      (- (char->integer c) zero)))]
	[scan-error
	  (lambda (c) (error 'scan "Unexpected character ~s" c))]
	[whitespace? (lambda (ch) (memv ch '(#\space #\newline #\tab)))]
	[digit? (lambda (ch) (<= (char->integer #\0)
			       (char->integer ch)
			       (char->integer #\9)))]
	[alpha?
	  (lambda (ch) (let ([n (char->integer ch)])
			 (or (<= (char->integer #\a) n (char->integer #\z))
			     (<= (char->integer #\A) n (char->integer #\Z)))))]
	[spec-initial?
	  (lambda (ch)
	    (memv ch '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\=
			#\> #\? #\~ #\_ #\^)))]
	[end-comment?
	  (lambda (ch)
	    (or (eof-object? ch) (eqv? #\newline ch)))]
	[delimeter?
	  (lambda (ch)
	    (or (eof-object? ch)
		(memv ch
		  '(#\space #\newline #\tab #\( #\[ #\) #\] #\" #\;))))])
    (letrec
      ([s0
	 (lambda (ip)
	   (let ([c (read-char ip)])
	     (cond
	       ((whitespace? c) (s0 ip))
	       ((eof-object? c) '(eof))
	       ((eqv? #\( c) '(lparen))
	       ((eqv? #\) c) '(rparen))
	       ((eqv? #\[ c) '(lbrack))
	       ((eqv? #\] c) '(rbrack))
	       ((eqv? #\' c) '(quote))
	       ((eqv? #\` c) '(grave))
	       ((eqv? #\. c) (s-dot 1 ip))
	       ((eqv? #\; c) (s-comment ip))
	       ((eqv? #\, c) (s-comma ip))
	       ((eqv? #\" c) (s-string '() ip))
	       ((eqv? #\# c) (s-hash ip))
	       ((eqv? #\+ c) (s-sign #t ip))
	       ((eqv? #\- c) (s-sign #f ip))
	       ((digit? c) (s-number #t (digit->value c) ip))
	       ((or (alpha? c) (spec-initial? c))
		(s-identifier (list c) ip))
	       (else (scan-error c)))))]
       [s-comment
	 (lambda (ip)
	   (let ([c (read-char ip)])
	     (cond
	       ((end-comment? c) (s0 ip))
	       (else (s-comment ip)))))]
       [s-dot
	 (lambda (n ip)
	   (let ([c (read-char ip)])
	     (cond
	       ((whitespace? c)
		(case n
		  ((1) '(dot))
		  ((3) '(ellipsis))
		  (else (error 'scan "Bad token ~a" (make-string n #\.)))))
	       ((eqv? #\. c) (s-dot (add1 n) ip))
	       (else (scan-error c)))))]
       [s-comma
	 (lambda (ip)
	   (let ([c (peek-char ip)])
	     (cond
	       ((eqv? #\@ c) (read-char ip) '(comma-at))
	       (else '(comma)))))]
       [s-string
	 (lambda (acc ip)
	   (let ([c (read-char ip)])
	     (cond
	       ((eqv? #\" c) `(datum ,(list->string (reverse acc))))
	       ((eqv? #\\ c) (s-string (cons (read-char ip) acc) ip))
	       (else (s-string (cons c acc) ip)))))]
       [s-hash
	 (lambda (ip)
	   (let ([c (read-char ip)])
	     (cond
	       ((eqv? #\t c) (s-need-delimiter '(boolean #t) ip))
	       ((eqv? #\f c) (s-need-delimiter '(boolean #f) ip))
	       ((eqv? #\( c) '(hash-lparen))
	       ((eqv? #\\ c) (s-char '() ip))
	       (else (scan-error c)))))]
       [s-need-delimiter
	 (lambda (acc ip)
	   (let ([c (peek-char ip)])
	     (cond
	       ((delimeter? c) acc)
	       (else (scan-error c)))))]
       [s-char
	 (lambda (acc ip)
	   (let ([c (peek-char ip)])
	     (cond
	       ((delimeter? c)
		(cond
		  ((null? acc) (read-char ip)
		   `(datum ,c))
		  ((null? (cdr acc))
		   `(datum ,(car acc)))
		  (else
		    (let ((name (list->string (reverse acc))))
		      (cond
			((string=? name "space")
			 '(datum #\space))
			((string=? name "tab")
			 '(datum #\tab))
			((string=? name "newline")
			 '(datum #\newline))
			(else
			  (error 'scan "Bad character name ~a" name)))))))
	       (else (read-char ip) (s-char (cons c acc) ip)))))]
       [s-sign
	 (lambda (plus? ip)
	   (let ([c (peek-char ip)])
	     (cond
	       ((delimeter? c)
		`(identifier ,(if plus? '+ '-)))
	       ((digit? c) (read-char ip)
		(s-number plus? (digit->value c) ip))
	       (else (scan-error c)))))]
       [s-number
	 (lambda (pos? acc ip)
	   (let ([c (peek-char ip)])
	     (cond
	       ((delimeter? c)
		`(datum ,(if pos? acc (- acc))))
	       ((digit? c)
		(read-char ip)
		(s-number pos? (+ (* acc 10) (digit->value c)) ip))
	       (else (scan-error c)))))]
       [s-identifier
	 (lambda (acc ip)
	   (let ([c (peek-char ip)])
	     (cond
	       ((delimeter? c)
		`(identifier ,(string->symbol
				(list->string (reverse acc)))))
	       ((or (alpha? c) (spec-initial? c) (digit? c)
		    (memv c '(#\. #\+ #\-)))
		(read-char ip)
		(s-identifier (cons c acc) ip))
	       (else (scan-error c)))))])
      (lambda (ip)
	(s0 ip)))))

(define parse
  (let ((parse-error
	  (lambda (tok)
	    (error 'parse "unexpected token ~s" tok))))
    (letrec ((reverse*			; needed for improper lists
	       (lambda (acc ls)
		 (if (null? ls)
		     acc
		     (reverse* (cons (car ls) acc) (cdr ls))))))
      (letrec
	((p-list0
	   (lambda (ip)
	     (let ([tok (scan ip)])
	       (record-case tok
		 (rparen () '())
		 (else
		   (let* ((head (p tok ip)))
		     (p-list (list head) ip)))))))
	 (p-list
	   (lambda (acc ip)
	     (let ([tok (scan ip)])
	       (record-case tok
		 (rparen () (reverse acc))
		 (dot ()
		   (let* ((tail (p (scan ip) ip))
			  (tok (scan ip)))
		     (record-case tok
		       (rparen () (reverse* tail acc))
		       (else (parse-error tok)))))
		 (else
		   (let* ((head (p tok ip)))
		     (p-list (cons head acc) ip)))))))
	 (p-vector
	   (lambda (acc ip)
	     (let ([tok (scan ip)])
	       (record-case tok
		 (rparen () (list->vector (reverse acc)))
		 (else 
		   (let ((head (p tok ip)))
		     (p-vector (cons head acc) ip)))))))
	 (p (lambda (tok ip)
	      (record-case tok
		(datum       (d) d)
		(identifier  (i) i)
		(lparen      () (p-list0 ip))
		(hash-lparen () (p-vector '() ip))
		(quote	     () (list 'quote (p (scan ip) ip)))
		(grave       () (list 'quasiquote (p (scan ip) ip)))
		(comma       () (list 'unquote (p (scan ip) ip)))
		(comma-at    () (list 'unquote-splicing (p (scan ip) ip)))
		(rparen      () (parse-error tok))
		(dot         () (parse-error tok))
		(eof         () (parse-error tok))
		(else (error 'parse "sanity-check: Bad token ~s" tok))))))
	(lambda (ip)
	  (p (scan ip) ip))))))