#lang racket ; modern.scm (Scheme), 2008-01-03 ; ; NOTE: NOT READY FOR PRODUCTION USE. ; ; Implements "modern Lisp expressions", aka mod-expressions. ; These implement "curly infix" and term-prefixing rules. E.G.: ; [x y z] => (x y z) ; {3 + 4 + 5} => (+ 3 4 5) ; f(x) => (f x) ; f{x + 3} => (f (+ x 3) ; x[z] => (bracketaccess x z) ; ; Call "modern-read" to read a "modern Lisp expression", aka mod-expression. ; ; Copyright (C) 2008 by David A. Wheeler. ; ; NOTE: This would be really easy to implement in Scheme, except for one ; quirk: most Scheme implementations' "read" function CONSUMES [, ], {, and }, ; instead of treating them as delimiters like space, (, or ). ; This is even true when the Scheme standards don't permit such characters ; at all, such as at the end of a number. ; The only solution is to re-implement "read" in Scheme, but one that ; DOES treat them as delimiters. So a simple re-implemention is provided. ; If your Scheme _does_ treat these characters as delimiters, ; you can eliminate all of that re-implementation code, and just use your ; built-in "read" function (which probably has additional extensions that ; this simple reader does not). ; ; If you DO use an ordinary Scheme reader, there is a limitation: ; the vector notation #(...) could not contain modern notation. ; In code, just use vector(...) instead. The best solution, of course, ; is to build this into your Scheme reader. ; ; You _could_ in a pinch use a standard Scheme reader that didn't ; consider {} or [] as delimiters. But then closing characters } and ] ; must be PRECEDED by a delimiter like a space, and you CANNOT invoke ; prefixed [] and {} at all. ; Released under the "MIT license": ; Permission is hereby granted, free of charge, to any person obtaining a ; copy of this software and associated documentation files (the "Software"), ; to deal in the Software without restriction, including without limitation ; the rights to use, copy, modify, merge, publish, distribute, sublicense, ; and/or sell copies of the Software, and to permit persons to whom the ; Software is furnished to do so, subject to the following conditions: ; ; The above copyright notice and this permission notice shall be included ; in all copies or substantial portions of the Software. ; ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR ; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ; OTHER DEALINGS IN THE SOFTWARE. ; Configuration: (define modern-backwards-compatible #f) ; If true, "(" triggers old reader. (define modern-bracketaccess #t) ; If true, "f[...]" => [bracketaccess f ...] ; if not, "f[...]" => [f ...]. ; Preserve original read. (define old-read read) ; A few useful utilities: (define (ismember? item lyst) ; Returns true if item is member of lyst, else false. (pair? (member item lyst))) (define (debug-result marker value) ; For debugging - you can insert this without adding let, etc., because ; after printing it returns the original value. (newline) (display "DEBUG: ") (display marker) (display " ") (write value) (newline) value) ; Define the tab character; a tab is immediately after the backslash. ; Unfortunately, this seems to be the only portable way to define the ; tab character in Scheme, so we'll do it once (here) and use it elsewhere. (define tab #\ ) (define (my-is-whitespace c) (ismember? c `(#\space #\newline ,tab))) ; TODO: Possibly support other whitespace chars, e.g.: ; #\return ; (code-char 10) (code-char 11) ; LF, VT ; (code-char 12) (code-char 13))))) ; FF, CR ; If so, also modify the "delimiters" list above. (define (skip-whitespace port) ; Consume whitespace. (cond ((my-is-whitespace (peek-char port)) (read-char port) (skip-whitespace port)))) ; Unfortunately, since most Scheme readers will consume [, {, }, and ], ; we have to re-implement our own Scheme reader. Ugh. ; If you fix your Scheme's "read" so that [, {, }, and ] are considered ; delimiters (and thus not consumed when reading symbols, numbers, etc.), ; you can just call old-read instead of using underlying-read below, ; with the limitation noted above about vector constants #(...). ; We WILL call old-read on string reading (that DOES seem to work ; in common cases, and lets us use the implementation's string extensions). (define modern-delimiters `(#\space #\newline #\( #\) #\[ #\] #\{ #\} ,tab)) (define (read-until-delim port delims) ; Read characters until eof or "delims" is seen; do not consume them. ; Returns a list of chars. (let ((c (peek-char port))) (cond ((eof-object? c) '()) ((ismember? (peek-char port) delims) '()) (#t (cons (read-char port) (read-until-delim port delims)))))) (define (read-error message) (display "Error: ") (display message) '()) (define (read-number port starting-lyst) (string->number (list->string (append starting-lyst (read-until-delim port modern-delimiters))))) (define (process-char port) ; We've read #\ - returns what it represents. (cond ((eof-object? (peek-char port)) (peek-char port)) (#t ; Not EOF. Read in the next character, and start acting on it. (let ((c (read-char port)) (rest (read-until-delim port modern-delimiters))) (cond ((null? rest) c) ; only one char after #\ - so that's it! (#t (let ((rest-string (list->string (cons c rest)))) (cond ((string-ci=? rest-string "space") #\space) ((string-ci=? rest-string "newline") #\newline) ((string-ci=? rest-string "ht") tab) ; Scheme extension. ((string-ci=? rest-string "tab") tab) ; Scheme extension. (#t (read-error "Invalid character name")))))))))) (define (process-sharp port) ; We've peeked a # character. Returns what it represents. ; Note: Since we have to re-implement process-sharp anyway, ; the vector representation #(...) uses my-read-delimited-list, which in ; turn calls modern-read2. Thus, modern-expressions CAN be used inside ; a vector expression. (read-char port) ; Remove # (cond ((eof-object? (peek-char port)) (peek-char port)) ; If eof, return eof. (#t ; Not EOF. Read in the next character, and start acting on it. (let ((c (read-char port))) (cond ((char=? c #\t) #t) ((char=? c #\f) #f) ((ismember? c '(#\i #\e #\b #\o #\d #\x)) (read-number port (list #\# c))) ((char=? c #\( ) ; Vector. (list->vector (my-read-delimited-list #\) port))) ((char=? c #\\) (process-char port)) (#t (read-error "Invalid #-prefixed string"))))))) (define digits '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (define (process-period port) ; We've peeked a period character. Returns what it represents. (read-char port) ; Remove . (let ((c (peek-char port))) (cond ((eof-object? c) '|.|) ; period eof; return period. ((ismember? c digits) (read-number port (list #\.))) ; period digit - it's a number. (#t ; At this point, Scheme only requires support for "." or "...". ; As an extension we can support them all. (string->symbol (list->string (cons #\. (read-until-delim port modern-delimiters)))))))) (define (underlying-read port) ; This tiny reader implementation REQUIRES a port value. ; That way, while writing/modifying it, we ; won't forget to pass the port to it. ; Note: This reader is case-sensitive, which is consistent with R6RS ; and guile, but NOT with R5RS. Most people won't notice, and I ; _like_ case-sensitivity. (skip-whitespace port) (let ((c (peek-char port))) (cond ((eof-object? c) c) ((char=? c #\") ; old readers tend to handle strings okay, call it. (old-read port)) ; (guile 1.8 and gauche/gosh 1.8.11 are fine) ((ismember? c digits) ; Initial digit. (read-number port '())) ((char=? c #\#) (process-sharp port)) ((char=? c #\.) (process-period port)) ((or (char=? c #\+) (char=? c #\-)) ; Initial + or - (read-char port) (if (ismember? (peek-char port) digits) (read-number port (list c)) (string->symbol (list->string (cons c (read-until-delim port modern-delimiters)))))) ; We'll reimplement abbreviations, (, and ;. ; These actually should be done by modern-read (and thus ; we won't see them), but redoing it here doesn't cost us anything, ; and it makes some kinds of testing simpler. It also means that ; this function is a fully-usable Scheme reader, and thus perhaps ; useful for other purposes. ((char=? c #\') (read-char port) (list 'quote (underlying-read port))) ((char=? c #\`) (read-char port) (list 'quasiquote (underlying-read port))) ((char=? c #\,) (read-char port) (cond ((char=? #\@ (peek-char port)) (read-char port) (list 'unquote-splicing (underlying-read port))) (#t (list 'unquote (underlying-read port))))) ; The "(" calls modern-read, but since this one shouldn't normally ; be used anyway (modern-read will get first crack at it), it ; doesn't matter: ((char=? c #\( ) (read-char port) (my-read-delimited-list #\) port)) ((char=? c #\; ) (skip-line port) (underlying-read port)) ((char=? c #\| ) ; Scheme extension, |...| symbol (like Common Lisp) (read-char port) ; Skip | (let ((newsymbol (string->symbol (list->string (read-until-delim port '(#\|)))))) (read-char port) newsymbol)) (#t ; Nothing else. Must be a symbol start. (string->symbol (list->string (read-until-delim port modern-delimiters))))))) ; End of Scheme reader re-implementation. ; Utility functions to implement the simple infix system: ; Return true if lyst has an even # of parameters, and the (alternating) first ; ones are "op". Used to determine if a longer lyst is infix. ; Otherwise it returns false. ; If passed empty list, returns true (so recursion works correctly). (define (even-and-op-prefix op lyst) (cond ((null? lyst) #t) ((not (pair? lyst)) #f) ; Not a list. ((not (eq? op (car lyst))) #f) ; fail - operators not all equal?. ((null? (cdr lyst)) #f) ; fail - odd # of parameters in lyst. (#t (even-and-op-prefix op (cddr lyst))))) ; recurse. ; Return True if the lyst is in simple infix format (and should be converted ; at read time). Else returns NIL. (define (simple-infix-listp lyst) (and (pair? lyst) ; Must have list; '() doesn't count. (pair? (cdr lyst)) ; Must have a second argument. (pair? (cddr lyst)) ; Must have a third argument (we check it ; this way for performance) (symbol? (cadr lyst)) ; 2nd parameter must be a symbol. (even-and-op-prefix (cadr lyst) (cdr lyst)))) ; even parameters equal?? ; Return alternating parameters in a lyst (1st, 3rd, 5th, etc.) (define (alternating-parameters lyst) (if (or (null? lyst) (null? (cdr lyst))) lyst (cons (car lyst) (alternating-parameters (cddr lyst))))) ; Transform a simple infix list - move the 2nd parameter into first position, ; followed by all the odd parameters. Thus (3 + 4 + 5) => (+ 3 4 5). (define (transform-simple-infix lyst) (cons (cadr lyst) (alternating-parameters lyst))) (define (process-curly lyst) (if (simple-infix-listp lyst) (transform-simple-infix lyst) ; Simple infix expression. (cons 'nfx lyst))) ; Non-simple; prepend "nfx" to the list. (define (my-read-delimited-list stop-char port) ; like read-delimited-list of Common Lisp, but calls modern-read instead. ; read the "inside" of a list until its matching stop-char, returning list. ; This implements a common extension: (. b) return b. ; That could be important for I-expressions, e.g., (. group) (skip-whitespace port) (let ((c (peek-char port))) (cond ((eof-object? c) (read-error "EOF in middle of list") c) ((char=? c stop-char) (read-char port) '()) ;( ((ismember? c '(#\) #\] #\})) (read-error "Bad closing character") c) (#t (let ((datum (modern-read2 port))) (cond ((eq? datum '|.|) (let ((datum2 (modern-read2 port))) (skip-whitespace port) (cond ((not (eqv? (peek-char port) stop-char)) (read-error "Bad closing character after . datum")) (#t (read-char port) datum2)))) (#t (cons datum (my-read-delimited-list stop-char port))))))))) (define (modern-process-tail port prefix) ; See if we've just finished reading a prefix, and if so, process. ; This recurses, to handle formats like f(x)(y). ; This implements prefixed (), [], and {} (if (not (or (symbol? prefix) (pair? prefix))) prefix ; Prefixes MUST be symbol or cons; return original value. (let ((c (peek-char port))) (cond ((eof-object? c) c) ((char=? c #\( ) ; ). Implement f(x). (read-char port) (modern-process-tail port ;( (cons prefix (my-read-delimited-list #\) port)))) ((char=? c #\[ ) ; Implement f[x] (read-char port) (modern-process-tail port (if modern-bracketaccess (cons 'bracketaccess (cons prefix (my-read-delimited-list #\] port))) (cons prefix (my-read-delimited-list #\] port))))) ((char=? c #\{ ) ; Implement f{x} (read-char port) (modern-process-tail port (list prefix (process-curly (my-read-delimited-list #\} port))))) (#t prefix))))) (define (skip-line port) ; Skip every character in the line - end on EOF or newline. (let ((c (peek-char port))) (cond ((not (or (eof-object? c) (char=? c #\newline))) (read-char port) (skip-line port))))) (define (modern-read2 port) ; Read using "modern Lisp notation". ; This implements unprefixed (), [], and {} (skip-whitespace port) (modern-process-tail port (let ((c (peek-char port))) ; (display "modern-read2 peeked at: ") ; (write c) (cond ; We need to directly implement abbreviations ', etc., so that ; we retain control over the reading process. ((eof-object? c) c) ((char=? c #\') (read-char port) (list 'quote (modern-read2 port))) ((char=? c #\`) (read-char port) (list 'quasiquote (modern-read2 port))) ((char=? c #\,) (read-char port) (cond ((char=? #\@ (peek-char port)) (read-char port) (list 'unquote-splicing (modern-read2 port))) (#t (list 'unquote (modern-read2 port))))) ((char=? c #\( ) ; ) (if modern-backwards-compatible (underlying-read port) (begin (read-char port) ; ( (my-read-delimited-list #\) port)))) ((char=? c #\[ ) (read-char port) (my-read-delimited-list #\] port)) ((char=? c #\{ ) (read-char port) (process-curly (my-read-delimited-list #\} port))) ((char=? c #\; ) ; Handle ";" directly, so we don't lose control. (skip-line port) (modern-read2 port)) (#t (let ((result (underlying-read port))) ; (display "DEBUG result = ") ; (write result) ; (display "\nDEBUG peek after= ") ; (write (peek-char port)) result)))))) (define (modern-read . port) (if (null? port) (modern-read2 (current-input-port)) (modern-read2 (car port)))) (define (modern-filter) (let ((result (modern-read (current-input-port)))) (if (eof-object? result) result (begin (write result) (newline) (modern-filter))))) (define (modern-load filename) (define (load port) (let ((inp (modern-read port))) (if (eof-object? inp) #t (begin (eval inp) (load port))))) (load (open-input-file filename))) (provide modern-read modern-load)