lexing.ss
(module lexing mzscheme
  (provide (all-defined))
  
  (require (lib "lex.ss" "parser-tools")
           (prefix : (lib "lex-sre.ss" "parser-tools"))
           (lib "etc.ss")
           (lib "string.ss"))
  
  (define-lex-abbrevs
    (delimiter "$")
    (non-delimiter (:+ (:or (:& any-char (complement "$"))
                            "\\$")))
    
    (lparen "(")
    (rparen ")")
    
    (identifier (:+ (:or (:+ alphabetic)
                         (:/ "0" "9"))))
    
    (quoted-string (:: "\""  
                       (:* string-element)
                       "\""))
    
    [string-element (:or (:~ "\"" "\\")
                         "\\\""
                         "\\\\"
                         "\\a"
                         "\\b"
                         "\\t"
                         "\\n"
                         "\\v"
                         "\\f"
                         "\\r"
                         "\\e"
                         "\\'")])
  
  ;; interpret-string-elements: string -> string
  ;; Turns the escape character sequences into
  ;; their respective escape characters.
  (define (interpret-string-elements s)
    (define lookup-table
      (hash-table (#\" #\")
                  (#\\ #\\)
                  (#\a (integer->char 7))
                  (#\b #\backspace)
                  (#\t #\tab)
                  (#\n #\newline)
                  (#\v #\vtab)
                  (#\f (integer->char 12))
                  (#\r #\return)
                  (#\e (integer->char 27))
                  (#\' #\')))
    (let loop ([elts '()]
               [i 0])
      (define (interpret-char ch)
        (cond [(char=? #\\ (string-ref s i))
               (cond [(< (add1 i) (string-length s))
                      (loop (cons (hash-table-get lookup-table
                                                  (string-ref s (add1 i)))
                                  elts)
                            (+ i 2))]
                     [else
                      (error 'interpret-char "unbalanced escape char")])]
              [else (loop (cons ch elts) (add1 i))]))
      (cond [(< i (string-length s))
             (interpret-char (string-ref s i))]
            [else
             (list->string (reverse elts))])))
  
  
  (define-tokens template-tokens
    (delimiter non-delimiter eof
               identifier semicolon equals string))
  
  
  ;; unquote-escapes: string -> string
  ;; Removes the escape character from any \\$'s that are in
  ;; plain, undelimited text.
  (define (unquote-escapes text)
    (regexp-replace* (regexp-quote "\\$") text "$"))
  
  
  ;; toplevel-lexer: input-port -> token
  (define toplevel-lexer
    (lexer-src-pos (delimiter (token-delimiter lexeme))
                   (non-delimiter (token-non-delimiter
                                   (unquote-escapes lexeme)))
                   ((eof) (token-eof lexeme))))
  
  
  ;; expression-lexer: input-port -> token
  (define expression-lexer
    (lexer-src-pos (identifier (token-identifier lexeme))
                   (delimiter (token-delimiter lexeme))
                   (";" (token-semicolon lexeme))
                   ("=" (token-equals lexeme))
                   (quoted-string
                    (token-string
                     (interpret-string-elements
                      (substring lexeme 1 (sub1 (string-length lexeme))))))
                   
                   (whitespace (return-without-pos
                                (expression-lexer input-port)))
                   ((eof) (token-eof lexeme)))))