lang/parser.rkt
#lang racket/base
(provide poppl-read poppl-read-syntax)
(require parser-tools/lex parser-tools/yacc
         (prefix-in : parser-tools/lex-sre)
         rackunit
         syntax/readerr)

(define-tokens tokens (id num binop unop str Base-type))
(define-empty-tokens mt-tokens (when prompt 
                                 open-brace close-brace open-paren close-paren
                                 semi bars eoft
                                 Or
                                 if then else fi 
                                 dot n/a <-
                                 equal comma))

(define current-source (make-parameter #f))

(define lex
  (lexer-src-pos
   ["when" (token-when)]
   ["prompt" (token-prompt)]
   [(:or "Boolean" "Unknown" "Number" "N/A")
    (token-Base-type (string->symbol lexeme))]
   ["Or" (token-Or)]
   [(:or "known?" "unknown?" "n/a?" "not") (token-unop (string->symbol lexeme))]
   ["=" (token-equal)]
   ["<-" (token-<-)]
   [(:or #\< #\!) (token-binop (string->symbol lexeme))]
   ["or" (token-binop 'or)]
   ["if" (token-if)]
   ["n/a" (token-n/a)]
   ["then" (token-then)]
   ["else" (token-else)]
   ["fi" (token-fi)]
   [#\; (token-semi)]
   [#\{ (token-open-brace)]
   [#\} (token-close-brace)]
   [#\, (token-comma)]
   [#\( (token-open-paren)]
   [#\) (token-close-paren)]
   [#\. (token-dot)]
   [(:: (:/ #\a #\z #\A #\Z)
        (:* #\_ #\? (:/ #\a #\z #\A #\Z #\0 #\9)))
    (token-id (string->symbol lexeme))]
   [(:: #\= (:: #\= (:: #\= (:: #\= (:* #\=)))))
    (token-bars)]
   [(:+ (:/ #\0 #\9))
    (token-num (string->number lexeme))]
   [(:: #\" (:* (char-complement #\")) #\")
    (token-str (substring lexeme 1 (- (string-length lexeme) 1)))]
   [(:+ whitespace) (return-without-pos (lex input-port))]
   [(eof) (token-eoft)]
   [(char-complement (union)) 
    (raise-read-error (format "unexpected character ~a" lexeme)
                      (object-name input-port)
                      (position-line start-pos)
                      (position-col start-pos)
                      (position-offset start-pos)
                      (- (position-offset end-pos)
                         (position-offset start-pos)))]))

(define (str->toks str)
  (let ([p (open-input-string str)])
    (let loop ()
      (let ([next (lex p)])
        (cons (token-name (position-token-token next))
              (if (eq? 'eoft (token-name (position-token-token next)))
                  '()
                  (loop)))))))

(check-equal? (str->toks "when (x) { prompt(); }")
              '(when open-paren id close-paren open-brace prompt open-paren close-paren semi close-brace eoft))

(define parse
  (parser
   [grammar 
    (start [(decls bars stmts) (add-srcloc (append $1 $3) $1-start-pos $3-end-pos)])
    (decls [(decl semi decls) (cons (add-srcloc $1 $1-start-pos $1-end-pos) $3)]
           [(decl semi) (list (add-srcloc $1 $1-start-pos $1-end-pos))])
    (decl [(type id equal expr) `(decl ,$1 ,(add-srcloc $2 $2-start-pos $2-end-pos) ,$4)])
    (type [(Base-type) (add-srcloc $1 $1-start-pos $n-start-pos)]
          [(type Or type) (prec Or) (add-srcloc `(Or ,$1 ,$3) $1-start-pos $n-end-pos)])
    (stmt [(when open-paren expr close-paren stmt)
           (add-srcloc `(when ,$3 ,$5) $1-start-pos $n-end-pos)]
          [(prompt open-paren id comma id comma expr close-paren semi)
           (add-srcloc `(prompt ,$3 ,(add-srcloc $5 $5-start-pos $5-end-pos #t) ,$7)
                       $1-start-pos $n-end-pos)]
          [(if expr then stmt else stmt fi)
           (add-srcloc `(if ,$2 ,$4 ,$6) $1-start-pos $n-end-pos)]
          [(id <- expr semi)
           (add-srcloc `(bang! ,(add-srcloc $1 $1-start-pos $1-end-pos) ,$3)
                       $1-start-pos $n-end-pos)]
          [(open-brace stmts close-brace)
           (add-srcloc `(begin ,@$2) $1-start-pos $n-end-pos)])
    (stmts [(stmt stmts) (cons (add-srcloc $1 $1-start-pos $1-end-pos) $2)]
           [(stmt) (list (add-srcloc $1 $1-start-pos $1-end-pos))])
    (expr [(expr op expr) (prec binop) (add-srcloc `(,$2 ,$1 ,$3) $1-start-pos $n-end-pos)]
          [(if expr then expr else expr fi) (add-srcloc `(if ,$2 ,$4 ,$6) $1-start-pos $n-end-pos)]
          [(num) (add-srcloc $1 $1-start-pos $n-end-pos)]
          [(id) (add-srcloc $1 $1-start-pos $n-end-pos #t)]
          [(n/a) (add-srcloc 'n/a $1-start-pos $n-end-pos)]
          [(str) (add-srcloc $1 $1-start-pos $n-end-pos)]
          [(unop open-paren expr close-paren) (add-srcloc `(,(add-srcloc $1 $1-start-pos $n-end-pos) ,$3) $1-start-pos $n-end-pos)]
          [(open-paren expr close-paren) (add-srcloc $2 $1-start-pos $n-end-pos)])
    (op [(equal) '=]
        [(binop) $1])]
   [precs (right binop) (right Or)]
   [tokens mt-tokens tokens]
   [src-pos]
   [start start]
   [end eoft]
   [error 
    (lambda (tok-ok? tok-name tok-value start-pos end-pos)
      (raise-syntax-error 
       'parse-error 
       (format "~s" (if tok-ok?
                        tok-value
                        'unknown))
       (add-srcloc (if tok-ok?
                       tok-value
                       'unknown)
                   start-pos
                   end-pos)))]))

(define (add-srcloc stuff start-pos end-pos [id? #f])
  (cond
    [id?
     (define str (symbol->string stuff))
     (define prt (open-input-string str))
     (port-count-lines! prt)
     (set-port-next-location! prt 
                              (position-line start-pos)
                              (position-col start-pos)
                              (position-offset start-pos))
     (read-syntax (current-source) prt)]
    [else
     (datum->syntax #f stuff (locs->vec start-pos end-pos))]))

(define (locs->vec start-pos end-pos)
  (vector
   (current-source)
   (position-line start-pos)
   (position-col start-pos)
   (position-offset start-pos)
   (- (position-offset end-pos)
      (position-offset start-pos))))

(define (run-p src p)
  (parameterize ([current-source src])
    (parse (λ () (lex p)))))

(define (poppl-read [port (current-input-port)])
  (syntax->datum (run-p #f port)))

(define (poppl-read-syntax [name #f] [port (current-input-port)])
  (run-p (or name (object-name port))
         port))