bsl/parser.rkt
#lang racket

#|

File: parser.rkt
Author: Bill Turtle (wrturtle)

This is the parser module for the Pyret BSL language.

|#

(require parser-tools/lex
         parser-tools/yacc
         "lexer.rkt")

(provide/contract
  [program-parser (-> (-> position-token?) (listof syntax?))]
  [repl-parser (-> (-> position-token?) syntax?)])

(define parser-source-name (make-parameter #f))
(provide parser-source-name)

;; -----------------------------------------------------------------------------
;; Our own error structure (again)

(define-struct (exn:fail:pyret:parser 
                exn:fail)
  (a-srcloc)
  #:property prop:exn:srclocs
  (lambda (a-struct)
    (match a-struct
      [(struct exn:fail:pyret:parser 
         (msg marks a-srcloc))
       (list a-srcloc)])))

(define (start-and-end->srcloc start end)
  (let ([sn (if (parser-source-name)
                (parser-source-name)
                #f)])
    (srcloc sn
            (position-line start)
            (position-col start)
            (position-offset start) 
            (- (position-offset end) (position-offset start)))))

(define (pyret-parser-error message sl)
  (raise
   (make-exn:fail:pyret:parser
    message
    (current-continuation-marks)
    sl)))

;; -----------------------------------------------------------------------------
;; Error messages

(define MSG-ERROR-PARSER-UNEXPECTED-EOF
  (string-append "reached the end of the program, but was still in the middle of a "
                 "definition or expr"))
(define (MSG-ERROR-PARSER-UNEXPECTED-NUMBER num)
  (format "found ~a, but was not expecting a number" num))
(define MSG-ERROR-PARSER-UNEXPECTED-SEMI
  (string-append "found a semi-colon, but was not expecting one"))
(define MSG-ERROR-PARSER-UNEXPECTED-IMAGE
  (string-append "found an image, but was not expecting one"))
(define MSG-ERROR-PARSER-UNEXPECTED-STRING
  (string-append "found a string, but was not expecting one"))
(define MSG-ERROR-PARSER-UNEXPECTED-COMMA
  "found a comma, but was not expecting one")
(define MSG-ERROR-PARSER-UNEXPECTED-OB
  "found a left-bracket, but was not expecting one")
(define MSG-ERROR-PARSER-UNEXPECTED-CB
  "found a right-bracket, but was not expecting one")

(define (produce-readable-error-msg tok-name tok-val)
  (case tok-name
    [(EOF) MSG-ERROR-PARSER-UNEXPECTED-EOF]
    [(NUMBER) (MSG-ERROR-PARSER-UNEXPECTED-NUMBER tok-val)]
    [(SEMI) MSG-ERROR-PARSER-UNEXPECTED-SEMI]
    [(IMAGE) MSG-ERROR-PARSER-UNEXPECTED-IMAGE]
    [(STRING) MSG-ERROR-PARSER-UNEXPECTED-STRING]
    [(COMMA) MSG-ERROR-PARSER-UNEXPECTED-COMMA]
    [(OB) MSG-ERROR-PARSER-UNEXPECTED-OB]
    [(CB) MSG-ERROR-PARSER-UNEXPECTED-CB]
    [else (format "unexpected token: ~S" tok-name)]))

;; -----------------------------------------------------------------------------
;; The program-parser
              
(define (make-srcloc-list start-pos end-pos)
  (let ([sn (if (parser-source-name)
                (parser-source-name)
                #f)])
  (list sn
        (position-line start-pos)
        (position-col start-pos)
        (position-offset start-pos)
        (add1 (- (position-offset end-pos) (position-offset start-pos))))))

(define program-parser
  (parser
   (tokens value-tokens op-tokens keyword-tokens)
   (start file_input)
   (end EOF)
   (src-pos)
   #;(suppress)
   
   (error
      (lambda (tok-ok? tok-name tok-val start-pos end-pos)
        (raise-syntax-error 'pyret/bsl
                            (if tok-ok?
                              (produce-readable-error-msg tok-name tok-val)
                              (format "invalid token: ~S" tok-name))
                            (datum->syntax #f 
                                           tok-val 
                                           (make-srcloc-list 
                                             start-pos end-pos)))))
    (precs
      (left + -)
      (left * / %)
      (right **)
      #;(left < <= = != >= >)
      (left DOT)
      (left OR-KW)
      (left AND-KW)
      (right NOT-KW)
    )
    
    (grammar
     ;; <program> ::= <newline-or-def-or-expr>*
     (file_input
      [(newline-or-def-or-expr-star) $1])
     
     ;; <newline-or-def-or-expr> ::= <def-or-expr>
     ;;                           |  <newline>
     (newline-or-def-or-expr-star
      [() empty]
      [(newline newline-or-def-or-expr-star) $2]
      [(def-or-expr newline-or-def-or-expr-star)
       (cons $1 $2)])
     
     ;; <def-or-expr> ::= <definition>
     ;;                |  <expr>
     ;;                |  <test-case>
     ;;                |  <big-bang>
     (def-or-expr
      [(definition) $1]
      [(expr) $1]
      [(test-case) $1]
      [(big-bang) $1])

     ;; <definition> ::= <def>
     ;;               |  <fun>
     ;;               |  struct: <id> (<id> {, <id>}*) <newline>
     (definition
       [(fun) $1]
       [(def) $1]
       [(STRUCT-KW COLON id OP id-list CP newline)
        (datum->syntax #f
                       (list (datum->syntax #f
                                            'struct
                                            (make-srcloc-list $1-start-pos $1-end-pos))
                             $3
                             $5)
                       (make-srcloc-list $1-start-pos $6-end-pos))])
     
     ;; <def> ::= def <id>: <expr>
     (def
       [(DEF-KW IDENTIFIER COLON expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           'def
                                           (make-srcloc-list $1-start-pos $1-end-pos))
                            (datum->syntax #f
                                           $2
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $4)
                      (make-srcloc-list $1-start-pos $4-end-pos))])
     ;; <fun> ::= fun <id>(<id-list>): <test-expr> <newline>
     ;;        |  fun <id>(<id-list>): <newline> <local-defs>* <expr>
     (fun
      [(FUN-KW id OP id-list CP COLON test-expr newline)
       (datum->syntax #f
                      (list (datum->syntax #f 'fun (make-srcloc-list $1-start-pos $1-end-pos))
                            $2
                            $4
                            $7)
                      (make-srcloc-list $1-start-pos $7-end-pos))]
      [(FUN-KW id OP id-list CP COLON newline local-def-star expr)
       (datum->syntax #f
                      (cons (datum->syntax #f 'fun (make-srcloc-list $1-start-pos $1-end-pos))
                            (cons $2
                                  (cons $4
                                        (append $8 (list $9)))))
                      (make-srcloc-list $1-start-pos $9-start-pos))])
                            
     
     ;; <local-def> ::= <def>
     ;;              |  <fun>
     (local-def
      [(def) $1]
      [(fun) $1])
     (local-def-plus
      [(local-def local-def-star) (cons $1 $2)])
     (local-def-star
      [() empty]
      [(local-def local-def-star) (cons $1 $2)])
     
     ;; We don't want structures or functions to have zero arguments, however
     ;; this definition will parse such a definition. Therefore, it will be up
     ;; to the definition macros to check for such an error. We could throw an
     ;; error in either location -- however we have to change some definition
     ;; macros as we progress through the language levels anyway, so we might as
     ;; well keep the parser the same and just change one more small thing
     ;; about the macro.
     (id-list
      [() empty]
      [(id comma-id-list) (cons $1 $2)])
     (comma-id-list
      [() empty]
      [(COMMA id comma-id-list) (cons $2 $3)])
     
     ;; <expr> ::= <test-expr> newline
     ;;         |  <compound-expr>
     (expr
      [(test-expr newline) $1]
      [(compound-expr) $1]) ;; compound expressions always end with
                            ;; <expr> so there is no need to for the <newline>
     
     
     ;; <test-expr> ::= <simple-expr>
     ;;              |  <boolean-comparison>
     ;;              |  <test-expr> and <test-expr>
     ;;              |  <test-expr> or <test-expr>
     ;;              |  not <test-expr>
     (test-expr
      [(simple-expr) $1]
      [(boolean-comparison) $1]
      [(test-expr AND-KW test-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           'and
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(test-expr OR-KW test-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           'or
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(NOT-KW test-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           'not
                                           (make-srcloc-list $1-start-pos $1-end-pos))
                            $2)
                      (make-srcloc-list $1-start-pos $2-end-pos))])
     
     ;; <simple-expr> ::= <value-expr>
     ;;                |  + <simple-expr>
     ;;                |  - <simple-expr>
     ;;                |  <simple-expr> + <simple-expr>
     ;;                |  <simple-expr> - <simple-expr>
     ;;                |  <simple-expr> * <simple-expr>
     ;;                |  <simple-expr> / <simple-expr>
     ;;                |  <simple-expr> % <simple-expr>
     ;;                |  <simple-expr> ** <simple-expr>
     ;;                |  <value-expr>[<simple-expr>]
     ;;                |  <value-expr>[<simple-expr>:<simple-expr>]
     ;;                |  <value-expr>[<simple-expr>:<simple-expr>:<simple-expr>]
     ;;                |  <template-expr>
     (simple-expr
      [(value-expr) $1]
      
      [(+ simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '+
                                           (make-srcloc-list $1-start-pos $1-end-pos))
                            $2)
                      (make-srcloc-list $1-start-pos $2-end-pos))]
      [(- simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '-
                                           (make-srcloc-list $1-start-pos $1-end-pos))
                            $2)
                      (make-srcloc-list $1-start-pos $2-end-pos))]
      [(simple-expr + simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '+
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $2-end-pos))]
      [(simple-expr - simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '-
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $2-end-pos))]
      [(simple-expr * simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '*
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(simple-expr / simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '/
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(simple-expr % simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '%
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(simple-expr ** simple-expr)
       (datum->syntax #f
                      (list (datum->syntax #f
                                           '**
                                           (make-srcloc-list $2-start-pos $2-end-pos))
                            $1
                            $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
     
      [(simple-expr DOT id)
       (datum->syntax #f
                      (list
                       (datum->syntax #f 'lookup (make-srcloc-list $2-start-pos $2-end-pos))
                       $1
                       $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      ;; indexing
      [(value-expr OB simple-expr CB)
       (datum->syntax #f
                      (list
                       (datum->syntax #f 'pyret-ref (make-srcloc-list $1-start-pos $4-end-pos))
                       $1
                       $3)
                      (make-srcloc-list $1-start-pos $4-end-pos))]
      
      ;; slicing, part 1
      [(value-expr OB simple-expr COLON simple-expr CB)
       (datum->syntax #f
                      (list
                       (datum->syntax #f 'pyret-slice (make-srcloc-list $1-start-pos $6-end-pos))
                       $1
                       $3
                       $5))]
      
      ;; slicing, part 2
      [(value-expr OB simple-expr COLON simple-expr COLON simple-expr CB)
       (datum->syntax #f
                      (list
                       (datum->syntax #f 'pyret-slice (make-srcloc-list $1-start-pos $8-end-pos))
                       $1
                       $3
                       $5
                       $7))]
      ;; templates
      [(template-expr) $1]
      
      ;; These are just here so that we can generate better error messages
      [(+) 
       (datum->syntax #f
                      (list 
                       (datum->syntax #f 
                                      '+ 
                                      (make-srcloc-list $1-start-pos $1-end-pos)))
                      (make-srcloc-list $1-start-pos $1-end-pos))]
      [(-) 
       (datum->syntax #f
                      (list
                       (datum->syntax #f 
                                      '- 
                                      (make-srcloc-list $1-start-pos $1-end-pos)))
                      (make-srcloc-list $1-start-pos $1-end-pos))]
      [(*)
       (datum->syntax #f
                      (list
                       (datum->syntax #f
                                      '*
                                      (make-srcloc-list $1-start-pos $1-end-pos)))
                      (make-srcloc-list $1-start-pos $1-end-pos))]
      [(/)
       (datum->syntax #f
                      (list
                       (datum->syntax #f
                                      '/
                                      (make-srcloc-list $1-start-pos $1-end-pos)))
                      (make-srcloc-list $1-start-pos $1-end-pos))]
      [(%)
       (datum->syntax #f
                      (list
                       (datum->syntax #f
                                      '%
                                      (make-srcloc-list $1-start-pos $1-end-pos)))
                      (make-srcloc-list $1-start-pos $1-end-pos))]
      [(**)
       (datum->syntax #f
                      (list
                       (datum->syntax #f
                                      '**
                                      (make-srcloc-list $1-start-pos $1-end-pos)))
                      (make-srcloc-list $1-start-pos $1-end-pos))]
     )
     
     ;; <template-expr> ::= ..
     ;;                  |  ...
     ;;                  |  ....
     ;;                  |  .....
     ;;                  |  ......
     (template-expr
      [(TWO-DOTS)
       (datum->syntax #f '.. (make-srcloc-list $1-start-pos $1-end-pos))]
      [(THREE-DOTS)
       (datum->syntax #f '... (make-srcloc-list $1-start-pos $1-end-pos))]
      [(FOUR-DOTS)
       (datum->syntax #f '.... (make-srcloc-list $1-start-pos $1-end-pos))]
      [(FIVE-DOTS)
       (datum->syntax #f '..... (make-srcloc-list $1-start-pos $1-end-pos))]
      [(SIX-DOTS)
       (datum->syntax #f '...... (make-srcloc-list $1-start-pos $1-end-pos))])
     
     ;;  <value-expr> ::= empty
     ;;                |  True
     ;;                |  False
     ;;                |  <id>
     ;;                |  <number>
     ;;                |  <image>
     ;;                |  <string>
     ;;                |  <list-expr>
     ;;                |  (<test-expr>)
     ;;                |  <id> (<arglist>)
     (value-expr
       [(EMPTY-KW)
        (datum->syntax #f
                       'empty
                       (make-srcloc-list $1-start-pos $1-end-pos))]
      [(TRUE-KW)
       (datum->syntax #f
                      'True
                      (make-srcloc-list $1-start-pos $1-end-pos))]
      [(FALSE-KW)
        (datum->syntax #f
                       'False
                       (make-srcloc-list $1-start-pos $1-end-pos))]
      [(id)
       $1]
      [(NUMBER) (datum->syntax #f
                               $1
                               (make-srcloc-list $1-start-pos $1-end-pos))]
      [(IMAGE) (datum->syntax #f
                              $1
                              (make-srcloc-list $1-start-pos $1-end-pos))]
      [(STRING) (datum->syntax #f
                               $1
                               (make-srcloc-list $1-start-pos $1-end-pos))]
      [(list-expr) $1]
      [(OP test-expr CP)
       (datum->syntax #f
                      $2
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(id OP list-of-exprs CP)
       (datum->syntax #f
                      (cons $1 $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
     )
     
     ;; <list-expr> ::= [ <simple-expr> {, <simple-expr>}* ]
     ;;              |  [ ]
     (list-expr
      [(OB list-of-exprs CB)
       (datum->syntax #f
                      (cons (datum->syntax #f
                                           'list
                                           (make-srcloc-list $1-start-pos $3-end-pos))
                            $2)
                      (make-srcloc-list $1-start-pos $3-end-pos))])
     
     ;; <simple-expr> {, <simple-expr>}*
     (list-of-exprs
      [() empty]
      [(simple-expr comma-simple-expr-star) (cons $1 $2)])
     (comma-simple-expr-star
      [() empty]
      [(COMMA simple-expr comma-simple-expr-star)
       (cons $2 $3)])
     
     ;; <id>
     (id
      [(IDENTIFIER)
       (datum->syntax #f
                      $1
                      (make-srcloc-list $1-start-pos $1-end-pos))])
     
     ;; <boolean-comparison> ::= <simple-expr> <boolean-comp-op> <simple-expr>
     ;;                       |  <simple-expr> <boolean-comp-op> <simple-expr> {<boolean-comp-op> <simple-expr>}+
     (boolean-comparison
      [(simple-expr boolean-comp-op simple-expr)
       (datum->syntax #f
                      (list $2 $1 $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))]
      [(simple-expr boolean-comp-op simple-expr boolean-comp-op-simple-expr-plus)
       (datum->syntax #f
                      (cons (datum->syntax #f
                                           'and
                                           (make-srcloc-list $1-start-pos $4-end-pos))
                            (cons (list $2 $1 $3)
                                  (cons (list (car (car $4)) $3 (car (cdr (car $4))))
                                        (cdr $4))))
                      (make-srcloc-list $1-start-pos $4-end-pos))])
     (boolean-comp-op-simple-expr-plus
      [(boolean-comp-op simple-expr)
       (list (list $1 $2))]
      [(boolean-comp-op simple-expr boolean-comp-op-simple-expr-plus)
       (cons (list $1 $2)
             (cons (list (car (car $3)) $2 (car (cdr (car $3))))
                   (cdr $3)))])
     (boolean-comp-op
      [(<) (datum->syntax #f '< (make-srcloc-list $1-start-pos $1-end-pos))]
      [(<=) (datum->syntax #f '<= (make-srcloc-list $1-start-pos $1-end-pos))]
      [(=) (datum->syntax #f '= (make-srcloc-list $1-start-pos $1-end-pos))]
      [(!=) (datum->syntax #f '!= (make-srcloc-list $1-start-pos $1-end-pos))]
      [(>=) (datum->syntax #f '>= (make-srcloc-list $1-start-pos $1-end-pos))]
      [(>) (datum->syntax #f '> (make-srcloc-list $1-start-pos $1-end-pos))]
      ;; not really a boolean comparison operator, but it is treated as such in
      ;; python
      [(IN-KW) (datum->syntax #f 'pyret-in (make-srcloc-list $1-start-pos $1-end-pos))]
      [(NOT-KW IN-KW) (datum->syntax #f 'pyret-not-in (make-srcloc-list $1-start-pos $1-end-pos))])

;; -----------------------------------------------------------------------------
;; Compound expressions, conditional
     
     ;; <compound-expr> ::= <conditional>
     (compound-expr
      [(conditional) $1])
     
     (maybe-newline
      [() empty]
      [(newline) empty])
     
     ;; <conditional>   ::= <if-expr> <else-expr>
     ;;                  |  <if-expr> <elif-expr>+ <if-end-marker>
     ;; <if-end-marker> ::= <else-expr>
     ;;                  |  :done
     (conditional
      [(if-expr else-expr)
       (datum->syntax #f
                      (append $1 (list $2))
                      (make-srcloc-list $1-start-pos $2-end-pos))]
      [(if-expr elif-expr-plus if-end-marker)
       (datum->syntax #f
                      (append $1 $2 $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))])
     (if-end-marker
      [(else-expr) (list $1)]
      [(DONE-KW) (list (datum->syntax #f
                                      'done
                                      (make-srcloc-list $1-start-pos $1-end-pos)))])
     ;; <if-expr> ::= if <test-expr>: <newline>? <expr>
     (if-expr
      [(IF-KW test-expr COLON maybe-newline expr)
       (list (datum->syntax #f
                            'if
                            (make-srcloc-list $1-start-pos $1-end-pos))
             $2
             $5)])
     ;; <elif-expr> :: elif <test-expr>: <newline>? <expr>
     (elif-expr
      [(ELIF-KW test-expr COLON maybe-newline expr)
       (list (datum->syntax #f
                            'elif
                            (make-srcloc-list $1-start-pos $1-end-pos))
             $2
             $5)])
     (elif-expr-plus
      [(elif-expr elif-expr-star)
       (cons $1 $2)])
     (elif-expr-star
      [() empty]
      [(elif-expr elif-expr-star)
       (cons $1 $2)])
     ;; <else-expr> :: else: <newline>? <expr>
     (else-expr
      [(ELSE-KW COLON maybe-newline expr)
       (list (datum->syntax #f
                            'else
                            (make-srcloc-list $1-start-pos $1-end-pos))
             $4)])
     
;; -----------------------------------------------------------------------------
;; Test cases
     
     ;; <test-case> ::= test: <test-expr> is: <test-expr>
     (test-case
       [(TEST-KW COLON test-expr TEST-IS-KW COLON test-expr newline)
        (datum->syntax #f
                       (list 
                        (datum->syntax #f 'test (make-srcloc-list $1-start-pos $1-end-pos))
                        $3
                        (datum->syntax #f 'is (make-srcloc-list $4-start-pos $4-end-pos))
                        $6)
                       (make-srcloc-list $1-start-pos $6-end-pos))])
     
;; -----------------------------------------------------------------------------
;; BIG BANG
     
     ;; <big-bang> ::= big_bang(<big-bang-clause>*) <newline>
     (big-bang
      [(BIG-BANG-KW OP big-bang-clause-star CP newline)
       (datum->syntax #f
                      (cons
                       (datum->syntax #f 'big-bang (make-srcloc-list $1-start-pos $1-end-pos))
                       $3)
                      (make-srcloc-list $1-start-pos $4-end-pos))])
     (big-bang-clause-star
      [() empty]
      [(id-equals-test-expr comma-big-bang-clause-star)
       (datum->syntax #f
                      (cons $1 $2))])
     (id-equals-test-expr
      [(id = test-expr)
       (datum->syntax #f
                      (list $1 $3)
                      (make-srcloc-list $1-start-pos $3-end-pos))])
     (comma-big-bang-clause-star
      [() empty]
      [(COMMA id-equals-test-expr comma-big-bang-clause-star)
       (datum->syntax #f
                      (cons $2 $3)
                      (make-srcloc-list $2-start-pos $3-end-pos))])
    )
   )
)

(define (repl-parser ip)
  (let ([result (program-parser ip)])
    (if (empty? result)
        result
        (car result))))