bsl/parse.rkt
#lang racket

;; file: parse.rkt
;; author: Bill Turtle
;;
;; parses a Python expression, and converts it to "parenthesized Python"
;; which is somewhere in between Python and Racket. Arithmetic in the
;; intermediate form looks exactly the same as it would in Racket.
;;

(require parser-tools/lex
         parser-tools/yacc
         "form-1.rkt"
         "lex.rkt")
(provide current-source-name program-parser)

#;(print-only-errors true)

(define current-source-name (make-parameter #f))

;; make-srcloc -: position : position -> list
;; Generate the list needed as the third argument to
;; datum->syntax
(define (make-srcloc start-pos end-pos)
  (list (current-source-name)
        (position-line start-pos)
        (position-col start-pos)
        (position-offset start-pos)
        (- (position-offset end-pos) (position-offset start-pos))))

;; macro to build syntax object
;; If our parser has a clause such as:
;; (foo
;;  [(FOO BAR BAZ) ...])
;; then we can get the start position of FOO with $1-start-pos,
;; it's end position with $1-end-pos, and likewise, BAR's start
;; position with $2-start-pos...
;; This very unhygenic macro keeps us from having to write that
;; long phrase over and over and over...
(define-syntax (b-syn stx)
  (syntax-case stx ()
    [(_ ctxt v start end)
     (with-syntax
         ([start-pos-syn (datum->syntax #'start
                                        (string->symbol
                                         (format "$~a-start-pos" 
                                                 (syntax->datum #'start))))]
          [end-pos-syn (datum->syntax #'end
                                      (string->symbol
                                       (format "$~a-end-pos" 
                                               (syntax->datum #'end))))])
       #'(datum->syntax #f
                        v
                        (list (current-source-name)
                              (position-line start-pos-syn)
                              (position-col start-pos-syn)
                              (position-offset start-pos-syn)
                              (- (position-offset end-pos-syn) (position-offset start-pos-syn)))))]))

;; program-parser : (thunk -> stage-1-parse)
;;
;; Note: the thunk is expected to return src-pos structures
;; TODO: semi-colons, doc strings, structs, local inside of def
(define program-parser
  (parser
   (start program)
   (end EOF)
   (tokens value-tokens keyword-tokens op-tokens)
   (src-pos) ; lexer was created with lexer-src-pos
   #;(debug "parse.rkt.debug")
   (suppress) ; currently, we have 6 shift/reduce errors
   ; when support for multiple programs in a file
   ; is turned on. They don't seem to be causing any
   ; interpretation errors, so I'm ignoring them for
   ; now.
   (error
    (lambda (tok-ok? tok-name tok-val start-pos end-pos)
      (raise-syntax-error 'pyret/bsl
                          (if tok-ok?
                              (format "unexpected token: ~S" tok-name)
                              (format "invalid token: ~S" tok-name))
                          (datum->syntax false tok-val (make-srcloc start-pos end-pos)))))
   (precs
    (nonassoc SEMI)
    (nonassoc RETURN)
    (left IF) ; precedence for the if-statement
    (right =)
    (left OR)
    (left AND)
    (right NOT)
    (left == IS < > LESS-EQUAL GREATER-EQUAL)
    (left +)
    (left / *)
    (left %)
    (right **)
    (left -) ; this is the unary '-'
    )
   (grammar
    (program
     [(def-or-expr) $1]
     [(def-or-expr def-or-expr program) 
        (stx-begin (cons $1 (cons $2 (list $3))))]
     [() (stx-eof)])
    
    (def-or-expr
      [(definition) $1]
      [(expr) $1])
    
    (definition
      [(binding) $1]
      [(structdef) $1]
      [(fundef) $1]
      [(procdef) $1])
    
    (binding
     [(IDENTIFIER = expr)
      (stx-binding (stx-id (b-syn false $1 1 1))
                   (b-syn false '= 2 2)
                   $3)])

    (structdef
      [(STRUCT IDENTIFIER args) ; e.g. "struct point (x,y)"
       (stx-struct (b-syn false 'struct 1 1)
                   (stx-id (b-syn false $2 2 2))
                   $3)])
    
    (fundef
     [(FUN IDENTIFIER args COLON expr)
      (stx-fun (b-syn false 'fun 1 1)
               (stx-id (b-syn false $2 2 2))
               $3
               (b-syn false ': 4 4)
               $5)]
     #;[(FUN IDENTIFIER args COLON let-expr)
      (stx-fun (b-syn false 'fun 1 1)
               (stx-id (b-syn false $2 2 2))
               $3
               (b-syn false ': 4 4)
               $5)]
     #;[(FUN IDENTIFIER args COLON where-expr)
      (stx-fun (b-syn false 'fun 1 1)
               (stx-id (b-syn false $2 2 2))
               $3
               (b-syn false ': 4 4)
               $5)])
    
    (procdef ; no procedures at BSL, but still allowed for
     ; Python users' convenience
     [(DEF IDENTIFIER args COLON expr)
      (stx-def (b-syn false 'def 1 1)
               (stx-id (b-syn false $2 2 2))
               $3
               (b-syn false ': 4 4)
               $5)])
    
    (args
     [(OP CP) empty]
     [(OP expr expr-list CP)
      (cons $2 $3)])
    
    (expr-list
     [() empty]
     [(COMMA expr expr-list)
      (cons $2 $3)])
    
    (application-expr
     [(IDENTIFIER args)
      (stx-app (stx-id (b-syn false $1 1 1))
               $2)])
      
    (negation-expr
     [(- expr)
      (stx-neg (b-syn false '- 1 1) $2)])
    
    (arithmetic-expr
     [(negation-expr) $1]
     [(expr ** expr) 
      (stx-binop (b-syn false 'expt 2 2) $1 $3)]
     [(expr + expr) 
      (stx-binop (b-syn false '+ 2 2) $1 $3)]
     [(expr - expr) (prec +)
                    (stx-binop (b-syn false '- 2 2) $1 $3)]
     [(expr * expr) 
      (stx-binop (b-syn false '* 2 2) $1 $3)]
     [(expr / expr) 
      (stx-binop (b-syn false '/ 2 2) $1 $3)]
     [(expr % expr) 
      (stx-binop (b-syn false 'modulo 2 2) $1 $3)])
    
    (boolean-expr
     [(expr == expr) 
      (stx-binop (b-syn false '= 2 2) $1 $3)]
     [(expr IS expr)
      (stx-binop (b-syn false '= 2 2) $1 $3)]
     [(expr < expr) 
      (stx-binop (b-syn false '< 2 2) $1 $3)]
     [(expr > expr) 
      (stx-binop (b-syn false '> 2 2) $1 $3)]
     [(expr LESS-EQUAL expr)
      (stx-binop (b-syn false '<= 2 2) $1 $3)]
     [(expr GREATER-EQUAL expr)
      (stx-binop (b-syn false '>= 2 2) $1 $3)]
     [(expr AND expr)
      (stx-binop (b-syn false 'and 2 2) $1 $3)]
     [(expr OR expr)
      (stx-binop (b-syn false 'or 2 2) $1 $3)]
     [(NOT expr)
      (stx-not (b-syn false 'not 1 1) $2)])
    
    (conditional-expr
     [(IF expr COLON expr ELSE COLON expr) (prec IF)
                                           (stx-if-else (stx-if (b-syn false 'if 1 1)
                                                                $2
                                                                (b-syn false ': 3 3)
                                                                $4)
                                                        (stx-else (b-syn false 'else 5 5)
                                                                  (b-syn false ': 6 6)
                                                                  $7))]
     [(IF expr COLON expr elif+ ELSE COLON expr) (prec IF)
                                                 (stx-cond (stx-if (b-syn false 'if 1 1)
                                                                   $2
                                                                   (b-syn false ': 3 3)
                                                                   $4)
                                                           $5
                                                           (stx-else (b-syn false 'else 6 6)
                                                                     (b-syn false ': 7 7)
                                                                     $8))])
     
     (elif
      [(ELIF expr COLON expr)
       (stx-elif (b-syn false 'elif 1 1)
                 $2
                 (b-syn false ': 3 3)
                 $4)])
     (elif*
      [( ) empty]
      [(elif elif*)
       (cons $1 $2)])
     (elif+
      [(elif elif*)
       (cons $1 $2)])
     
     (expr-comma*
      [( ) empty]
      [( expr ) `(,$1)]
      [( expr COMMA expr-comma* )
       (cons $1 $3)])
     
     (listmaker
      [(OB expr-comma* CB)
       (stx-lst $2)])
     
     (struct-access
      [(IDENTIFIER PERIOD IDENTIFIER)
       (stx-app (stx-id (b-syn false $1 1 1))
                (list (stx-app (stx-id (b-syn false 'quote 3 3))
                               (list (stx-id (b-syn false $3 3 3))))))])
                               
     (expr
      [(IDENTIFIER) (stx-id (b-syn false $1 1 1))]
      [(NUMBER) (stx-num (b-syn false $1 1 1))]
      [(STRING) (stx-str (b-syn false $1 1 1))]
      [(CHAR) (stx-char (b-syn false $1 1 1))]
      [(RETURN expr) (stx-return (b-syn false 'return 1 1) $2)]
      [(struct-access) $1]
      [(OP expr CP) $2] ; nested expression
      [(arithmetic-expr) $1]
      [(boolean-expr) $1]
      [(application-expr) $1]
      [(conditional-expr) $1]
      [(listmaker) $1]
      [(TRUE) (stx-bool (b-syn false #t 1 1))]
      [(FALSE) (stx-bool (b-syn false #f 1 1))]))))

(define (test-parser string)
  (let ([p (open-input-string string)])
    (port-count-lines! p)
    (let ([val (program-parser (lambda () (expression-lexer p)))])
      (close-input-port p)
      val)))