bsl/parse.rkt
#lang racket

(require parser-tools/lex
         parser-tools/yacc
         syntax/stx
         plai/test-harness
         "lex.rkt"
         racket/runtime-path)
(provide program-parser check-parse current-source-name)

(print-only-errors true)

 

(define-runtime-path this-path ".")

;; TODO: fix x-1 bug: this gets lexed as
;; (token-IDENTIFIER x) (token-NUMBER -1)
;; instead of (token-IDENTIFER x) '- (token-NUMBER 1)
;;
;; check for return statement at the end of expressions
;; indentation check
;;
;; at the moment, we have to assume that 'else' is always
;; at the same location as 'if', until I figure out how to
;; get it in the syntax object.
;;
;; indentation checks for cond statement
;;
;; support for strings and characters (shouldn't be too difficult)

;; make-srcloc -: syntax : position : position -> list
;; Generate the list needed as the third argument to
;; datum->syntax
(define (make-srcloc orig-stx start-pos end-pos)
  (list (if (syntax? orig-stx) orig-stx 'no-syntax-available)
        (position-line start-pos)
        (position-col start-pos)
        (position-offset start-pos)
        (- (position-offset end-pos) (position-offset start-pos))))

(define current-source-name (make-parameter #f))

;; macro to build syntax object
(define-syntax (b-syn stx)
  (syntax-case stx ()
    [(_ ctxt v start end)
     (with-syntax
         ([start-pos-syn (datum->syntax #'start
                                        (string->symbol
                                         (format "$~a-start-pos" 
                                                 (syntax->datum #'start))))]
          [end-pos-syn (datum->syntax #'end
                                      (string->symbol
                                       (format "$~a-end-pos" 
                                               (syntax->datum #'end))))])
       #'(datum->syntax ctxt
                        v
                        (list (current-source-name)
                              (position-line start-pos-syn)
                              (position-col start-pos-syn)
                              (position-offset start-pos-syn)
                              (- (position-offset end-pos-syn) (position-offset start-pos-syn)))))]))

(define (program-parser ctxt)
  (parser
   (start program)
   (end EOF)
   (tokens value-tokens keyword-tokens op-tokens)
   (src-pos) ; the lexer was created with lexer-src-pos, so we need this
   (suppress) ; not good
   (error
    (lambda (tok-ok? tok-name tok-val start-pos end-pos)
      (raise-syntax-error 'pyret/bsl
                          (if tok-ok?
                              (format "unexpected token: ~S" tok-name)
                              (format "invalid token: ~S" tok-name))
                          (datum->syntax false tok-val (make-srcloc false start-pos end-pos)))))
   (precs
    (left OR)
    (left AND)
    (right NOT)
    (left == < > LESS-EQUAL GREATER-EQUAL)
    (left - +)
    (left / *)
    (left %)
    (right **)
    (right NEG)
    (left SEMI))
   (grammar
    (program 
     [(def-or-expr program)
      (b-syn ctxt `(begin ,$1 ,$2) 1 2)]
     [(def-or-expr) $1]
     [() eof])
    
    (def-or-expr
      [(definition) $1]
      [(expr) $1])
    
    (definition
      [(IDENTIFIER = expr) (b-syn ctxt `(define ,$1 ,$3) 1 3)]
      [(funcdef) $1])
    
    (funcdef
     [(DEF IDENTIFIER args COLON expr)
      (local [(define head (cons (b-syn ctxt $2 2 2) $3))]
        (b-syn ctxt `(define ,head ,$5) 1 5))])
    
    (args
     [(OP CP) empty]
     [(OP expr expr-list CP)
      (cons (b-syn ctxt $2 2 2) $3)])
    
    (expr-list
     [() empty]
     [(COMMA expr expr-list)
      (cons (b-syn ctxt $2 2 2) $3)])
    
    (application-expr
     [(IDENTIFIER args) (b-syn ctxt (cons (b-syn ctxt $1 1 1) $2) 1 2)])
    
    (negation-expr
     [(- expr) (b-syn ctxt (list (b-syn ctxt '- 1 1) $2) 1 2)])
    
    (arithmetic-expr
     [(negation-expr) $1]
     [(expr ** expr) (prec **) (b-syn ctxt (list (b-syn ctxt 'exp 2 2) $1 $3) 1 3)]
     [(expr + expr) (prec +) (b-syn ctxt (list (b-syn ctxt '+ 2 2) $1 $3) 1 3)]
     [(expr - expr) (prec -) (b-syn ctxt (list (b-syn ctxt '- 2 2) $1 $3) 1 3)]
     [(expr * expr) (prec *) (b-syn ctxt (list (b-syn ctxt '* 2 2) $1 $3) 1 3)]
     [(expr / expr) (prec /) (b-syn ctxt (list (b-syn ctxt '/ 2 2) $1 $3) 1 3)]
     [(expr % expr) (prec %) (b-syn ctxt (list (b-syn ctxt 'modulo 2 2) $1 $3) 1 3)])
    
    (boolean-expr
     [(expr == expr) (prec ==) (b-syn ctxt (list (b-syn ctxt '= 2 2) $1 $3) 1 3)]
     [(expr < expr) (prec <) (b-syn ctxt (list (b-syn ctxt '< 2 2) $1 $3) 1 3)]
     [(expr > expr) (prec >) (b-syn ctxt (list (b-syn ctxt '> 2 2) $1 $3) 1 3)]
     [(expr LESS-EQUAL expr)
      (prec LESS-EQUAL)
      (b-syn ctxt (list (b-syn ctxt '<= 2 2) $1 $3) 1 3)]
     [(expr GREATER-EQUAL expr)
      (prec GREATER-EQUAL)
      (b-syn ctxt (list (b-syn ctxt '>= 2 2) $1 $3) 1 3)]
     [(NOT expr) (prec NOT) (b-syn ctxt (list (b-syn ctxt 'not 1 1) $2) 1 2)]
     [(expr AND expr)
      (prec AND)
      (b-syn ctxt (list (b-syn ctxt 'and 2 2) $1 $3) 1 3)]
     [(expr OR expr)
      (prec OR)
      (b-syn ctxt (list (b-syn ctxt 'or 2 2) $1 $3) 1 3)])
    
    (conditional-expr
     [(IF expr COLON expr ELSE COLON expr) 
      (b-syn ctxt (list (b-syn ctxt 'if 1 1) $2 $4 $7) 1 7)]
     [(IF expr COLON expr elif-plus-else) 
      (local [(define first-clause (b-syn ctxt (list (b-syn ctxt $2 2 2) (b-syn ctxt $4 4 4)) 1 4))
              (define clauses (cons first-clause $5))]
        (b-syn ctxt (cons (b-syn ctxt 'cond 1 5) clauses) 1 5))])
    
    (elif-plus-else ; evaluates to a list of syntax objects;
     ; the value of each of these syntax objects is a list containing two
     ; more syntax objects
     [(ELSE COLON expr) (list (b-syn ctxt (list (b-syn ctxt 'else 1 1) (b-syn ctxt $3 3 3)) 1 3))]
     [(ELIF expr COLON expr elif-plus-else)
      (cons (b-syn ctxt (list (b-syn ctxt $2 2 2) (b-syn ctxt $4 4 4)) 1 4)
            $5)])
    (expr
     [(IDENTIFIER) (b-syn ctxt $1 1 1)]
     [(NUMBER) (b-syn ctxt $1 1 1)]
     [(OP expr CP) $2] ; parenthesized expressions
     [(RETURN expr)
      (let ([ret-syn (b-syn ctxt 'return 1 1)]
            [expr-syn (b-syn ctxt $2 2 2)])
        (b-syn ctxt (list ret-syn expr-syn) 1 2))]
     [(arithmetic-expr) $1]
     [(boolean-expr) $1]
     [(application-expr) $1]
     [(conditional-expr) $1]
     [(TRUE) (b-syn ctxt true 1 1)]
     [(FALSE) (b-syn ctxt false 1 1)]))))


(define (test-parser string)
  (let ([p (open-input-string string)])
    (port-count-lines! p)
    ((program-parser #'h) (lambda () (expression-lexer p)))))

(define (test-parser/cip)
  (port-count-lines! (current-input-port))
  ((program-parser #'h) (lambda () (expression-lexer (current-input-port)))))

(define (test-parser/file f)
  (let ([p (open-input-file (build-path this-path f))])
    (port-count-lines! p)
    (let ([expr ((program-parser #'h) (λ () (expression-lexer p)))])
      (close-input-port p)
      expr)))

(define (s-obj=? test-stx)
  (lambda (parsed-stx)
    (equal? (syntax->datum parsed-stx) (syntax->datum test-stx))))


;; check-indent-line -: syntax-object : syntax-object -> boolean
;; given expression1, make sure that expression2 is properly
;; indented (either on the same line, or indented)
(define (check-indent-line expr1 expr2)
  (if (or (equal? (syntax-line expr1) (syntax-line expr2))
          (>= (- (syntax-column expr2) (syntax-column expr1)) 2))
      true
      (error 'pyret/bsl "bad indentation at line ~a, column ~a"
             (syntax-line expr2) (syntax-column expr2)))) ; this error message is temporary


#|
(let ([good (test-parser "if x == 0: return 0 else: return 1")]
      [bad (test-parser/file "bad-indent.test")])
  (test (check-indent-line (stx-car (stx-cdr good)) (stx-car (stx-cdr (stx-cdr good)))) true)
  #;(test/exn (check-indent-line (stx-car (stx-cdr bad)) (stx-car (stx-cdr (stx-cdr bad))))
            "bad indentation")) ; test/exn isn't working for some reason
|#


;; check-parse -: syntax-object -> syntax-object
;; after we parse the program the first time, we run this
;; checker, which checks for indentation and return statements
(define (check-parse stx)
  (if (syntax? stx)
      (let ([val (syntax-e stx)]
            [line (syntax-line stx)]
            [col (syntax-column stx)]
            [pos (syntax-position stx)]
            [span (syntax-span stx)]
            [src (syntax-source stx)])
        (let ([loc (list src line col pos span)])
          (cond
            [(syntax? val) (datum->syntax stx (syntax-e (check-parse val)) loc)]
            [(list? val)
             (match (map syntax->datum val)
               [(list 'define head body)
                (begin
                  (check-indent-line (stx-car val) (stx-car (stx-cdr (stx-cdr val))))
                  (datum->syntax stx (map check-parse val) loc))]
               [(list 'if test-expr then-expr else-expr)
                (begin
                  (check-indent-line (stx-car val) (stx-car (stx-cdr val))) ; check if:
                  (check-indent-line (stx-car val)
                                     (stx-car (stx-cdr (stx-cdr (stx-cdr val))))); check else:
                  (datum->syntax stx (map check-parse val) loc))]
               
               [(list 'return expr) (datum->syntax stx (check-parse (stx-car (stx-cdr val))) loc)]
               [_ (datum->syntax stx (map check-parse val) loc)])]
            [else stx])))
      stx))

#|
;; interp -: string -> s-expression
;; interpret a string of pyret/bsl and return the resulting Racket s-expression
(define (interp string)
  (let ([p (open-input-string string)])
    (port-count-lines! p)
    (eval (check-parse ((program-parser #'h) (lambda () (expression-lexer p)))))))

;; interp/file -: string -> s-expression
;; given the name of a file containing pyret/bsl, return the resulting Racket s-expression
(define (interp/file f)
  (let ([p (open-input-file (build-path this-path f))])
    (port-count-lines! p)
    (parameterize ([current-namespace (make-base-namespace)])
      (let ([result (eval 
                     
                     (check-parse ((program-parser #'h) (lambda () (expression-lexer p)))))])
        (close-input-port p)
        result))))
|#

;; syntax-equal? -: syntax-object : syntax-object -> boolean
;; recursively checks to make sure that two syntax objects are equal
(define (syntax-equal? stx1 stx2)
  (if (and (syntax? stx1) (syntax? stx2)) ; first, they must both be syntax!
      (if (and (equal? (syntax->datum stx1) (syntax->datum stx2))
               (equal? (syntax-line stx1) (syntax-line stx2))
               (equal? (syntax-column stx1) (syntax-column stx2))
               (equal? (syntax-position stx1) (syntax-position stx2))
               (equal? (syntax-span stx1) (syntax-span stx2)))
          (cond
            [(stx-null? stx1) (stx-null? stx2)]
            [(stx-list? stx1)
             (local [(define (list-cross list-1 list-2)
                       (if (empty? list-1)
                           empty
                           (cons (cons (car list-1) (car list-2)) (list-cross (cdr list-1) (cdr list-2)))))]
               (andmap (lambda (v) (syntax-equal? (stx-car v) (stx-cdr v))) 
                       (list-cross (stx->list stx1) (stx->list stx2))))]
            [(stx-pair? stx1) (and (syntax-equal? (stx-car stx1) (stx-car stx2))
                                   (syntax-equal? (stx-cdr stx1) (stx-cdr stx2)))]
            [else true])
                                       
          #f)
      #f))




(local [(define ex #'(+ 1 2))]
  (test (syntax-equal? ex ex) true))
(local [(define ex #'(cons #'car #'cdr))]
  (test (syntax-equal? ex ex) true))

; basics
(test/pred (test-parser "true") (s-obj=? #'#t))
(test/pred (test-parser "false") (s-obj=? #'#f))
(test/pred (test-parser "5") (s-obj=? #'5))
(test/pred (test-parser "42.5") (s-obj=? #'42.5))
(test/pred (test-parser "-5") (s-obj=? #'-5))
(test/pred (test-parser "foo") (s-obj=? #'foo))
(test/pred (test-parser "foo_bar_baz") (s-obj=? #'foo-bar-baz))
(test/pred (test-parser "number?") (s-obj=? #'number?))

; binding
(test/pred (test-parser "x = 5") (s-obj=? #'(define x 5)))
(test/pred (test-parser "x = 5") (λ (v) (list? (syntax-e v))))

; arithmetic
(test/pred (test-parser "5 ** 4 ** 3") (s-obj=? #'(exp 5 (exp 4 3))))
(test/pred (test-parser "5 ** 5") (s-obj=? #'(exp 5 5)))
(test/pred (test-parser "-5 ** 3") (s-obj=? #'(exp -5 3)))
(test/pred (test-parser "- (5 ** 3)") (s-obj=? #'(- (exp 5 3))))
(test/pred (test-parser "4 + 3 ** 5") (s-obj=? #'(+ 4 (exp 3 5))))
(test/pred (test-parser "4 ** 3 + 5") (s-obj=? #'(+ (exp 4 3) 5)))
(let ([ex (test-parser "4 ** (3 + 5)")])
  (test/pred ex (s-obj=? #'(exp 4 (+ 3 5))))
  (test (syntax-span ex) 12)
  (test (syntax-span (stx-car (stx-cdr (stx-cdr ex)))) 5)
  (test (syntax-position (stx-car (stx-cdr (stx-cdr ex)))) 7))
(test/pred (test-parser "4 + 3 * 5") (s-obj=? #'(+ 4 (* 3 5))))

; boolean expressions
(test/pred (test-parser "x == 5 and y == 2") (s-obj=? #'(and (= x 5) (= y 2))))
(test/pred (test-parser "x == 5 and y == 3 or z == 4")
           (s-obj=? #'(or (and (= x 5) (= y 3)) (= z 4))))
(test/pred (test-parser "x == 5 or y == 3 and z == 4")
           (s-obj=? #'(or (= x 5) (and (= y 3) (= z 4)))))
(test/pred (test-parser "not (x)") (s-obj=? #'(not x)))
(test/pred (test-parser "not (x == 5 and y == 3)")
           (s-obj=? #'(not (and (= x 5) (= y 3)))))
(test/pred (test-parser "not x == 5 and y == 3")
           (s-obj=? #'(and (not (= x 5)) (= y 3))))

; function definition
(test/pred (test-parser "def idty(x): return x") (s-obj=? #'(define (idty x) (return x))))
(test/pred (test-parser "def first_arg(x,y): return x")
           (s-obj=? #'(define (first-arg x y) (return x))))
(let ([ex (test-parser "def middle_arg(x,y,z): return y")])
  (test/pred ex (s-obj=? #'(define (middle-arg x y z) (return y))))
  (test/pred ex (λ (v) (list? (syntax-e ex))))
  (test/pred ex (λ (v) (identifier? (stx-car ex))))
  (let ([head (stx-car (stx-cdr ex))]
        [body (stx-car (stx-cdr (stx-cdr ex)))])
    (test/pred head (λ (v) (list? (syntax-e v))))
    (test/pred body (λ (v) (list? (syntax-e v))))
    (test (syntax-position (stx-car (stx-cdr head))) 16) ; x
    (test (syntax-span (stx-car (stx-cdr head))) 1)
    (test (syntax-position (stx-car (stx-cdr (stx-cdr head)))) 18) ; y
    (test (syntax-span (stx-car body)) 6)
    (test (syntax-span (stx-car (stx-cdr body))) 1)))


; function application
(test/pred (test-parser "add3(1,2,3)") (s-obj=? #'(add3 1 2 3)))

; conditionals
(test/pred (test-parser "if x == 0: return 0 else: return 1")
           (s-obj=? #'(if (= x 0) (return 0) (return 1))))

(test/pred (test-parser/file "fib.test")
           (s-obj=?
            #'(define (fib n)
                (cond
                  [(= n 0) (return 1)]
                  [(= n 1) (return 1)]
                  [else (return (+ (fib (- n 1)) (fib (- n 2))))]))))

;; tests for check-parse
(test (syntax->datum (check-parse (test-parser "5"))) 5)
(define (test-syntax-equal stx1 stx2)
  (test (syntax-equal? stx1 stx2) true))
(test-syntax-equal (check-parse (test-parser "1 + 3"))
                   (test-parser "1 + 3"))
(test-syntax-equal (check-parse (test-parser
                                 "if x == 0: 0 else: 1"))
                   (test-parser  "if x == 0: 0 else: 1"))
(test (syntax->datum (check-parse (test-parser
                                   "if x == 0: return 0 else: return 1")))
      '(if (= x 0) 0 1))


;; tests for overall interpretation
#;(interp/file "fac.test")
#;(test (interp "fac(0)") 1)
#;(test (interp "fac(1)") 1)
#;(test (interp "fac(4)") 24)