json/packrat.ss
;; Packrat Parser Library
;;
;; Copyright (c) 2004, 2005 Tony Garnock-Jones <tonyg@kcbbs.gen.nz>
;; Copyright (c) 2005 LShift Ltd. <query@lshift.net>
;;
;; Permission is hereby granted, free of charge, to any person
;; obtaining a copy of this software and associated documentation
;; files (the "Software"), to deal in the Software without
;; restriction, including without limitation the rights to use, copy,
;; modify, merge, publish, distribute, sublicense, and/or sell copies
;; of the Software, and to permit persons to whom the Software is
;; furnished to do so, subject to the following conditions:
;;
;; The above copyright notice and this permission notice shall be
;; included in all copies or substantial portions of the Software.
;;
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
;; BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
;; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
;; SOFTWARE.

;;-- MzScheme-specific header
#lang scheme/base

(require srfi/1/list
         srfi/9/record)

(provide parse-result?
         parse-result-successful?
         parse-result-semantic-value
         parse-result-next
         parse-result-error
         
         parse-results?
         parse-results-position
         parse-results-base
         parse-results-next
         
         parse-error?
         parse-error-position
         parse-error-expected
         parse-error-messages
         
         make-parse-position
         parse-position?
         parse-position-file
         parse-position-line
         parse-position-column
         
         top-parse-position
         update-parse-position
         parse-position->string
         
         ;;empty-results
         ;;make-results
         
         make-error-expected
         make-error-message
         make-result
         make-expected-result
         make-message-result
         
         prepend-base
         prepend-semantic-value
         
         base-generator->results
         results->result
         
         parse-position>?
         parse-error-empty?
         merge-parse-errors
         merge-result-errors
         
         parse-results-token-kind
         parse-results-token-value
         
         packrat-check-base
         packrat-check
         packrat-or
         packrat-unless
         
         packrat-parser)

;;-- BEGIN PORTABLE BODY

(define-record-type parse-result
  (make-parse-result successful? semantic-value next error)
  parse-result?
  (successful? parse-result-successful?)
  (semantic-value parse-result-semantic-value)
  (next parse-result-next) ;; #f, if eof or error; otherwise a parse-results
  (error parse-result-error)
  ;; ^^ #f if none, but usually a parse-error structure
  )

(define-record-type parse-results
  (make-parse-results position base next map)
  parse-results?
  (position parse-results-position) ;; a parse-position or #f if unknown
  (base parse-results-base) ;; a value, #f indicating 'none' or 'eof'
  (next parse-results-next* set-parse-results-next!)
  ;; ^^ a parse-results, or a nullary function delivering same, or #f for nothing next (eof)
  (map parse-results-map set-parse-results-map!)
  ;; ^^ an alist mapping a nonterminal to a parse-result
  )

(define-record-type parse-error
  (make-parse-error position expected messages)
  parse-error?
  (position parse-error-position) ;; a parse-position or #f if unknown
  (expected parse-error-expected) ;; set of things (lset)
  (messages parse-error-messages) ;; list of strings
  )

(define-record-type parse-position
  (make-parse-position file line column)
  parse-position?
  (file parse-position-file)
  (line parse-position-line)
  (column parse-position-column))

(define (top-parse-position filename)
  (make-parse-position filename 1 0))

(define (update-parse-position pos ch)
  (if (not pos)
      #f
      (let ((file (parse-position-file pos))
            (line (parse-position-line pos))
            (column (parse-position-column pos)))
        (case ch
          ((#\return) (make-parse-position file line 0))
          ((#\newline) (make-parse-position file (+ line 1) 0))
          ((#\tab) (make-parse-position file line (* (quotient (+ column 8) 8) 8)))
          (else (make-parse-position file line (+ column 1)))))))

(define (parse-position->string pos)
  (if (not pos)
      "<??>"
      (string-append (parse-position-file pos) ":"
                     (number->string (parse-position-line pos)) ":"
                     (number->string (parse-position-column pos)))))

(define (empty-results pos)
  (make-parse-results pos #f #f '()))

(define (make-results pos base next-generator)
  (make-parse-results pos base next-generator '()))

(define (make-error-expected pos str)
  (make-parse-error pos (list str) '()))

(define (make-error-message pos msg)
  (make-parse-error pos '() (list msg)))

(define (make-result semantic-value next)
  (make-parse-result #t semantic-value next #f))

(define (make-expected-result pos str)
  (make-parse-result #f #f #f (make-error-expected pos str)))

(define (make-message-result pos msg)
  (make-parse-result #f #f #f (make-error-message pos msg)))

(define (prepend-base pos base next)
  (make-parse-results pos base next '()))

(define (prepend-semantic-value pos key result next)
  (make-parse-results pos #f #f
                      (list (cons key (make-result result next)))))

(define (base-generator->results generator)
  ;; Note: applies first next-generator, to get first result
  (define (results-generator)
    (let-values (((pos base) (generator)))
      (if (not base)
          (empty-results pos)
          (make-results pos base results-generator))))
  (results-generator))

(define (parse-results-next results)
  (let ((next (parse-results-next* results)))
    (if (procedure? next)
        (let ((next-value (next)))
          (set-parse-results-next! results next-value)
          next-value)
        next)))

(define (results->result results key fn)
  (let ((results-map (parse-results-map results)))
    (cond
      ((assv key results-map) => cdr)
      (else (let ((result (fn)))
              (set-parse-results-map! results (cons (cons key result) results-map))
              result)))))

(define (parse-position>? a b)
  (cond
    ((not a) #f)
    ((not b) #t)
    (else (let ((la (parse-position-line a)) (lb (parse-position-line b)))
            (or (> la lb)
                (and (= la lb)
                     (> (parse-position-column a) (parse-position-column b))))))))

(define (parse-error-empty? e)
  (and (null? (parse-error-expected e))
       (null? (parse-error-messages e))))

(define (merge-parse-errors e1 e2)
  (cond
    ((not e1) e2)
    ((not e2) e1)
    (else
     (let ((p1 (parse-error-position e1))
           (p2 (parse-error-position e2)))
       (cond
         ((or (parse-position>? p1 p2) (parse-error-empty? e2)) e1)
         ((or (parse-position>? p2 p1) (parse-error-empty? e1)) e2)
         (else (make-parse-error p1
                                 (lset-union equal?
                                             (parse-error-expected e1)
                                             (parse-error-expected e2))
                                 (append (parse-error-messages e1) (parse-error-messages e2)))))))))

(define (merge-result-errors result errs)
  (make-parse-result (parse-result-successful? result)
                     (parse-result-semantic-value result)
                     (parse-result-next result)
                     (merge-parse-errors (parse-result-error result) errs)))

;---------------------------------------------------------------------------

(define (parse-results-token-kind results)
  (let ((base (parse-results-base results)))
    (and base (car base))))

(define (parse-results-token-value results)
  (let ((base (parse-results-base results)))
    (and base (cdr base))))

(define (packrat-check-base token-kind k)
  (lambda (results)
    (let ((base (parse-results-base results)))
      (if (eqv? (and base (car base)) token-kind)
          ((k (and base (cdr base))) (parse-results-next results))
          (make-expected-result (parse-results-position results)
                                (if (not token-kind)
                                    "end-of-file"
                                    token-kind))))))

(define (packrat-check parser k)
  (lambda (results)
    (let ((result (parser results)))
      (if (parse-result-successful? result)
          (merge-result-errors ((k (parse-result-semantic-value result))
                                (parse-result-next result))
                               (parse-result-error result))
          result))))

(define (packrat-or p1 p2)
  (lambda (results)
    (let ((result (p1 results)))
      (if (parse-result-successful? result)
          result
          (merge-result-errors (p2 results)
                               (parse-result-error result))))))

(define (packrat-unless explanation p1 p2)
  (lambda (results)
    (let ((result (p1 results)))
      (if (parse-result-successful? result)
          (make-message-result (parse-results-position results)
                               explanation)
          (p2 results)))))

;---------------------------------------------------------------------------

(define (object->external-representation o)
  (let ((s (open-output-string)))
    (write o s)
    (get-output-string s)))

(define-syntax packrat-parser
  (syntax-rules (<- quote ! @ /)
    ((_ start (nonterminal (alternative body0 body ...) ...) ...)
     (let ()
       (define nonterminal
         (lambda (results)
           (results->result results 'nonterminal
                            (lambda ()
                              ((packrat-parser #f "alts" nonterminal
                                               ((begin body0 body ...) alternative) ...)
                               results)))))
       ...
       start))
    
    ((_ #f "alts" nt (body alternative))
     (packrat-parser #f "alt" nt body alternative))
    
    ((_ #f "alts" nt (body alternative) rest0 rest ...)
     (packrat-or (packrat-parser #f "alt" nt body alternative)
                 (packrat-parser #f "alts" nt rest0 rest ...)))
    
    ((_ #f "alt" nt body ())
     (lambda (results) (make-result body results)))
    
    ((_ #f "alt" nt body ((! fails ...) rest ...))
     (packrat-unless (string-append "Nonterminal " (symbol->string 'nt)
                                    " expected to fail "
                                    (object->external-representation '(fails ...)))
                     (packrat-parser #f "alt" nt #t (fails ...))
                     (packrat-parser #f "alt" nt body (rest ...))))
    
    ((_ #f "alt" nt body ((/ alternative ...) rest ...))
     (packrat-check (packrat-parser #f "alts" nt (#t alternative) ...)
                    (lambda (result) (packrat-parser #f "alt" nt body (rest ...)))))
    
    ((_ #f "alt" nt body (var <- 'val rest ...))
     (packrat-check-base 'val
                         (lambda (var)
                           (packrat-parser #f "alt" nt body (rest ...)))))
    
    ((_ #f "alt" nt body (var <- @ rest ...))
     (lambda (results)
       (let ((var (parse-results-position results)))
         ((packrat-parser #f "alt" nt body (rest ...)) results))))
    
    ((_ #f "alt" nt body (var <- val rest ...))
     (packrat-check val
                    (lambda (var)
                      (packrat-parser #f "alt" nt body (rest ...)))))
    
    ((_ #f "alt" nt body ('val rest ...))
     (packrat-check-base 'val
                         (lambda (dummy)
                           (packrat-parser #f "alt" nt body (rest ...)))))
    
    ((_ #f "alt" nt body (val rest ...))
     (packrat-check val
                    (lambda (dummy)
                      (packrat-parser #f "alt" nt body (rest ...)))))))

#;(define (x)
    (sc-expand
     '(packrat-parser expr
                      (expr ((a <- mulexp '+ b <- mulexp)
                             (+ a b))
                            ((a <- mulexp) a))
                      (mulexp ((a <- simple '* b <- simple)
                               (* a b))
                              ((a <- simple) a))
                      (simple ((a <- 'num) a)
                              (('oparen a <- expr 'cparen) a)))))

;;-- END PORTABLE BODY