#lang scheme
(require (planet "main.ss" ("dherman" "memoize.plt" 3 1)))
(define-for-syntax verbose 0)
(define-syntax (log stx)
(syntax-case stx ()
((_ num x ...)
(with-syntax ((verbose verbose))
#'(when (>= verbose num)
(apply printf (list x ...)))))))
(define (log . v)
(when verbose
(apply printf v)))
(define nothing (lambda () #f))
(define end-of-input (lambda () #f))
(define-for-syntax (do-literal string answer)
(with-syntax ((string string)
(answer answer))
#'(lambda (input column last)
(let loop ((current column)
(letters (if (char? string)
(list string)
(string->list string))))
(log 3 "Letters ~a\n" letters)
(if (null? letters)
(begin
(log 3 "Continuing to the next part after matching '~a'\n" string)
(answer input current string))
(begin
(log 4 "Matching '~a' to input '~a' at column ~a\n" (car letters)
(input current)
current)
(if (equal? (car letters) (input current))
(loop (add1 current) (cdr letters))
#f)))))))
(define-for-syntax (do-eof answer)
(with-syntax ((answer answer))
#'(lambda (input column last)
(if (eq? (input column) end-of-input)
(answer input (add1 column) eof)
#f))))
(define-for-syntax (do-epsilon answer)
(with-syntax ((answer answer))
#'(lambda (input column last)
(answer input column last))))
(define-for-syntax (do-any answer)
(with-syntax ((answer answer))
#'(lambda (input column last)
(let ((this (input column)))
(answer input (add1 column) this)))))
(define-for-syntax (do-nonterminal nt answer)
(with-syntax ((nt nt)
(answer answer))
#'(lambda (input column last)
(let ((result (nt input column last)))
(if (not result)
#f
(answer input (cadr result) (car result)))))))
(define-for-syntax (do-or pattern answer)
(syntax-case pattern ()
((sub)
(with-syntax ((answer answer))
#'(lambda (input column last)
(let ((result ((translate-choice (sub) _) input column last)))
(if result
(answer input (cadr result) (car result))
#f)))))
((sub1 sub ...)
(with-syntax ((answer answer)
(rest (do-or #'(sub ...) answer)))
#'(lambda (input column last)
(let ((result ((translate-choice (sub1) _) input column last)))
(if result
(answer input (cadr result) (car result))
(rest input column last))))))))
(define-for-syntax (do-apply nt args answer)
(with-syntax ((nt nt)
((fargs ...) args)
(answer answer))
#'(lambda (input column last)
(let* ((func (nt fargs ...))
(result (func input column last)))
(if result
(answer input (cadr result) (car result))
#f)))))
(define-for-syntax (do-foreign fpeg nt answer)
(with-syntax ((nt nt)
(fpeg fpeg)
(answer answer))
#'(lambda (input column last)
(let* ((result (fpeg input #:nonterminal 'nt #:output #t #:column column)))
(if result
(answer input (cadr result) (car result))
#f)))))
(define-for-syntax (do-not pattern answer)
(with-syntax ((pattern pattern)
(answer answer))
#'(lambda (input column last)
(let ((result ((translate-choice pattern _) input column last)))
(if result
#f
(answer input column last))))))
(define-for-syntax (do-ensure pattern answer)
(with-syntax ((pattern pattern)
(answer answer))
#'(lambda (input column last)
(let ((result ((translate-choice pattern _) input column last)))
(if result
(answer input column last)
#f)))))
(define-for-syntax (do-bind var pattern answer)
(with-syntax ((var var)
(pattern pattern)
(answer answer))
#'(lambda (input column last)
(let ((result ((translate-choice pattern _) input column last)))
(if result
(let ((var (car result))
(next-column (cadr result)))
(answer input next-column var))
#f)))))
(define-for-syntax (do-predicate predicate answer)
(with-syntax ((predicate predicate)
(answer answer))
#'(lambda (input column last)
(let ((next predicate))
(if next
(answer input column next)
#f)))))
(define-for-syntax (do-repeat pattern answer)
(with-syntax ((pattern pattern)
(answer answer))
#'(lambda (input column last)
(let ((proc (translate-choice pattern _)))
(let loop ((column column)
(all '()))
(let ((result (proc input column last)))
(if (not result)
(answer input column (reverse all))
(loop (cadr result) (cons (car result) all)))))))))
(define-for-syntax (do-maybe pattern answer)
(with-syntax ((pattern pattern)
(answer answer))
#'(lambda (input column last)
(let ((proc (translate-choice pattern _))
(nothing '()))
(let ((result (proc input column last)))
(if (not result)
(answer input column nothing)
(answer input (cadr result) (car result))))))))
(define (parse-choice choice input column last)
(choice input column last))
(define (create-parser symbol productions)
(memo-lambda (input column last)
(log 1 "Parse input with symbol ~a at column ~a char '~a'\n" symbol column (input column))
(let loop ((choices productions)
(num 1))
(log 1 "Current choice ~a of ~a\n" num symbol)
(if (null? choices)
(begin
(log 1 "Nonterminal ~a failed to parse\n" symbol)
#f)
(let* ((result (parse-choice (car choices)
input column
last)))
(if result
(begin
(log 1 "Parsed with ~a = ~a\n" symbol result)
result)
(loop (cdr choices) (add1 num))))))))
(define-syntax (translate-choice stx)
(syntax-case stx (raw)
((_ () action)
(if (eq? (syntax->datum #'action) '_)
#'(lambda (input column last)
(log 2 "Returning last matched ~a\n" last)
(list last column))
#'(lambda (input column last)
(log 2 "Performing action ~a\n" action)
(list action column))))
((_ (choice choice* ...) action)
(with-syntax ((rest #'(translate-choice (choice* ...) action)))
(syntax-case #'choice (bind except + * ? predicate not ensure or apply foreign)
(()
(do-epsilon #'action))
(eof (equal? (syntax->datum #'eof) 'eof)
(do-eof #'rest))
(nt (and (identifier? #'nt)
(not (equal? (syntax->datum #'nt) '_)))
(do-nonterminal #'nt #'rest))
(nt (equal? (syntax->datum #'nt) '_)
(do-any #'rest))
((bind var . next)
(do-bind #'var #'next #'rest))
((* . pattern)
(do-repeat #'pattern #'rest))
((+ . pattern)
(with-syntax (((patterns ...) #'pattern))
#'(translate-choice ((bind first patterns ...)
(bind next (* patterns ...))
(predicate (cons first next))
choice* ...)
action)))
((foreign peg nt)
(do-foreign #'peg #'nt #'rest))
((or . pattern)
(do-or #'pattern #'rest))
((? . pattern)
(do-maybe #'pattern #'rest))
((predicate what)
(do-predicate #'what #'rest))
((apply nt . args)
(do-apply #'nt #'args #'rest))
((not . pattern)
(do-not #'pattern #'rest))
((ensure . pattern)
(do-ensure #'pattern #'rest))
((except . patterns)
(with-syntax (((patterns ...) #'patterns))
#'(translate-choice ((not patterns ...) _ choice* ...) action)))
(lit
(or (string? (syntax->datum #'lit))
(char? (syntax->datum #'lit)))
(do-literal #'lit #'rest)))))))
(provide peg)
(define-syntax (peg stx)
(syntax-case stx (start grammar)
((peg (start start-nt) (grammar (nonterminal choice ...) ...))
(with-syntax ((((translated-choices ...) ...)
(map (lambda (choices)
(map (lambda (choice)
(syntax-case choice ()
((action) #'(translate-choice () action))
(((element element-rest ...) action)
#'(translate-choice (element element-rest ...) action))))
(syntax->list choices)))
(syntax->list #'((choice ...) ...)))))
(with-syntax (((nt-func ...)
(map (lambda (nt choices)
(syntax-case nt ()
((name args ...)
(with-syntax (((cs ...) choices))
#'(lambda (args ...)
(create-parser 'name (list cs ...)))))
(name
(with-syntax (((cs ...) choices))
#'(create-parser 'name (list cs ...))))))
(syntax->list #'(nonterminal ...))
(syntax->list #'((translated-choices ...) ...))))
((nt-name ...)
(map (lambda (nt)
(syntax-case nt ()
((name args ...) #'name)
(name #'name)))
(syntax->list #'(nonterminal ...)))))
#'(letrec ((nt-name nt-func) ...)
(lambda (input #:nonterminal (nt 'start-nt) #:output (output #f) #:column (column 0))
(log 1 "Start parsing with nonterminal ~a at column ~a\n" nt column)
(let* ((names->functions (let ((h (make-hash)))
(hash-set! h 'nt-name nt-func)
...
h))
(result ((hash-ref names->functions nt) input column #f)))
(log 1 "Result of parsing is ~a\n" result)
(if output
result
(if result
(car result)
#f))))))))))
(define (parse-string parser string)
(let* ((s (string->list string))
(v (list->vector s))
(max (length s)))
(parser (lambda (i)
(if (>= i max)
end-of-input
(vector-ref v i))))))
(define (parse-file parser file)
(define max-length 4096)
(with-input-from-file file
(lambda ()
(let ((strings (make-hash)))
(parser (lambda (i)
(let* ((index (floor (/ i max-length)))
(str (hash-ref strings index
(lambda ()
(log 5 "Reading next ~a characters\n" max-length)
(let ((str (read-string max-length)))
(log 5 "Read ~a\n" str)
(hash-set! strings index str)
str)))))
(if (or (eof-object? str)
(>= (modulo i max-length) (string-length str)))
end-of-input
(string-ref str (modulo i max-length))))))))))
(provide parse)
(define (parse parser obj)
(cond
((string? obj) (parse-string parser obj))
((path? obj) (parse-file parser obj))
(else (error "You gave me a ~a. Please pass a string or a path to the parse method.\n" obj))))
(define (test1)
(define p
(peg
(start blah)
(grammar
(blah ((foobar " " "1") 23)
((foobar (bind x " ") (bind y "2")) (string-append x y))
((foobar (bind x " ") "3") 40)
)
(foobar (("hello" "animals") 99)
(("hello" " " "world") 9)))))
(let ((s (string->list "hello world 2")))
(p (lambda (i)
(list-ref s i)))))
(define (test2)
(define p
(peg
(start blah)
(grammar
(blah (((bind x ones) (bind z (? "food")) (bind y twos)) (list x z y))
)
(ones (((bind x (* "1"))) x))
(twos (((bind x (* "2"))) x))
)))
(let* ((s (string->list "111112222"))
(max (length s)))
(p (lambda (i)
(if (>= i max)
'you-cant-possibly-match-this
(list-ref s i)))))
)
(test2)