peg.ss
#lang scheme

(require (planet "main.ss" ("dherman" "memoize.plt" 3 1)))

#|
(pattern action)

pattern :=
nonterminal
"literal"
#\char
(bind foo pattern)
(* pattern)
(? pattern)
(predicate action pattern)
|#

#|
  0 1 2 3 4 5 6
a 
b
c
d
e
f
g
|#

#|
(* foo) = foo1
foo1 = (((bind f foo) (bind f* foo1)) (cons f f*)
	() (list))
|#

;; (((nonterminal result index) ...) ...)

#|
(peg
  (start foobar)
  (grammar
    (foobar ((hello world) 1)
	    ((blah) 2))
    (hello ((go away) 3))))
|#

(define-for-syntax verbose 0)

(define-syntax (log stx)
  (syntax-case stx ()
    ((_ num x ...)
     (with-syntax ((verbose verbose))
       #'(when (>= verbose num)
	   (apply printf (list x ...)))))))

#;
(define (log . v)
  (when verbose
    (apply printf v)))

(define nothing (lambda () #f))
(define end-of-input (lambda () #f))

(define-for-syntax (do-literal string answer)
  (with-syntax ((string string)
		(answer answer))
    #'(lambda (input column last)
	(let loop ((current column)
		   (letters (if (char? string)
			      (list string)
			      (string->list string))))
	  (log 3 "Letters ~a\n" letters)
	  (if (null? letters)
	    (begin
	      (log 3 "Continuing to the next part after matching '~a'\n" string)
	      (answer input current string))
	    ;; (list string current)
	    (begin
	      (log 4 "Matching '~a' to input '~a' at column ~a\n" (car letters)
		      (input current)
		      current)
	      (if (equal? (car letters) (input current))
		(loop (add1 current) (cdr letters))
		#f)))))))

;; matches eof and nothing else
(define-for-syntax (do-eof answer)
  (with-syntax ((answer answer))
    #'(lambda (input column last)
	(if (eq? (input column) end-of-input)
	  (answer input (add1 column) eof)
	  #f))))

;; matches nothing
(define-for-syntax (do-epsilon answer)
  (with-syntax ((answer answer))
    #'(lambda (input column last)
	(answer input column last))))

;; matches any 1 character
(define-for-syntax (do-any answer)
  (with-syntax ((answer answer))
    #'(lambda (input column last)
	(let ((this (input column)))
	  (answer input (add1 column) this)))))

;; matches some nonterminal
(define-for-syntax (do-nonterminal nt answer)
  (with-syntax ((nt nt)
		(answer answer))
    #'(lambda (input column last)
	(let ((result (nt input column last)))
	  ; (log "Result of ~a was ~a\n" 'nt result)
	  ; (log 2 "Result of ~a was ~a\n" 'nt result)
	  (if (not result)
	    #f
	    (answer input (cadr result) (car result)))))))

;; returns the result of the first pattern in (pattern ...) to match
(define-for-syntax (do-or pattern answer)
  (syntax-case pattern ()
    ((sub)
     (with-syntax ((answer answer))
       #'(lambda (input column last)
	   (let ((result ((translate-choice (sub) _) input column last)))
	     (if result
	       (answer input (cadr result) (car result))
	       #f)))))
    ((sub1 sub ...)
     (with-syntax ((answer answer)
		   (rest (do-or #'(sub ...) answer)))
       #'(lambda (input column last)
	   (let ((result ((translate-choice (sub1) _) input column last)))
	     (if result
	       (answer input (cadr result) (car result))
	       (rest input column last))))))))

#|
;; or using a sub-peg
(define-for-syntax (do-or pattern answer)
  (syntax-case pattern ()
    ((sub ...)
     (with-syntax ((answer answer)
		   (first-nt (gensym)))
       #'(lambda (input column last)
	   (let ((sub-peg (peg
			    (start first-nt)
			    (grammar (first-nt ((sub) _) ...)))))
	     (answer input column 
		     (sub-peg (lambda (c)
				(input (+ column c)))))))))))
|#

;; apply some arguments to a nonterminal and invoke the nonterminal
(define-for-syntax (do-apply nt args answer)
  (with-syntax ((nt nt)
		((fargs ...) args)
		(answer answer))
    #'(lambda (input column last)
	(let* ((func (nt fargs ...))
	       (result (func input column last)))
	  (if result
	    (answer input (cadr result) (car result))
	    #f)))))

(define-for-syntax (do-foreign fpeg nt answer)
  (with-syntax ((nt nt)
		(fpeg fpeg)
		(answer answer))
    #'(lambda (input column last)
	(let* ((result (fpeg input #:nonterminal 'nt #:output #t #:column column)))
	  (if result
	    (answer input (cadr result) (car result))
	    #f)))))

;; doesn't match pattern
;; consumes no input
(define-for-syntax (do-not pattern answer)
  (with-syntax ((pattern pattern)
		(answer answer))
    #'(lambda (input column last)
	(let ((result ((translate-choice pattern _) input column last)))
	  (if result
	    #f
	    (answer input column last))))))

;; ensures that pattern will match
;; consumes no input
(define-for-syntax (do-ensure pattern answer)
  (with-syntax ((pattern pattern)
		(answer answer))
    #'(lambda (input column last)
	(let ((result ((translate-choice pattern _) input column last)))
	  (if result
	    (answer input column last)
	    #f)))))

;; binds a variable to the result of pattern
(define-for-syntax (do-bind var pattern answer)
  (with-syntax ((var var)
		(pattern pattern)
		(answer answer))
    #'(lambda (input column last)
	(let ((result ((translate-choice pattern _) input column last)))
	  (if result
	    (let ((var (car result))
		  (next-column (cadr result)))
	      (answer input next-column var))
	    #f)))))

;; performs arbitrary computation, if the computation is anything
;; besides #f the parse will continue. the result of the computation
;; is given to the next pattern as its result
(define-for-syntax (do-predicate predicate answer)
  (with-syntax ((predicate predicate)
		(answer answer))
    #'(lambda (input column last)
	(let ((next predicate))
	  (if next
	    (answer input column next)
	    #f)))))

;; repeat a pattern 0 or more times
;; returns a list
(define-for-syntax (do-repeat pattern answer)
  (with-syntax ((pattern pattern)
		(answer answer))
    #'(lambda (input column last)
	(let ((proc (translate-choice pattern _)))
	  (let loop ((column column)
		     (all '()))
	    (let ((result (proc input column last)))
	      (if (not result)
		(answer input column (reverse all))
		(loop (cadr result) (cons (car result) all)))))))))

;; matches a pattern in which case it returns the result of the pattern
;; or doesn't match the pattern in which case '() is returned
(define-for-syntax (do-maybe pattern answer)
  (with-syntax ((pattern pattern)
		(answer answer))
    #'(lambda (input column last)
	(let ((proc (translate-choice pattern _))
	      ;; what should nothing be?
	      (nothing '()))
	    (let ((result (proc input column last)))
	      (if (not result)
		(answer input column nothing)
		(answer input (cadr result) (car result))))))))

(define (parse-choice choice input column last)
  (choice input column last))

(define (create-parser symbol productions)
  (memo-lambda (input column last)
    (log 1 "Parse input with symbol ~a at column ~a char '~a'\n" symbol column (input column))
    (let loop ((choices productions)
	       (num 1))
      (log 1 "Current choice ~a of ~a\n" num symbol)
      (if (null? choices)
	(begin
	  (log 1 "Nonterminal ~a failed to parse\n" symbol)
	  #f)
	(let* ((result (parse-choice (car choices)
				     input column
				     last)))
	  (if result
	    (begin
	      (log 1 "Parsed with ~a = ~a\n" symbol result)
	      result)
	    (loop (cdr choices) (add1 num))))))))

(define-syntax (translate-choice stx)
  (syntax-case stx (raw)
    ((_ () action)
     (if (eq? (syntax->datum #'action) '_)
       #'(lambda (input column last)
	   (log 2 "Returning last matched ~a\n" last)
	   (list last column))
       #'(lambda (input column last)
	   (log 2 "Performing action ~a\n" action)
	   (list action column))))
    ((_ (choice choice* ...) action)
     (with-syntax ((rest #'(translate-choice (choice* ...) action)))
       (syntax-case #'choice (bind except + * ? predicate not ensure or apply foreign)
	 (()
	  (do-epsilon #'action))
	 (eof (equal? (syntax->datum #'eof) 'eof)
	      (do-eof #'rest))
	 (nt (and (identifier? #'nt)
		  (not (equal? (syntax->datum #'nt) '_)))
	     (do-nonterminal #'nt #'rest))
	 (nt (equal? (syntax->datum #'nt) '_)
	     (do-any #'rest))
	 ((bind var . next)
	  (do-bind #'var #'next #'rest))
	 ((* . pattern)
	  (do-repeat #'pattern #'rest))
	 ((+ . pattern)
	  (with-syntax (((patterns ...) #'pattern))
	    #'(translate-choice ((bind first patterns ...)
				 (bind next (* patterns ...))
				 (predicate (cons first next))
				 choice* ...)
				action)))
	 ((foreign peg nt)
	  (do-foreign #'peg #'nt #'rest))
	 ((or . pattern)
	  (do-or #'pattern #'rest))
	 ((? . pattern)
	  (do-maybe #'pattern #'rest))
	 ((predicate what)
	  (do-predicate #'what #'rest))
	 ((apply nt . args)
	  (do-apply #'nt #'args #'rest))
	 ((not . pattern)
	  (do-not #'pattern #'rest))
	 ((ensure . pattern)
	  (do-ensure #'pattern #'rest))
	 ((except . patterns)
	  (with-syntax (((patterns ...) #'patterns))
	    #'(translate-choice ((not patterns ...) _ choice* ...) action)))
	 (lit
	   (or (string? (syntax->datum #'lit))
	       (char? (syntax->datum #'lit)))
	   (do-literal #'lit #'rest)))))))

(provide peg)
(define-syntax (peg stx)
  (syntax-case stx (start grammar)
    ((peg (start start-nt) (grammar (nonterminal choice ...) ...))
     (with-syntax ((((translated-choices ...) ...)
		    (map (lambda (choices)
			   (map (lambda (choice)
				  (syntax-case choice ()
				    ((action) #'(translate-choice () action))
				    (((element element-rest ...) action)
				     #'(translate-choice (element element-rest ...) action))))
				(syntax->list choices)))
			 (syntax->list #'((choice ...) ...)))))
       (with-syntax (((nt-func ...)
		      (map (lambda (nt choices)
			     (syntax-case nt ()
			       ((name args ...)
				(with-syntax (((cs ...) choices))
				  #'(lambda (args ...)
				      (create-parser 'name (list cs ...)))))
			       (name
				 (with-syntax (((cs ...) choices))
				   #'(create-parser 'name (list cs ...))))))
			   (syntax->list #'(nonterminal ...))
			   (syntax->list #'((translated-choices ...) ...))))
		     ((nt-name ...)
		      (map (lambda (nt)
			     (syntax-case nt ()
			       ((name args ...) #'name)
			       (name #'name)))
			   (syntax->list #'(nonterminal ...)))))
	 #'(letrec ((nt-name nt-func) ...)
	     (lambda (input #:nonterminal (nt 'start-nt) #:output (output #f) #:column (column 0))
	       (log 1 "Start parsing with nonterminal ~a at column ~a\n" nt column)
	       (let* ((names->functions (let ((h (make-hash)))
					 (hash-set! h 'nt-name nt-func)
					 ...
					 h))
		      (result ((hash-ref names->functions nt) input column #f)))
		 (log 1 "Result of parsing is ~a\n" result)
		 (if output
		   result
		   (if result
		     (car result)
		     #f))))))))))

(define (parse-string parser string)
  (let* ((s (string->list string))
	 (v (list->vector s))
	 (max (length s)))
    (parser (lambda (i)
	      (if (>= i max)
		end-of-input
		(vector-ref v i))))))

(define (parse-file parser file)
  (define max-length 4096)
  (with-input-from-file file
    (lambda ()
      (let ((strings (make-hash)))
	(parser (lambda (i)
		  (let* ((index (floor (/ i max-length)))
			 (str (hash-ref strings index
					(lambda ()
					  (log 5 "Reading next ~a characters\n" max-length)
					  (let ((str (read-string max-length)))
					    (log 5 "Read ~a\n" str)
					    (hash-set! strings index str)
					    str)))))
		    (if (or (eof-object? str)
			    (>= (modulo i max-length) (string-length str)))
		      end-of-input
		      (string-ref str (modulo i max-length))))))))))

(provide parse)
(define (parse parser obj)
  (cond
    ((string? obj) (parse-string parser obj))
    ((path? obj) (parse-file parser obj))
    (else (error "You gave me a ~a. Please pass a string or a path to the parse method.\n" obj))))

#;
(define (test1)
  (define p
    (peg
      (start blah)
      (grammar
	(blah ((foobar " " "1") 23)
	      ((foobar (bind x " ") (bind y "2")) (string-append x y))
	      ((foobar (bind x " ") "3") 40)
	      )
	(foobar (("hello" "animals") 99)
		(("hello" " " "world") 9)))))

  (let ((s (string->list "hello world 2")))
    (p (lambda (i)
	 (list-ref s i)))))

#;
(define (test2)
  (define p
    (peg
      (start blah)
      (grammar
	(blah (((bind x ones) (bind z (? "food")) (bind y twos)) (list x z y))
	      )
	(ones (((bind x (* "1"))) x))
	(twos (((bind x (* "2"))) x))
	      )))

  (let* ((s (string->list "111112222"))
	 (max (length s)))
    (p (lambda (i)
	 (if (>= i max)
	   'you-cant-possibly-match-this
	   (list-ref s i)))))
  )

#;
(test2)