#lang racket/base
(require (for-template racket/base)
racket/list
racket/set
"../stx-types.rkt"
"flatten.rkt")
(provide rules-codegen)
(define (rules-codegen stx)
(syntax-case stx ()
[(_)
(raise-syntax-error #f "The set of grammatical rules can't be empty." stx)]
[(_ r ...)
(begin
(define rules (syntax->list #'(r ...)))
(define flattened-rules (flatten-rules rules))
(define generated-rule-codes (map flat-rule->yacc-rule flattened-rules))
(define start-id (syntax-case (first rules) (rule)
[(rule id pattern)
#'id]))
(define-values (implicit-tokens explicit-tokens) (rules-collect-token-types rules))
(define implicit-token-types
(set->list (list->set (map syntax-e implicit-tokens))))
(define explicit-token-types
(set->list (list->set (map syntax-e explicit-tokens))))
(define token-types
(set->list (list->set (append (map (lambda (x) (string->symbol (syntax-e x)))
implicit-tokens)
(map syntax-e explicit-tokens)))))
(with-syntax ([start-id start-id]
[(token-type ...)
token-types]
[(token-type-constructor ...)
(map (lambda (x) (string->symbol (format "token-~a" x)))
token-types)]
[(explicit-token-types ...) explicit-token-types]
[(implicit-token-types ...) implicit-token-types]
[(implicit-token-type-constructor ...)
(map (lambda (x) (string->symbol (format "token-~a" x)))
implicit-token-types)]
[(generated-rule-code ...) generated-rule-codes])
(syntax/loc stx
(begin
(require parser-tools/lex
parser-tools/yacc)
(provide parse
default-lex/1
tokens
all-tokens-hash
token-EOF
token-type-constructor ...
current-source
current-parser-error-handler
[struct-out exn:fail:parsing])
(define-tokens tokens (EOF token-type ...))
(define all-tokens-hash
(make-hash (list (cons 'EOF token-EOF)
(cons 'token-type token-type-constructor) ...)))
(define default-lex/1
(lexer-src-pos [implicit-token-types
(implicit-token-type-constructor lexeme)]
...
[(eof) (token-EOF eof)]))
(define current-source (make-parameter #f))
(struct exn:fail:parsing exn:fail (srclocs)
#:transparent
#:property prop:exn:srclocs (lambda (instance)
(exn:fail:parsing-srclocs instance)))
(define current-parser-error-handler
(make-parameter
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
(raise (exn:fail:parsing
(format "Encountered error while parsing, near: ~e [line=~a, column=~a, position=~a]"
tok-value
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos))
(current-continuation-marks)
(list (srcloc (current-source)
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos)
(if (and (number? (position-offset end-pos))
(number? (position-offset start-pos)))
(- (position-offset end-pos)
(position-offset start-pos))
#f))))))))
(define (coerse-to-position-token t)
(cond
[(position-token? t)
t]
[else
(position-token t
(position #f #f #f)
(position #f #f #f))]))
(define parse
(let (
[THE-GRAMMAR
(parser
(tokens tokens)
(src-pos)
(start start-id)
(end EOF)
(error (lambda (tok-ok? tok-name tok-value start-pos end-pos)
((current-parser-error-handler) tok-ok? tok-name tok-value start-pos end-pos)))
(grammar
generated-rule-code ...))]
)
(lambda (source tokenizer)
(parameterize ([current-source source])
(THE-GRAMMAR (lambda ()
(coerse-to-position-token (tokenizer))))))))))))]))
(define (flat-rule->yacc-rule a-flat-rule)
(syntax-case a-flat-rule ()
[(rule-type origin name clauses ...)
(begin
(define translated-clauses
(map (lambda (clause) (translate-clause clause #'name #'origin))
(syntax->list #'(clauses ...))))
(with-syntax ([(translated-clause ...) translated-clauses])
#`[name translated-clause ...]))]))
(define (translate-clause a-clause rule-name/false origin)
(define translated-patterns
(let loop ([primitive-patterns (syntax->list a-clause)])
(cond
[(empty? primitive-patterns)
'()]
[else
(cons (syntax-case (first primitive-patterns) (id lit token inferred-id)
[(id val)
#'val]
[(lit val)
(datum->syntax #f (string->symbol (syntax-e #'val)) #'val)]
[(token val)
#'val]
[(inferred-id val reason)
#'val])
(loop (rest primitive-patterns)))])))
(define translated-actions
(for/list ([translated-pattern (in-list translated-patterns)]
[primitive-pattern (syntax->list a-clause)]
[pos (in-naturals 1)])
(with-syntax ([$X (datum->syntax translated-pattern (string->symbol (format "$~a" pos)))]
[$X-start-pos (datum->syntax translated-pattern (string->symbol (format "$~a-start-pos" pos)))]
[$X-end-pos (datum->syntax translated-pattern (string->symbol (format "$~a-end-pos" pos)))])
(with-syntax ([primitive-loc
#'(list (current-source)
(position-line $X-start-pos)
(position-col $X-start-pos)
(position-offset $X-start-pos)
(if (and (number? (position-offset $X-start-pos))
(number? (position-offset $X-end-pos)))
(- (position-offset $X-end-pos)
(position-offset $X-start-pos))
#f))])
(syntax-case primitive-pattern (id lit token inferred-id)
[(inferred-id val reason)
#'(syntax-case $X ()
[(inferred-rule-name rest (... ...))
(syntax->list #'(rest (... ...)))])]
[(id val)
#`(list $X)]
[(lit val)
#`(list (datum->syntax #f $X primitive-loc))]
[(token val)
#`(list (datum->syntax #f $X primitive-loc))])))))
(define whole-rule-loc
(if (> (length translated-patterns) 0)
(with-syntax ([$1-start-pos
(datum->syntax (first translated-patterns)
(string->symbol "$1-start-pos"))]
[$n-end-pos
(datum->syntax (last translated-patterns)
(string->symbol (format "$~a-end-pos"
(length translated-patterns))))])
#`(list (current-source)
(position-line $1-start-pos)
(position-col $1-start-pos)
(position-offset $1-start-pos)
(if (and (number? (position-offset $1-start-pos))
(number? (position-offset $n-end-pos)))
(- (position-offset $n-end-pos)
(position-offset $1-start-pos))
#f)))
#'(list (current-source) #f #f #f #f)))
(with-syntax ([(translated-pattern ...) translated-patterns]
[(translated-action ...) translated-actions])
#`[(translated-pattern ...)
(datum->syntax #f
(append (list (datum->syntax #f '#,rule-name/false #,whole-rule-loc))
translated-action ...)
#,whole-rule-loc)]))
(define (rules-collect-token-types rules)
(define-values (implicit explicit)
(for/fold ([implicit '()]
[explicit '()])
([r (in-list rules)])
(rule-collect-token-types r implicit explicit)))
(values (reverse implicit) (reverse explicit)))
(define (rule-collect-token-types a-rule implicit explicit)
(syntax-case a-rule (rule)
[(rule id a-pattern)
(pattern-collect-implicit-token-types #'a-pattern implicit explicit)]))
(define (pattern-collect-implicit-token-types a-pattern implicit explicit)
(let loop ([a-pattern a-pattern]
[implicit implicit]
[explicit explicit])
(syntax-case a-pattern (id lit token choice repeat maybe seq)
[(id val)
(values implicit explicit)]
[(lit val)
(values (cons #'val implicit) explicit)]
[(token val)
(values implicit (cons #'val explicit))]
[(choice vals ...)
(for/fold ([implicit implicit]
[explicit explicit])
([v (in-list (syntax->list #'(vals ...)))])
(loop v implicit explicit))]
[(repeat min val)
(loop #'val implicit explicit)]
[(maybe val)
(loop #'val implicit explicit)]
[(seq vals ...)
(for/fold ([implicit implicit]
[explicit explicit])
([v (in-list (syntax->list #'(vals ...)))])
(loop v implicit explicit))])))