parser.ss
(module parser mzscheme
  (require (lib "lex.ss" "parser-tools")
           (lib "yacc.ss" "parser-tools"))
  (require (lib "etc.ss"))
  (require "lexer.ss")
  (require "ast.ss")

  ;; http://www.cs.man.ac.uk/~pjj/bnf/c_syntax.bnf
  ;; http://www.open-std.org/JTC1/SC22/WG14/www/docs/n1256.pdf

  ;; ===========================================================================
  ;; PARSER
  ;; ===========================================================================

  (define (position->string p)
    (format "(~a,~a)" (position-line p) (position-col p)))

  (define (c-parsers src-file)
    (define-syntax (build-src stx)
      (syntax-case stx ()
        ((_ end)
         (syntax (build-src 1 end)))
        ((_ start end)
         (with-syntax ((start-pos (datum->syntax-object
                                   (syntax end)
                                   (string->symbol
                                    (format "$~a-start-pos"
                                            (syntax-object->datum (syntax start))))))
                       (end-pos (datum->syntax-object
                                 (syntax end)
                                 (string->symbol
                                  (format "$~a-end-pos"
                                          (syntax-object->datum (syntax end)))))))
           (syntax
            (make-src src-file
                      (position-line start-pos)
                      (position-col start-pos)
                      (- (position-offset end-pos)
                         (position-offset start-pos))
                      (position-offset start-pos)))))))
    (parser
     (start Decl Decls Read)
     (end EOF)
     (src-pos)
     (tokens BasicTokens Keywords Operators Separators EmptyLiterals)
     ;; TODO: define a syntax-error exception and raise it
     (error (lambda (token-ok? token-name token-value start-pos end-pos)
              (if token-ok?
                  (printf "unexpected token: ~a=~a : ~a - ~a~n"
                          token-name token-value (position->string start-pos) (position->string end-pos))
                  (printf "invalid token: ~a, ~a : ~a - ~a~n"
                          token-name token-value (position->string start-pos) (position->string end-pos)))))
     (grammar
      (Read
       [()
        '()]
       [(Decls)
        $1]
       [(O_BRACE)
        '()]
       [(O_BRACE Decls)
        $2])
      (Decls
       [(Decl)
        (list $1)]
       [(Decls Decl)
        (append $1 (list $2))])
      (Decl
       [(TypeDef) $1]
       [(TypeDecl) $1])
      (TypeDecl
       [(struct IDENTIFIER SEMI_COLON)
        (make-decl:type:tagged (make-type:struct $2 #f))]
       [(struct IDENTIFIER O_BRACE StructFieldList C_BRACE SEMI_COLON)
        (make-decl:type:tagged (make-type:struct $2 $4))]
       [(union IDENTIFIER SEMI_COLON)
        (make-decl:type:tagged (make-type:union $2 #f))]
       [(union IDENTIFIER O_BRACE StructFieldList C_BRACE SEMI_COLON)
        (make-decl:type:tagged (make-type:union $2 $4))])
;      (TypeDefs
;       [(TypeDef)
;        (list $1)]
;       [(TypeDefs TypeDef)
;        (append $1 (list $2))])
      (TypeDef
       [(typedef Type TypeDefName SEMI_COLON)
        (make-decl:type:def $2 (list $3))])
;      (TypeDefNames
;       [(TypeDefName) (list $1)]
;       [(TypeDefNames COMMA TypeDefName) (append $1 (list $3))])
      (TypeDefName
       ;; XXX: generalize this representation
       [(IDENTIFIER) (cons $1 #f)]
       #;[(* IDENTIFIER) (cons $2 #t)])
      (Type
       [(struct IDENTIFIER)
        (make-type:struct $2 #f)]
       [(struct IDENTIFIER O_BRACE StructFieldList C_BRACE)
        (make-type:struct $2 $4)]
       [(struct O_BRACE StructFieldList C_BRACE)
        (make-type:struct #f $3)]
       [(union IDENTIFIER)
        (make-type:union $2 #f)]
       [(union O_BRACE StructFieldList C_BRACE)
        (make-type:union #f $3)]
       ;; XXX: wrap these in (make-type:ref ----)?
       [(IDENTIFIER)
        (make-type:ref $1)]
       [(int) (make-type:ref 'int)]
       [(char) (make-type:ref 'char)]
       [(double) (make-type:ref 'double)]
       [(float) (make-type:ref 'float)]
       [(long) (make-type:ref 'long)]
       [(short) (make-type:ref 'short)]
       [(Type *)
        (make-type:pointer $1)]
       ;; XXX: generalize to arbitrary number of brackets
       [(Type O_BRACKET Expression C_BRACKET)
        (make-type:array $1 $3)])
      (StructFieldList
       [() null]
       [(StructFieldList StructField)
        (append $1 (list $2))])
      (StructField
       [(Type IDENTIFIER SEMI_COLON)
        (cons $2 $1)]
;        (make-structure-field $1 $2)]
       ;; XXX: generalize this, lord have mercy
       [(Type IDENTIFIER O_BRACKET Expression C_BRACKET SEMI_COLON)
        (cons $2 (make-type:array $1 $4))])
;        (make-structure-field (make-array-type $1 $4) $2)])
      (Expression
       [(AddExpression) $1])
      (AddExpression
       [(MultExpression) $1]
       [(AddExpression + MultExpression)
        (make-expr:binop '+ $1 $3)]
       [(AddExpression - MultExpression)
        (make-expr:binop '- $1 $3)])
      (MultExpression
       [(IDENTIFIER) $1]
       [(Literal) $1]
       [(MultExpression * Literal)
        (make-expr:binop '* $1 $3)]
       [(MultExpression / Literal)
        (make-expr:binop '/ $1 $3)])
      (Literal
       [(INTEGER_LIT) (make-expr:lit 'int $1)]
       [(HEX_LIT) (make-expr:lit 'int $1)]
       [(OCT_LIT) (make-expr:lit 'int $1)]
       [(LONG_LIT) (make-expr:lit 'long $1)]
       [(HEXL_LIT) (make-expr:lit 'long $1)]
       [(OCTL_LIT) (make-expr:lit 'long $1)]
       [(FLOAT_LIT) (make-expr:lit 'float $1)]
       [(DOUBLE_LIT) (make-expr:lit 'double  $1)]
       [(CHAR_LIT) (make-expr:lit 'char $1)]
       [(STRING_LIT) (make-expr:lit 'string $1)])
      )))

  (define (input-source->input-port in)
    (cond
      [(path? in) (open-input-file in)]
      [(string? in) (open-input-string in)]
      [else in]))

  (define (parse-decl in)
    (let ([in (input-source->input-port in)])
      (port-count-lines! in)
      (let ([parsers (c-parsers (object-name in))])
        ((car parsers) (lambda () (c-lexer in))))))

  (define (parse-decls in)
    (let ([in (input-source->input-port in)])
      (port-count-lines! in)
      (let ([parsers (c-parsers (object-name in))])
        ((cadr parsers) (lambda () (c-lexer in))))))

  (define (parse-read in)
    (let* ([in (input-source->input-port in)]
           [nesting (box 0)]
           [initial? #t]
           [lexer (make-c-lexer nesting)])
      (port-count-lines! in)
      (let ([parsers (c-parsers (object-name in))])
        ((caddr parsers)
         (lambda ()
           (begin0
             (lexer in)
             (when (and initial? (zero? (unbox nesting)))
               (set! lexer c-lexer))
             (set! initial? #f)))))))

  (provide parse-decl parse-decls parse-read))