ical-parser.ss
(module ical-parser mzscheme
  (require (lib "yacc.ss" "parser-tools")
           (prefix : (lib "lex-sre.ss" "parser-tools"))
           (lib "lex.ss" "parser-tools"))
  (provide (all-defined))
  
  ;; Helpers
  
  ; Totally-lex returns the entire token stream
  (define (totally-lex lexer input-port)
    (let loop ([r (list)])
      (let ([c (lexer input-port)])
        (if (eq? c 'EOF)
            r
            (loop (append r (list c)))))))
  (define (totally-lex/string lexer string)
    (totally-lex lexer (open-input-string string)))
  
  ; Parse calls the parser
  (define (parse parser lexer port)
    (parser (lambda () (lexer port))))
  (define (parse/string parser lexer string)
    (parse parser lexer (open-input-string string)))
  ; Useful for handwritten token streams to mess with the parser
  (define (parse/fake-lexer parser token-stream)
    (let ([r token-stream])
      (parser
       (lambda ()
         (if (null? r)
             'EOF
             (let ([c (car r)])
               (set! r (cdr r))
               c))))))
  
  ;; The lexer and parser
  
  ; Possible modes:
  ; (NORMAL VCARD VCAL VEVENT VTODO VALUES BASE64* QUOTED_PRINTABLE*)
  ; * = unused
  (define current-lexmode (make-parameter (list 'NORMAL)))
  (define (pop-lexmode! lexmode)
    (current-lexmode
     (let loop ([r (current-lexmode)])
       (if (null? r)
           r
           (let ([c (car r)])
             (if (eq? c lexmode)
                 (cdr r)
                 (loop (cdr r))))))))
  (define (push-lexmode! lexmode)
    (current-lexmode (append (list lexmode) (current-lexmode))))
  
  (define-tokens
    tokens
    (ID STRING))
  (define-empty-tokens
    empty-tokens
    (EOF
     EQ COLON DOT SEMICOLON SPACE HTAB
     LINESEP NEWLINE FAKE-BREAK
     BEGIN_VCARD END_VCARD
     BEGIN_VCAL END_VCAL
     BEGIN_VEVENT END_VEVENT
     BEGIN_VTODO END_VTODO))
  
  (define icalvcal-lexer/normal-mode
    (lexer
     [(eof) 'EOF]
     [(:+ (:: #\: (:? (:or #\newline #\return)))) (begin (push-lexmode! 'VALUES)
                                                         'COLON)]
     [#\; 'SEMICOLON]
     [#\= 'EQ]
     [(:or #\tab #\space #\newline #\return) (icalvcal-lexer/normal-mode input-port)]
     ["BEGIN:VCARD" (begin (push-lexmode! 'VCARD)
                           'BEGIN_VCARD)] 
     ["END:VCARD" (begin (pop-lexmode! 'VCARD)
                         'END_VCARD)]
     [(:or "BEGIN:VCAL" "BEGIN:VCALENDAR") (begin (push-lexmode! 'VCAL)
                                                  'BEGIN_VCAL)] 
     [(:or "END:VCAL" "END:VCALENDAR") (begin (pop-lexmode! 'VCAL)
                                              'END_VCAL)]
     ["BEGIN:VEVENT" (begin (push-lexmode! 'VEVENT)
                            'BEGIN_VEVENT)] 
     ["END:VEVENT" (begin (pop-lexmode! 'VEVENT)
                          'END_VEVENT)]
     ["BEGIN:VTODO" (begin (push-lexmode! 'VTODO)
                           'BEGIN_VTODO)] 
     ["END:VTODO" (begin (pop-lexmode! 'VTODO)
                         'END_VTODO)]
     [(:: (:* (:or #\space #\tab))
          #\"
          (complement (:: any-string (:or #\newline #\return #\; #\: #\=) any-string))
          #\")
      (token-ID lexeme)]
     [(:: (:* (:or #\space #\tab))
          (complement (:: any-string (:or #\tab #\newline #\return #\space #\; #\: #\=) any-string)))
      (token-ID lexeme)]))
  
  (define icalvcal-lexer/values-mode
    (lexer
     [(eof) 'EOF]
     ; handleRFC822LineBreak (case 2)
     ; \cr\lf\space is a fake linebreak
     ; RFC822 has this idea that lines should only be a certain length... great idea guys.
     [(:: (:or #\newline
               (:: #\return #\linefeed))
          (:or #\space #\tab))
      'FAKE-BREAK]
     ; handleRFC822LineBreak (case 1)
     ; ";" is the escape character for lines
     [(:: #\; (:: (:* (:or #\space #\tab))
                  (:+ (:or #\newline
                           (:: #\return #\linefeed)))))
      (icalvcal-lexer/values-mode input-port)]
     [(:: (:or #\newline
               (:: #\return #\linefeed))
          (char-complement (:or #\space #\tab)))
      (begin (pop-lexmode! 'VALUES)
             (file-position input-port 
                            (- (file-position input-port) 1))
             'LINESEP)]
     [(complement (:or (::)
                       (:: any-string (:or #\return #\newline) any-string)))
      (token-STRING lexeme)]))
  
  (define icalvcal-lexer
    (lambda (input-port)
      (cond
        [(eq? (car (current-lexmode)) 'VALUES)
         ; XXX: Doesn't handle Base64 data here (you could do it yourself)
         ; XXX: May not handle QUOTED_PRINTABLEs, but not sure (i.e. send me a broken file)
         (icalvcal-lexer/values-mode input-port)
         ]
        [else
         (icalvcal-lexer/normal-mode input-port)])))
  
  (define icalvcal-parser
    (parser
;     (debug "~/out.ss")
     (suppress)
     (error (lambda args (printf "~a~n" args)))
     
     (start mime)
     (end EOF)
     (tokens tokens empty-tokens)
     
     (grammar
      [mime ((vobjects) $1)]
      [vobjects ((vobject vobjects) (append (list $1) $2))
                ((vobject) (list $1))]
      [vobject ((vcard) $1)
               ((vcal) $1)]
      [items ((item items) (append (list $1) $2))
             ((item) (list $1))]
      [item ((prop COLON values LINESEP) `(,(car $1) ,@(cdr $1) ,@$3))] 
      [prop ((name attr_params) (list $1 $2))
            ((name) (list $1 (list)))]
      [attr_params ((attr_param attr_params) (append (list $1) $2))
                   ((attr_param) (list $1))]
      [attr_param ((SEMICOLON attr) $2)]
      [attr ((name) (list $1 #f))
            ((name EQ name) (list $1 (symbol->string $3)))]
      [name ((ID) (string->symbol $1))]
      [values ((value SEMICOLON values) (append (list $1) $3))
              ((value) (list $1))]
      [value ((value-part value) (string-append $1 $2))
             ((value-part) $1)]
      [value-part ((STRING) $1)
                  ((FAKE-BREAK) "")]
      [calitems ((calitem calitems) (append (list $1) $2))
                ((calitem) (list $1))
                ; These two lines are different from libicalvcal to return x-exprs
                ((items calitems) (append $1 $2)) 
                ((items) $1) ]
      [calitem ((eventitem) $1)
               ((todoitem) $1)]
      [vcard ((BEGIN_VCARD items END_VCARD) `(VCARD ,@$2)) 
             ((BEGIN_VCARD END_VCARD) `(VCARD))]
      [vcal ((BEGIN_VCAL calitems END_VCAL) `(VCAL ,@$2))
            ((BEGIN_VCAL END_VCAL) `(VCAL))]
      [eventitem ((BEGIN_VEVENT items END_VEVENT) `(VEVENT ,@$2))
                 ((BEGIN_VEVENT END_VEVENT) `(VEVENT))]
      [todoitem ((BEGIN_VTODO items END_VTODO) `(VTODO ,@$2))
                ((BEGIN_VTODO END_VTODO) `(VTODO))]))))