rfc822.ss
#lang scheme/base
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; parsing and generating rfc2/822 dates.
;; rfc2822 is a more limited version of rfc822.
;; below is the syntax for both rfc822 & rfc2822. from
;; http://tools.ietf.org/html/rfc2822
;; http://tools.ietf.org/html/rfc822
;;
;; NOTE we do not have to worry about CFWS - those would be filtered out by
;; read-network-line module. 
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; yc 10/12/2009 - first version

#|
RFC822 syntax. we'll allow lower case. 
     date-time   =  [ day "," ] date time        ; dd mm yy
                                                 ;  hh:mm:ss zzz
     
     day         =  "Mon"  / "Tue" /  "Wed"  / "Thu"
                 /  "Fri"  / "Sat" /  "Sun"
     
     date        =  1*2DIGIT month 2DIGIT        ; day month year
                                                 ;  e.g. 20 Jun 82
     
     month       =  "Jan"  /  "Feb" /  "Mar"  /  "Apr"
                 /  "May"  /  "Jun" /  "Jul"  /  "Aug"
                 /  "Sep"  /  "Oct" /  "Nov"  /  "Dec"
     
     time        =  hour zone                    ; ANSI and Military
     
     hour        =  2DIGIT ":" 2DIGIT [":" 2DIGIT]
                                                 ; 00:00:00 - 23:59:59
     
     zone        =  "UT"  / "GMT"                ; Universal Time
                                                 ; North American : UT
                 /  "EST" / "EDT"                ;  Eastern:  - 5/ - 4
                 /  "CST" / "CDT"                ;  Central:  - 6/ - 5
                 /  "MST" / "MDT"                ;  Mountain: - 7/ - 6
                 /  "PST" / "PDT"                ;  Pacific:  - 8/ - 7
                 /  1ALPHA                       ; Military: Z = UT;
                                                 ;  A:-1; (J not used)
                                                 ;  M:-12; N:+1; Y:+12
                 / ( ("+" / "-") 4DIGIT )        ; Local differential
                                                 ;  hours+min. (HHMM)

;;|#
#|

RFC 2822 (allowing lower case).

date-time       =       [ day-of-week "," ] date FWS time [CFWS]

day-of-week     =       ([FWS] day-name) / obs-day-of-week

day-name        =       "Mon" / "Tue" / "Wed" / "Thu" /
                        "Fri" / "Sat" / "Sun"

date            =       day month year

year            =       4*DIGIT / obs-year

month           =       (FWS month-name FWS) / obs-month

month-name      =       "Jan" / "Feb" / "Mar" / "Apr" /
                        "May" / "Jun" / "Jul" / "Aug" /
                        "Sep" / "Oct" / "Nov" / "Dec"

day             =       ([FWS] 1*2DIGIT) / obs-day

time            =       time-of-day FWS zone

time-of-day     =       hour ":" minute [ ":" second ]

hour            =       2DIGIT / obs-hour

minute          =       2DIGIT / obs-minute

second          =       2DIGIT / obs-second

zone            =       (( "+" / "-" ) 4DIGIT) / obs-zone

obs-day-of-week =       [CFWS] day-name [CFWS]

obs-year        =       [CFWS] 2*DIGIT [CFWS]

obs-month       =       CFWS month-name CFWS

obs-day         =       [CFWS] 1*2DIGIT [CFWS]

obs-hour        =       [CFWS] 2DIGIT [CFWS]

obs-minute      =       [CFWS] 2DIGIT [CFWS]

obs-second      =       [CFWS] 2DIGIT [CFWS]

obs-zone        =       "UT" / "GMT" /          ; Universal Time

;;|#
(require "base.ss" "date.ss" "depend.ss")
;; handling wday (sun, mon, tue, wed, thu, fri, sat) - we already have this function in
;; weekday->num.
;; handling month - we already have month->num.
;; handling zone - we need to handle the following timezone offsets.
;; UT GMT +-HHMM EST EDT CST CDT MST MDT PST PDT A - I, I - M, N-Z; M = Y???
(define (tz-abbr->offset abbr)
  (define (number-helper abbr)
    (let-values (((q r)
                  (quotient/remainder abbr 100)))
      (+ (* q 3600) (* r 60))))
  (define (helper abbr)
    (cond ((symbol? abbr)
           (helper (symbol->string abbr)))
          ((string? abbr)
           (string-downcase abbr))
          (else #f)))
  (number-helper (if (number? abbr) abbr 
                     (assoc/cdr (helper abbr)
                                '(("ut" . 0)
                                  ("gmt" . 0)
                                  ("z" . 0)
                                  ("est" . -0500)
                                  ("edt" . -0400)
                                  ("cst" . -0600)
                                  ("cdt" . -0500)
                                  ("mst" . -0700)
                                  ("mdt" . -0600)
                                  ("pst" . -0800)
                                  ("pdt" . -0700)
                                  ("a" . 0100)
                                  ("b" . 0200)
                                  ("c" . 0300)
                                  ("d" . 0400)
                                  ("e" . 0500)
                                  ("f" . 0600)
                                  ("g" . 0700)
                                  ("h" . 0800)
                                  ("i" . 0900)
                                  ("k" . 1000)
                                  ("l" . 1100)
                                  ("m" . 1200)
                                  ("n" . -0100)
                                  ("o" . -0200)
                                  ("p" . -0300)
                                  ("q" . -0400)
                                  ("r" . -0500)
                                  ("s" . -0600)
                                  ("t" . -0700)
                                  ("u" . -0800)
                                  ("v" . -0900)
                                  ("w" . -1000)
                                  ("x" . -1100)
                                  ("y" . -1200))))))
;; (trace tz-abbr->offset)
;; what is the best way to parse the date?
;; it isn't that hard to parse but should I write a parser or regular expression based parser?

(define rfc822 #px"^\\s*(?i:(mon|tue|wed|thu|fri|sat),\\s+)?(\\d{1,2})\\s+(?i:(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec))\\s*((\\d\\d)?\\d\\d)\\s+(\\d\\d):(\\d\\d)(:(\\d\\d))?\\s+(?i:([+-]?\\d\\d\\d\\d|ut|gmt|z|est|edt|cst|cdt|mst|mdt|pst|pdt|a|b|c|d|e|f|g|h|i|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y))\\s*$")

(define (rfc822->date str)
  (define (year-helper year)
    (if (< year 100) 
        (+ year (if (< year 70) 2000 1900))
        year))
  (define (tz-helper tz)
    (tz-abbr->offset (if (regexp-match #px"[+-]?\\d\\d\\d\\d" tz)
                         (string->number tz)
                         tz)))
  (define (helper wday day mon year y2 hh mm :ss ss tz)
    (build-date (year-helper (string->number year))
                (month->num  mon)
                (string->number day)
                (string->number hh) 
                (string->number mm) 
                (string->number (if (not ss) "0" ss))
                #:tz (tz-helper tz)))
  (if-it (rfc822? str) 
         (apply helper (cdr it))
         (error 'rfc822->date "invalid rfc822 date: ~a" str)))

(define (rfc822? str)
  (regexp-match rfc822 str))

;; what do you do when going backwards?
;; we would not be using any of the abbreviated timezones...
(define (date->rfc822 date)
  (define (weekday-helper date)
    (assoc/cdr (date-week-day date)
               '((0 . Sun) (1 . Mon) (2 . Tue) (3 . Wed) (4 . Thu) (5 . Fri) (6 . Sat))))
  (define (month-helper date)
    (assoc/cdr (date-month date)
               '((1 . Jan) (2 . Feb) (3 . Mar) (4 . Apr) (5 . May) (6 . Jun)
                           (7 . Jul) (8 . Aug) (9 . Sep) (10 . Oct) (11 . Nov) (12 . Dec))))
  (define (two-digit x)
    (format (if (< x 10) "0~a" "~a") x))
  (define (tz-helper date)
    (let-values (((q r)
                  (quotient/remainder (date-zone-offset date) 3600)))
      (let ((q (abs q))
            (r (/ (abs r) 10)))
        (format "~a~a~a" (if (>= (date-zone-offset date) 0) "+" "-")
                (two-digit q) (two-digit r)))))
  (format "~a, ~a ~a ~a ~a:~a:~a ~a" 
          (weekday-helper date)
          (date-day date)
          (month-helper date)
          (date-year date)
          (two-digit (date-hour date))
          (two-digit (date-minute date))
          (two-digit (date-second date))
          (tz-helper date)))

#|
(define dates
  '("sat, 2 feb 2009 10:10 ut"
    "2 feb 2009 10:10 ut"
    "2 feb 2009 10:10:10 ut"
    "2 feb 2009 10:10:10 +0400"
    "2 feb 2009 10:10:10 0400"
    "2 feb 2009 10:10:10 pdt"
    "2 feb 09 10:10:10 -0400"
    ))

;;(map (lambda (d) (regexp-match re d)) dates)
(map (lambda (d) (rfc822->date d)) dates)
;;|#
;; CONTRACT
(provide/contract
 (rfc822->date (-> string? date?))
 (rfc822? (-> string? any))
 (date->rfc822 (-> date? string?))
 )