#lang scheme/base ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; parsing and generating rfc2/822 dates. ;; rfc2822 is a more limited version of rfc822. ;; below is the syntax for both rfc822 & rfc2822. from ;; http://tools.ietf.org/html/rfc2822 ;; http://tools.ietf.org/html/rfc822 ;; ;; NOTE we do not have to worry about CFWS - those would be filtered out by ;; read-network-line module. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; yc 10/12/2009 - first version #| RFC822 syntax. we'll allow lower case. date-time = [ day "," ] date time ; dd mm yy ; hh:mm:ss zzz day = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" date = 1*2DIGIT month 2DIGIT ; day month year ; e.g. 20 Jun 82 month = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" time = hour zone ; ANSI and Military hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT] ; 00:00:00 - 23:59:59 zone = "UT" / "GMT" ; Universal Time ; North American : UT / "EST" / "EDT" ; Eastern: - 5/ - 4 / "CST" / "CDT" ; Central: - 6/ - 5 / "MST" / "MDT" ; Mountain: - 7/ - 6 / "PST" / "PDT" ; Pacific: - 8/ - 7 / 1ALPHA ; Military: Z = UT; ; A:-1; (J not used) ; M:-12; N:+1; Y:+12 / ( ("+" / "-") 4DIGIT ) ; Local differential ; hours+min. (HHMM) ;;|# #| RFC 2822 (allowing lower case). date-time = [ day-of-week "," ] date FWS time [CFWS] day-of-week = ([FWS] day-name) / obs-day-of-week day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" date = day month year year = 4*DIGIT / obs-year month = (FWS month-name FWS) / obs-month month-name = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" day = ([FWS] 1*2DIGIT) / obs-day time = time-of-day FWS zone time-of-day = hour ":" minute [ ":" second ] hour = 2DIGIT / obs-hour minute = 2DIGIT / obs-minute second = 2DIGIT / obs-second zone = (( "+" / "-" ) 4DIGIT) / obs-zone obs-day-of-week = [CFWS] day-name [CFWS] obs-year = [CFWS] 2*DIGIT [CFWS] obs-month = CFWS month-name CFWS obs-day = [CFWS] 1*2DIGIT [CFWS] obs-hour = [CFWS] 2DIGIT [CFWS] obs-minute = [CFWS] 2DIGIT [CFWS] obs-second = [CFWS] 2DIGIT [CFWS] obs-zone = "UT" / "GMT" / ; Universal Time ;;|# (require "base.ss" "date.ss" "depend.ss") ;; handling wday (sun, mon, tue, wed, thu, fri, sat) - we already have this function in ;; weekday->num. ;; handling month - we already have month->num. ;; handling zone - we need to handle the following timezone offsets. ;; UT GMT +-HHMM EST EDT CST CDT MST MDT PST PDT A - I, I - M, N-Z; M = Y??? (define (tz-abbr->offset abbr) (define (number-helper abbr) (let-values (((q r) (quotient/remainder abbr 100))) (+ (* q 3600) (* r 60)))) (define (helper abbr) (cond ((symbol? abbr) (helper (symbol->string abbr))) ((string? abbr) (string-downcase abbr)) (else #f))) (number-helper (if (number? abbr) abbr (assoc/cdr (helper abbr) '(("ut" . 0) ("gmt" . 0) ("z" . 0) ("est" . -0500) ("edt" . -0400) ("cst" . -0600) ("cdt" . -0500) ("mst" . -0700) ("mdt" . -0600) ("pst" . -0800) ("pdt" . -0700) ("a" . 0100) ("b" . 0200) ("c" . 0300) ("d" . 0400) ("e" . 0500) ("f" . 0600) ("g" . 0700) ("h" . 0800) ("i" . 0900) ("k" . 1000) ("l" . 1100) ("m" . 1200) ("n" . -0100) ("o" . -0200) ("p" . -0300) ("q" . -0400) ("r" . -0500) ("s" . -0600) ("t" . -0700) ("u" . -0800) ("v" . -0900) ("w" . -1000) ("x" . -1100) ("y" . -1200)))))) ;; (trace tz-abbr->offset) ;; what is the best way to parse the date? ;; it isn't that hard to parse but should I write a parser or regular expression based parser? (define rfc822 #px"^\\s*(?i:(mon|tue|wed|thu|fri|sat),\\s+)?(\\d{1,2})\\s+(?i:(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec))\\s*((\\d\\d)?\\d\\d)\\s+(\\d\\d):(\\d\\d)(:(\\d\\d))?\\s+(?i:([+-]?\\d\\d\\d\\d|ut|gmt|z|est|edt|cst|cdt|mst|mdt|pst|pdt|a|b|c|d|e|f|g|h|i|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y))\\s*$") (define (rfc822->date str) (define (year-helper year) (if (< year 100) (+ year (if (< year 70) 2000 1900)) year)) (define (tz-helper tz) (tz-abbr->offset (if (regexp-match #px"[+-]?\\d\\d\\d\\d" tz) (string->number tz) tz))) (define (helper wday day mon year y2 hh mm :ss ss tz) (build-date (year-helper (string->number year)) (month->num mon) (string->number day) (string->number hh) (string->number mm) (string->number (if (not ss) "0" ss)) #:tz (tz-helper tz))) (if-it (rfc822? str) (apply helper (cdr it)) (error 'rfc822->date "invalid rfc822 date: ~a" str))) (define (rfc822? str) (regexp-match rfc822 str)) ;; what do you do when going backwards? ;; we would not be using any of the abbreviated timezones... (define (date->rfc822 date) (define (weekday-helper date) (assoc/cdr (date-week-day date) '((0 . Sun) (1 . Mon) (2 . Tue) (3 . Wed) (4 . Thu) (5 . Fri) (6 . Sat)))) (define (month-helper date) (assoc/cdr (date-month date) '((1 . Jan) (2 . Feb) (3 . Mar) (4 . Apr) (5 . May) (6 . Jun) (7 . Jul) (8 . Aug) (9 . Sep) (10 . Oct) (11 . Nov) (12 . Dec)))) (define (two-digit x) (format (if (< x 10) "0~a" "~a") x)) (define (tz-helper date) (let-values (((q r) (quotient/remainder (date-zone-offset date) 3600))) (let ((q (abs q)) (r (/ (abs r) 10))) (format "~a~a~a" (if (>= (date-zone-offset date) 0) "+" "-") (two-digit q) (two-digit r))))) (format "~a, ~a ~a ~a ~a:~a:~a ~a" (weekday-helper date) (date-day date) (month-helper date) (date-year date) (two-digit (date-hour date)) (two-digit (date-minute date)) (two-digit (date-second date)) (tz-helper date))) #| (define dates '("sat, 2 feb 2009 10:10 ut" "2 feb 2009 10:10 ut" "2 feb 2009 10:10:10 ut" "2 feb 2009 10:10:10 +0400" "2 feb 2009 10:10:10 0400" "2 feb 2009 10:10:10 pdt" "2 feb 09 10:10:10 -0400" )) ;;(map (lambda (d) (regexp-match re d)) dates) (map (lambda (d) (rfc822->date d)) dates) ;;|# ;; CONTRACT (provide/contract (rfc822->date (-> string? date?)) (rfc822? (-> string? any)) (date->rfc822 (-> date? string?)) )