reader.ss
#lang scheme/base
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; DATE-TZ.plt - provides time-zone-based date calculations
;;
;; Bonzai Lab, LLC.  All rights reserved.
;;
;; released under LGPL.
;;
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reader.ss - converts a zoneinfo file into a list of zones, rules, and links.
;; yc 10/2/2009 - first version
;; yc 10/19/2009 - use depend.ss for centralized dependency

(require "base.ss" (except-in "depend.ss" make-time time? time-second)
         scheme/pretty
         )

(define-struct time (hour minute second nano tz))

(define (parse-time str (make-if-not-time? #t))
  (if-it (regexp-match #px"^[+-]?(\\d+)(:(\\d+)(:(\\d+))?)?" str)
         (apply make-time 
                (map (lambda (n)
                       (if (not n) 0
                           (string->number n)))
                     (list (second it)
                           (fourth it)
                           (sixth it) #f #f)))
         (if (not make-if-not-time?) #f
             (make-time 0 0 0 0 0))))

;; time to write a function that'll read a whole zoneinfo file.
;; rules
;; a line starting with # means it'll be skipped.
;;
;; we need special ways of reading the file I think... perhaps just using regular read would work...
;; and we need to skip the comments...

(define (read-zoneinfo in)
  (define (read-helper in)
    (define (helper acc)
      (parameterize ((current-readtable
                      (make-readtable (current-readtable)
                                      #\# #\; #f)))
        (let ((term (read in)))
          (cond ((eof-object? term)
                 (reverse acc))
                (else (helper (cons term acc)))))))
    (helper '()))
  (define (helper acc)
    (let ((l (read-line in 'any)))
      (cond ((eof-object? l)
             (reverse acc))
            ((equal? l "")
             (helper acc))
            (else
             (let ((res (read-helper (open-input-string l))))
               (cond ((null? res)
                      (helper acc))
                     ((equal? (car res) 'Rule)
                      (helper (cons (apply parse-rule (cdr res))
                                    acc)))
                     ((equal? (car res) 'Zone)
                      (helper (cons (parse-zone (cdr res)) acc)))
                     ((equal? (car res) 'Link)
                      (helper (cons (apply parse-link (cdr res)) acc)))
                     ((equal? (caar acc) 'zone)
                      (helper (cons (parse-zone res (cdar acc)) acc)))
                     (else
                      (helper (cons res acc)))))))))
  (helper '()))
;; (trace read-zoneinfo)

(provide/contract
 (read-zoneinfo (-> input-port? any))
 )

;; we want a function to parse the time...
;; seems like there are multiple of such functions, though.
;; 1 regular time - 2am,2pm, etc.
;; 2 hours (for offsets) -2
;; 3 the *special time* such as 2:00s(standard time), 2:00u(universal time), 2:00w(wall time)
;; the common part is the numbers.
;; the non common part is the precedent & trailings.
;; so what we want to do is to parse that part out...
(define (parse-offset str (make-if-not-time? #t))
  (let ((time (parse-time str make-if-not-time?))
        (sign (regexp-match #px"^\\s*(\\+|-)?\\d+" str)))
    (if (not time) #f
        ((cond ((not sign) +)
               ((equal? (second sign) "-") -)
               (else +)) (h->s (time-hour time)
                               (time-minute time)
                               (time-second time))))))

(define (parse-extended-time str)
  (let ((time (parse-time str))
        (unit (regexp-match #px"\\d+\\s*(w|u|g|z|s)?\\s*$" str)))
    (list (time-hour time)
          (time-minute time)
          (time-second time)
          (if unit
              (if-it (second unit)
                     (string->symbol it)
                     'w)
              'w))))
;; this ought to produce a particular time (no tz attached)
(define (extended-time-helper t)
  (if (number? t)
      (list t 0 0 #f)
      (case t
        ((-) (list 0 0 0 #f))
        (else
         (parse-extended-time (symbol->string t))))))

(define (on-the-day-helper on month)
  (define (days-greater-helper days)
    ;; we need to spit the day up
    (if-it (regexp-match #px"(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(>|<|>=|<=)(\\d+)"
                         days)
           (let ((wday
                  (if-it (weekday->num (cadr it) #t)
                         it
                         (error 'wday-helper "Unkonwn weekday ~a" (cadr it))))
                 (match (string->symbol (caddr it)))
                 (day (string->number (cadddr it))))
             (list 'match wday match day))
           (error 'on-the-day-helper
                  "Unrecognized pattern ~a in ~a"
                  days
                  (list on month))))
  (cond ((symbol? on)
         (case on
           ((lastSun) ;; means the last sunday.
            (list 'last 0))
           ((lastMon) ;; means last monday.
            (list 'last 1))
           ((lastTue) ;; means last monday.
            (list 'last 2))
           ((lastWed) ;; means last monday.
            (list 'last 3))
           ((lastThu) ;; means last monday.
            (list 'last 4))
           ((lastFri) ;; means last monday.
            (list 'last 5))
           ((lastSat) ;; means last monday.
            (list 'last 6))
           (else ;; we need to split it up...
            (days-greater-helper (symbol->string on)))))
        ((number? on)
         ;; (list 'day on)
         on)
        (else
         (error 'on-the-day-helper
                "unknown on the day ~a in ~a"
                on
                (list on month)))))


;; first thing - convert rules into their correct values.
;; name = string
;; from = (or/c integer? maximum minimum max min) = anything else ignored.
;; (min = epoch)
;; to = integer, max, min, only (means repeating from)
;; type = '-' (inclusive from - to), otherwise 'not sure for now' (string)
;; in = the month name (or number)? maybe abbreviated. (string)
(define (parse-rule name from to type in on at save letter/s)
  (define name-helper symbol->string)
  (define (from-year-helper year)
    (case year
      ((maximum max) +inf.0)
      ((min minimum) -inf.0)
      (else year)))
  (define (to-year-helper from to)
    (case to
      ((maximum max) +inf.0)
      ((min minimum) -inf.0)
      ((only) from)
      (else to)))
  (define (type-helper type from/y to/y)
    (case type
      ((-) type)
      (else
       (error 'type-helper
              "unknown type ~a in ~a"
              type
              (list name from to type in on at save letter/s)))))
  (define (month-helper in)
    (if-it (month->num in #t)
           it
           (error 'in-month-helper
                  "unknown month ~a in ~a"
                  in
                  (list name from to type in on at save letter/s))))
  (define (save-helper save)
    (if (number? save)
        (h->s save)
        (case save
          ((-) 0)
          (else
           (parse-offset (symbol->string save)
                         )))))
  (define (letter/s-helper letter/s)
    (case letter/s
      ((-) "")
      (else (symbol->string letter/s))))
  (let* ((name (name-helper name))
         (from-year (from-year-helper from))
         (to-year (to-year-helper from-year to))
         (type (type-helper type from-year to-year))
         (in-month (month-helper in))
         (on-the-day (on-the-day-helper on in-month))
         (at-when (extended-time-helper at))
         (offsets (save-helper save))
         (letter/s (letter/s-helper letter/s)))
    (list 'rule name from-year to-year type in-month on-the-day
          at-when offsets letter/s)))


#|
;; according to ZIC (8)

       A rule line has the form

            Rule  NAME  FROM  TO    TYPE  IN   ON       AT    SAVE  LETTER/S

       For example:

            Rule  US    1967  1973  -     Apr  lastSun  2:00  1:00  D


       NAME    Gives the (arbitrary) name of the set of rules this rule is part of.

       FROM    Gives the first year in which the rule applies.  Any integer year can be supplied; the  Gregorian
               calendar  is assumed.  The word minimum (or an abbreviation) means the minimum year representable
               as an integer.  The word maximum (or an abbreviation) means the maximum year representable as  an
               integer.   Rules  can describe times that are not representable as time values, with the unrepre-
               sentable times ignored; this allows rules to be portable among hosts with  differing  time  value
               types.

       TO      Gives  the  final year in which the rule applies.  In addition to minimum and maximum (as above),
               the word only (or an abbreviation) may be used to repeat the value of the FROM field.

       TYPE    Gives the type of year in which the rule applies.  If TYPE is - then  the  rule  applies  in  all
               years between FROM and TO inclusive.  If TYPE is something else, then zic executes the command
                    yearistype year type
               to  check  the  type  of  a year: an exit status of zero is taken to mean that the year is of the
               given type; an exit status of one is taken to mean that the year is not of the given type.

       IN      Names the month in which the rule takes effect.  Month names may be abbreviated.

       ON      Gives the day on which the rule takes effect.  Recognized forms include:

                    5        the fifth of the month
                    lastSun  the last Sunday in the month
                    lastMon  the last Monday in the month
                    Sun>=8   first Sunday on or after the eighth
                    Sun<=25  last Sunday on or before the 25th

               Names of days of the week may be abbreviated or spelled out in full.  Note that there must be  no
               spaces within the ON field.

       AT      Gives the time of day at which the rule takes effect.  Recognized forms include:

                    2        time in hours
                    2:00     time in hours and minutes
                    15:00    24-hour format time (for times after noon)
                    1:28:14  time in hours, minutes, and seconds
                    -        equivalent to 0

               where  hour 0 is midnight at the start of the day, and hour 24 is midnight at the end of the day.
               Any of these forms may be followed by the letter w if the given time is local "wall clock"  time,
               s  if  the  given  time is local "standard" time, or u (or g or z) if the given time is universal
               time; in the absence of an indicator, wall clock time is assumed.

       SAVE    Gives the amount of time to be added to local standard time when the rule  is  in  effect.   This
               field  has  the  same  format  as the AT field (although, of course, the w and s suffixes are not
               used).

       LETTER/S
               Gives the "variable part" (for example, the "S" or "D" in "EST" or "EDT") of time zone  abbrevia-
               tions to be used when this rule is in effect.  If this field is -, the variable part is null.

;;|#

;; second - time to parse zones.
#|
       A zone line has the form

            Zone  NAME                GMTOFF  RULES/SAVE  FORMAT  [UNTIL]

       For example:

            Zone  Australia/Adelaide  9:30    Aus         CST     1971 Oct 31 2:00

       The fields that make up a zone line are:

       NAME  The  name of the time zone.  This is the name used in creating the time conversion information file
             for the zone.

       GMTOFF
             The amount of time to add to UTC to get standard time in this zone.  This field has the same format
             as  the  AT  and  SAVE fields of rule lines; begin the field with a minus sign if time must be sub-
             tracted from UTC.

       RULES/SAVE
             The name of the rule(s) that apply in the time zone or, alternately, an amount of time  to  add  to
             local standard time.  If this field is - then standard time always applies in the time zone.

       FORMAT
             The  format  for  time  zone abbreviations in this time zone.  The pair of characters %s is used to
             show where the "variable part" of the time zone abbreviation goes.  Alternately, a slash (/)  sepa-
             rates standard and daylight abbreviations.

       UNTIL The  time at which the UTC offset or the rule(s) change for a location.  It is specified as a year,
             a month, a day, and a time of day.  If this is specified, the time zone  information  is  generated
             from  the  given  UTC offset and rule change until the time specified.  The month, day, and time of
             day have the same format as the IN, ON, and AT columns of a rule; trailing columns can be  omitted,
             and default to the earliest possible value for the missing columns.

             The  next line must be a "continuation" line; this has the same form as a zone line except that the
             string "Zone" and the name are omitted, as the continuation line will place information starting at
             the  time  specified as the UNTIL field in the previous line in the file used by the previous line.
             Continuation lines may contain an UNTIL field, just as zone lines do, indicating that the next line
             is a further continuation.

;;|#
(define (parse-zone args (prev #f))
  (if (not prev) ;; this is not a continuation line.
      (apply parse-zone-helper args)
      ;; we'll take the first value and apply to the zone itself.
      (apply parse-zone-helper (cons (car prev) args))))

(define (parse-zone-helper name gmtoff rules/save format . until)
  (define (offset-helper offset)
    (if (number? offset)
        (h->s offset)
        (case offset
          ((-) 0)
          (else (parse-offset (symbol->string offset))))))
  (define (rules/save-helper rules/save)
    (case rules/save
      ((-) #f)
      (else
       (let ((save (symbol->string rules/save)))
         (if-it (parse-offset save #f)
                it
                save)))))
  (define (until-helper (year #f) (month 1) (day 1) (time '0)) ;; this can be
    (if (not year) ;; nothing is passed in.
        #f
        (let ((month (month->num month #t)))
          (append (list year month (on-the-day-helper day month))
                  (extended-time-helper time)))))
  (define (format-helper format)
    (if (symbol? format)
        (format-helper (symbol->string format))
        (regexp-replace* #px"%s" format "~a")))
  (list 'zone
        (if (symbol? name) (symbol->string name) name)
        (offset-helper gmtoff)
        (rules/save-helper rules/save)
        (format-helper format)
        (apply until-helper until)))

(define (parse-link from to)
  (list 'link (symbol->string from)
        (symbol->string to)))

;; now that we have the zone.  the question is - how do we handle
;; the usage of the zones?