rfc3339.ss
;;; @Package     rfc3339
;;; @Subtitle    RFC3339 Date and Time Format in Scheme
;;; @HomePage    http://www.neilvandyke.org/rfc3339-scheme/
;;; @Author      Neil Van Dyke
;;; @Version     0.3
;;; @Date        2009-03-03
;;; @PLaneT      neil/rfc3339:1:1

;; $Id: rfc3339.ss,v 1.76 2009/03/04 01:55:04 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2005--2009 Neil Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

#lang scheme/base

(require srfi/9)

;;; @section Introduction

;;; The @b{rfc3339} package implements parsing, formatting, and simple
;;; validation of @uref{ftp://ftp.rfc-editor.org/in-notes/rfc3339.txt, RFC3339}
;;; date and time format, which is a subset of
;;; @uref{http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html,
;;; ISO 8601}, intended for use in Internet protocols.
;;;
;;; Note that full Scheme support of ISO 8601 and various time-related
;;; computation is a very different project of the author, and not at all the
;;; intention of @b{rfc3339}.

;;; @b{rfc3339} requires R5RS, SRFI-6, SRFI-9, and two particular regular
;;; expression functions.  Note that the regular expression functions in
;;; Pregexp 1e9 will not work, but are expected to work in subsequent versions
;;; of Pregexp.  Thus far, @b{rfc3339} has only been tested under PLT Scheme.

;;; @section Record Type

;;; @code{rfc3339-record} is an abstract data type for the information in an
;;; RFC3339 format time and date.  (``@code{rfc3339-string}'' is used in
;;; identifiers to denote the RFC3339 format as a Scheme string.)

(define-record-type rfc3339-record
  (make-rfc3339-record year month mday hour minute second secfrac offsetmin)
  rfc3339-record?
  (year      rfc3339-record:year      rfc3339-record:set-year!)
  (month     rfc3339-record:month     rfc3339-record:set-month!)
  (mday      rfc3339-record:mday      rfc3339-record:set-mday!)
  (hour      rfc3339-record:hour      rfc3339-record:set-hour!)
  (minute    rfc3339-record:minute    rfc3339-record:set-minute!)
  (second    rfc3339-record:second    rfc3339-record:set-second!)
  (secfrac   rfc3339-record:secfrac   rfc3339-record:set-secfrac!)
  (offsetmin rfc3339-record:offsetmin rfc3339-record:set-offsetmin!))

;;; @defproc make-rfc3339-record year month mday hour minute second secfrac offsetmin
;;;
;;; Construct an @code{rfc3339-record} with the given field values.  Each of
;;; @var{year}, @var{month}, @var{mday}, @var{hour}, @var{minute}, and
;;; @var{second} is @code{#f} or a nonnegative integer.  @var{secfrac} is
;;; @code{#f} or a real number that is greater than or equal to 0.0 and less
;;; than 1.0.  @var{offsetmin} is @code{#f} or a nonnegative integer.  Note
;;; that @var{offsetmin} represents both the hour and minute components of an
;;; RFC3339 string.

;;; @defproc rfc3339-record? x
;;;
;;; Predicate for @code{rfc3339-record}.

;;; @defproc  rfc3339-record:year      rec
;;; @defprocx rfc3339-record:month     rec
;;; @defprocx rfc3339-record:mday      rec
;;; @defprocx rfc3339-record:hour      rec
;;; @defprocx rfc3339-record:minute    rec
;;; @defprocx rfc3339-record:second    rec
;;; @defprocx rfc3339-record:secfrac   rec
;;; @defprocx rfc3339-record:offsetmin rec
;;;
;;; Get the respective field value of @code{rfc3339-record} @var{rec}.

;;; @defproc  rfc3339-record:set-year!      rec val
;;; @defprocx rfc3339-record:set-month!     rec val
;;; @defprocx rfc3339-record:set-mday!      rec val
;;; @defprocx rfc3339-record:set-hour!      rec val
;;; @defprocx rfc3339-record:set-minute!    rec val
;;; @defprocx rfc3339-record:set-second!    rec val
;;; @defprocx rfc3339-record:set-secfrac!   rec val
;;; @defprocx rfc3339-record:set-offsetmin! rec val
;;;
;;; Set the respective field value of @code{rfc3339-record} @var{rec} to
;;; @var{val}.

;;; @defproc rfc3339-record->list rec
;;;
;;; Yields a list of the @code{rfc3339-record} @var{rec} fields, corresponding
;;; to the arguments of the @code{make-rfc3339-record} procedure.
;;;
;;; @lisp
;;; (rfc3339-record->list
;;;  (make-rfc3339-record 1985 4 12 23 20 50 0.52 0))
;;; @result{} (1985 4 12 23 20 50 0.52 0)
;;; @end lisp

(define (rfc3339-record->list rec)
  (list (rfc3339-record:year      rec)
        (rfc3339-record:month     rec)
        (rfc3339-record:mday      rec)
        (rfc3339-record:hour      rec)
        (rfc3339-record:minute    rec)
        (rfc3339-record:second    rec)
        (rfc3339-record:secfrac   rec)
        (rfc3339-record:offsetmin rec)))

;;; @section Parsing

;;; The parsing procedures are for constructing a @code{rfc3339-record}s,
;;; lists, and vectors from RFC3339 strings.  The underlying parser can also
;;; apply a user-supplied closure directly.

;;; @defproc parse-rfc3339-string str constructor
;;;
;;; Parses RFC3339 string @var{str} and applies procedure @var{constructor}
;;; with the parsed values.  The arguments of @var{constructor} are the same
;;; as those of @code{make-rfc3339-record}.

(define parse-rfc3339-string
  (let ((rx (regexp
             (string-append
              "^ *"
              "(?:([0-9]+)(?:-([0-9]+)(?:-([0-9]+))?)?)?"
              "([Tt]| +)?"
              "(?:([0-9]+)(?::([0-9]+)(?::([0-9]+)(?:(.[0-9]+))?)?)?)?"
              ;;" *"
              "(?:([Zz])|(?:(["
              "-" ; regexp-cc-minus for pregexp 1e9
              "+])(?:([0-9]+)?(?::([0-9]+)?))))?"
              " *$"))))
    (lambda (str constructor)
      (cond ((regexp-match rx str)
             =>
             (lambda (result)
               (apply (lambda (whole dy dm dd t th tm ts tf z os oh om)
                        ;; Note: Very gross, but we want to use the limited
                        ;; (and hopefully fast native) regexp.
                        (and (and dy (not dm) (not t) th)
                             (begin (set! th (string-append dy th))
                                    (set! dy #f)))
                        (constructor
                         (if dy (string->number dy) #f)
                         (if dm (string->number dm) #f)
                         (if dd (string->number dd) #f)
                         (if th (string->number th) #f)
                         (if tm (string->number tm) #f)
                         (if ts (string->number ts) #f)
                         (if tf (string->number tf) #f)
                         (cond (z 0)
                               ((or oh om)
                                (* (if (equal? os "-") -1 1)
                                   (+ (* (if oh (string->number oh) 0) 60)
                                      (if om (string->number om) 0))))
                               (else #f))))
                      result)))
            (else #f)))))

;; TODO: Wait for Pregexp workaround or fix for alternations of clusters, which
;; have a known bug in Pregexp 1e9:
;;
;; (pregexp-match "(b)"     "b")  ==>  ("b" "b")
;; (pregexp-match "(a)|(b)" "b")  ==>  ("b" "b")
;;
;; (require (lib "pregexp.ss"))
;; (define regexp          pregexp)
;; (define regexp-match    pregexp-match)
;; (define regexp-cc-minus "\\-")

;;; @defproc  string->rfc3339-record str
;;;
;;; Yields an @code{rfc3339-record} from RFC3339 string @var{str}.

(define (string->rfc3339-record str)
  (parse-rfc3339-string str make-rfc3339-record))

;;; @defproc  rfc3339-string->list   str
;;; @defprocx rfc3339-string->vector str
;;;
;;; Yields a list or vector (respectively) from the parsed values of RFC3339
;;; string @var{str}.  The list and vector elements correspond to the arguments
;;; of @code{make-rfc3339-record}.
;;;
;;; @lisp
;;; (rfc3339-string->list   "1985-04-12T23:20:69.52+5:0")
;;; @result{}  (1985 4 12 23 20 69 0.52 300)
;;; (rfc3339-string->vector "1985-04-12T23:20:69.52+5:0")
;;; @result{} #(1985 4 12 23 20 69 0.52 300)
;;; @end lisp

(define (rfc3339-string->list   str) (parse-rfc3339-string str list))
(define (rfc3339-string->vector str) (parse-rfc3339-string str vector))

;;; @section Formatting

;;; An RFC3339 string format can be obtained from an @code{rfc3339-record}.

(define %write-zpad
  (let ((zeroes '#("" "0" "00" "000" "0000")))
    (lambda (num width port)
      (if num
          (let ((pad (cond ((< num 10)   (- width 1))
                           ((< num 100)  (- width 2))
                           ((< num 1000) (- width 3))
                           (else         0))))
            (and (> pad 0)
                 (display (vector-ref zeroes pad) port))
            (display num port))
          (display (vector-ref zeroes width) port)))))

(define (%write-date port dy dm dd)
  (%write-zpad dy  4 port)
  (write-char                  #\-   port)
  (%write-zpad dm  2 port)
  (write-char                  #\-   port)
  (%write-zpad dd  2 port))

(define (%write-time port th tm ts tf)
  (%write-zpad th  2 port)
  (write-char                  #\:   port)
  (%write-zpad tm  2 port)
  (write-char                  #\:   port)
  (%write-zpad ts  2 port)
  (and tf
       (if (zero? tf)
           (display ".0" port)
           (let ((str (number->string tf)))
             (if (equal? str "0.0")
                 (display ".0" port)
                 (display (substring str 1 (string-length str)) port))))))

(define (%write-offset port om)
  (case om
    ((0)  (write-char #\Z port))
    ((#f) (void))
    (else (let ((om (if (< om 0)
                        (begin (write-char #\- port)
                               (- om))
                        (begin (write-char #\+ port)
                               om))))
            (display (quotient om 60) port)
            (write-char #\: port)
            (%write-zpad (remainder om 60) 2 port)))))

(define (%write-full port dy dm dd th tm ts tf om)
  (let ((d? (or dy dm dd))
        (t? (or th tm ts tf)))
    (and d?
         (%write-date port dy dm dd))
    (and d?
         t?
         (write-char #\T port))
    (and t?
         (%write-time port th tm ts tf))
    (and om
         (%write-offset port om))))

;;; @defproc write-rfc3339 rec port
;;;
;;; Write an RFC3339 string format of @code{rfc3339-record} @var{rec} to
;;; output port @var{port}.

(define (write-rfc3339-record rec port)
  ;; TODO: Maybe lose this "apply".
  (apply %write-full port (rfc3339-record->list rec)))

;;; @defproc rfc3339-record->string rec
;;;
;;; Yield an RFC3339 string format of @code{rfc3339-record} @var{rec} as a
;;; Scheme string.

(define (rfc3339-record->string rec)
  (let ((os (open-output-string)))
    (write-rfc3339-record rec os)
    (let ((str (get-output-string os)))
      (close-output-port os)
      str)))

;;; @section Validation

;;; A few procedures are provided for validating @code{rfc3339-record}s.

(define-syntax %field-check
  (syntax-rules ()
    ((_ ?explain?-var ?var ?valid-expr)
     (cond ((not ?var)  (if ?explain?-var
                            (list 'missing (quote ?var))
                            #t))
           (?valid-expr #f)
           (else        (if ?explain?-var
                            (list 'invalid
                                  (quote ?var)
                                  ?var
                                  (quote ?valid-expr))
                            #t))))))

(define (%leap-year? year)
  (and (zero? (modulo year 4))
       (if (zero? (modulo year 100))
           (zero? (modulo year 400))
           #t)))

(define (%month-days year month)
  (case month
    ((1 3 5 7 8 10 12) 31)
    ((4 6 9 11)        30)
    ((2)               (if (%leap-year? year) 29 28))
    (else              #f)))

;;; @defproc  check-rfc3339-record-date   rec explain?
;;; @defprocx check-rfc3339-record-time   rec explain?
;;; @defprocx check-rfc3339-record-offset rec explain?
;;;
;;; Check the respective component of @code{rfc3339-record} @var{rec} for
;;; completeness and correctness, yielding @code{#f} iff no problems were
;;; detected.  If @var{explain?} is true, then true values of these procedures
;;; are lists that ``explain'' the error detected.  For example:
;;;
;;; @lisp
;;; (check-rfc3339-record-date
;;;  (string->rfc3339-record "1999-02")    #t)
;;; @result{} (missing mday)
;;;
;;; (check-rfc3339-record-date
;;;  (string->rfc3339-record "1999-02-29") #t)
;;; @result{}
;;; (invalid mday 29 (and (integer? mday)
;;;                       (<= 1 mday (month-days year month))))
;;;
;;; (check-rfc3339-record-date
;;;  (string->rfc3339-record "2000-02-29") #t)
;;; @result{} #f
;;; @end lisp
;;;
;;; Leap years are calculated correctly.  Leap seconds (61st seconds in
;;; minutes) are tolerated in any date and time.

(define check-rfc3339-record-date
  (let ((month-days %month-days))
    (lambda (rec explain?)
      (let ((year  (rfc3339-record:year  rec))
            (month (rfc3339-record:month rec))
            (mday  (rfc3339-record:mday  rec)))
        (or (%field-check
             explain? year  (and (integer? year) (>= year 1)))
            (%field-check
             explain? month (and (integer? month) (<= 1 month 12)))
            (%field-check
             explain? mday  (and (integer? mday)
                                 (<= 1 mday (month-days year month)))))))))

(define (check-rfc3339-record-time rec explain?)
  (let ((hour    (rfc3339-record:hour    rec))
        (minute  (rfc3339-record:minute  rec))
        (second  (rfc3339-record:second  rec))
        (secfrac (rfc3339-record:secfrac rec)))
    (or (%field-check
         explain? hour    (and (integer? hour)   (<= 0 hour   23)))
        (%field-check
         explain? minute  (and (integer? minute) (<= 0 minute 59)))
        (%field-check
         explain? second  (and (integer? second) (<= 0 second 60)))
        (%field-check
         explain? secfrac (and (real? secfrac)
                               (<= 0.0 secfrac)
                               (< secfrac 1.0))))))

(define (check-rfc3339-record-offset rec explain?)
  (let ((offsetmin (rfc3339-record:offsetmin rec)))
    (%field-check explain?
                  offsetmin
                  (integer? offsetmin))))

;;; @defproc check-rfc3339-record-full rec explain?
;;;
;;; Checks all three components.  See @code{check-rfc3339-record-date} et al.

(define (check-rfc3339-record-full rec explain?)
  (or (check-rfc3339-record-date   rec explain?)
      (check-rfc3339-record-time   rec explain?)
      (check-rfc3339-record-offset rec explain?)))

;;; @defproc valid-full-rfc3339-record? rec
;;;
;;; Yields a true value iff @code{check-rfc3339-record-full} yields a false
;;; value.

(define (valid-full-rfc3339-record? rec)
  (not (check-rfc3339-record-full rec #f)))

;;; @section SRFI-19 Interoperability

;;; @b{rfc3339} has no dependency on SRFI-19, but a procedure is provided for
;;; constructing a SRFI-19 @code{date}.

;;; @defproc rfc3339-string->srfi19-date/constructor str make-date
;;;
;;; Contruct a SRFI-19 @code{date} from an RFC3339 string, where @var{str} is
;;; the string, and @var{make-date} is the SRFI-19 @code{date} constructor.
;;; Applications using SRFI-19 may wish to define an
;;; @code{rfc3339-string->date} procedure:
;;;
;;; @lisp
;;; (define (rfc3339-string->date str)
;;;   (rfc3339-string->srfi19-date/constructor str make-date))
;;; @end lisp

(define (rfc3339-string->srfi19-date/constructor str make-date)
  (parse-rfc3339-string
   str
   (lambda (dy dm dd th tm ts tf om)
     ;; TODO: Do we have the right "make-date" here, and do we really want to
     ;; do this?
     (make-date (if tf (inexact->exact (truncate (* 100 tf))) 0) ; nanosecond
                (or ts 0)               ; second
                (or tm 0)               ; minute
                (or th 0)               ; hour
                (or dd 0)               ; day
                (or dm 0)               ; month
                (or dy 0)               ; year
                (if om (* 60 om) 0)     ; zone-offset
                ))))

;; TODO: rfc3339-record->srfi19-date/constructor

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.3 --- 2009-03-03 --- PLaneT @code{(1 1)}
;;; License is now LGPL 3.  Converted to author's new Scheme adminsitration
;;; system.  Changes for PLT 4.x.
;;;
;;; @item Version 0.2 --- 2005-12-05 --- PLaneT @code{(1 0)}
;;; Release for PLT 299/3xx.  Changed portability note in light of Pregexp
;;; post-1e9 bug fix.  Minor documentation changes.
;;;
;;; @item Version 0.1 --- 2005-01-30
;;; Initial release.
;;;
;;; @end table

(provide
 check-rfc3339-record-date
 check-rfc3339-record-full
 check-rfc3339-record-offset
 check-rfc3339-record-time
 make-rfc3339-record
 parse-rfc3339-string
 rfc3339-record->list
 rfc3339-record->string
 rfc3339-record:hour
 rfc3339-record:mday
 rfc3339-record:minute
 rfc3339-record:month
 rfc3339-record:offsetmin
 rfc3339-record:secfrac
 rfc3339-record:second
 rfc3339-record:set-hour!
 rfc3339-record:set-mday!
 rfc3339-record:set-minute!
 rfc3339-record:set-month!
 rfc3339-record:set-offsetmin!
 rfc3339-record:set-secfrac!
 rfc3339-record:set-second!
 rfc3339-record:set-year!
 rfc3339-record:year
 rfc3339-record?
 rfc3339-string->list
 rfc3339-string->srfi19-date/constructor
 rfc3339-string->vector
 string->rfc3339-record
 valid-full-rfc3339-record?
 write-rfc3339-record)