zip.ss
(module zip mzscheme
  (require (planet "io.ss" ("dherman" "io.plt" 1)))
  (require (planet "file.ss" ("dherman" "io.plt" 1)))
  (require (lib "contract.ss"))
  (require (lib "deflate.ss"))
  (require (lib "etc.ss"))
  (require (all-except (lib "list.ss" "srfi" "1") zip any))
  (require "private/zip-constants.ss")
  (require "private/crc-32.ss")

  ;; TODO:
  ;;   - need utilities for:
  ;;       * get me the list of all files, no directories
  ;;       * get me the list of all files and directories
  ;;       * get me the list of all files and directories except empty ones

  ;; ===========================================================================
  ;; DATA DEFINITIONS
  ;; ===========================================================================

  ;; An msdos-time or an msdos-date is an exact-integer in the respective format
  ;; described at:
  ;;
  ;;     http://msdn.microsoft.com/library/en-us/com/htm/cmf_a2c_25gl.asp

  (print-struct #t)
  (define old (current-inspector))
  (current-inspector (make-inspector))

  ;; metadata : path * bytes * boolean * exact-integer * exact-integer * nat
  (define-struct metadata (path name directory? time date compression))

  ;; header : metadata * exact-integer * nat * nat * nat
  (define-struct header (metadata crc compressed uncompressed size))

  (current-inspector old)

  ;; ===========================================================================
  ;; CONSTANTS
  ;; ===========================================================================

  (define *spec-version* #x3e)     ; version 6.2
  (define *required-version* 20)   ; version 2.0
  (define *compression-level* 8)   ; I don't think this is configurable
  (define *zip-comment* #"packed by zip.plt - http://planet.plt-scheme.org")

  ;; ===========================================================================
  ;; FILE CREATION
  ;; ===========================================================================

  ;; date->msdos-time : date -> msdos-time
  (define (date->msdos-time date)
    (bitwise-ior
     (ceiling (/ (date-second date) 2))
     (arithmetic-shift (date-minute date) 5)
     (arithmetic-shift (date-hour date) 11)))

  ;; date->msdos-date : date -> msdos-date
  (define (date->msdos-date date)
    (bitwise-ior
     (date-day date)
     (arithmetic-shift (date-month date) 5)
     (arithmetic-shift (- (date-year date) 1980) 9)))

  ;; NOTE: the use of set! emphasizes that an expression can't be refactored out

  ;; zip-one-entry : output-stream metadata -> header
  (define (zip-one-entry out metadata)
    (let* ([directory? (metadata-directory? metadata)]
           [filename (metadata-name metadata)]
           [filename-length (bytes-length filename)]
           [seekable? (seekable-port? out)]
           [bits (if seekable? 0 #b1000)]
           [time (metadata-time metadata)]
           [date (metadata-date metadata)]
           [compression (metadata-compression metadata)]
           [mark1 #f]
           [mark2 #f]
           [write-int (lambda (n size) (write-integer n #f out #f size))])
      ;; write the contents to the output stream:
      (write-int *local-file-header*    4)  ; signature
      (write-int *required-version*     2)  ; version
      (write-int bits                   2)  ; bits
      (write-int compression            2)  ; compression
      (write-int time                   2)  ; time
      (write-int date                   2)  ; date
      (if seekable? (set! mark1 (file-position out)))
      (write-int 0                      4)  ; crc-32
      (write-int 0                      4)  ; compressed
      (write-int 0                      4)  ; uncompressed
      (write-int filename-length        2)  ; filename-length
      (write-int 0                      2)  ; extra-length
      (write-bytes filename out)            ; filename
      (if (not directory?)
          (let-values ([(uncompressed compressed bogus-crc)
                        (with-input-from-file (metadata-path metadata)
                          (lambda ()
                            (deflate (current-input-port) out)))])
            ;; TODO: deflate.ss seems to be creating a bad CRC, so I have to
            ;;       compute it myself. This should eventually go away.
            (let ([crc (with-input-from-file (metadata-path metadata) crc-32)])
              (if seekable?
                  (begin
                    (set! mark2 (file-position out))
                    (file-position out mark1))
                  (write-int #x08074b50 4)) ; EXT signature
              (write-int crc            4)  ; crc-32
              (write-int compressed     4)  ; compressed
              (write-int uncompressed   4)  ; uncompressed
              (if seekable? (file-position out mark2))

              ;; return the header information
              (make-header metadata crc compressed uncompressed
                           (+ filename-length compressed (if seekable? 30 46)))))
          (make-header metadata 0 0 0 (+ filename-length 30)))))

  ;; write-end-of-central-directory : output-port nat nat nat ->
  (define (write-end-of-central-directory out count start size)
    (let ([comment-length (bytes-length *zip-comment*)]
          [write-int (lambda (n size) (write-integer n #f out #f size))])
      (write-int #x06054b50     4) ; signature
      (write-int 0              2) ; # this disk
      (write-int 0              2) ; # disk with start of central dir.
      (write-int count          2) ; # entries in central dir. on this disk
      (write-int count          2) ; # entries in central dir.
      (write-int size           4) ; size of central dir.
      (write-int start          4) ; offset of start of central dir.
      (write-int comment-length 2)
      (write-bytes *zip-comment* out)))

  ;; write-central-directory : output-port (listof header) ->
  (define (write-central-directory out headers)
    (let ([count (length headers)]
          [write-int (lambda (n size) (write-integer n #f out #f size))])
      (let loop ([headers headers] [offset 0] [size 0])
        (if (null? headers)
            ;; no digital signature (why?)
            (write-end-of-central-directory out count offset size)
            (let* ([header (car headers)]
                   [metadata (header-metadata header)]
                   [filename-length (bytes-length (metadata-name metadata))]
                   [attributes (if (metadata-directory? metadata)
                                   *external-attributes:directory*
                                   *external-attributes:file*)]
                   [compression (metadata-compression metadata)]
                   [version (bitwise-ior
                             *spec-version*
                             (arithmetic-shift *system* 8))])
              (write-int #x02014b50                   4)
              (write-int version                      2)
              (write-int *required-version*           2)
              (write-int 0                            2)
              (write-int compression                  2)
              (write-int (metadata-time metadata)     2)
              (write-int (metadata-date metadata)     2)
              (write-int (header-crc header)          4)
              (write-int (header-compressed header)   4)
              (write-int (header-uncompressed header) 4)
              (write-int filename-length              2)
              (write-int 0                            2)
              (write-int 0                            2) ; comment length
              (write-int 0                            2)
              (write-int 0                            2) ; internal attributes
              (write-int attributes                   4) ; external attributes
              (write-int offset                       4)
              (write-bytes (metadata-name metadata) out)
              (loop (cdr headers)
                    (+ offset (header-size header))
                    (+ size filename-length 46)))))))

  ;; ===========================================================================
  ;; FRONT END
  ;; ===========================================================================

  ;; with-trailing-slash : bytes -> bytes
  (define (with-trailing-slash bytes)
    (if (= (bytes-ref bytes (sub1 (bytes-length bytes)))
           (char->integer #\/))
        bytes
        (bytes-append bytes #"/")))

  ;; with-slash-separator : bytes -> bytes
  (define (with-slash-separator bytes)
    (regexp-replace* *os-specific-separator-regexp* bytes #"/"))

  ;; path->entry : path -> bytes
  (define (path->entry path)
    (with-slash-separator (path->bytes path)))

  ;; build-metadata : relative-path -> metadata
  (define (build-metadata path)
    (let* ([mod (seconds->date (file-or-directory-modify-seconds path))]
           [dir? (directory-exists? path)]
           [time (date->msdos-time mod)]
           [date (date->msdos-date mod)])
      (make-metadata path
                     (if dir?
                         (with-trailing-slash (path->entry path))
                         (path->entry path))
                     dir?
                     time
                     date
                     (if dir? 0 *compression-level*))))

  ;; TODO: can you make this work for absolute paths with a little more work?

  ;; zip : (listof relative-path) [output-port] ->
  (define zip
    (opt-lambda (files [out (current-output-port)])
      (let ([headers (map-in-order (lambda (file) (zip-one-entry out file))
                                   (map build-metadata files))])
        (write-central-directory out headers))))

  (provide/contract
   [zip (((listof relative-path/c)) (output-port?) . opt-> . any)]))