unzip.ss
(module unzip mzscheme
  (require (planet "io.ss" ("dherman" "io.plt" 1)))
  (require (planet "file.ss" ("dherman" "io.plt" 1)))
  (require (lib "contract.ss"))
  (require (lib "etc.ss"))
  (require (lib "inflate.ss"))
  (require (lib "port.ss"))
  (require (lib "file.ss"))
  (require "private/zip-constants.ss")

  ;; ===========================================================================
  ;; DATATYPES AND UTILITIES
  ;; ===========================================================================

  ;; (alistof bytes zip-entry)
  (define-struct zip-directory (contents))

  ;; nat * boolean
  (define-struct zip-entry (offset dir?))

  (define-struct (exn:fail:unzip exn:fail) ())
  (define-struct (exn:fail:unzip:no-such-entry exn:fail:unzip) (entry))
  
  (define (raise-unzip-error message)
    (raise
     (make-exn:fail:unzip (string->immutable-string (format "unzip: ~a" message))
                          (current-continuation-marks))))
  
  (define (raise-entry-not-found entry)
    (raise
     (make-exn:fail:unzip:no-such-entry
      (string->immutable-string (format "unzip: entry not found: \"~a\"" (bytes->string/latin-1 entry)))
      (current-continuation-marks)
      entry)))

  ;; zip-directory-entries : zip-directory -> (listof bytes)
  (define (zip-directory-entries zipdir)
    (map car (zip-directory-contents zipdir)))

  ;; zip-directory-lookup : bytes zip-directory -> (option zip-entry)
  (define (zip-directory-lookup entry zipdir)
    (let loop ([contents (zip-directory-contents zipdir)])
      (cond
        [(null? contents) #f]
        [(or (bytes=? entry (caar contents))
             (bytes=? (bytes-append entry #"/") (caar contents)))
         (cdar contents)]
        [else (loop (cdr contents))])))

  ;; zip-directory-contains? : (union string path bytes) zip-directory -> boolean
  (define (zip-directory-contains? entry zipdir)
    (if (bytes? entry)
        (and (zip-directory-lookup entry zipdir) #t)
        (zip-directory-contains? (path->zip-path entry) zipdir)))

  ;; matches-directory? : bytes bytes -> boolean
  (define (bytes-prefix? dirname entry-name)
    (let ([dirname-len (bytes-length dirname)]
          [entry-name-len (bytes-length entry-name)])
      (and (>= entry-name-len dirname-len)
           (bytes=? (subbytes entry-name 0 dirname-len) dirname))))

  ;; zip-directory-includes-directory? : (union string path bytes) zip-directory -> boolean
  (define (zip-directory-includes-directory? dirname zipdir)
    (if (bytes? dirname)
        (ormap (lambda (pair)
                 (bytes-prefix? dirname (car pair)))
               (zip-directory-contents zipdir))
        (zip-directory-includes-directory? (path->zip-path dirname) zipdir)))

  ;; path->zip-path : (union path string) -> bytes
  (define (path->zip-path p)
    (if (path? p)
        (bytes->zip-bytes (path->bytes p))
        (bytes->zip-bytes (string->bytes/latin-1 p))))
  
  (define (bytes->zip-bytes b)
    (regexp-replace* *os-specific-separator-regexp* b #"/"))

  ;; ===========================================================================
  ;; UNZIPPING ENGINE
  ;; ===========================================================================

  (define *slash-byte* (char->integer #\/))

  (define (directory-entry? name)
    (= (bytes-ref name (sub1 (bytes-length name))) *slash-byte*))

  ;; unzip-one-entry : input-port (bytes boolean input-port -> a) -> a
  (define (unzip-one-entry in read-entry)
    (let ([read-int (lambda (count) (read-integer count #f in #f))])
      (let* ([signature            (read-int 4)]
             [version              (read-bytes 2 in)]
             [bits                 (read-int 2)]
             [compression          (read-int 2)]
             [time                 (read-int 2)]
             [date                 (read-int 2)]
             [crc-32               (read-int 4)]
             [compressed           (read-int 4)]
             [uncompressed         (read-int 4)]
             [filename-length      (read-int 2)]
             [extra-length         (read-int 2)]
             [filename             (read-bytes filename-length in)]
             [extra                (read-bytes extra-length in)])
        (let* ([mark (file-position in)]
               [dir? (directory-entry? filename)]
               [in0 (if (bit-set? 3 bits)
                        in
                        (make-limited-input-port in compressed #f))])
          (dynamic-wind
           void
           (lambda ()
             (read-entry filename
                         dir?
                         (if (zero? compression)
                             in0
                             (make-filter-input-port inflate in0))))
           (lambda ()
             (file-position in (+ mark compressed))))))))

  ;; find-central-directory : input-port nat -> nat nat nat
  (define (find-central-directory in size)
    (let loop ([pos (- size 18)])
      (unless (positive? pos)
        (raise-unzip-error "no central directory"))
      (file-position in pos)
      (let* ([read-int (lambda (count) (read-integer count #f in #f))]
             [signature (read-int 4)])
        (if (= signature *end-of-central-directory-record*)
            (let ([disk-number       (read-int 2)]
                  [directory-disk    (read-int 2)]
                  [disk-entries      (read-int 2)]
                  [entry-count       (read-int 2)]
                  [directory-length  (read-int 4)]
                  [directory-offset  (read-int 4)]
                  [comment-length    (read-int 2)])
              (if (= (- size (file-position in)) comment-length)
                  (values directory-offset directory-length entry-count)
                  (loop (sub1 pos))))
            (loop (sub1 pos))))))
  
  ;; read-central-directory : input-port nat -> (alistof bytes zip-entry)
  (define (read-central-directory in size)
    (let-values ([(offset length count) (find-central-directory in size)])
      (file-position in offset)
      (build-list count
                  (lambda (i)
                    (let* ([read-int (lambda (count)
                                       (read-integer count #f in #f))]
                           [signature (read-int 4)])
                      (unless (= signature *central-file-header*)
                        (raise-unzip-error
                         (format "bad central file header signature: ~a"
                                 signature)))
                      (let ([version             (read-int 2)]
                            [required            (read-int 2)]
                            [bits                (read-int 2)]
                            [compression         (read-int 2)]
                            [time                (read-int 2)]
                            [date                (read-int 2)]
                            [crc-32              (read-int 4)]
                            [compressed          (read-int 4)]
                            [uncompressed        (read-int 4)]
                            [filename-length     (read-int 2)]
                            [extra-length        (read-int 2)]
                            [comment-length      (read-int 2)]
                            [disk-number         (read-int 2)]
                            [internal-attributes (read-int 2)]
                            [external-attributes (read-int 4)]
                            [relative-offset     (read-int 4)])
                        (let* ([filename (read-bytes filename-length)]
                               [dir? (directory-entry? filename)])
                          (skip-bytes (+ extra-length comment-length) in)
                          (cons filename (make-zip-entry relative-offset dir?)))))))))

  ;; ===========================================================================
  ;; FRONT END
  ;; ===========================================================================

  ;; unzip : [input-port (bytes boolean input-port -> any)] -> any
  (define unzip
    (opt-lambda ([in (current-input-port)] [read-entry *default-entry-reader*])
      (when (= (peek-integer 4 #f in #f) *local-file-header*)
        (unzip-one-entry in read-entry)
        (unzip in read-entry))))

  ;; read-zip-directory : (union string path) -> zip-directory
  (define (read-zip-directory path)
    (make-zip-directory
     (with-input-from-file path
       (lambda ()
         (read-central-directory (current-input-port)
                                 (file-size path))))))

  ;; unzip-entry : (union string path) zip-directory bytes [(bytes boolean input-port -> a)] -> a
  (define unzip-entry
    (opt-lambda (path dir entry-name [read-entry *default-entry-reader*])
      (cond
        [(zip-directory-lookup entry-name dir)
         => (lambda (entry)
              (with-input-from-file path
                (lambda ()
                  (file-position (current-input-port) (zip-entry-offset entry))
                  (unzip-one-entry (current-input-port) read-entry))))]
        [else (raise-entry-not-found entry-name)])))
  
  ;; ===========================================================================
  ;; ENTRY PARSERS
  ;; ===========================================================================

  ;; make-filesystem-entry-reader : [output-flag] -> (bytes boolean input-port -> any)
  (define make-filesystem-entry-reader
    (opt-lambda ([flag 'error])
      (lambda (name dir? in)
        (let ([path (bytes->path name)])
          (if dir?
              (unless (directory-exists? path)
                (make-directory* path))
              (let ([parent (dirname path)])
                (unless (directory-exists? parent)
                  (make-directory* parent))
                (with-output-to-file path
                  (lambda ()
                    (copy-port in (current-output-port)))
                  flag)))))))
  
  (define *default-entry-reader* (make-filesystem-entry-reader))

  ;; make-piped-entry-reader : output-port -> (bytes boolean input-port -> any)
  (define (make-piped-entry-reader out)
    (lambda (name dir? in)
      (unless dir?
        (copy-port in out))))
  
  (define output-flag/c
    (symbols 'error 'replace 'truncate 'truncate/replace 'append 'update))
  
  (provide/contract
   [exn:fail:unzip? (any/c . -> . boolean?)]
   [exn:fail:unzip:no-such-entry? (any/c . -> . boolean?)]
   [make-exn:fail:unzip (string? continuation-mark-set? . -> . exn:fail:unzip?)]
   [make-exn:fail:unzip:no-such-entry (string? continuation-mark-set? bytes? . -> . exn:fail:unzip:no-such-entry?)]
   [exn:fail:unzip:no-such-entry-entry (exn:fail:unzip:no-such-entry? . -> . bytes?)]
   [zip-directory? (any/c . -> . boolean?)]
   [zip-directory-entries (zip-directory? . -> . (listof bytes?))]
   [zip-directory-contains? ((union string? path? bytes?) zip-directory? . -> . boolean?)]
   [zip-directory-includes-directory? ((union string? path? bytes?) zip-directory? . -> . boolean?)])

  (provide/contract
   [output-flag/c contract?])

  (provide/contract
   [unzip (() (input-port? (bytes? boolean? input-port? . -> . any)) . opt-> . any)]
   [read-zip-directory ((union string? path?) . -> . zip-directory?)]
   [unzip-entry (((union string? path?) zip-directory? bytes?)
                 ((bytes? boolean? input-port? . -> . any))
                 . opt-> .
                 any)]
   [path->zip-path ((union string? path?) . -> . bytes?)]
   [make-filesystem-entry-reader (() (output-flag/c) . opt-> . (bytes? boolean? input-port? . -> . any))]
   [make-piped-entry-reader (output-port? . -> . (bytes? boolean? input-port? . -> . any))]))