main.rkt
#lang racket

;;; gzip.ss  --  Jens Axel Søgaard

;;;
;;; PURPOSE
;;;

; This package provides bindings for zlib a general purpose
; compression library written by Jean-loup Gailly and Mark Adler.

;     <http://www.zlib.net/manual.html>

;;;
;;; HISTORY
;;;

; 2007-feb-17
;   - version 1.0 released
;       exports:
;           open-input-gz-file, open-output-gz-file
;           compress-bytes, uncompress-bytes
;           read-gz-file

;;;
;;; DOCUMENTATION
;;;

; See doc.txt

(require ffi/unsafe
         (except-in (lib "contract.ss") ->)
         (rename-in (lib "contract.ss") [-> c->])
         rackunit
         racket/runtime-path)

(define-runtime-path windows-dll-path "./zlib1.dll")
  
  ; open the shared library
  (define libzlib
    (case (system-type)
      [(windows)  (ffi-lib windows-dll-path)]
      [(macosx)   (ffi-lib "libz")]
      [else       (ffi-lib "zlib")]))
  
  (define error-codes
    (map reverse
         '((Z_OK 0)
           (Z_STREAM_END 1)
           (Z_NEED_DICT  2)
           (Z_ERRNO     -1)
           (Z_STREAM_ERROR -2)
           (Z_DATA_ERROR   -3)
           (Z_MEM_ERROR    -4)
           (Z_BUF_ERROR    -5)
           (Z_VERSION_ERROR -6))))
  
  (define (error-code->symbol code)
    (cond
      [(assoc code error-codes) => cadr]
      [else                        #f]))
  
  (define zlib-compress
    ; int compress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen);
    (get-ffi-obj 'compress libzlib
                 (_fun _pointer (destLen : (_ptr io _long)) _pointer _long 
                       -> (error-code : _int)
                       -> (let ([sym (error-code->symbol error-code)])
                            (if (eq? sym 'Z_OK)
                                destLen
                                sym)))))
  
  (define zlib-uncompress
    ;  int uncompress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen);
    (get-ffi-obj 'uncompress libzlib
                 (_fun _pointer (destLen : (_ptr io _long)) _pointer _long
                       -> (error-code : _int)
                       -> (let ([sym (error-code->symbol error-code)])
                            (if (eq? sym 'Z_OK)
                                destLen
                                sym)))))
  
  (define zlib-version
    ;  const char * zlibVersion (void);
    (get-ffi-obj 'zlibVersion libzlib
                 (_fun -> _string)))
  
  
  ; I can't get gzopen to return anything but #f :-(
  (define zlib-gzopen
    ; gzFile gzopen (const char *path, const char *mode)
    (get-ffi-obj 'gzopen libzlib
                 (_fun _file _string -> _pointer)))
  
  (define zlib-gzread
    ; Reads the given number of uncompressed bytes from the compressed file.
    ; If the input file was not in gzip format, gzread copies the given
    ; number of bytes into the buffer.
    
    ; gzread returns the number of uncompressed bytes actually read (0 for end of file, -1 for error).
    
    ; int gzread (gzFile file, voidp buf, unsigned len);
    (get-ffi-obj 'gzread libzlib
                 (_fun _pointer _bytes _uint 
                       -> (result : _int)
                       -> (cond
                            [(= result 0)  eof]
                            [(= result -1) #f]
                            [else          result]))))
  
  (define zlib-gzwrite
    ; Writes the given number of uncompressed bytes into the compressed file.
    ; gzwrite returns the number of uncompressed bytes actually written (0 in case of error).
    
    ; int gzwrite (gzFile file, const voidp buf, unsigned len);
    (get-ffi-obj 'gzwrite libzlib
                 (_fun _pointer _pointer _uint 
                       -> (result : _int)
                       -> (cond
                            [(= result 0)  #f]
                            [else          result]))))
  
  (define zlib-gzclose 
    ; Flushes all pending output if necessary, closes the compressed file
    ; and deallocates all the (de)compression state. The return value is
    ; the zlib error number (see function gzerror below).
    ; int gzclose (gzFile file);
    (get-ffi-obj 'gzclose libzlib
                 (_fun _pointer 
                       -> (error-code : _int)
                       -> (error-code->symbol error-code))))
  
  ; compress-bytes : bytes -> bytes or symbol 
  (provide/contract [compress-bytes (bytes? . c-> . (or/c bytes? symbol?))])
  (define (compress-bytes uncompressed-bytes)
    (let* ([in-len     (bytes-length uncompressed-bytes)]
           [out-len    (inexact->exact (ceiling (+ in-len (* 0.1 in-len) 12)))]
           [out-buffer (make-bytes out-len)]
           [len/error-code (zlib-compress out-buffer out-len uncompressed-bytes in-len)])
      (if (number? len/error-code)
          (make-sized-byte-string out-buffer len/error-code)
          len/error-code)))
  
  ; uncompress-bytes : bytes -> bytes or symbol
  (provide/contract [uncompress-bytes (bytes? integer? . c-> . (or/c bytes? symbol?))])
  (define (uncompress-bytes compressed-bytes uncompressed-size)
    (let* ([in-len     (bytes-length compressed-bytes)]
           [out-len    uncompressed-size]
           [out-buffer (make-bytes out-len)]
           [len/error-code (zlib-uncompress out-buffer out-len compressed-bytes in-len)])
      (if (number? len/error-code)
          out-buffer
          len/error-code)))
  
  (define (path->absolute-path path)
    (cond 
      [(absolute-path? path) 
       path]
      [(relative-path? path)
       (build-path (current-directory) path)]
      [else
       (error 'open-output-gz-file "Strange path, given: ~a: " path)]))
  
  (define (bytes-append* bs)
    (let ([result (make-bytes (apply + (map bytes-length bs)))])
      (let loop ([bs bs] [i 0])
        (cond [(null? bs) result]
              [else       (let ([len (bytes-length (car bs))])
                            (memmove result i (car bs) len)
                            (loop (cdr bs) (+ i len)))]))))
  
  ; more efficient than opening an gz-port and reading
  (provide/contract [read-gz-file (path? . c-> . bytes?)])
  (define (read-gz-file path)
    (define BLOCK-SIZE 4096)
    (let ([gzfile (zlib-gzopen (path->absolute-path path) "rb")])
      (unless gzfile
        (raise (make-exn:fail:filesystem 
                (format "read-bytes-from-gz-file: cannot open input file, given ~a" (path->string path))
                (current-continuation-marks))))
      (let loop ([bs '()])
        (let* ([buf (make-bytes BLOCK-SIZE)]
               [len (zlib-gzread gzfile buf BLOCK-SIZE)])
          (cond
            [(eof-object? len)
             (bytes-append* (reverse bs))]
            [(= len BLOCK-SIZE)
             (loop (cons buf bs))]
            [(<= 0 len BLOCK-SIZE)
             (loop (cons (make-sized-byte-string buf len) bs))])))))
  
  (provide/contract [open-input-gz-file (path? . c-> . port?)])
  ; open-input-gz-file : path -> port
  (define (open-input-gz-file path)
    (let ([gzfile (zlib-gzopen (path->absolute-path path) "rb")])
      (unless gzfile
        (raise (make-exn:fail:filesystem 
                (format "open-input-gz-file: cannot open input file, given ~a" (path->string path))
                (current-continuation-marks))))
      (let ()
        (define name-v path)
        (define (read-proc buf)
          (let ([len/eof (zlib-gzread gzfile buf (bytes-length buf))])
            len/eof))
        (define optional-peek-proc #f)
        (define (close-proc)
          (zlib-gzclose gzfile))
        (make-input-port name-v read-proc optional-peek-proc close-proc))))
  
  (provide open-output-gz-file)
  (define (open-output-gz-file path #:replace [replace? #f])
    (let ([path (path->absolute-path path)])
      (unless replace?
        (when (file-exists? path)
          (raise (make-exn:fail:filesystem:exists
                  (format "open-output-gz-file: output file already exists, given ~a"
                          (path->string path))
                  (current-continuation-marks)))))
      (let ([gzfile (zlib-gzopen path "wb")])
        (unless gzfile
          (raise (make-exn:fail:filesystem 
                  (format "open-output-gz-file: cannot open output file, given ~a" 
                          (path->string path))
                  (current-continuation-marks))))
        (let ()
          (define name-v path)
          (define (write-proc buf start ending keep? break-while-blocking?)
            (if (= start ending)
                0
                (let ([len/eof (zlib-gzwrite gzfile (ptr-add buf start) (- ending start))])
                  len/eof)))
          (define (close-proc)
            (zlib-gzclose gzfile))
          
          (make-output-port name-v always-evt write-proc close-proc)))))
 
  ;; testing: 
  
  ;; we could comment this out, but why bother? the overhead is pretty low.
  ;; with these tests, it looks like all the code is covered except for the
  ;; exception cases.

  ;; check that uncompress(compress(b)) = b
  (let ([test-bytes #"otoh ethon tohesunt osn.t soc  so thson.th "])
    (check-equal? (uncompress-bytes (compress-bytes test-bytes)
                                    (bytes-length test-bytes))
                  test-bytes))
  
  ;; compress two separate byte-strings to a temp file
  (define temp-file (make-temporary-file))
  (define out (open-output-gz-file temp-file #:replace #t))
  (define first-part #"12aothe3")
  (define second-part #"342321")
  (check-equal? (write-bytes first-part out)
                (bytes-length first-part))
  (check-equal? (write-bytes second-part out)
                (bytes-length second-part))
  (close-output-port out)
  
  ;; read the byte-string from the file
  (define in (open-input-gz-file temp-file))
  (define expected-out (bytes-append first-part second-part))
  (check-equal? (read-bytes (bytes-length expected-out) in) expected-out)
  (close-input-port in)
  
  ;; do it again using read-gz-file
  (check-equal? (read-gz-file temp-file)
                expected-out)
  
  (delete-file temp-file)