gzip.ss
;;; gzip.ss  --  Jens Axel Søgaard

;;;
;;; PURPOSE
;;;

; This package provides bindings for zlib a general purpose
; compression library written by Jean-loup Gailly and Mark Adler.

;     <http://www.zlib.net/manual.html>

;;;
;;; HISTORY
;;;

; 2007-feb-17
;   - version 1.0 released
;       exports:
;           open-input-gz-file, open-output-gz-file
;           compress-bytes, uncompress-bytes
;           read-gz-file

;;;
;;; DOCUMENTATION
;;;

; See doc.txt

(module gzip mzscheme
  (require (lib "foreign.ss")
           (all-except (lib "contract.ss") ->)
           (rename (lib "contract.ss") c-> ->)
           (lib "etc.ss"))
  (unsafe!)
  
  ; open the shared library
  (define libzlib
    (case (system-type)
      [(windows)  (ffi-lib (build-path 
                            (this-expression-source-directory)
                            "zlib1.dll"))]
      [(macosx)   (ffi-lib "libz")]
      [else       (ffi-lib "zlib")]))
  
  (define error-codes
    (map reverse
         '((Z_OK 0)
           (Z_STREAM_END 1)
           (Z_NEED_DICT  2)
           (Z_ERRNO     -1)
           (Z_STREAM_ERROR -2)
           (Z_DATA_ERROR   -3)
           (Z_MEM_ERROR    -4)
           (Z_BUF_ERROR    -5)
           (Z_VERSION_ERROR -6))))
  
  (define (error-code->symbol code)
    (cond
      [(assoc code error-codes) => cadr]
      [else                        #f]))
  
  (define zlib-compress
    ; int compress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen);
    (get-ffi-obj 'compress libzlib
                 (_fun _pointer (destLen : (_ptr io _long)) _pointer _long 
                       -> (error-code : _int)
                       -> (let ([sym (error-code->symbol error-code)])
                            (if (eq? sym 'Z_OK)
                                destLen
                                sym)))))
  
  (define zlib-uncompress
    ;  int uncompress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen);
    (get-ffi-obj 'uncompress libzlib
                 (_fun _pointer (destLen : (_ptr io _long)) _pointer _long
                       -> (error-code : _int)
                       -> (let ([sym (error-code->symbol error-code)])
                            (if (eq? sym 'Z_OK)
                                destLen
                                sym)))))
  
  (define zlib-version
    ;  const char * zlibVersion (void);
    (get-ffi-obj 'zlibVersion libzlib
                 (_fun -> _string)))
  
  
  ; I can't get gzopen to return anything but #f :-(
  (define zlib-gzopen
    ; gzFile gzopen (const char *path, const char *mode)
    (get-ffi-obj 'gzopen libzlib
                 (_fun _file _string -> _pointer)))
  
  (define zlib-gzread
    ; Reads the given number of uncompressed bytes from the compressed file.
    ; If the input file was not in gzip format, gzread copies the given
    ; number of bytes into the buffer.
    
    ; gzread returns the number of uncompressed bytes actually read (0 for end of file, -1 for error).
    
    ; int gzread (gzFile file, voidp buf, unsigned len);
    (get-ffi-obj 'gzread libzlib
                 (_fun _pointer _bytes _uint 
                       -> (result : _int)
                       -> (cond
                            [(= result 0)  eof]
                            [(= result -1) #f]
                            [else          result]))))
  
  (define zlib-gzwrite
    ; Writes the given number of uncompressed bytes into the compressed file.
    ; gzwrite returns the number of uncompressed bytes actually written (0 in case of error).
    
    ; int gzwrite (gzFile file, const voidp buf, unsigned len);
    (get-ffi-obj 'gzwrite libzlib
                 (_fun _pointer _pointer _uint 
                       -> (result : _int)
                       -> (cond
                            [(= result 0)  #f]
                            [else          result]))))
  
  (define zlib-gzclose 
    ; Flushes all pending output if necessary, closes the compressed file
    ; and deallocates all the (de)compression state. The return value is
    ; the zlib error number (see function gzerror below).
    ; int gzclose (gzFile file);
    (get-ffi-obj 'gzclose libzlib
                 (_fun _pointer 
                       -> (error-code : _int)
                       -> (error-code->symbol error-code))))
  
  ; compress-bytes : bytes -> bytes or symbol 
  (provide/contract [compress-bytes (bytes? . c-> . (or/c bytes? symbol?))])
  (define (compress-bytes uncompressed-bytes)
    (let* ([in-len     (bytes-length uncompressed-bytes)]
           [out-len    (inexact->exact (ceiling (+ in-len (* 0.1 in-len) 12)))]
           [out-buffer (make-bytes out-len)]
           [len/error-code (zlib-compress out-buffer out-len uncompressed-bytes in-len)])
      (if (number? len/error-code)
          (make-sized-byte-string out-buffer len/error-code)
          len/error-code)))
  
  ; uncompress-bytes : bytes -> bytes or symbol
  (provide/contract [uncompress-bytes (bytes? integer? . c-> . (or/c bytes? symbol?))])
  (define (uncompress-bytes compressed-bytes uncompressed-size)
    (let* ([in-len     (bytes-length compressed-bytes)]
           [out-len    uncompressed-size]
           [out-buffer (make-bytes out-len)]
           [len/error-code (zlib-uncompress out-buffer out-len compressed-bytes in-len)])
      (if (number? len/error-code)
          out-buffer
          len/error-code)))
  
  (define (path->absolute-path path)
    (cond 
      [(absolute-path? path) 
       path]
      [(relative-path? path)
       (build-path (current-directory) path)]
      [else
       (error 'open-output-gz-file "Strange path, given: ~a: " path)]))
  
  (define (bytes-append* bs)
    (let ([result (make-bytes (apply + (map bytes-length bs)))])
      (let loop ([bs bs] [i 0])
        (cond [(null? bs) result]
              [else       (let ([len (bytes-length (car bs))])
                            (memmove result i (car bs) len)
                            (loop (cdr bs) (+ i len)))]))))
  
  ; more efficient than opening an gz-port and reading
  (provide/contract [read-gz-file (path? . c-> . bytes?)])
  (define (read-gz-file path)
    (define BLOCK-SIZE 4096)
    (let ([gzfile (zlib-gzopen (path->absolute-path path) "rb")])
      (unless gzfile
        (raise (make-exn:fail:filesystem 
                (format "read-bytes-from-gz-file: cannot open input file, given ~a" (path->string path))
                (current-continuation-marks))))
      (let loop ([bs '()])
        (let* ([buf (make-bytes BLOCK-SIZE)]
               [len (zlib-gzread gzfile buf BLOCK-SIZE)])
          (cond
            [(eof-object? len)
             (bytes-append* (reverse bs))]
            [(= len BLOCK-SIZE)
             (loop (cons buf bs))]
            [(<= 0 len BLOCK-SIZE)
             (loop (cons (make-sized-byte-string buf len) bs))])))))
  
  (provide/contract [open-input-gz-file (path? . c-> . port?)])
  ; open-input-gz-file : path -> port
  (define (open-input-gz-file path)
    (let ([gzfile (zlib-gzopen (path->absolute-path path) "rb")])
      (unless gzfile
        (raise (make-exn:fail:filesystem 
                (format "open-input-gz-file: cannot open input file, given ~a" (path->string path))
                (current-continuation-marks))))
      (let ()
        (define name-v path)
        (define (read-proc buf)
          (let ([len/eof (zlib-gzread gzfile buf (bytes-length buf))])
            len/eof))
        (define optional-peek-proc #f)
        (define (close-proc)
          (zlib-gzclose gzfile))
        (make-input-port name-v read-proc optional-peek-proc close-proc))))
  
  (provide/contract [open-output-gz-file (->* (port?) (listof symbol?)   (port?))])
  ; open-output-gz-file : path ['replace] -> port
  (define (open-output-gz-file path . options)
    (let ([path (path->absolute-path path)])
      (unless (member 'replace options)
        (if (file-exists? path)
            (raise (make-exn:fail:filesystem:exists
                    (format "open-output-gz-file: output file already exists, given ~a"
                            (path->string path))
                    (current-continuation-marks)))))
      (let ([gzfile (zlib-gzopen path "wb")])
        (unless gzfile
          (raise (make-exn:fail:filesystem 
                  (format "open-output-gz-file: cannot open output file, given ~a" 
                          (path->string path))
                  (current-continuation-marks))))
        (let ()
          (define name-v path)
          (define (write-proc buf start ending keep? break-while-blocking?)
            (if (= start ending)
                0
                (let ([len/eof (zlib-gzwrite gzfile (ptr-add buf start) (- ending start))])
                  len/eof)))
          (define (close-proc)
            (zlib-gzclose gzfile))
          
          (make-output-port name-v always-evt write-proc close-proc)))))
  
  ;  (current-directory "c:")
  ; 
  ;  (define out (open-output-gz-file "fooo.gz" 'replace))
  ;  (write-bytes #"123" out)
  ;  (write-bytes #"344445" out)
  ;  (close-output-port out)
  ; 
  ;  (define in (open-input-gz-file "fooo.gz"))
  ;  (display (read-bytes (bytes-length #"123344445") in))
  ;  (close-input-port in)
  
  )