packed-binary.ss
#lang scheme
;;; packed-binary.ss
;;; Copyright (c) 2008 M. Douglas Williams
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 2.1 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA.
;;;
;;; -------------------------------------------------------------------
;;;
;;; This module performs conversions between PLT Scheme values and C structs
;;; represented as PLT Scheme byte strings. It uses format strings (explained
;;; below) as compact descriptions of the layout of the C structs and the
;;; intended conversion to/from PLT Scheme values. This can be used in handling
;;; binary data stored in files or from network connections, among other
;;; sources.
;;;
;;; -------------------------------------------------------------------
;;;
;;; Version  Date      Description
;;; 1.0.0    09/02/08  Initial release. (Doug Williams)

(require srfi/13)

;;; Pack routines

;;; pack-char:
;;; Pack a character into the buffer at the specified position.
(define (pack-char buffer start c size signed? big-endian?)
  (bytes-set! buffer start (char->integer c)))

;;; pack-integer:
;;; Pack an integer into the buffer at the specified position.
(define (pack-integer buffer start n size signed? big-endian?)
  (if (= size 1)
      ;; single byte
      (if signed?
          (error "Signed byte integers not supported")
          (bytes-set! buffer start n))
      ;; 2, 4, or 8 bytes
      (integer->integer-bytes n size signed? big-endian? buffer start)))

;;; pack-real:
;;; Pack a floating-point number into the buffer at the specified position.
(define (pack-real buffer start x size signed? big-endian?)
  (real->floating-point-bytes x size big-endian? buffer start))

;;; pack-string:
;;; Pack a string into the buffer at the specified position.
(define (pack-string buffer start s size)
  (let* ((bytes (string->bytes/latin-1 s))
         (copy-size (min size (bytes-length bytes))))
    (bytes-copy! buffer start bytes 0 copy-size)))
    
;;; Unpack routines

;;; unpack-char:
;;; Unpack a character from the specified position.
(define (unpack-char buffer start size signed? big-endian?)
  (integer->char (bytes-ref buffer start)))

;;; unpack-integer:
;;; Unpack an integer from the specified position.
(define (unpack-integer buffer start size signed? big-endian?)
  (if (= size 1)
      ;; single byte
      (if signed?
          (error "Signed byte integers not supported")
          (bytes-ref buffer start))
      ;; 2, 4, or 8 bytes
      (integer-bytes->integer buffer signed? big-endian? start (+ start size))))

;;; unpack-real:
;;; Unpack a floating point number from the specified position.
(define (unpack-real buffer start size signed? big-endian?)
  (floating-point-bytes->real buffer big-endian? start (+ start size)))

;;; unpack-string:
;;; Unpack a string from the specified position.
(define (unpack-string buffer start size)
  (bytes->string/latin-1 (subbytes buffer start (+ start size))))

;;; packed-format: (struct/c packed-format ((specifier char?)
;;;                                         (size nonnegative-integer?)
;;;                                         (signed? boolean?)
;;;                                         (native-alignment nonnegative-integer?)
;;;                                         (pack-handler proc?)
;;;                                         (unpack-handler proc?)))
(define-struct packed-format
  (specifier
   size
   signed?
   native-alignment
   pack-handler
   unpack-handler))

;;; packed-format-x: packed-format?
;;; Defines the pad byte packed binary format.
(define packed-format-x
  (make-packed-format
   #\x
   1 #f 1
   #f
   #f))

;;; packed-format-c: packed-format?
;;; Defines the char (char) packed binary format.
(define packed-format-c
  (make-packed-format
   #\c
   1 #f 1
   pack-char
   unpack-char))

;;; packed-format-b
;;; Defines the signed byte integer (signed char) packed binary format.
(define packed-format-b
  (make-packed-format
   #\b
   1 #t 1
   pack-integer
   unpack-integer))

;;; packed-format-B
;;; Defines the unsigned byte integer (unsigned char) packed binary format.
(define packed-format-B
  (make-packed-format
   #\B
   1 #f 1
   pack-integer
   unpack-integer))

;;; packed-format-h
;;; Defines the signed two byte integer (short) packed binary format.
(define packed-format-h
  (make-packed-format
   #\h
   2 #t 1
   pack-integer
   unpack-integer))

;;; packed-format-H
;;; Defines the unsigned two byte integer (unsigned short) packed binary format.
(define packed-format-H
  (make-packed-format
   #\H
   2 #f 2
   pack-integer
   unpack-integer))

;;; packed-format-i
;;; Defines the signed four byte integer (int) packed binary format.
(define packed-format-i
  (make-packed-format
   #\i
   4 #t 4
   pack-integer
   unpack-integer))

;;; packed-format-I
;;; Defines the unsigned four byte integer (unsigned int) packed binary format.
(define packed-format-I
  (make-packed-format
   #\I
   4 #f 4
   pack-integer
   unpack-integer))

;;; packed-format-l
;;; Defines the signed four byte integer (long) packed binary format.
(define packed-format-l
  (make-packed-format
   #\l
   4 #t 4
   pack-integer
   unpack-integer))

;;; packed-format-L
;;; Defines the unsigned four byte integer (unsigned long) packed binary format.
(define packed-format-L
  (make-packed-format
   #\L
   4 #f 4
   pack-integer
   unpack-integer))

;;; packed-format-q
;;; Defines the signed eight byte integer (long long) packed binary format.
(define packed-format-q
  (make-packed-format
   #\q
   8 #t 8
   pack-integer
   unpack-integer))

;;; packed-format-Q
;;; Defines the unsigned eight byte integer (unsigned long long) packed binary format.
(define packed-format-Q
  (make-packed-format
   #\Q
   8 #f 8
   pack-integer
   unpack-integer))

;;; packed-format-f
;;; Defines the four byte floating point (float) packed binary format.
(define packed-format-f
  (make-packed-format
   #\f
   4 #t 4
   pack-real
   unpack-real))

;;; packed-format-d
;;; Defines the eight byte floating point (double) packed binary format.
(define packed-format-d
  (make-packed-format
   #\d
   8 #t 8
   pack-real
   unpack-real))

;;; packed-format-s
;;; Defines the string (char[]) packed binary format.  This format is unique in
;;; that the count is the length of the string and not a repeat count.
(define packed-format-s
  (make-packed-format
   #\s
   1 #f 1
   pack-string
   unpack-string))

(define packed-format-specifiers
  '(#\x #\c #\b #\B #\h #\H #\i #\I #\l #\L #\q #\Q #\f #\d))

(define packed-format-alist
  `((#\x . ,packed-format-x)
    (#\c . ,packed-format-c)
    (#\b . ,packed-format-b)
    (#\B . ,packed-format-B)
    (#\h . ,packed-format-h)
    (#\H . ,packed-format-H)
    (#\i . ,packed-format-i)
    (#\I . ,packed-format-I)
    (#\l . ,packed-format-l)
    (#\L . ,packed-format-L)
    (#\q . ,packed-format-q)
    (#\Q . ,packed-format-Q)
    (#\f . ,packed-format-f)
    (#\d . ,packed-format-d)
    (#\s . ,packed-format-s)))

;;; char->packed-format: char? -> (or/c packed-format? false/c)
;;; Return the packed format descriptor for the specified character or #f if
;;; the character is not a valid format specified.
(define (char->packed-format char)
  (let ((acell (assv char packed-format-alist)))
    (if acell (cdr acell) #f)))

;;; Packed Format String Processing

;;; packed-format-byte-order-regexp: regexp?
;;; A regular expression that matches the byte order, size, and alignment
;;; specifier at the beginning of a packed format string.
(define packed-format-byte-order-regexp #px"^@|=|<|>|!")

;;; packed-format-regexp: regexp?
;;; A regular expression that matches packed format specifications in a
;;; packed format string.
(define packed-format-regexp
  #px"\\s*(\\d*)(x|c|b|B|h|H|i|I|l|L|q|Q|f|d|s)\\s*")

;;; packed-format-for-each: proc? x string?
;;; Applies a procedure to all of the packed format specifications in a
;;; packed format string.  The procedure supplied takes four arguments:
;;; byte-order-spec (char?), spec (string?), count (integer?), and format spec
;;; (char?).
(define (packed-format-for-each proc packed-format-string)
  (let* ((byte-order-match
          (regexp-match packed-format-byte-order-regexp packed-format-string))
         (byte-order-spec
          (if byte-order-match
              (string-ref (first byte-order-match) 0)
              #\@))
         (start
          (if byte-order-match 1 0))
         (packed-format-specs
          (regexp-match* packed-format-regexp packed-format-string start)))
    (unless (= (foldl + start (map string-length packed-format-specs))
               (string-length packed-format-string))
      (error "Packed format string error."))
    (for-each
     (lambda (packed-format-spec)
       (let* ((packed-format-spec-match
               (regexp-match packed-format-regexp packed-format-spec))
              (count
               (if (> (string-length (second packed-format-spec-match)) 0)
                   (string->number (second packed-format-spec-match))
                   1))
              (specifier
               (string-ref (third packed-format-spec-match) 0)))
         (proc byte-order-spec packed-format-spec count specifier)))
     (map string-trim-both packed-format-specs))))

;;; pad-count: nonnegative-integer? nonnegative-integer? ->
;;;            nonnegative-integer?
;;; Returns the number of pad bytes to add to the offset for the specified
;;; alignment.
(define (pad-count offset alignment)
  (modulo (- alignment (modulo offset alignment)) alignment))

;;; calculate-size: packed-format-string? -> nonnegative-integer?
;;; Returns the number of bytes required to store packed binary data for the
;;; specified format string.
(define (calculate-size packed-format-string)
  (let ((size 0))
    (packed-format-for-each
     (lambda (byte-order-spec packed-format-spec count specifier)
       (let ((packed-format (char->packed-format specifier)))
         ;; Align for native mode packing
         (when (eqv? byte-order-spec #\@)
           (set! size (+ size (pad-count size (packed-format-native-alignment packed-format)))))
         ;; Calculate size for this entry
         (set! size (+ size (* count (packed-format-size packed-format))))))
     packed-format-string)
    size))

;;; pack-into:
(define (pack-into packed-format-string buffer offset . values)
  (packed-format-for-each
   (lambda (byte-order-spec packed-format-spec count specifier)
     (let* ((packed-format (char->packed-format specifier))
            (size (packed-format-size packed-format))
            (signed? (packed-format-signed? packed-format))
            (native-alignment (packed-format-native-alignment packed-format))
            (pack-handler (packed-format-pack-handler packed-format))
            (aligned? (if (eqv? byte-order-spec #\@) #t #f))
            (big-endian? (if (memv byte-order-spec '(#\@ #\=))
                             (system-big-endian?)
                             (if (eqv? byte-order-spec #\<) #f #t))))
       ;; Align for native mode packing
       (when aligned?
         (set! offset (+ offset (pad-count offset native-alignment))))
       ;; Pack values
       (cond ((eqv? specifier #\x)
              ;; Add count pad bytes
              (set! offset (+ offset count)))
             ((eqv? specifier #\s)
              ;; String, so count is the size
              (pack-handler buffer offset (car values) count)
              (set! values (cdr values))
              (set! offset (+ offset count)))
             (else
              (for ((i (in-range count)))
                (pack-handler buffer offset (car values) size signed? big-endian?)
                (set! values (cdr values))
                (set! offset (+ offset size)))))))
   packed-format-string)
  buffer)

;;; pack:
(define (pack packed-format-string . values)
  (let* ((buffer-size (calculate-size packed-format-string))
         (buffer (make-bytes buffer-size)))
    (apply pack-into packed-format-string buffer 0 values)))

;;; unpack-from:
(define (unpack-from packed-format-string buffer (offset 0))
  (let ((buffer-size (bytes-length buffer))
         (values '()))
    (packed-format-for-each
     (lambda (byte-order-spec packed-format-spec count specifier)
       (let* ((packed-format (char->packed-format specifier))
              (size (packed-format-size packed-format))
              (signed? (packed-format-signed? packed-format))
              (native-alignment (packed-format-native-alignment packed-format))
              (unpack-handler (packed-format-unpack-handler packed-format))
              (aligned? (if (eqv? byte-order-spec #\@) #t #f))
              (big-endian? (if (memv byte-order-spec '(#\@ #\=))
                              (system-big-endian?)
                              (if (eqv? byte-order-spec #\<) #f #t))))
         ;; Align for native mode packing
         (when aligned?
           (set! offset (+ offset (pad-count offset native-alignment))))
         ;; unpack values
         (cond ((eqv? specifier #\x)
                ;; Add count pad bytes
                (set! offset (+ offset count)))
               ((eqv? specifier #\s)
                ;; String, so count is the size
                (set! values (append values (list (unpack-handler buffer offset count))))
                ;; [PLaneT Issue Tracking System]
                ;; #198: file position doesn't get updated correctly for string type
                ;;(set! offset (+ offset size)))
                (set! offset (+ offset count)))
               (else
                (for ((i (in-range count)))
                  (set! values (append values (list (unpack-handler buffer offset size signed? big-endian?))))
                  (set! offset (+ offset size)))))))
     packed-format-string)
    values))

;;; unpack:
(define (unpack packed-format-string buffer)
  (unpack-from packed-format-string buffer 0))

;;; write-packed:
(define (write-packed packed-format-string port . values)
  (let ((bytes (apply pack packed-format-string values)))
    (write-bytes bytes port)))

;;; read-packed:
(define (read-packed packed-format-string port)
  (let* ((buffer-size (calculate-size packed-format-string))
         (buffer (read-bytes buffer-size port)))
    (unpack packed-format-string buffer)))

;;; valid-packed-format-string-regexp: regexp?
;;; A regular expression that matches a valid packed format string.
(define valid-packed-format-string-regexp
  #px"^(?:@|=|<|>|!)?(?:\\s*\\d*(?:x|c|b|B|h|H|i|I|l|L|q|Q|f|d|s)\\s*)*$")

;;; packed-format-string?: any/c -> boolean?
;;; A predicate function that returns #t if it's argument is a valid packed
;;; format string, and #f otherwise.
(define (packed-format-string? string)
  (and (string? string)
       (regexp-match-exact? valid-packed-format-string-regexp string)))

;;; packed-format-string/c: flat-contract?
;;; A flat contract for a packed format string for error reporting.
(define packed-format-string/c
  (flat-named-contract 'packed-format-string/c packed-format-string?))

;;; Interface contracts

(provide/contract
 (packed-format-string? 
  (-> any/c boolean?))
 (calculate-size
  (-> packed-format-string/c (and/c integer? exact? (>=/c 0))))
 (pack
  (->* (packed-format-string/c) () #:rest (listof any/c) bytes?))
 (pack-into
  (->* (packed-format-string/c bytes? (and/c integer? exact? (>=/c 0)))
       () #:rest (listof any/c)
       bytes?))
 (unpack
  (-> packed-format-string/c bytes? (listof any/c)))
 (unpack-from
  (->* (packed-format-string/c bytes?) ((and/c integer? exact? (>=/c 0)))
       (listof any/c)))
 (write-packed
  (->* (packed-format-string/c output-port?) () #:rest (listof any/c) any))
 (read-packed
  (-> packed-format-string/c input-port? (listof any/c))))