uuid.rkt
#lang racket
;;; uuid.rkt

;;; This module implements Universally Unique Identifiers (UUIDs) in accordance
;;; with RFC 4122: A Universally Unique Identifier (UUID) URN Namespace.

(require (planet soegaard/digest/digest))

;;; Miscellaneous Functions

;;; (char-hexadecimal? char) -> boolean?
;;;   char : char?
;;; Returns #t if char is a hexadecimal character.
(define (char-hexadecimal? char)
  (or (char<=? #\0 char #\9)
      (char<=? #\a char #\f)
      (char<=? #\A char #\F)))

;;; (nibble? x) -> boolean?
;;;   x : any/c
;;; Returns #t if x is a nibble (i.e., an exact, nonnegative integer between
;;; 0 and 15, inclusive).
(define (nibble? x)
  (and (integer? x)
       (exact? x)
       (<= 0 x 15)))

;;; (hex-string? x) -> boolean?
;;;   x : any/c
;;; Returns #t if x is a string composed strictly of hexadecimal characters.
(define (hex-string? x)
  (and (string? x)
       (let/ec exit
         (for ((char (in-string x)))
           (unless (char-hexadecimal? char)
             (exit #f)))
         #t)))

;;; (hex-digit->nibble char) -> nibble?
;;;   char : char?
;;; Returns the nibble (i.e., an exact, nonnegative integer between 0 and 15,
;;; inclusive) represented by char.
(define (hex-digit->nibble char)
  (cond ((char<=? #\0 char #\9)
         (- (char->integer char) (char->integer #\0)))
        ((char<=? #\a char #\f)
         (+ (- (char->integer char) (char->integer #\a)) 10))
        ((char<=? #\A char #\F)
         (+ (- (char->integer char) (char->integer #\A)) 10))
        (else
         (error 'hex-digit->nibble
                "expected a hexadecimal digit, given ~a" char))))

;;; hex-digits : (vectorof char?)
;;; A vector containing the hexadecimal characters.
(define hex-digits
  #(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))

;;; (byte->hex-string byte) -> hex-string?
;;;   byte : byte?
;;; Returns a string with the two-character hexadecimal representation of the
;;; byte (i.e., exact, nonnegative integer between 0 and 255 inclusive), byte.
(define (byte->hex-string byte)
  (let-values (((q r) (quotient/remainder byte 16)))
    (string (vector-ref hex-digits q) (vector-ref hex-digits r))))

;;; (hex-string->byte hex-string) -> byte?
;;;   hex-string : hex-string?
;;; Returns the byte (i.e., exact, nonnegative integer value between 0 and 255,
;;; inclusive) represented by the hex-string. The hex-string must contain
;;; exactly 2 hexadecimal digits.
(define (hex-string->byte hex-string)
  (unless (= (string-length hex-string) 2)
    (error 'hex-string->byte
           "expected a string of heximal digits of length 2, given ~a"
           hex-string))
  (+ (* (hex-digit->nibble (string-ref hex-string 0)) 16)
     (hex-digit->nibble (string-ref hex-string 1))))

;;; (bytes->hex-string bytes) -> hex-string?
;;;   bytes : bytes?
;;; Returns a hex-string equivalent to bytes.
;(define (bytes->hex-string bytes)
;  (for/fold ((hex-string ""))
;    ((byte (in-bytes bytes)))
;    (string-append hex-string (byte->hex-string byte))))

;;; (hex-string->bytes hex-string) -> bytes?
;;;   hex-string : hex-string?
;;; Returns a byte string equivalent to hex-string. The hex-string must have an
;;; even number of hexadecimal digits.
(define (hex-string->bytes hex-string)
  (unless (even? (string-length hex-string))
    (error 'hex-string->bytes
           "expected an even length string of hexidecimal characters, given ~a"
           hex-string))
  (apply bytes (for/list ((i (in-range 0 (string-length hex-string) 2)))
                 (hex-string->byte (substring hex-string i (+ i 2))))))

;;; (host-to-network bytes) -> bytes?
;;;   bytes : bytes?
;;; Returns the network encoding for the host encoded unsigned 2, 4, or 8 byte
;;; string, bytes.
;(define (host-to-network bytes)
;  (integer->integer-bytes
;   (integer-bytes->integer bytes #f)
;   (bytes-length bytes)
;   #f #t))

;;; (network-to-host bytes) -> bytes?
;;;   bytes : bytes?
;;; Returns the host encoding for the network encoded unsigned 2, 4, or 8 byte
;;; string, bytes.
;(define (network-to-host bytes)
;  (integer->integer-bytes
;   (integer-bytes->integer bytes #f #t)
;   (bytes-length bytes)
;   #f))

;;; (struct uuid (bytes))
;;;   bytes : bytes?
;;; A structure representing a Universally Unique Identifier (UUID). A UUID is
;;; 128 bits long and is represented as a byte string of length 16. All of the
;;; fields are stored internally in little-endian format. Therefore, the must
;;; be converted to network format for certain operations.
;;;
;;;  Field                      Data Type                Octet
;;;                             Description
;;;
;;;  time-low                   unsigned 32-bit integer  0-3
;;;                             The low field of the timestamp
;;;
;;;  time-mid                   unsigned 16-bit integer  4-5
;;;                             The middle field of the timestamp
;;;
;;;  time-hi-and-version        unsigned 16-bit integer  6-7
;;;                             The high field of the timestamp multiplexed with
;;;                             the version number
;;;
;;;  clock-seq-hi-and-reserved  unsigned 8-bit integer   8
;;;                             The high field of the clock sequence multiplexed
;;;                             with the variant
;;;
;;;  clock-seq-low              unsigned 8-bit integer   9
;;;                             The low field of the clock sequence
;;;
;;;  node                       unsigned 48-bit integer  10-15
;;;                             The spatially unique node identifier
(define-struct uuid
  (bytes)
  #:property prop:custom-write
  (lambda (uuid port write?)
    (write-string (format "#<uuid ~a>" (uuid->string uuid)) port)))

;;; (uuid-RFC-4122? uuid) -> boolean?
;;;   uuid : uuid?
;;; Returns #t if uuid is the variant defined by RFC-4122.
(define (uuid-RFC-4122? uuid)
  (= (bitwise-and (bytes-ref (uuid-bytes uuid) 8) #xc0) #x80))

;;; (uuid-version uuid) -> exact-nonnegative-integer?
;;;   uuid : uuid?
;;; Returns the version (i.e., type, or more accurately, sub-type) of the uuid.
(define (uuid-version uuid)
  (bitwise-bit-field (bytes-ref (uuid-bytes uuid) 6) 4 8))

;;; (bytes->uuid bytes) -> uuid?
;;;   bytes : bytes?
;;; Returns a uuid with the contents specified by bytes.
;(define (bytes->uuid bytes)
;  (unless (= (bytes-length bytes) 16)
;    (error 'bytes->uuid
;           "expected a byte string of length 16, given ~a"
;           bytes))
;  (make-uuid (bytes->immutable-bytes bytes)))

;;; (uuid->bytes uuid) -> bytes?
;;;   uuid : uuid?
;;; Returns the byte string that represents uuid. The result is immutable.
(define (uuid->bytes uuid)
  (uuid-bytes uuid))

;;; (uuid-bytes<->uuid-network-bytes uuid-bytes) -> bytes?
;;;   uuid-bytes : bytes?
;;; Converts to or from network format for a byte string. This operation is its
;;; own inverse. The time-low, time-mid, and time-hi-and-version fields are
;;; transposed (little endian <-> big endian). The other fields are not
;;; transposed.
(define (uuid-bytes<->uuid-network-bytes uuid-bytes)
  (bytes
   (bytes-ref uuid-bytes 3)
   (bytes-ref uuid-bytes 2)
   (bytes-ref uuid-bytes 1)
   (bytes-ref uuid-bytes 0)
   (bytes-ref uuid-bytes 5)
   (bytes-ref uuid-bytes 4)
   (bytes-ref uuid-bytes 7)
   (bytes-ref uuid-bytes 6)
   (bytes-ref uuid-bytes 8)
   (bytes-ref uuid-bytes 9)
   (bytes-ref uuid-bytes 10)
   (bytes-ref uuid-bytes 11)
   (bytes-ref uuid-bytes 12)
   (bytes-ref uuid-bytes 13)
   (bytes-ref uuid-bytes 14)
   (bytes-ref uuid-bytes 15)))

;;; (hex-string->uuid hex-string) -> uuid?
;;;   hex-string : hex-string?
;;; Returns a uuid whose value is equivalent to the specified hex string. An
;;; error is raised if the hex string is not of length 32.
(define (hex-string->uuid hex-string)
  (unless (= (string-length hex-string) 32)
    (error 'hex-string->uuid
           "expected a string of hexidecimal characters of length 32, given ~a"
           hex-string))
  (make-uuid (hex-string->bytes hex-string)))

;;; (uuid->hex-string uuid) -> hex-string?
;;;   uuid : uuid?
;;; Returns a hex string equivalent to the uuid.
(define (uuid->hex-string uuid)
  (bytes->hex-string (uuid-bytes uuid)))

;;; (uuid->string uuid) -> string?
;;;   uuid : uuid?
;;; Returns a string equivalent to the uuid. The string is in 8-4-4-4-12 format,
;;; that is "xxxxxxxx-xxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx".
(define (uuid->string uuid)
  (let ((bytes (uuid-bytes uuid)))
    (string-append
     (bytes->hex-string (subbytes bytes 0 4))
     "-"
     (bytes->hex-string (subbytes bytes 4 6))
     "-"
     (bytes->hex-string (subbytes bytes 6 8))
     "-"
     (bytes->hex-string (subbytes bytes 8 10))
     "-"
     (bytes->hex-string (subbytes bytes 10 16)))))

;;; (uuid->urn-string uuid) -> string?
;;;   uuid : uuid?
;;; Returns the urn equivalent to the uuid. This is the string "urn:uuid:"
;;; prepended on to the 8-4-4-4-12 formatted string.
(define (uuid->urn-string uuid)
  (string-append "urn:uuid:" (uuid->string uuid)))

;;; uuid-string-regexp : pregexp?
;;; A regular expression to recognize and parse UUIDs.
(define uuid-string-regexp
  #px"^(?i:(urn:uuid:))?([[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12})$")

;;; (uuid-string? x) -> boolean?
;;;   x : any/c
;;; Returns #t if x is a UUID string.
(define (uuid-string? x)
  (and (string? x)
       (regexp-match? uuid-string-regexp x)))

;;; (string->uuid string) -> uuid?
;;;   string : string?
;;; Parses a UUID string. An exception is raised if the string is not recognized
;;; as a UUID.
(define (string->uuid string)
  (let ((match (regexp-match uuid-string-regexp string)))
    (unless match
      (error 'string->uuid
             "expected a valid uuid string, given ~s" string))
    (let* ((uuid-string (third match))
           (hex-string (string-append
                        (substring uuid-string 0 8)
                        (substring uuid-string 9 13)
                        (substring uuid-string 14 18)
                        (substring uuid-string 19 23)
                        (substring uuid-string 24 36))))
      (hex-string->uuid hex-string))))

;;; (current-RFC-4122-time) -> exact-nonnegative-integer?
;;; Returns the current time in RFC 4122 format, which is the number of 100
;;; nanosecond intervals since 00:00:00.00, 15 Oct 1582.
;;; MDW - 02/05/2010 - This results in a failure on some platforms because the
;;; result of current-inexact-milliseconds is not necessarily an integer.
(define (current-RFC-4122-time)
;  (+ 122192928000000000
;     (* (inexact->exact (current-inexact-milliseconds)) 10000))
  (+ 122192928000000000
     (inexact->exact (round (* (current-inexact-milliseconds) 10000)))))
  

;;; (make-uuid-1) -> uuid?
;;; Generate a time-based (type 1) UUID using the current time (in number of 100
;;; nanosecond intervals since 00:00:00.00, 10 Oct 1582) and the MAC address of
;;; the host. This implementation does not maintain state information on the UUID
;;; generator and always uses a random sequence number. This is fine for low
;;; volume generation. If the MAC accress of the host cannot be determined, a
;;; random (broadcast) MAC address is used, which cannot clash with any real MAC
;;; address.
(define (make-uuid-1 (mac #f))
  (let* ((uuid-bytes (make-bytes 16))
         (time (current-RFC-4122-time))
         (time-low (bitwise-bit-field time 0 32))
         (time-mid (bitwise-bit-field time 32 48))
         (time-hi (bitwise-bit-field time 48 64)))
    (integer->integer-bytes time-low 4 #f #t uuid-bytes 0)
    (integer->integer-bytes time-mid 2 #f #t uuid-bytes 4)
    (integer->integer-bytes time-hi 2 #f #t uuid-bytes 6)
    (bytes-set! uuid-bytes 8 (random 256))
    (bytes-set! uuid-bytes 9 (random 256))
    (if mac
        (for ((i (in-range 6)))
          (let* ((start (* i 3))
                 (end (+ start 2)))
            (bytes-set! uuid-bytes (+ i 10) (hex-string->byte (substring mac start end)))))
        (begin
          (for ((i (in-range 10 16)))
            (bytes-set! uuid-bytes i (random 256)))
          (bytes-set!
           uuid-bytes 10
           (bitwise-ior (bytes-ref uuid-bytes 10) #x01))))
    (bytes-set!
     uuid-bytes 8
     (bitwise-ior (bitwise-and (bytes-ref uuid-bytes 8) #x3f) #x80))
    (bytes-set!
     uuid-bytes 6
     (bitwise-ior (bitwise-and (bytes-ref uuid-bytes 6) #x0f) #x10))
    (make-uuid uuid-bytes)))

;;; (uuid-1->date uuid) -> date?
;;;   uuid : uuid?
;;; Returns the date and time that the time-based (type 1) uuid was created.
(define (uuid-1->date uuid)
  (unless (and (uuid-RFC-4122? uuid)
               (= (uuid-version uuid) 1))
    (error 'uuid-1->date
           "expected a time-based (version 1) RFC 4122 UUID, given ~a"
           uuid))
  (let* ((uuid-bytes (uuid-bytes uuid))
         (time-low-bytes (subbytes uuid-bytes 0 4))
         (time-mid-bytes (subbytes uuid-bytes 4 6))
         (time-hi-bytes (subbytes uuid-bytes 6 8)))
    (bytes-set!
     time-hi-bytes 0
     (bitwise-and (bytes-ref time-hi-bytes 0) #x0f))
    (let* ((time (+ (* (integer-bytes->integer time-hi-bytes #f #t) 281474976710656)
                    (* (integer-bytes->integer time-mid-bytes #f #t) 4294967296)
                    (integer-bytes->integer time-low-bytes #f #t)))
           (inexact-milliseconds (round (/ (- time 122192928000000000) 10000.0)))
           (milliseconds (inexact->exact inexact-milliseconds)))
      (seconds->date (quotient milliseconds 1000)))))

;;; (make-uuid-3 namespace-uuid name #:legacy (force? #f)) -> uuid?
;;;   namespace-uuid : uuid?
;;;   name : string?
;;;   force? : boolean? = #f
;;; Create a name-based (type 3) UUID using MD5 hashing.
(define (make-uuid-3 namespace-uuid name #:legacy (force? #f))
  (let* ((namespace-bytes
          (if force?
              (uuid-bytes<->uuid-network-bytes (uuid->bytes namespace-uuid))
              (uuid->bytes namespace-uuid)))
         (name-bytes (string->bytes/utf-8 name))
         (namespace-name-bytes (bytes-append namespace-bytes name-bytes))
         (hash (md5 namespace-name-bytes))
         (hash-bytes (hex-string->bytes hash))
         (uuid-bytes
          (if force?
              (uuid-bytes<->uuid-network-bytes hash-bytes)
              hash-bytes)))
    (bytes-set!
     uuid-bytes 8
     (bitwise-ior (bitwise-and (bytes-ref uuid-bytes 8) #x3f) #x80))
    (bytes-set!
     uuid-bytes 6
     (bitwise-ior (bitwise-and (bytes-ref uuid-bytes 6) #x0f) #x30))
    (make-uuid uuid-bytes)))

;;; (make-uuid-4) -> uuid?
;;; Returns a new (pseudo-)random (type 4) UUID.
(define (make-uuid-4)
  (let ((random-bytes (apply bytes (for/list ((i (in-range 16)))
                                     (random 256)))))
    (bytes-set!
     random-bytes 8
     (bitwise-ior (bitwise-and (bytes-ref random-bytes 8) #x3f) #x80))
    (bytes-set!
     random-bytes 6
     (bitwise-ior (bitwise-and (bytes-ref random-bytes 6) #x0f) #x40))
    (make-uuid random-bytes)))

;;; (make-uuid-5 namespace-uuid name #:legacy (force? #f)) -> uuid?
;;;   namespace-uuid : uuid?
;;;   name : string?
;;; Create a name-based (type 5) UUID based on SHA-1 hashing.
(define (make-uuid-5 namespace-uuid name #:legacy (force? #f))
  (let* ((namespace-bytes
          (if force?
              (uuid-bytes<->uuid-network-bytes (uuid->bytes namespace-uuid))
              (uuid->bytes namespace-uuid)))
         (name-bytes (string->bytes/utf-8 name))
         (namespace-name-bytes (bytes-append namespace-bytes name-bytes))
         (hash (sha1 namespace-name-bytes))
         (hash-bytes (hex-string->bytes hash))
         (uuid-bytes
          (if force?
              (uuid-bytes<->uuid-network-bytes hash-bytes)
              hash-bytes)))
    (bytes-set!
     uuid-bytes 8
     (bitwise-ior (bitwise-and (bytes-ref uuid-bytes 8) #x3f) #x80))
    (bytes-set!
     uuid-bytes 6
     (bitwise-ior (bitwise-and (bytes-ref uuid-bytes 6) #x0f) #x50))
    (make-uuid uuid-bytes)))

;;; UUID Comparison Functions

;;; (uuid=? uuid-1 uuid-2) -> boolean?
;;;   uuid-1 : uuid?
;;;   uuid-2 : uuid?
;;; Returns #t if uuid-1 and uuid-2 are equal.
(define (uuid=? uuid-1 uuid-2)
  (bytes=? (uuid->bytes uuid-1) (uuid->bytes uuid-2)))

;;; (uuid<? uuid-1 uuid-2) -> boolean?
;;;   uuid-1 : uuid?
;;;   uuid-2 : uuid?
;;; Returns #t if uuid-1 is (lexically) less than uuid-2.
(define (uuid<? uuid-1 uuid-2)
  (bytes<? (uuid->bytes uuid-1) (uuid->bytes uuid-2)))

;;; (uuid>? uuid-1 uuid-2) -> boolean?
;;;   uuid-1 : uuid?
;;;   uuid-2 : uuid?
;;; Returns #t if uuid-1 is (lexically) greater than uuid-2.
(define (uuid>? uuid-1 uuid-2)
  (bytes>? (uuid->bytes uuid-1) (uuid->bytes uuid-2)))

;;; nil-uuid : uuid?
(define nil-uuid (hex-string->uuid "00000000000000000000000000000000"))

;;; namespace-DNS : uuid?
;;; namespace-URL : uuid?
;;; namespace-OID : uuid?
;;; namespace-X500 : uuid?
(define namespace-DNS  (hex-string->uuid "6ba7b8109dad11d180b400c04fd430c8"))
(define namespace-URL  (hex-string->uuid "6ba7b8119dad11d180b400c04fd430c8"))
(define namespace-OID  (hex-string->uuid "6ba7b8129dad11d180b400c04fd430c8"))
(define namespace-X500 (hex-string->uuid "6ba7b8149dad11d180b400c04fd430c8"))

;;; Module Contracts

(provide
 nil-uuid
 namespace-DNS
 namespace-URL
 namespace-OID
 namespace-X500)

(provide/contract
 (uuid?
  (-> any/c boolean?))
 (uuid-RFC-4122?
  (-> uuid? boolean?))
 (uuid-version
  (-> uuid? exact-nonnegative-integer?))
 (make-uuid-1
  (->* () (bytes?) uuid?))
 (uuid-1->date
  (-> uuid? date?))
 (make-uuid-3
  (->* (uuid? string?) (#:legacy boolean?) uuid?))
 (make-uuid-4
  (-> uuid?))
 (make-uuid-5
  (->* (uuid? string?) (#:legacy boolean?) uuid?))
 (hex-string?
  (-> any/c boolean?))
 (hex-string->uuid
  (-> hex-string? uuid?))
 (uuid-string?
  (-> any/c boolean?))
 (string->uuid
  (-> string? (or/c uuid? false/c)))
 (uuid->hex-string
  (-> uuid? hex-string?))
 (uuid->string
  (-> uuid? string?))
 (uuid->urn-string
  (-> uuid? string?))
 (uuid=?
  (-> uuid? uuid? boolean?))
 (uuid<?
  (-> uuid? uuid? boolean?))
 (uuid>?
  (-> uuid? uuid? boolean?)))