bencode.rkt
#lang racket/base
(require racket/contract
         (planet neil/mcfly:1:0))

(doc (section "Introduction"))

(doc "The "
      (bold "bencode")
      " package is for parsing the "
      (italic "bencoding")
      " format of the BitTorrent network protocol into basic Racket data types.
       This is useful for inspecting "
      (tt ".torrent")
      " files, and might be useful in the implementation of a BitTorrent client
       or protocol analyzer.")

(doc "The format interpretation is based on the undated "
     (hyperlink "http://www.bittorrent.com/protocol.html"
                "BitTorrent protocol documentation Web page")
     " as viewed on 2005-04-17.  The mapping from those bencoding types to
      Scheme types is:")

(doc (tabular
      (list
       (list (italic "String")
             "Racket byte string.")

       (list (italic "Integer")
             "Scheme integer.")

       (list (italic "List")
             "Scheme list.")

       (list (italic "Dictionary")
             (list "Scheme list with the symbol "
                   (tt "dictionary")
                   " as its head, and an association list as its tail.")))))

(doc "For example, a parse of a certain real-world "
     (tt ".torrent")
     " file:")

(doc (racketinput (unbencode (open-input-file "debian.torrent")))
     (racketresultblock
      ((dictionary
        (#"announce"      . #"http://cdimage.debian.org:6969/announce")
        (#"comment"       . #"Debian CD from cdimage.debian.org")
        (#"creation date" . 1105009474)
        (#"info"
         dictionary
         (#"length"       . 600158208)
         (#"name"         . #"debian-30r4-i386-binary-1.iso")
         (#"piece length" . 524288)
         (#"pieces"       . #,(italic "[...large byte string...]")))))))

;; Note: This package is currently specific to PLT 299/3xx, due to the need for
;; byte I/O and some representation for byte sequences.  Otherwise, the code
;; has been written to require only R5RS, SRFI-6, and SRFI-23.

;; Byte Operations Portability:

(define-syntax %bencode:peek-byte
  (syntax-rules () ((_ PORT) (peek-byte PORT))))

(define-syntax %bencode:read-byte
  (syntax-rules () ((_ PORT) (read-byte PORT))))

(define-syntax %bencode:write-byte
  (syntax-rules () ((_ BYTE PORT) (write-byte BYTE PORT))))

(define-syntax %bencode:eat-byte
  (syntax-rules ()
    ((_ PORT) (%bencode:read-byte PORT))))

(define-syntax %bencode:open-output-bytes
  (syntax-rules ()
    ((_) (open-output-bytes))))

(define-syntax %bencode:get-output-bytes
  (syntax-rules ()
    ((_ PORT) (get-output-bytes PORT))))

;; Errors:

(define-syntax %bencode:premature-eof-error
  (syntax-rules ()
    ((_) (error "bencoding premature eof"))))

(define-syntax %bencode:invalid-char-error
  (syntax-rules ()
    ((_ CHAR) (error "bencode invalid char:" CHAR))))

(doc (section "API"))

(doc procedure unbencode-single
     "Parses a single bencoding object (and any child objects, in the case of
      a list or dictionary) from input port "
     (racket port)
     " and yields the Scheme representation.")
(provide/contract (unbencode-single (-> input-port? list?)))
;; TODO: Make contract for what we return, rather than "list?"
(define unbencode-single
  (letrec ((do-digits
            (lambda (port term num)
              (let ((c (%bencode:read-byte port)))
                ;; TODO: Maybe shift the "case" into the "cond", with range
                ;; test for the digits.  Profile.
                (cond ((eof-object? c) (%bencode:premature-eof-error))
                      ((eqv? term   c) num)
                      (else (case c
                              ((48) (do-digits port term    (* 10 num)   ))
                              ((49) (do-digits port term (+ (* 10 num) 1)))
                              ((50) (do-digits port term (+ (* 10 num) 2)))
                              ((51) (do-digits port term (+ (* 10 num) 3)))
                              ((52) (do-digits port term (+ (* 10 num) 4)))
                              ((53) (do-digits port term (+ (* 10 num) 5)))
                              ((54) (do-digits port term (+ (* 10 num) 6)))
                              ((55) (do-digits port term (+ (* 10 num) 7)))
                              ((56) (do-digits port term (+ (* 10 num) 8)))
                              ((57) (do-digits port term (+ (* 10 num) 9)))
                              (else (%bencode:invalid-char-error c))))))))
           (do-string
            (lambda (port num)
              (let ((os (%bencode:open-output-bytes)))
                (let loop ((len (do-digits port 58 num)))
                  (if (zero? len)
                      (let ((bytes (%bencode:get-output-bytes os)))
                        (close-output-port os)
                        bytes)
                      (let ((b (%bencode:read-byte port)))
                        (if (eof-object? b)
                            (%bencode:premature-eof-error)
                            (begin (%bencode:write-byte b os)
                                   (loop (- len 1)))))))))))
    (lambda (port)
      (let ((c (%bencode:read-byte port)))
        ;; TODO: Maybe turn this if-case into a "cond", with range test for the
        ;; digits.  Profile.
        (if (eof-object? c)
            #f
            (case c
              ((105) ;; "i"
               (let ((c (%bencode:peek-byte port)))
                 (if (eqv? 45 c) ;; "-"
                     (begin (%bencode:eat-byte port)
                            (- (do-digits port 101 0)))
                     (do-digits port 101 0))))
              ((108) ;; "l"
               (let loop ()
                 (let ((c (%bencode:peek-byte port)))
                   (cond ((eof-object? c) (%bencode:premature-eof-error))
                         ((eqv? 101    c) (%bencode:eat-byte port) '())
                         (else (cons (or (unbencode-single port)
                                         (%bencode:premature-eof-error))
                                     (loop)))))))
              ((100) ;; "d"
               (cons
                'dictionary
                (let loop ()
                  (let ((c (%bencode:peek-byte port)))
                    (cond ((eof-object? c)
                           (%bencode:premature-eof-error))
                          ((eqv? 101    c) (%bencode:eat-byte port) '())
                          (else
                           (cons (cons (or (unbencode-single port)
                                           (%bencode:premature-eof-error))
                                       (or (unbencode-single port)
                                           (%bencode:premature-eof-error)))
                                 (loop))))))))
              ((48) (do-string port 0))
              ((49) (do-string port 1))
              ((50) (do-string port 2))
              ((51) (do-string port 3))
              ((52) (do-string port 4))
              ((53) (do-string port 5))
              ((54) (do-string port 6))
              ((55) (do-string port 7))
              ((56) (do-string port 8))
              ((57) (do-string port 9))
              (else (%bencode:invalid-char-error c))))))))

(doc procedure unbencode
     "Yields a list of the Scheme representations of all bencoding objects parsed
      from input port "
     (racket port)
     ".")
(provide/contract (unbencode (-> input-port? list?)))
(define (unbencode port)
  (let ((obj (unbencode-single port)))
    (if obj
        (cons obj (unbencode port))
        '())))

(doc history

     (#:planet 2:0 #:date "2012-06-12"

               "Converted to McFly.  Changed package home page URL.")

     (#:version "0.2" #:planet 1:1 #:date "2009-03-03"

                "Library is now LPGL 3.  Converted to author's new Scheme administration system.")

     (#:version "0.1" #:planet 1:0 #:date "2005-04-17"

                "Initial release."))