bencode.ss
;; THIS FILE IS GENERATED

(module bencode mzscheme


;;; @Package     bencode.scm
;;; @Subtitle    BitTorrent Bencode Decoding in Scheme
;;; @HomePage    http://www.neilvandyke.org/bencode-scm/
;;; @Author      Neil W. Van Dyke
;;; @AuthorEmail neil@@neilvandyke.org
;;; @Version     0.1
;;; @Date        2005-04-17

;; $Id: bencode.scm,v 1.26 2005/04/18 11:15:38 neil Exp $

;;; @legal
;;; Copyright @copyright{} 2005 Neil W. Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 2.1 of the License, or (at your option) any
;;; later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @url{http://www.gnu.org/copyleft/lesser.html} for details.  For other
;;; license options and consulting, contact the author.
;;; @end legal

;; (load "../testeez/testeez.scm")
(define-syntax %bencode:testeez
  (syntax-rules () ((_ x ...)
                    ;; (testeez x ...)
                    (error "Tests disabled.")
                    )))

;;; @section Introduction

;;; @code{bencode.scm} parses the @dfn{bencoding} format of the BitTorrent
;;; network protocol into basic Scheme data types (and currently PLT-specific
;;; byte strings).  This is useful for inspecting @code{.torrent} files, and
;;; might later be used in the implementation of a BitTorrent client or
;;; protocol analyzer.
;;;
;;; The format interpretation is based on the undated
;;; @uref{http://www.bittorrent.com/protocol.html, BitTorrent protocol
;;; documentation Web page} as viewed on 2005-04-17.  The mapping from
;;; those bencoding types to Scheme types is:
;;;
;;; @table @dfn
;;;
;;; @item String
;;; PLT byte string.
;;;
;;; @item Integer
;;; Scheme integer.
;;;
;;; @item List
;;; Scheme list.
;;;
;;; @item Dictionary
;;; Scheme list with the symbol @code{dictionary} as its head, and an
;;; association list as its tail.
;;;
;;; @end table
;;;
;;; @noindent
;;; For example, a parse of a certain real-world @code{.torrent} file:
;;;
;;; @lisp
;;; (unbencode (open-input-file "debian.torrent")))
;;; @result{}
;;; ((dictionary
;;;   (#"announce"      . #"http://cdimage.debian.org:6969/announce")
;;;   (#"comment"       . #"Debian CD from cdimage.debian.org")
;;;   (#"creation date" . 1105009474)
;;;   (#"info"
;;;    dictionary
;;;    (#"length"       . 600158208)
;;;    (#"name"         . #"debian-30r4-i386-binary-1.iso")
;;;    (#"piece length" . 524288)
;;;    (#"pieces"       . @r{@i{[...large byte string...]}}))))
;;; @end lisp
;;;
;;; @code{bencode.scm} is currently specific to PLT 299/3xx, due to the need
;;; for byte I/O and some representation for byte sequences.  Otherwise, the
;;; code has been written to require only R5RS, SRFI-6, and SRFI-23.

;; Byte Operations Portability:

(define-syntax %bencode:peek-byte
  (syntax-rules () ((_ PORT) (peek-byte PORT))))

(define-syntax %bencode:read-byte
  (syntax-rules () ((_ PORT) (read-byte PORT))))

(define-syntax %bencode:write-byte
  (syntax-rules () ((_ BYTE PORT) (write-byte BYTE PORT))))

(define-syntax %bencode:eat-byte
  (syntax-rules ()
    ((_ PORT) (%bencode:read-byte PORT))))

(define-syntax %bencode:open-output-bytes
  (syntax-rules ()
    ((_) (open-output-bytes))))

(define-syntax %bencode:get-output-bytes
  (syntax-rules ()
    ((_ PORT) (get-output-bytes PORT))))

;; Errors:

(define-syntax %bencode:premature-eof-error
  (syntax-rules ()
    ((_) (error "bencoding premature eof"))))

(define-syntax %bencode:invalid-char-error
  (syntax-rules ()
    ((_ CHAR) (error "bencode invalid char:" CHAR))))

;;; @section API

;;; @defproc unbencode-single port
;;;
;;; Parses a single bencoding object (and any child objects, in the case of a
;;; list or dictionary) from input port @var{port} and yields the Scheme
;;; representation.

(define unbencode-single
  (letrec ((do-digits
            (lambda (port term num)
              (let ((c (%bencode:read-byte port)))
                ;; TODO: Maybe shift the "case" into the "cond", with range
                ;; test for the digits.  Profile.
                (cond ((eof-object? c) (%bencode:premature-eof-error))
                      ((eqv? term   c) num)
                      (else (case c
                              ((48) (do-digits port term    (* 10 num)   ))
                              ((49) (do-digits port term (+ (* 10 num) 1)))
                              ((50) (do-digits port term (+ (* 10 num) 2)))
                              ((51) (do-digits port term (+ (* 10 num) 3)))
                              ((52) (do-digits port term (+ (* 10 num) 4)))
                              ((53) (do-digits port term (+ (* 10 num) 5)))
                              ((54) (do-digits port term (+ (* 10 num) 6)))
                              ((55) (do-digits port term (+ (* 10 num) 7)))
                              ((56) (do-digits port term (+ (* 10 num) 8)))
                              ((57) (do-digits port term (+ (* 10 num) 9)))
                              (else (%bencode:invalid-char-error c))))))))
           (do-string
            (lambda (port num)
              (let ((os (%bencode:open-output-bytes)))
                (let loop ((len (do-digits port 58 num)))
                  (if (zero? len)
                      (let ((bytes (%bencode:get-output-bytes os)))
                        (close-output-port os)
                        bytes)
                      (let ((b (%bencode:read-byte port)))
                        (if (eof-object? b)
                            (%bencode:premature-eof-error)
                            (begin (%bencode:write-byte b os)
                                   (loop (- len 1)))))))))))
    (lambda (port)
      (let ((c (%bencode:read-byte port)))
        ;; TODO: Maybe turn this if-case into a "cond", with range test for the
        ;; digits.  Profile.
        (if (eof-object? c)
            #f
            (case c
              ((105) ;; "i"
               (let ((c (%bencode:peek-byte port)))
                 (if (eqv? 45 c) ;; "-"
                     (begin (%bencode:eat-byte port)
                            (- (do-digits port 101 0)))
                     (do-digits port 101 0))))
              ((108) ;; "l"
               (let loop ()
                 (let ((c (%bencode:peek-byte port)))
                   (cond ((eof-object? c) (%bencode:premature-eof-error))
                         ((eqv? 101    c) (%bencode:eat-byte port) '())
                         (else (cons (or (unbencode-single port)
                                         (%bencode:premature-eof-error))
                                     (loop)))))))
              ((100) ;; "d"
               (cons
                'dictionary
                (let loop ()
                  (let ((c (%bencode:peek-byte port)))
                    (cond ((eof-object? c)
                           (%bencode:premature-eof-error))
                          ((eqv? 101    c) (%bencode:eat-byte port) '())
                          (else
                           (cons (cons (or (unbencode-single port)
                                           (%bencode:premature-eof-error))
                                       (or (unbencode-single port)
                                           (%bencode:premature-eof-error)))
                                 (loop))))))))
              ((48) (do-string port 0))
              ((49) (do-string port 1))
              ((50) (do-string port 2))
              ((51) (do-string port 3))
              ((52) (do-string port 4))
              ((53) (do-string port 5))
              ((54) (do-string port 6))
              ((55) (do-string port 7))
              ((56) (do-string port 8))
              ((57) (do-string port 9))
              (else (%bencode:invalid-char-error c))))))))

;;; @defproc unbencode port
;;;
;;; Yields a list of the Scheme representations of all bencoding objects parsed
;;; from input port @var{port}.

(define (unbencode port)
  (let ((obj (unbencode-single port)))
    (if obj
        (cons obj (unbencode port))
        '())))

;;; @section Tests

;;; The @code{bencode.scm} test suite can be enabled by editing the source code
;;; file and loading @uref{http://www.neilvandyke.org/testeez/, Testeez}.

(define (%bencode:unbencode-string str)
  ;; TODO: This is at least internal-use-only because reading bytes from a
  ;; string is even messier, portability-wise.
  (let* ((port   (open-input-string str))
         (result (unbencode port)))
    (close-input-port port)
    result))

(define (%bencode:test)
  (%bencode:testeez
   "bencode.scm"

   (test/equal "" (%bencode:unbencode-string "4:spam") '(#"spam"))

   (test/equal "" (%bencode:unbencode-string "i3e")  '(3))
   (test/equal "" (%bencode:unbencode-string "i-3e") '(-3))
   (test/equal "" (%bencode:unbencode-string "i0e")  '(0))

   (test/equal "" (%bencode:unbencode-string "i123e")  '(123))
   (test/equal "" (%bencode:unbencode-string "i-123e") '(-123))

   (test/equal ""
               (%bencode:unbencode-string "l4:spam4:eggse")
               '((#"spam" #"eggs")))

   (test/equal ""
               (%bencode:unbencode-string "d3:cow3:moo4:spam4:eggse")
               '((dictionary (#"cow" . #"moo") (#"spam" . #"eggs"))))

   (test/equal ""
               (%bencode:unbencode-string "d4:spaml1:a1:bee")
               '((dictionary (#"spam" . (#"a" #"b")))))

   (test/equal ""
               (%bencode:unbencode-string
                (string-append "4:spami3ei-3ei0ei123ei-123el4:spam4:eggsed3:co"
                               "w3:moo4:spam4:eggsed4:spaml1:a1:bee"))
               '(#"spam" 3 -3 0 123 -123 (#"spam" #"eggs")
                 (dictionary (#"cow". #"moo") (#"spam" . #"eggs"))
                 (dictionary (#"spam". (#"a" #"b")))))

   ))

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.1 --- 2005-04-17
;;; Initial release.
;;;
;;; @end table

(provide (all-defined)))