;;; @Package bencode ;;; @Subtitle BitTorrent Bencode Decoding in Scheme ;;; @HomePage http://www.neilvandyke.org/bencode-scheme/ ;;; @Author Neil Van Dyke ;;; @Version 0.2 ;;; @Date 2009-03-03 ;;; @PLaneT neil/bencode:1:1 ;; $Id: bencode.ss,v 1.35 2009/03/03 11:33:13 neilpair Exp $ ;;; @legal ;;; Copyright @copyright{} 2005-2009 Neil Van Dyke. This program is Free ;;; Software; you can redistribute it and/or modify it under the terms of the ;;; GNU Lesser General Public License as published by the Free Software ;;; Foundation; either version 3 of the License (LGPL 3), or (at your option) ;;; any later version. This program is distributed in the hope that it will be ;;; useful, but without any warranty; without even the implied warranty of ;;; merchantability or fitness for a particular purpose. See ;;; @indicateurl{http://www.gnu.org/licenses/} for details. For other licenses ;;; and consulting, please contact the author. ;;; @end legal #lang scheme/base ;;; @section Introduction ;;; The @b{bencode} library is for parsing the @dfn{bencoding} format of the ;;; BitTorrent network protocol into basic Scheme data types (and currently ;;; PLT-specific byte strings). This is useful for inspecting @code{.torrent} ;;; files, and might later be used in the implementation of a BitTorrent client ;;; or protocol analyzer. ;;; ;;; The format interpretation is based on the undated ;;; @uref{http://www.bittorrent.com/protocol.html, BitTorrent protocol ;;; documentation Web page} as viewed on 2005-04-17. The mapping from ;;; those bencoding types to Scheme types is: ;;; ;;; @table @dfn ;;; ;;; @item String ;;; PLT byte string. ;;; ;;; @item Integer ;;; Scheme integer. ;;; ;;; @item List ;;; Scheme list. ;;; ;;; @item Dictionary ;;; Scheme list with the symbol @code{dictionary} as its head, and an ;;; association list as its tail. ;;; ;;; @end table ;;; ;;; @noindent ;;; For example, a parse of a certain real-world @code{.torrent} file: ;;; ;;; @lisp ;;; (unbencode (open-input-file "debian.torrent")) ;;; @result{} ;;; ((dictionary ;;; (#"announce" . #"http://cdimage.debian.org:6969/announce") ;;; (#"comment" . #"Debian CD from cdimage.debian.org") ;;; (#"creation date" . 1105009474) ;;; (#"info" ;;; dictionary ;;; (#"length" . 600158208) ;;; (#"name" . #"debian-30r4-i386-binary-1.iso") ;;; (#"piece length" . 524288) ;;; (#"pieces" . [...large byte string...])))) ;;; @end lisp ;;; ;;; @code{bencode.scm} is currently specific to PLT 299/3xx, due to the need ;;; for byte I/O and some representation for byte sequences. Otherwise, the ;;; code has been written to require only R5RS, SRFI-6, and SRFI-23. ;; Byte Operations Portability: (define-syntax %bencode:peek-byte (syntax-rules () ((_ PORT) (peek-byte PORT)))) (define-syntax %bencode:read-byte (syntax-rules () ((_ PORT) (read-byte PORT)))) (define-syntax %bencode:write-byte (syntax-rules () ((_ BYTE PORT) (write-byte BYTE PORT)))) (define-syntax %bencode:eat-byte (syntax-rules () ((_ PORT) (%bencode:read-byte PORT)))) (define-syntax %bencode:open-output-bytes (syntax-rules () ((_) (open-output-bytes)))) (define-syntax %bencode:get-output-bytes (syntax-rules () ((_ PORT) (get-output-bytes PORT)))) ;; Errors: (define-syntax %bencode:premature-eof-error (syntax-rules () ((_) (error "bencoding premature eof")))) (define-syntax %bencode:invalid-char-error (syntax-rules () ((_ CHAR) (error "bencode invalid char:" CHAR)))) ;;; @section API ;;; @defproc unbencode-single port ;;; ;;; Parses a single bencoding object (and any child objects, in the case of a ;;; list or dictionary) from input port @var{port} and yields the Scheme ;;; representation. (define unbencode-single (letrec ((do-digits (lambda (port term num) (let ((c (%bencode:read-byte port))) ;; TODO: Maybe shift the "case" into the "cond", with range ;; test for the digits. Profile. (cond ((eof-object? c) (%bencode:premature-eof-error)) ((eqv? term c) num) (else (case c ((48) (do-digits port term (* 10 num) )) ((49) (do-digits port term (+ (* 10 num) 1))) ((50) (do-digits port term (+ (* 10 num) 2))) ((51) (do-digits port term (+ (* 10 num) 3))) ((52) (do-digits port term (+ (* 10 num) 4))) ((53) (do-digits port term (+ (* 10 num) 5))) ((54) (do-digits port term (+ (* 10 num) 6))) ((55) (do-digits port term (+ (* 10 num) 7))) ((56) (do-digits port term (+ (* 10 num) 8))) ((57) (do-digits port term (+ (* 10 num) 9))) (else (%bencode:invalid-char-error c)))))))) (do-string (lambda (port num) (let ((os (%bencode:open-output-bytes))) (let loop ((len (do-digits port 58 num))) (if (zero? len) (let ((bytes (%bencode:get-output-bytes os))) (close-output-port os) bytes) (let ((b (%bencode:read-byte port))) (if (eof-object? b) (%bencode:premature-eof-error) (begin (%bencode:write-byte b os) (loop (- len 1))))))))))) (lambda (port) (let ((c (%bencode:read-byte port))) ;; TODO: Maybe turn this if-case into a "cond", with range test for the ;; digits. Profile. (if (eof-object? c) #f (case c ((105) ;; "i" (let ((c (%bencode:peek-byte port))) (if (eqv? 45 c) ;; "-" (begin (%bencode:eat-byte port) (- (do-digits port 101 0))) (do-digits port 101 0)))) ((108) ;; "l" (let loop () (let ((c (%bencode:peek-byte port))) (cond ((eof-object? c) (%bencode:premature-eof-error)) ((eqv? 101 c) (%bencode:eat-byte port) '()) (else (cons (or (unbencode-single port) (%bencode:premature-eof-error)) (loop))))))) ((100) ;; "d" (cons 'dictionary (let loop () (let ((c (%bencode:peek-byte port))) (cond ((eof-object? c) (%bencode:premature-eof-error)) ((eqv? 101 c) (%bencode:eat-byte port) '()) (else (cons (cons (or (unbencode-single port) (%bencode:premature-eof-error)) (or (unbencode-single port) (%bencode:premature-eof-error))) (loop)))))))) ((48) (do-string port 0)) ((49) (do-string port 1)) ((50) (do-string port 2)) ((51) (do-string port 3)) ((52) (do-string port 4)) ((53) (do-string port 5)) ((54) (do-string port 6)) ((55) (do-string port 7)) ((56) (do-string port 8)) ((57) (do-string port 9)) (else (%bencode:invalid-char-error c)))))))) ;;; @defproc unbencode port ;;; ;;; Yields a list of the Scheme representations of all bencoding objects parsed ;;; from input port @var{port}. (define (unbencode port) (let ((obj (unbencode-single port))) (if obj (cons obj (unbencode port)) '()))) ;;; @unnumberedsec History ;;; @table @asis ;;; ;;; @item Version 0.2 --- 2009-03-03 -- PLaneT @code{(1 1)} ;;; Library is now LPGL 3. Converted to author's new Scheme administration ;;; system. ;;; ;;; @item Version 0.1 --- 2005-04-17 -- PLaneT @code{(1 0)} ;;; Initial release. ;;; ;;; @end table (provide unbencode unbencode-single)