private/xml-to-scribble.ss
#lang scheme
(require scribble/manual xml)
(provide xexpr->scribble fix-dl)

(define (a->link a)
  (match a
    [(list 'a (list (list 'href val)) str)
     `(link ,(if (regexp-match #rx"^/" val)
                 (string-append "http://www.flickr.com" val)
                 val)
            ,str)]))

(define (misc? x) (or (comment? x) (p-i? x)))

(define (xexpr-fold elm str sym int cdata misc x)
  (local [(define (recur y)
            (xexpr-fold elm str sym int cdata misc y))]
    
    (cond [(string? x) (str x)]
          [(symbol? x) (sym x)]
          [(exact-nonnegative-integer? x) (int x)]
          [(cdata? x) (cdata x)]
          [(misc? x) (misc x)]
          [(cons? x)         
           (elm (first x)
                (second x)
                (map recur (drop x 2)))])))

;; This fixes the awful DL mess that Flickr emits in its documentation.
(define (fix-dl x)
  (xexpr-fold
   (λ (n as bs)
     (cond [(symbol=? n 'dl)
            (list* 'dl as
                   (map (λ (e)
                          (cond [(and (cons? e)
                                      (eq? 'dl (first e)))
                                 (third e)]
                                [(and (cons? e)
                                      (or (eq? 'dt (first e))
                                          (eq? 'dd (first e))))
                                 e]
                                [else
                                 `(dd () ,e)]))
                        bs))]
           [else (list* n as bs)]))            
   (λ (str) str)
   (λ (sym) sym)
   (λ (int) int)
   (λ (cdata) cdata)
   (λ (misc) misc) 
   x))

(define (xexpr->scribble x)
  (xexpr-fold
   (λ (n as bs)
     (cond [(symbol=? n 'a)
            (a->link (list* n as bs))]
           [(or (symbol=? n 'b)
                (symbol=? n 'strong))
            (list* 'bold bs)]
           [(symbol=? n 'p)
            (list* 'para bs)]
           [(symbol=? n 'code)
            (list* 'tt bs)]
           [(symbol=? n 'ul)
            (list* 'itemlist bs)]
           [(symbol=? n 'dl)
            (list* 'itemlist bs)]
           [(symbol=? n 'li)
            (list* 'item bs)]
           [(symbol=? n 'dt)
            (list* 'item bs)]
           [(symbol=? n 'dd)
            (list* 'item bs)]
           [(symbol=? n 'br)
            ""]
           [(symbol=? n 'i)
            (list* 'italic bs)]
           [(symbol=? n 'em)
            (list* 'italic bs)]
           [(symbol=? n 'span)
            (list* 'elem bs)]
           [(symbol=? n 'q)
            (list* 'elem
                   "\""
                   (append bs (list "\"")))]
           [(symbol=? n 'root)
            bs]
           [else
            (list* 'elem 
                   "[UNKNOWN ELEMENT "
                   (symbol->string n)
                   "] "
                   bs)]))            
   (λ (str) str)
   (λ (sym)
     (cond [(symbol=? sym 'emdash) "---"]
           [(symbol=? sym 'nbsp) ""]
           [else 
            (string-append "&"
                           (symbol->string sym)
                           ";")]))
   (λ (int)
     (cond [(= int 8212) "---"]
           [else 
            (string-append "&"
                           (number->string int)
                           ";")]))
   (λ (cdata) "")
   (λ (misc) "") 
   x))

#;
(fix-dl
 `(root () 
        (code () "foo") 
        (ul () 
            (li () "item " "1")
            (li () "item 2"))
        ;; This is a problematic example.
        (dl ()
            nbsp
            (dt () (b () (code () "friends")))
            (dl () "Only contacts who...")
            (dt () (b () (code () "both")))
            (dl () "Only contacts who..."))
        
        (b () "yeah " "yeah") 
        ,lnk))