xml-pull.ss
(module xml-pull mzscheme
  (require (lib "etc.ss")
           (lib "struct.ss")
           (only (lib "list.ss") first foldl)
           (planet "ssax.ss" ("lizorkin" "ssax.plt" 1 3))
           (planet "generator.ss" ("dyoo" "generator.plt" 2 0)))
  
  
  ;; This is an implementation of a XML pull-style parser.  I'm feeling a little whimsical,
  ;; so the abstractions here are named after food.

  
  ;; An morsel is one of the following:
  (define-struct morsel () #f)
  (define-struct (start-element morsel) (name attributes) #f)
  (define-struct (end-element morsel) (name attributes) #f)
  (define-struct (characters morsel) (s1 s2) #f)
  (define-struct (exhausted morsel) () #f)

  (provide (struct morsel ())
           (struct start-element (name attributes))
           (struct end-element (name attributes))
           (struct characters (s1 s2))
           (struct exhausted ()))
  ;; Add more morsels here as necessary.  I wonder if we'll want a start-document or end-document
  ;; morsel?
  
  
  
  
  
;                                           
;                                           
;   @@@$-                                   
;    @ -$                                   
;    @  @   $@$:  @@-$+  :@@+@  -@@$   @@-$+
;    @ -$     -@   @$ :  @$ -@  $  -$   @$ :
;    @@@-  -$@$@   @     :@@$-  @@@@@   @   
;    @     $*  @   @        *@  $       @   
;    @     @- *@   @     @  :@  +:      @   
;   @@@    -$$-@@ @@@@@  $+@@:   $@@+  @@@@@
;                                           
;                                           
;                                           
;                                           

  
  ;; The parser state is a structure:
  ;;
  ;; (make-pstate c l)
  ;;
  ;; where c is a boolean and l is an sexp.  We set collecting? to true
  ;; whenever we're interested in accumulating children into the pstate
  ;; structure.  l is used to accumulate the s-expressions when we're
  ;; in an interested mood.
  (define-struct pstate (collecting? lst))
  
  
  ;; pstate-extend: pstate sexp -> pstate
  ;; Accumulates a-sexp into the parser state a-state.
  (define (pstate-extend a-pstate a-sexp)
    (copy-struct pstate a-pstate 
                 [pstate-lst (cons a-sexp (pstate-lst a-pstate))]))
  
  
  ;; rev-normalize-string-children: (listof sexp) -> (listof sexp)
  ;; Does a shallow reveral of elements, and concatenates adjacent strings
  ;; together.
  (define rev-normalize-string-children ssax:reverse-collect-str-drop-ws)
  
  
  
  (provide current-namespace-translate)
  ;; current-namespace-translate: (parameterof (symbol -> symbol))
  ;; Used to do additional translation of namespaces to something convenient.
  (define current-namespace-translate 
    (make-parameter (lambda (ns) ns)))

  
  ;; elem-gi->symbol: elem-gi -> symbol
  ;; Translate an elem-gi (which is either itself a symbol or a pair of symbols)
  ;; into a single symbol.
  ;; FIXME: allow user to provide translational map of the namespace
  ;; to something that they like.
  (define (elem-gi->symbol elem-gi)
    (cond
      [(symbol? elem-gi) elem-gi]
      [else
       (string->symbol
        (string-append 
         (symbol->string ((current-namespace-translate) (car elem-gi))) 
         ":" 
         (symbol->string (cdr elem-gi))))]))

  
  ;; normalize-attributes: (listof (elem-gi . string)) -> (listof (list symbol string))
  ;; Forces attribute names to be symbols and restructures
  ;; each attribute name/value pair into a list.
  (define (normalize-attributes attributes)
    (reverse 
     (foldl (lambda (x acc)
              (cons (list (elem-gi->symbol (car x))
                          (cdr x))
                    acc))
            '()
            attributes)))

  
  (define ((new-level-handler yield) elem-gi attributes namespaces expected-content seed)
    (cond
      [(pstate-collecting? seed) 
       (copy-struct pstate seed [pstate-lst '()])]
      [else
       (let ([start-collecting?
              (yield (make-start-element (elem-gi->symbol elem-gi) 
                                         (normalize-attributes attributes)))])
         (cond
           [(eqv? start-collecting? #t)
            (copy-struct pstate seed 
                         [pstate-collecting? #t]
                         [pstate-lst '()])]
           [else seed]))]))

      
  
  (define ((finish-element-handler yield) elem-gi attributes namespaces parent-seed seed)

    (define (combine elem-gi attributes)
      (pstate-extend parent-seed
                     `(,(elem-gi->symbol elem-gi)
                        (@ ,@(normalize-attributes attributes))
                        ,@(rev-normalize-string-children (pstate-lst seed)))))
    (cond
      [(pstate-collecting? parent-seed)
       (combine elem-gi attributes)]
      [(and (not (pstate-collecting? parent-seed))
            (pstate-collecting? seed))
       (yield (first (pstate-lst (combine elem-gi attributes))))
       parent-seed]
      [else
       (yield (make-end-element (elem-gi->symbol elem-gi)
                                (normalize-attributes attributes)))
       parent-seed]))
  
  
  (define ((char-data-handler yield) s1 s2 seed)
    (cond
      [(pstate-collecting? seed) 
       (cond 
         [(string=? s2 "") (pstate-extend seed s1)]
         [else (pstate-extend (pstate-extend seed s1) s2)])]
      [else
       (yield (make-characters s1 s2))
       seed]))
  
  

  
  
;                                    
;                                    
;                  :@@$   :@@$       
;    @             @:     @:         
;   @@@@@   $@$:  @@@@@  @@@@@ @@@ @@@
;    @        -@   @      @     $-  $-
;    @     -$@$@   @      @     -$  $
;    @     $*  @   @      @      $*$:
;    @: :$ @- *@   @      @       $$ 
;    :@@$- -$$-@@ @@@@@  @@@@@    $* 
;                                 $  
;                               @@@@ 
;                                    
;                                    
  
  ;; A taffy is a structure:
  ;;
  ;; (make-taffy g)
  ;;
  ;; where g is a generator.  We use a taffy to pull off morsels
  ;; of chewy XML.
  (define-struct taffy (g last-morsel))
  (provide taffy? taffy)

  (provide start-xml-pull)
  ;; start-xml-pull: input-port -> taffy
  (define (start-xml-pull ip)
    (define-generator (start-parsing ip)
      (let ([parser
             (ssax:make-parser
              NEW-LEVEL-SEED (new-level-handler yield)
              FINISH-ELEMENT (finish-element-handler yield)
              CHAR-DATA-HANDLER (char-data-handler yield))])
              ;; don't know how to properly handle PI's yet...
        (parser ip (make-pstate #f '()))))
    (make-taffy (start-parsing ip) #f))

  
  (provide pull-morsel)
  ;; pull-morsel: taffy -> morsel
  (define (pull-morsel a-taffy)
    (let ([evt
           (generator-next (taffy-g a-taffy) 
                           (lambda (exn) (make-exhausted)) #f)])
      (set-taffy-last-morsel! a-taffy evt)
      evt))

  
  (provide pull-sexp)
  ;; pull-sexp: taffy -> (union sexp exhaused)
  (define (pull-sexp a-taffy)
    (unless (start-element? (taffy-last-morsel a-taffy))
      (error 'pull-sexp 
             "can only pull an sexp if the last morsel was a start-element, but was ~a"
             (taffy-last-morsel a-taffy)))
    (let ([evt
           (generator-next (taffy-g a-taffy)
                           (lambda (exn) (make-exhausted)) #t)])
      (set-taffy-last-morsel! a-taffy evt)
      evt))
  
  
  (provide pull-sexps/g)
  ;; pull-sexps/g: taffy symbol -> (generatorof sexp)
  (define-generator (pull-sexps/g a-taffy a-symbol)
    (let loop ([morsel (pull-morsel a-taffy)])
      (cond
        [(exhausted? morsel)
         (void)]
        [(and (start-element? morsel)
              (symbol=? a-symbol (start-element-name morsel)))
         (yield (pull-sexp a-taffy))
         (loop (pull-morsel a-taffy))]
        [else 
         (loop (pull-morsel a-taffy))]))))