(module xml-pull mzscheme
(require (lib "etc.ss")
(lib "struct.ss")
(only (lib "list.ss") first foldl)
(planet "ssax.ss" ("lizorkin" "ssax.plt" 1 3))
(planet "generator.ss" ("dyoo" "generator.plt" 2 0)))
(define-struct morsel () #f)
(define-struct (start-element morsel) (name attributes) #f)
(define-struct (end-element morsel) (name attributes) #f)
(define-struct (characters morsel) (s1 s2) #f)
(define-struct (exhausted morsel) () #f)
(provide (struct morsel ())
(struct start-element (name attributes))
(struct end-element (name attributes))
(struct characters (s1 s2))
(struct exhausted ()))
(define-struct pstate (collecting? lst))
(define (pstate-extend a-pstate a-sexp)
(copy-struct pstate a-pstate
[pstate-lst (cons a-sexp (pstate-lst a-pstate))]))
(define rev-normalize-string-children ssax:reverse-collect-str-drop-ws)
(provide current-namespace-translate)
(define current-namespace-translate
(make-parameter (lambda (ns) ns)))
(define (elem-gi->symbol elem-gi)
(cond
[(symbol? elem-gi) elem-gi]
[else
(string->symbol
(string-append
(symbol->string ((current-namespace-translate) (car elem-gi)))
":"
(symbol->string (cdr elem-gi))))]))
(define (normalize-attributes attributes)
(reverse
(foldl (lambda (x acc)
(cons (list (elem-gi->symbol (car x))
(cdr x))
acc))
'()
attributes)))
(define ((new-level-handler yield) elem-gi attributes namespaces expected-content seed)
(cond
[(pstate-collecting? seed)
(copy-struct pstate seed [pstate-lst '()])]
[else
(let ([start-collecting?
(yield (make-start-element (elem-gi->symbol elem-gi)
(normalize-attributes attributes)))])
(cond
[(eqv? start-collecting? #t)
(copy-struct pstate seed
[pstate-collecting? #t]
[pstate-lst '()])]
[else seed]))]))
(define ((finish-element-handler yield) elem-gi attributes namespaces parent-seed seed)
(define (combine elem-gi attributes)
(pstate-extend parent-seed
`(,(elem-gi->symbol elem-gi)
(@ ,@(normalize-attributes attributes))
,@(rev-normalize-string-children (pstate-lst seed)))))
(cond
[(pstate-collecting? parent-seed)
(combine elem-gi attributes)]
[(and (not (pstate-collecting? parent-seed))
(pstate-collecting? seed))
(yield (first (pstate-lst (combine elem-gi attributes))))
parent-seed]
[else
(yield (make-end-element (elem-gi->symbol elem-gi)
(normalize-attributes attributes)))
parent-seed]))
(define ((char-data-handler yield) s1 s2 seed)
(cond
[(pstate-collecting? seed)
(cond
[(string=? s2 "") (pstate-extend seed s1)]
[else (pstate-extend (pstate-extend seed s1) s2)])]
[else
(yield (make-characters s1 s2))
seed]))
(define-struct taffy (g last-morsel))
(provide taffy? taffy)
(provide start-xml-pull)
(define (start-xml-pull ip)
(define-generator (start-parsing ip)
(let ([parser
(ssax:make-parser
NEW-LEVEL-SEED (new-level-handler yield)
FINISH-ELEMENT (finish-element-handler yield)
CHAR-DATA-HANDLER (char-data-handler yield))])
(parser ip (make-pstate #f '()))))
(make-taffy (start-parsing ip) #f))
(provide pull-morsel)
(define (pull-morsel a-taffy)
(let ([evt
(generator-next (taffy-g a-taffy)
(lambda (exn) (make-exhausted)) #f)])
(set-taffy-last-morsel! a-taffy evt)
evt))
(provide pull-sexp)
(define (pull-sexp a-taffy)
(unless (start-element? (taffy-last-morsel a-taffy))
(error 'pull-sexp
"can only pull an sexp if the last morsel was a start-element, but was ~a"
(taffy-last-morsel a-taffy)))
(let ([evt
(generator-next (taffy-g a-taffy)
(lambda (exn) (make-exhausted)) #t)])
(set-taffy-last-morsel! a-taffy evt)
evt))
(provide pull-sexps/g)
(define-generator (pull-sexps/g a-taffy a-symbol)
(let loop ([morsel (pull-morsel a-taffy)])
(cond
[(exhausted? morsel)
(void)]
[(and (start-element? morsel)
(symbol=? a-symbol (start-element-name morsel)))
(yield (pull-sexp a-taffy))
(loop (pull-morsel a-taffy))]
[else
(loop (pull-morsel a-taffy))]))))