ssax-code.ss
; Module header is generated automatically
#cs(module ssax-code mzscheme
(require (lib "defmacro.ss"))
(require "common.ss")
(require "myenv.ss")
(require "srfi-13-local.ss")
(require "util.ss")
(require "parse-error.ss")
(require "input-parse.ss")
(require "look-for-str.ss")
(require "char-encoding.ss")

; $Id: SSAX.scm,v 1.8 2003/08/02 02:14:18 oleg Exp $
; DL: let*-values rewritten to call-with-values

(define-macro run-test
  (lambda body
    (define (re-write body)
      (cond
       ((vector? body) (list->vector (re-write (vector->list body))))
       ((not (pair? body)) body)
       ((and (eq? 'quote (car body)) (pair? (cdr body)) (string? (cadr body)))
        (string->symbol (cadr body)))
       (else (cons (re-write (car body)) (re-write (cdr body))))))
    (cons 'begin (re-write body))))
(define (make-xml-token kind head) (cons kind head))
(define xml-token? pair?)
(define-macro xml-token-kind (lambda (token) `(car ,token)))
(define-macro xml-token-head (lambda (token) `(cdr ,token)))
(define (string-whitespace? str)
  (let ((len (string-length str)))
    (cond
     ((zero? len) #t)
     ((= 1 len) (char-whitespace? (string-ref str 0)))
     ((= 2 len)
      (and (char-whitespace? (string-ref str 0))
           (char-whitespace? (string-ref str 1))))
     (else
      (let loop ((i 0))
        (or (>= i len)
            (and (char-whitespace? (string-ref str i)) (loop (++ i)))))))))
(define (assq-values val alist)
  (let loop ((alist alist) (scanned '()))
    (cond
     ((null? alist) (values #f scanned))
     ((equal? val (caar alist))
      (values (car alist) (append scanned (cdr alist))))
     (else (loop (cdr alist) (cons (car alist) scanned))))))
(define (fold-right kons knil lis1)
  (let recur ((lis lis1))
    (if (null? lis)
      knil
      (let ((head (car lis))) (kons head (recur (cdr lis)))))))
(define (fold kons knil lis1)
  (let lp ((lis lis1) (ans knil))
    (if (null? lis) ans (lp (cdr lis) (kons (car lis) ans)))))
(define ssax:S-chars (map ascii->char '(32 10 9 13)))
(define (ssax:skip-S port) (skip-while ssax:S-chars port))
(define (ssax:ncname-starting-char? a-char)
  (and (char? a-char) (or (char-alphabetic? a-char) (char=? #\_ a-char))))
(define (ssax:read-NCName port)
  (let ((first-char (peek-char port)))
    (or (ssax:ncname-starting-char? first-char)
        (parser-error port "XMLNS [4] for '" first-char "'")))
  (string->symbol
    (next-token-of
      (lambda (c)
        (cond
         ((eof-object? c) #f)
         ((char-alphabetic? c) c)
         ((string-index "0123456789.-_" c) c)
         (else #f)))
      port)))
(define (ssax:read-QName port)
  (let ((prefix-or-localpart (ssax:read-NCName port)))
    (case (peek-char port)
      ((#\:)
       (read-char port)
       (cons prefix-or-localpart (ssax:read-NCName port)))
      (else prefix-or-localpart))))
(define ssax:Prefix-XML (string->symbol "xml"))
(define name-compare
  (letrec ((symbol-compare
             (lambda (symb1 symb2)
               (cond
                ((eq? symb1 symb2) '=)
                ((string<? (symbol->string symb1) (symbol->string symb2)) '<)
                (else '>)))))
    (lambda (name1 name2)
      (cond
       ((symbol? name1) (if (symbol? name2) (symbol-compare name1 name2) '<))
       ((symbol? name2) '>)
       ((eq? name2 ssax:largest-unres-name) '<)
       ((eq? name1 ssax:largest-unres-name) '>)
       ((eq? (car name1) (car name2)) (symbol-compare (cdr name1) (cdr name2)))
       (else (symbol-compare (car name1) (car name2)))))))
(define ssax:largest-unres-name
  (cons (string->symbol "#LARGEST-SYMBOL") (string->symbol "#LARGEST-SYMBOL")))
(define ssax:read-markup-token
  (let ()
    (define (skip-comment port)
      (assert-curr-char '(#\-) "XML [15], second dash" port)
      (if (not (find-string-from-port? "-->" port))
        (parser-error port "XML [15], no -->"))
      (make-xml-token 'COMMENT #f))
    (define (read-cdata port)
      (assert (string=? "CDATA[" (read-string 6 port)))
      (make-xml-token 'CDSECT #f))
    (lambda (port)
      (assert-curr-char '(#\<) "start of the token" port)
      (case (peek-char port)
        ((#\/)
         (read-char port)
         (begin0
           (make-xml-token 'END (ssax:read-QName port))
           (ssax:skip-S port)
           (assert-curr-char '(#\>) "XML [42]" port)))
        ((#\?) (read-char port) (make-xml-token 'PI (ssax:read-NCName port)))
        ((#\!)
         (case (peek-next-char port)
           ((#\-) (read-char port) (skip-comment port))
           ((#\[) (read-char port) (read-cdata port))
           (else (make-xml-token 'DECL (ssax:read-NCName port)))))
        (else (make-xml-token 'START (ssax:read-QName port)))))))
(define (ssax:skip-pi port)
  (if (not (find-string-from-port? "?>" port))
    (parser-error port "Failed to find ?> terminating the PI")))
(define (ssax:read-pi-body-as-string port)
  (ssax:skip-S port)
  (string-concatenate/shared
    (let loop ()
      (let ((pi-fragment (next-token '() '(#\?) "reading PI content" port)))
        (if (eqv? #\> (peek-next-char port))
          (begin (read-char port) (cons pi-fragment '()))
          (cons* pi-fragment "?" (loop)))))))
(define (ssax:skip-internal-dtd port)
  (if (not (find-string-from-port? "]>" port))
    (parser-error
      port
      "Failed to find ]> terminating the internal DTD subset")))
(define ssax:read-cdata-body
  (let ((cdata-delimiters (list char-return #\newline #\] #\&)))
    (lambda (port str-handler seed)
      (let loop ((seed seed))
        (let ((fragment
                (next-token '() cdata-delimiters "reading CDATA" port)))
          (case (read-char port)
            ((#\newline) (loop (str-handler fragment nl seed)))
            ((#\])
             (if (not (eqv? (peek-char port) #\]))
               (loop (str-handler fragment "]" seed))
               (let check-after-second-braket ((seed
                                                (if (string-null? fragment)
                                                  seed
                                                  (str-handler
                                                    fragment
                                                    ""
                                                    seed))))
                 (case (peek-next-char port)
                   ((#\>) (read-char port) seed)
                   ((#\])
                    (check-after-second-braket (str-handler "]" "" seed)))
                   (else (loop (str-handler "]]" "" seed)))))))
            ((#\&)
             (let ((ent-ref
                     (next-token-of
                       (lambda (c)
                         (and (not (eof-object? c)) (char-alphabetic? c) c))
                       port)))
               (cond
                ((and (string=? "gt" ent-ref) (eqv? (peek-char port) #\;))
                 (read-char port)
                 (loop (str-handler fragment ">" seed)))
                (else
                 (loop
                  (str-handler ent-ref "" (str-handler fragment "&" seed)))))))
            (else
             (if (eqv? (peek-char port) #\newline) (read-char port))
             (loop (str-handler fragment nl seed)))))))))
(define (ssax:read-char-ref port)
  (let* ((base
          (cond ((eqv? (peek-char port) #\x) (read-char port) 16) (else 10)))
         (name (next-token '() '(#\;) "XML [66]" port))
         (char-code (string->number name base)))
    (read-char port)
    (if (integer? char-code)
      (ucscode->char char-code)
      (parser-error port "[wf-Legalchar] broken for '" name "'"))))
(define ssax:predefined-parsed-entities
  `((,(string->symbol "amp") . "&")
    (,(string->symbol "lt") . "<")
    (,(string->symbol "gt") . ">")
    (,(string->symbol "apos") . "'")
    (,(string->symbol "quot") . "\"")))
(define (ssax:handle-parsed-entity
         port
         name
         entities
         content-handler
         str-handler
         seed)
  (cond
   ((assq name entities)
    =>
    (lambda (decl-entity)
      (let ((ent-body (cdr decl-entity))
            (new-entities (cons (cons name #f) entities)))
        (cond
         ((string? ent-body)
          (call-with-input-string
            ent-body
            (lambda (port) (content-handler port new-entities seed))))
         ((procedure? ent-body)
          (let ((port (ent-body)))
            (begin0
              (content-handler port new-entities seed)
              (close-input-port port))))
         (else (parser-error port "[norecursion] broken for " name))))))
   ((assq name ssax:predefined-parsed-entities)
    =>
    (lambda (decl-entity) (str-handler (cdr decl-entity) "" seed)))
   (else (parser-error port "[wf-entdeclared] broken for " name))))
(define (make-empty-attlist) '())
(define (attlist-add attlist name-value)
  (if (null? attlist)
    (cons name-value attlist)
    (case (name-compare (car name-value) (caar attlist))
      ((=) #f)
      ((<) (cons name-value attlist))
      (else (cons (car attlist) (attlist-add (cdr attlist) name-value))))))
(define attlist-null? null?)
(define (attlist-remove-top attlist) (values (car attlist) (cdr attlist)))
(define (attlist->alist attlist) attlist)
(define attlist-fold fold)
(define ssax:read-attributes
  (let ((value-delimeters (append ssax:S-chars '(#\< #\&))))
    (define (read-attrib-value delimiter port entities prev-fragments)
      (let* ((new-fragments
               (cons
                (next-token
                  '()
                  (cons delimiter value-delimeters)
                  "XML [10]"
                  port)
                prev-fragments))
             (cterm (read-char port)))
        (cond
         ((or (eof-object? cterm) (eqv? cterm delimiter)) new-fragments)
         ((eqv? cterm char-return)
          (if (eqv? (peek-char port) #\newline) (read-char port))
          (read-attrib-value delimiter port entities (cons " " new-fragments)))
         ((memv cterm ssax:S-chars)
          (read-attrib-value delimiter port entities (cons " " new-fragments)))
         ((eqv? cterm #\&)
          (cond
           ((eqv? (peek-char port) #\#)
            (read-char port)
            (read-attrib-value
              delimiter
              port
              entities
              (cons (string (ssax:read-char-ref port)) new-fragments)))
           (else
            (read-attrib-value
              delimiter
              port
              entities
              (read-named-entity port entities new-fragments)))))
         (else (parser-error port "[CleanAttrVals] broken")))))
    (define (read-named-entity port entities fragments)
      (let ((name (ssax:read-NCName port)))
        (assert-curr-char '(#\;) "XML [68]" port)
        (ssax:handle-parsed-entity
          port
          name
          entities
          (lambda (port entities fragments)
            (read-attrib-value '*eof* port entities fragments))
          (lambda (str1 str2 fragments)
            (if (equal? "" str2)
              (cons str1 fragments)
              (cons* str2 str1 fragments)))
          fragments)))
    (lambda (port entities)
      (let loop ((attr-list (make-empty-attlist)))
        (if (not (ssax:ncname-starting-char? (ssax:skip-S port)))
          attr-list
          (let ((name (ssax:read-QName port)))
            (ssax:skip-S port)
            (assert-curr-char '(#\=) "XML [25]" port)
            (ssax:skip-S port)
            (let ((delimiter (assert-curr-char '(#\' #\") "XML [10]" port)))
              (loop
               (or (attlist-add
                     attr-list
                     (cons
                      name
                      (string-concatenate-reverse/shared
                        (read-attrib-value delimiter port entities '()))))
                   (parser-error
                     port
                     "[uniqattspec] broken for "
                     name))))))))))
(define (ssax:resolve-name port unres-name namespaces apply-default-ns?)
  (cond
   ((pair? unres-name)
    (cons
     (cond
      ((assq (car unres-name) namespaces) => cadr)
      ((eq? (car unres-name) ssax:Prefix-XML) ssax:Prefix-XML)
      (else
       (parser-error
         port
         "[nsc-NSDeclared] broken; prefix "
         (car unres-name))))
     (cdr unres-name)))
   (apply-default-ns?
    (let ((default-ns (assq '*DEFAULT* namespaces)))
      (if (and default-ns (cadr default-ns))
        (cons (cadr default-ns) unres-name)
        unres-name)))
   (else unres-name)))
(define (ssax:uri-string->symbol uri-str) (string->symbol uri-str))
(define ssax:complete-start-tag
  (let ((xmlns (string->symbol "xmlns"))
        (largest-dummy-decl-attr (list ssax:largest-unres-name #f #f #f)))
    (define (validate-attrs port attlist decl-attrs)
      (define (add-default-decl decl-attr result)
        (call-with-values
         (lambda () (apply values decl-attr))
         (lambda (attr-name content-type use-type default-value)
           (and (eq? use-type 'REQUIRED)
                (parser-error port "[RequiredAttr] broken for" attr-name))
           (if default-value
               (cons (cons attr-name default-value) result)
               result))))
      (let loop ((attlist attlist) (decl-attrs decl-attrs) (result '()))
        (if (attlist-null? attlist)
          (attlist-fold add-default-decl result decl-attrs)
          (call-with-values
           (lambda () (attlist-remove-top attlist))
           (lambda (attr attr-others)
             (call-with-values
              (lambda ()
                (if (attlist-null? decl-attrs)
                    (values largest-dummy-decl-attr decl-attrs)
                    (attlist-remove-top decl-attrs)))
              (lambda (decl-attr other-decls)
                (case (name-compare (car attr) (car decl-attr))
                  ((<)
                   (if (or (eq? xmlns (car attr))
                           (and (pair? (car attr)) (eq? xmlns (caar attr))))
                       (loop attr-others decl-attrs (cons attr result))
                       (parser-error port "[ValueType] broken for " attr)))
                  ((>)
                   (loop attlist other-decls (add-default-decl decl-attr result)))
                  (else
                   (call-with-values
                    (lambda () (apply values decl-attr))
                    (lambda (attr-name content-type use-type default-value)
                      (cond
                        ((eq? use-type 'FIXED)
                         (or (equal? (cdr attr) default-value)
                             (parser-error
                              port
                              "[FixedAttr] broken for "
                              attr-name)))
                        ((eq? content-type 'CDATA) #t)
                        ((pair? content-type)
                         (or (member (cdr attr) content-type)
                             (parser-error
                              port
                              "[enum] broken for "
                              attr-name
                              "="
                              (cdr attr))))
                        (else
                         (ssax:warn
                          port
                          "declared content type "
                          content-type
                          " not verified yet")))
                      (loop attr-others other-decls
                            (cons attr result)))))))))))))
    (define (add-ns port prefix uri-str namespaces)
      (and (equal? "" uri-str)
           (parser-error port "[dt-NSName] broken for " prefix))
      (let ((uri-symbol (ssax:uri-string->symbol uri-str)))
        (let loop ((nss namespaces))
          (cond
           ((null? nss) (cons (cons* prefix uri-symbol uri-symbol) namespaces))
           ((eq? uri-symbol (cddar nss))
            (cons (cons* prefix (cadar nss) uri-symbol) namespaces))
           (else (loop (cdr nss)))))))
    (define (adjust-namespace-decl port attrs namespaces)
      (let loop ((attrs attrs) (proper-attrs '()) (namespaces namespaces))
        (cond
         ((null? attrs) (values proper-attrs namespaces))
         ((eq? xmlns (caar attrs))
          (loop
           (cdr attrs)
           proper-attrs
           (if (equal? "" (cdar attrs))
             (cons (cons* '*DEFAULT* #f #f) namespaces)
             (add-ns port '*DEFAULT* (cdar attrs) namespaces))))
         ((and (pair? (caar attrs)) (eq? xmlns (caaar attrs)))
          (loop
           (cdr attrs)
           proper-attrs
           (add-ns port (cdaar attrs) (cdar attrs) namespaces)))
         (else
          (loop (cdr attrs) (cons (car attrs) proper-attrs) namespaces)))))
    (lambda (tag-head port elems entities namespaces)
      (let* ((attlist (ssax:read-attributes port entities))
             (empty-el-tag?
              (begin
                (ssax:skip-S port)
                (and (eqv?
                      #\/
                      (assert-curr-char
                       '(#\> #\/)
                       "XML [40], XML [44], no '>'"
                       port))
                     (assert-curr-char
                      '(#\>)
                      "XML [44], no '>'"
                      port)))))
        (call-with-values
         (lambda () (if elems
                        (cond
                          ((assoc tag-head elems)
                           =>
                           (lambda (decl-elem)
                             (values
                              (if empty-el-tag? 'EMPTY-TAG (cadr decl-elem))
                              (caddr decl-elem))))
                          (else
                           (parser-error
                            port
                            "[elementvalid] broken, no decl for "
                            tag-head)))
                        (values (if empty-el-tag? 'EMPTY-TAG 'ANY) #f)))
         (lambda (elem-content decl-attrs)
           (let ((merged-attrs
                  (if decl-attrs
                      (validate-attrs port attlist decl-attrs)
                      (attlist->alist attlist))))
             (call-with-values
              (lambda ()
                (adjust-namespace-decl port merged-attrs namespaces))
              (lambda (proper-attrs namespaces)
                (values
                 (ssax:resolve-name port tag-head namespaces #t)
                 (fold-right
                  (lambda (name-value attlist)
                    (or (attlist-add
                         attlist
                         (cons
                          (ssax:resolve-name port (car name-value)
                                             namespaces #f)
                          (cdr name-value)))
                        (parser-error
                         port
                         "[uniqattspec] after NS expansion broken for "
                         name-value)))
                  (make-empty-attlist)
                  proper-attrs)
                 namespaces
                 elem-content))))))))))
(define (ssax:read-external-id port)
  (let ((discriminator (ssax:read-NCName port)))
    (assert-curr-char ssax:S-chars "space after SYSTEM or PUBLIC" port)
    (ssax:skip-S port)
    (let ((delimiter (assert-curr-char '(#\' #\") "XML [11], XML [12]" port)))
      (cond
       ((eq? discriminator (string->symbol "SYSTEM"))
        (begin0
          (next-token '() (list delimiter) "XML [11]" port)
          (read-char port)))
       ((eq? discriminator (string->symbol "PUBLIC"))
        (skip-until (list delimiter) port)
        (assert-curr-char ssax:S-chars "space after PubidLiteral" port)
        (ssax:skip-S port)
        (let* ((delimiter (assert-curr-char '(#\' #\") "XML [11]" port))
               (systemid (next-token '() (list delimiter) "XML [11]" port)))
          (read-char port)
          systemid))
       (else
        (parser-error
          port
          "XML [75], "
          discriminator
          " rather than SYSTEM or PUBLIC"))))))
(define (ssax:scan-Misc port)
  (let loop ((c (ssax:skip-S port)))
    (cond
     ((eof-object? c) c)
     ((not (char=? c #\<))
      (parser-error port "XML [22], char '" c "' unexpected"))
     (else
      (let ((token (ssax:read-markup-token port)))
        (case (xml-token-kind token)
          ((COMMENT) (loop (ssax:skip-S port)))
          ((PI DECL START) token)
          (else
           (parser-error
             port
             "XML [22], unexpected token of kind "
             (xml-token-kind token)))))))))
(define ssax:read-char-data
  (let ((terminators-usual (list #\< #\& char-return))
        (terminators-usual-eof (list #\< '*eof* #\& char-return))
        (handle-fragment
          (lambda (fragment str-handler seed)
            (if (string-null? fragment) seed (str-handler fragment "" seed)))))
    (lambda (port expect-eof? str-handler seed)
      (if (eqv? #\< (peek-char port))
        (let ((token (ssax:read-markup-token port)))
          (case (xml-token-kind token)
            ((START END) (values seed token))
            ((CDSECT)
             (let ((seed (ssax:read-cdata-body port str-handler seed)))
               (ssax:read-char-data port expect-eof? str-handler seed)))
            ((COMMENT) (ssax:read-char-data port expect-eof? str-handler seed))
            (else (values seed token))))
        (let ((char-data-terminators
                (if expect-eof? terminators-usual-eof terminators-usual)))
          (let loop ((seed seed))
            (let* ((fragment
                     (next-token
                       '()
                       char-data-terminators
                       "reading char data"
                       port))
                   (term-char (peek-char port)))
              (if (eof-object? term-char)
                (values (handle-fragment fragment str-handler seed) term-char)
                (case term-char
                  ((#\<)
                   (let ((token (ssax:read-markup-token port)))
                     (case (xml-token-kind token)
                       ((CDSECT)
                        (loop
                         (ssax:read-cdata-body
                           port
                           str-handler
                           (handle-fragment fragment str-handler seed))))
                       ((COMMENT)
                        (loop (handle-fragment fragment str-handler seed)))
                       (else
                        (values
                          (handle-fragment fragment str-handler seed)
                          token)))))
                  ((#\&)
                   (case (peek-next-char port)
                     ((#\#)
                      (read-char port)
                      (loop
                       (str-handler
                         fragment
                         (string (ssax:read-char-ref port))
                         seed)))
                     (else
                      (let ((name (ssax:read-NCName port)))
                        (assert-curr-char '(#\;) "XML [68]" port)
                        (values
                          (handle-fragment fragment str-handler seed)
                          (make-xml-token 'ENTITY-REF name))))))
                  (else
                   (if (eqv? (peek-next-char port) #\newline) (read-char port))
                   (loop
                    (str-handler fragment (string #\newline) seed))))))))))))
(define (ssax:assert-token token kind gi error-cont)
  (or (and (xml-token? token)
           (eq? kind (xml-token-kind token))
           (equal? gi (xml-token-head token)))
      (error-cont token kind gi)))
(define-macro ssax:make-pi-parser
  (lambda (my-pi-handlers)
    `(lambda (port target seed)
       (case target
         (unquote-splicing
          (let loop ((pi-handlers my-pi-handlers) (default #f))
            (cond
             ((null? pi-handlers)
              (if default
                `((else (,default port target seed)))
                '((else
                   (ssax:warn port "Skipping PI: " target nl)
                   (ssax:skip-pi port)
                   seed))))
             ((eq? '*DEFAULT* (caar pi-handlers))
              (loop (cdr pi-handlers) (cdar pi-handlers)))
             (else
              (cons
               `((,(caar pi-handlers)) (,(cdar pi-handlers) port target seed))
               (loop (cdr pi-handlers) default))))))))))
(define-macro ssax:make-elem-parser
  (lambda (my-new-level-seed
           my-finish-element
           my-char-data-handler
           my-pi-handlers)
    `(lambda (start-tag-head port elems entities namespaces preserve-ws? seed)
       (define xml-space-gi (cons ssax:Prefix-XML (string->symbol "space")))
       (let handle-start-tag ((start-tag-head start-tag-head)
                              (port port)
                              (entities entities)
                              (namespaces namespaces)
                              (preserve-ws? preserve-ws?)
                              (parent-seed seed))
         (call-with-values
          (lambda () (ssax:complete-start-tag
                          start-tag-head
                          port
                          elems
                          entities
                          namespaces))
          (lambda (elem-gi attributes namespaces expected-content)
            (let ((seed (,my-new-level-seed
                         elem-gi
                         attributes
                         namespaces
                         expected-content
                         parent-seed)))
              (case expected-content
                ((EMPTY-TAG)
                 (,my-finish-element
                  elem-gi
                  attributes
                  namespaces
                  parent-seed
                  seed))
                ((EMPTY)
                 (ssax:assert-token
                  (and (eqv? #\< (ssax:skip-S port))
                       (ssax:read-markup-token port))
                  'END
                  start-tag-head
                  (lambda (token exp-kind exp-head)
                    (parser-error
                     port
                     "[elementvalid] broken for "
                     token
                     " while expecting "
                     exp-kind
                     exp-head)))
                 (,my-finish-element
                  elem-gi
                  attributes
                  namespaces
                  parent-seed
                  seed))
                (else
                 (let ((preserve-ws?
                        (cond
                          ((assoc xml-space-gi attributes)
                           =>
                           (lambda (name-value)
                             (equal? "preserve" (cdr name-value))))
                          (else preserve-ws?))))
                   (let loop ((port port)
                              (entities entities)
                              (expect-eof? #f)
                              (seed seed))
                     (call-with-values
                      (lambda () (ssax:read-char-data
                                  port
                                  expect-eof?
                                  ,my-char-data-handler
                                  seed))
                      (lambda (seed term-token)
                        (if (eof-object? term-token)
                            seed
                            (case (xml-token-kind term-token)
                              ((END)
                               (ssax:assert-token
                                term-token
                                'END
                                start-tag-head
                                (lambda (token exp-kind exp-head)
                                  (parser-error
                                   port
                                   "[GIMatch] broken for "
                                   term-token
                                   " while expecting "
                                   exp-kind
                                   exp-head)))
                               (,my-finish-element
                                elem-gi
                                attributes
                                namespaces
                                parent-seed
                                seed))
                              ((PI)
                               (let ((seed
                                      ((ssax:make-pi-parser ,my-pi-handlers)
                                       port
                                       (xml-token-head term-token)
                                       seed)))
                                 (loop port entities expect-eof? seed)))
                              ((ENTITY-REF)
                               (let ((seed
                                      (ssax:handle-parsed-entity
                                       port
                                       (xml-token-head term-token)
                                       entities
                                       (lambda (port entities seed)
                                         (loop port entities #t seed))
                                       ,my-char-data-handler
                                       seed)))
                                 (loop port entities expect-eof? seed)))
                              ((START)
                               (if (eq? expected-content 'PCDATA)
                                   (parser-error
                                    port
                                    "[elementvalid] broken for "
                                    elem-gi
                                    " with char content only; unexpected token "
                                    term-token))
                               (let ((seed
                                      (handle-start-tag
                                       (xml-token-head term-token)
                                       port
                                       entities
                                       namespaces
                                       preserve-ws?
                                       seed)))
                                 (loop port entities expect-eof? seed)))
                              (else
                               (parser-error
                                port
                                "XML [43] broken for "
                                term-token)))))))))))))))))
(define-macro ssax:make-parser
  (lambda user-handlers
    (define all-handlers
      '((DOCTYPE
          lambda
          (port docname systemid internal-subset? seed)
          (when internal-subset?
            (ssax:warn port "Internal DTD subset is not currently handled ")
            (ssax:skip-internal-dtd port))
          (ssax:warn
            port
            "DOCTYPE DECL "
            docname
            " "
            systemid
            " found and skipped")
          (values #f '() '() seed))
        (UNDECL-ROOT lambda (elem-gi seed) (values #f '() '() seed))
        (DECL-ROOT lambda (elem-gi seed) seed)
        (NEW-LEVEL-SEED . REQD)
        (FINISH-ELEMENT . REQD)
        (CHAR-DATA-HANDLER . REQD)
        (PI)))
    (define (delete-assoc alist tag cont)
      (let loop ((alist alist) (scanned '()))
        (cond
         ((null? alist) (error "Unknown user-handler-tag: " tag))
         ((eq? tag (caar alist))
          (cont tag (cdar alist) (append scanned (cdr alist))))
         (else (loop (cdr alist) (cons (car alist) scanned))))))
    (define (merge-handlers declared-handlers given-handlers)
      (cond
       ((null? given-handlers)
        (cond
         ((null? declared-handlers) '())
         ((not (eq? 'REQD (cdar declared-handlers)))
          (cons
           (car declared-handlers)
           (merge-handlers (cdr declared-handlers) given-handlers)))
         (else
          (error
           "The handler for the tag "
           (caar declared-handlers)
           " must be specified"))))
       ((null? (cdr given-handlers))
        (error "Odd number of arguments to ssax:make-parser"))
       (else
        (delete-assoc
          declared-handlers
          (car given-handlers)
          (lambda (tag value alist)
            (cons
             (cons tag (cadr given-handlers))
             (merge-handlers alist (cddr given-handlers))))))))
    (let ((user-handlers (merge-handlers all-handlers user-handlers)))
      (define (get-handler tag)
        (cond
         ((assq tag user-handlers) => cdr)
         (else (error "unknown tag: " tag))))
      `(lambda (port seed)
         (define (handle-decl port token-head seed)
           (or (eq? (string->symbol "DOCTYPE") token-head)
               (parser-error
                 port
                 "XML [22], expected DOCTYPE declaration, found "
                 token-head))
           (assert-curr-char ssax:S-chars "XML [28], space after DOCTYPE" port)
           (ssax:skip-S port)
           (let* ((docname (ssax:read-QName port))
                  (systemid
                    (and (ssax:ncname-starting-char? (ssax:skip-S port))
                         (ssax:read-external-id port)))
                  (internal-subset?
                   (begin
                     (ssax:skip-S port)
                     (eqv?
                      #\[
                      (assert-curr-char
                       '(#\> #\[)
                       "XML [28], end-of-DOCTYPE"
                       port)))))
             (call-with-values
              (lambda () (,(get-handler 'DOCTYPE)
                          port
                          docname
                          systemid
                          internal-subset?
                          seed))
              (lambda (elems entities namespaces seed)
                (scan-for-significant-prolog-token-2
                 port
                 elems
                 entities
                 namespaces
                 seed)))))
         (define (scan-for-significant-prolog-token-1 port seed)
           (let ((token (ssax:scan-Misc port)))
             (if (eof-object? token)
               (parser-error port "XML [22], unexpected EOF")
               (case (xml-token-kind token)
                 ((PI)
                  (let ((seed
                         ((ssax:make-pi-parser ,(get-handler 'PI))
                          port
                          (xml-token-head token)
                          seed)))
                    (scan-for-significant-prolog-token-1 port seed)))
                 ((DECL) (handle-decl port (xml-token-head token) seed))
                 ((START)
                  (call-with-values
                   (lambda () (,(get-handler 'UNDECL-ROOT)
                               (xml-token-head token)
                               seed))
                   (lambda (elems entities namespaces seed)
                     (element-parser
                      (xml-token-head token)
                      port
                      elems
                      entities
                      namespaces
                      #f
                      seed))))
                 (else
                  (parser-error port "XML [22], unexpected markup " token))))))
         (define (scan-for-significant-prolog-token-2
                  port
                  elems
                  entities
                  namespaces
                  seed)
           (let ((token (ssax:scan-Misc port)))
             (if (eof-object? token)
               (parser-error port "XML [22], unexpected EOF")
               (case (xml-token-kind token)
                 ((PI)
                  (let ((seed
                         ((ssax:make-pi-parser ,(get-handler 'PI))
                          port
                          (xml-token-head token)
                          seed)))
                    (scan-for-significant-prolog-token-2
                      port
                      elems
                      entities
                      namespaces
                      seed)))
                 ((START)
                  (element-parser
                    (xml-token-head token)
                    port
                    elems
                    entities
                    namespaces
                    #f
                    (,(get-handler 'DECL-ROOT) (xml-token-head token) seed)))
                 (else
                  (parser-error port "XML [22], unexpected markup " token))))))
         (define element-parser
           (ssax:make-elem-parser
             ,(get-handler 'NEW-LEVEL-SEED)
             ,(get-handler 'FINISH-ELEMENT)
             ,(get-handler 'CHAR-DATA-HANDLER)
             ,(get-handler 'PI)))
         (scan-for-significant-prolog-token-1 port seed)))))
(define (ssax:reverse-collect-str fragments)
  (cond
   ((null? fragments) '())
   ((null? (cdr fragments)) fragments)
   (else
    (let loop ((fragments fragments) (result '()) (strs '()))
      (cond
       ((null? fragments)
        (if (null? strs)
          result
          (cons (string-concatenate/shared strs) result)))
       ((string? (car fragments))
        (loop (cdr fragments) result (cons (car fragments) strs)))
       (else
        (loop
         (cdr fragments)
         (cons
          (car fragments)
          (if (null? strs)
            result
            (cons (string-concatenate/shared strs) result)))
         '())))))))
(define (ssax:reverse-collect-str-drop-ws fragments)
  (cond
   ((null? fragments) '())
   ((null? (cdr fragments))
    (if (and (string? (car fragments)) (string-whitespace? (car fragments)))
      '()
      fragments))
   (else
    (let loop ((fragments fragments)
               (result '())
               (strs '())
               (all-whitespace? #t))
      (cond
       ((null? fragments)
        (if all-whitespace?
          result
          (cons (string-concatenate/shared strs) result)))
       ((string? (car fragments))
        (loop
         (cdr fragments)
         result
         (cons (car fragments) strs)
         (and all-whitespace? (string-whitespace? (car fragments)))))
       (else
        (loop
         (cdr fragments)
         (cons
          (car fragments)
          (if all-whitespace?
            result
            (cons (string-concatenate/shared strs) result)))
         '()
         #t)))))))

; procedure: ssax:xml->sxml PORT NAMESPACE-PREFIX-ASSIG
;
; This is an instance of a SSAX parser that returns an SXML
; representation of the XML document to be read from PORT.
; NAMESPACE-PREFIX-ASSIG is a list of (USER-PREFIX . URI-STRING)
; that assigns USER-PREFIXes to certain namespaces identified by
; particular URI-STRINGs. It may be an empty list.
; The procedure returns an SXML tree. The port points out to the
; first character after the root element.
(define (ssax:xml->sxml port namespace-prefix-assig)
  (letrec ((namespaces
             (map
              (lambda (el)
                (cons* #f (car el) (ssax:uri-string->symbol (cdr el))))
              namespace-prefix-assig))
           (RES-NAME->SXML
             (lambda (res-name)
               (string->symbol
                 (string-append
                   (symbol->string (car res-name))
                   ":"
                   (symbol->string (cdr res-name)))))))
    (let ((result
            (reverse
              ((ssax:make-parser
                 NEW-LEVEL-SEED
                 (lambda (elem-gi attributes namespaces expected-content seed)
                   '())
                 FINISH-ELEMENT
                 (lambda (elem-gi attributes namespaces parent-seed seed)
                   (let ((seed (ssax:reverse-collect-str-drop-ws seed))
                         (attrs
                          (attlist-fold
                            (lambda (attr accum)
                              (cons
                               (list
                                (if (symbol? (car attr))
                                  (car attr)
                                  (RES-NAME->SXML (car attr)))
                                (cdr attr))
                               accum))
                            '()
                            attributes)))
                     (cons
                      (cons
                       (if (symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi))
                       (if (null? attrs) seed (cons (cons '@ attrs) seed)))
                      parent-seed)))
                 CHAR-DATA-HANDLER
                 (lambda (string1 string2 seed)
                   (if (string-null? string2)
                     (cons string1 seed)
                     (cons* string2 string1 seed)))
                 DOCTYPE
                 (lambda (port docname systemid internal-subset? seed)
                   (when internal-subset?
                     (ssax:warn
                       port
                       "Internal DTD subset is not currently handled ")
                     (ssax:skip-internal-dtd port))
                   (ssax:warn
                     port
                     "DOCTYPE DECL "
                     docname
                     " "
                     systemid
                     " found and skipped")
                   (values #f '() namespaces seed))
                 UNDECL-ROOT
                 (lambda (elem-gi seed) (values #f '() namespaces seed))
                 PI
                 ((*DEFAULT*
                    lambda
                    (port pi-tag seed)
                    (cons
                     (list '*PI* pi-tag (ssax:read-pi-body-as-string port))
                     seed))))
               port
               '()))))
      (cons
       '*TOP*
       (if (null? namespace-prefix-assig)
         result
         (cons
          (list
           '@@
           (cons
            '*NAMESPACES*
            (map
             (lambda (ns) (list (car ns) (cdr ns)))
             namespace-prefix-assig)))
          result))))))
(define SSAX:XML->SXML ssax:xml->sxml)

(provide (all-defined)))