#lang racket/base
(require racket/promise
(provide (all-defined-out))

;; A specialized lazy XML->SXML parser
; Is heavily based on continuations

; Preliminary helper functions

; A helper that forces all descendants of a given node or a nodeset
(define (lazy:force-descendants node)
    ((lazy:promise? node)  ; force it
     (lazy:force-descendants (force node)))
    ((pair? node)  ; not null
     (for-each lazy:force-descendants node))
    (else  ; null or not pair
     #t  ; nothing to be done

; Returns the list containing of all members of the argument list except
; for the last member
(define (lazy:except-last lst)
    (or (null? lst)  ; this shouldn't happen
        (null? (cdr lst)))
    (cons (car lst) (lazy:except-last (cdr lst)))))

; Returns the common part of the seed
(define (lazy:seed-common seed)
  ((if (null? (cdr seed))  ; a short seed
       car caddr)

; A monad-like handler
; Replaces the common part of the seed
(define (lazy:replace-common seed new-common)
  (if (null? (cdr seed))  ; a short seed
      (list new-common)
      (list (car seed)
            (cadr seed)
            (cadddr seed))))

; Produces a lazy SXML document, which corresponds to reading a source
; document in a stream-wise fashion
(define (lazy:xml->sxml port namespace-prefix-assig)
  (let ((namespaces
         (map (lambda (el)
                (cons* #f (car el) (ssax:uri-string->symbol (cdr el))))
         (lambda (res-name)
             (symbol->string (car res-name))
             (symbol->string (cdr res-name)))))))
    ((lambda (result)
       ; We assume that nobody follows the document element      
       (if (null? namespace-prefix-assig)
           (cons '*TOP* (lazy:except-last result))
             `(@@ (*NAMESPACES*
                      (lambda (ns) (list (car ns) (cdr ns)))
             (lazy:except-last result)))))
     (call-with-current-continuation   ; we grab the continuation to escape from parsing
      (lambda (result-k)
        ; seed ::= (list result-k state-k common-seed level)
        ; result-k - continuation on what to do with the current result portion
        ; state-k - continuation to return to SSAX state on this level of XML
        ;  tree hierarchy
        ; common-seed - general seed information
        ; level - level of a current node in a tree hierarchy
          (lambda (elem-gi attributes namespaces expected-content seed)
            ;(pp (cons elem-gi (cadddr seed)))
             (or (null? (cdr seed))  ; short seed
                 (> (cadddr seed) 3))   ; deep level
             (list '())  ; work like a conventional SSAX parser
             (let ((attrs
                     (lambda (attr accum)
                       (cons (list 
                              (if (symbol? (car attr)) (car attr)
                                  (RES-NAME->SXML (car attr)))
                              (cdr attr)) accum))
                     '() attributes)))
                (lambda (new-level-k)  ; how to parse next
                  ((car seed)  ; return the result
                   (let ((elem-content
                          ; A promise to continue parsing
                          (call-with-current-continuation  ; where to put the result
                           (lambda (elem-k)
                              (list  ; now form a seed
                               elem-k  ; what to do with result
                               new-level-k   ; SSAX state on this level
                               '()  ; common-seed is empty
                               (+ (cadddr seed) 1)  ; increase level
                      ; Previous string content
                      (ssax:reverse-collect-str-drop-ws (caddr seed))
                        (if (symbol? elem-gi) elem-gi
                            (RES-NAME->SXML elem-gi))
                        (if (null? attrs) elem-content
                            (cons (cons '@ attrs) elem-content)))
                       ; The following siblings of this element
                         (call-with-current-continuation  ; where to put the result
                          (lambda (foll-k)
                            ; First we force the parsing of the current element
                            (lazy:force-descendants elem-content)
                            ; Than continue parsing
                            ((cadr seed)  ; recover the parent level of nesting
                              foll-k  ; what to do with result
                              (cadr seed)
                              '()  ; common-seed is empty
                              (cadddr seed)  ; the same level for siblings
	     (lambda (elem-gi attributes namespaces parent-seed seed)
                (null? (cdr seed))  ; a short seed
                (let ((common (ssax:reverse-collect-str-drop-ws
                               (lazy:seed-common seed)))
                        (lambda (attr accum)
                          (cons (list 
                                 (if (symbol? (car attr)) (car attr)
                                     (RES-NAME->SXML (car attr)))
                                 (cdr attr)) accum))
                        '() attributes)))
                     (if (symbol? elem-gi) elem-gi
                         (RES-NAME->SXML elem-gi))
                     (if (null? attrs) common
                         (cons (cons '@ attrs) common)))
                    (lazy:seed-common parent-seed))))
                ; Otherwise - just return the remaining character content
                ((car seed)  ; continuation
                  (lazy:seed-common seed)))))
	     (lambda (string1 string2 seed)
               ;(pp (list string1 string2 seed))
                (if (string-null? string2)
                    (cons string1 (lazy:seed-common seed))
                    (cons* string2 string1 (lazy:seed-common seed)))))

	     (lambda (port docname systemid internal-subset? seed)
	       (when internal-subset?
		     (ssax:warn port
			   "Internal DTD subset is not currently handled ")
		     (ssax:skip-internal-dtd port))
	       (ssax:warn port "DOCTYPE DECL " docname " "
		     systemid " found and skipped")
	       (values #f '() namespaces seed))

	     (lambda (elem-gi seed)
	       (values #f '() namespaces seed))

	     ((*DEFAULT* .
		(lambda (port pi-tag seed)
                    (list '*PI* pi-tag (ssax:read-pi-body-as-string port))
                    (lazy:seed-common seed))))))
            (list  ; form initial seed
             result-k   ; put the result
             (lambda (seed)  ; dummy top-level parser state that produces '()
               ((car seed)  ; where to put the result nodeset
             1  ; level for the document element