port.ss
#lang scheme/base
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; XML.plt - XML related utility
;;
;; Bonzai Lab, LLC.  All rights reserved.
;;
;; Licensed under LGPL.
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; port.ss - bindings for reading html or xml into xexpr or sxml
;; yc 12/22/2009 - first version
;; yc 1/18/2010 - fix the read-sxml and read-xexpr's signature to expect only 1 arg
;; yc 2/9/2010 - remove bzlib/http dependency but keep the ability to handle both xml & html stream
(require (only-in html read-html-as-xml)
         "depend.ss"
         "xml.ss" 
         (rename-in "depend.ss" (srl:sxml->xml-noindent sxml->string))
         )

;; use xml? to test to see whether or not the port contains xml declaration process instruction
(define xml? (tokens #"<?" #"xml" (return #t)))

(define (single-element? contents)
  (= 1 (length (filter xexpr? contents))))

;; html->xexpr
(define (html->xexpr contents)
  (let ((contents (map xml->xexpr contents)))
    (cond ((single-element? contents)
           (car contents))
          (else
           `(*TOP* . ,contents)))))

(define (read-xexpr/html in)
  (html->xexpr (read-html-as-xml in))) 

(define (read-xexpr/xml in)
  (xml->xexpr (document-element (read-xml in)))) 

(define (read-xexpr in (filter identity))
  (define (helper in)
    (let-values (((v IN)
                  (xml? (make-input in))))
      (if (failed? v) 
          read-xexpr/html
          read-xexpr/xml)))
  (filter ((helper in) in)))

(define (read-sxml in (filter identity)) 
  (xexpr->sxml (read-xexpr in filter))) 

(define (write-xexpr xexpr (out (current-output-port))) 
  (write-string (xexpr->string xexpr) out)) 

(define (write-sxml sxml (out (current-output-port))) 
  (write-string (sxml->string sxml) out)) 

(define xexpr/c* any/c)
(define sxml/c any/c)
(define is-xml? (make-reader xml?)) 
(define (normalize-xml-bytes bytes)
  (if (is-xml? bytes)
      bytes
      (bytes-append #"<?xml version=\"1.0\" ?>" bytes)))

(provide/contract
 (read-xexpr (->* (input-port?) 
                  (isa/c) any)) ;; xexpr?
 (read-sxml (->* (input-port?) 
                 (isa/c) any))
 (write-xexpr (->* (xexpr/c*)
                   (output-port?)
                   any))
 (write-sxml (->* (sxml/c)
                  (output-port?)
                  any))
 (is-xml? Reader/c) 
 (normalize-xml-bytes (-> bytes? bytes?))
 )

(provide sxml->string)