libxml2.ss
#lang scheme

(require scheme/foreign)

(unsafe!)

(define xml2lib (ffi-lib "libxml2"))

(provide xml->sxml/file
         xml->sxml/bytes
         bytes->validation-context
         validation-context?)

;; treating this as opaque for now.
(define-cpointer-type _xmlDocPtr)


(define _xmlElementType
  (_enum
   '(XML_ELEMENT_NODE = 1
     XML_ATTRIBUTE_NODE
     XML_TEXT_NODE 
     XML_CDATA_SECTION_NODE
     XML_ENTITY_REF_NODE
     XML_ENTITY_NODE
     XML_PI_NODE
     XML_COMMENT_NODE
     XML_DOCUMENT_NODE
     XML_DOCUMENT_TYPE_NODE
     XML_DOCUMENT_FRAG_NODE 
     XML_NOTATION_NODE
     XML_HTML_DOCUMENT_NODE
     XML_DTD_NODE
     XML_ELEMENT_DECL
     XML_ATTRIBUTE_DECL
     XML_ENTITY_DECL
     XML_NAMESPACE_DECL
     XML_XINCLUDE_START
     XML_XINCLUDE_END
     XML_DOCB_DOCUMENT_NODE)))


(define-cstruct _xmlNs
  ([next     _xmlNs-pointer]
   [type     _xmlElementType]
   [href     _bytes]
   [prefix   _bytes]
   [_private _pointer]
   [context  _xmlDocPtr]))

#|  struct _xmlNs *	next	: next Ns link for this node
    xmlNsType	type	: global or local
    const xmlChar *	href	: URL for the namespace
    const xmlChar *	prefix	: prefix for the namespace
    void *	_private	: application data
    struct _xmlDoc *	context	: normally an xmlDoc
|#


(define-cstruct _xmlAttr
  ([_private _pointer]
   [type     _xmlElementType]
   [name     _bytes]
   [children _xmlAttr-pointer/null]
   [last     _pointer]
   [parent   _pointer] ;; actually a node, but no murec allowed?
   [next     _xmlAttr-pointer/null]
   [prev     _xmlAttr-pointer/null]
   [doc      _xmlDocPtr]
   [ns       _xmlNs-pointer/null]
   [atype    _pointer #;_xmlAttributeType]
   [psvi     _pointer]))


(define-cstruct _xmlNode
  ([_private _pointer]
   [type     _xmlElementType]
   [name     _bytes]
   [children _xmlNode-pointer/null]
   [last     _xmlNode-pointer/null]
   [parent   _xmlNode-pointer/null]
   [next     _xmlNode-pointer/null]
   [prev     _xmlNode-pointer/null]
   [doc      _xmlDocPtr]
   [ns       _xmlNs-pointer/null]
   [content  _bytes] 
   [properties _xmlAttr-pointer/null]
   [nsDef    _xmlNs-pointer/null]
   [psvi     _pointer]
   [line     _ushort]
   [extra    _ushort]))

#|    void *	_private	: application data
    xmlElementType	type	: type number, must be second !
    const xmlChar *	name	: the name of the node, or the entity
    struct _xmlNode *	children	: parent->childs link
    struct _xmlNode *	last	: last child link
    struct _xmlNode *	parent	: child->parent link
    struct _xmlNode *	next	: next sibling link
    struct _xmlNode *	prev	: previous sibling link
    struct _xmlDoc *	doc	: the containing document End of common p
    xmlNs *	ns	: pointer to the associated namespace
    xmlChar *	content	: the content
    struct _xmlAttr *	properties	: properties list
    xmlNs *	nsDef	: namespace definitions on this node
    void *	psvi	: for type/PSVI informations
    unsigned short	line	: line number
    unsigned short	extra	: extra data for XPath/XSLT
|#


#|    void *	_private	: application data
    xmlElementType	type	: XML_ATTRIBUTE_NODE, must be second !
    const xmlChar *	name	: the name of the property
    struct _xmlNode *	children	: the value of the property
    struct _xmlNode *	last	: NULL
    struct _xmlNode *	parent	: child->parent link
    struct _xmlAttr *	next	: next sibling link
    struct _xmlAttr *	prev	: previous sibling link
    struct _xmlDoc *	doc	: the containing document
    xmlNs *	ns	: pointer to the associated namespace
    xmlAttributeType	atype	: the attribute type if validating
    void *	psvi	: for type/PSVI informations
|#



;; xmlParseFile : path -> xmlDocPtr
;; read an XML document from a file:
(define xmlParseFile (get-ffi-obj "xmlParseFile" xml2lib (_fun (path : _path) 
                                                               -> (ptr-or-null : _xmlDocPtr/null)
                                                               -> (or ptr-or-null (error 'xmlParseFile "failed to parse file: ~e" path)))))

;; xmlParseDoc : bytes -> xmlDocPtr
;; read an XML document from memory:
(define xmlParseDoc 
  (get-ffi-obj "xmlParseDoc" xml2lib (_fun (bytes : _bytes) -> (ptr-or-null : _xmlDocPtr/null)
                                           -> (or ptr-or-null (error 'xmlParseDoc "failed to parse bytes: ~e" bytes)))))

;; xmlDocGetRootElement : xmlDocPtr -> xmlNodePtr
(define xmlDocGetRootElement
  (get-ffi-obj "xmlDocGetRootElement" xml2lib (_fun _xmlDocPtr
                                                    -> (ptr-or-null : _xmlNode-pointer/null)
                                                    -> (or ptr-or-null (error 'xmlDocGetRootElement "failed to get pointer to root node")))))


;; xmlFreeDoc : xmlDocPtr ->
;; for safety, I'm betting that this should be linked to collection of the ptr...
(define xmlFreeDoc 
  (get-ffi-obj "xmlFreeDoc" xml2lib (_fun _xmlDocPtr -> _void)))

;; xmlNodePtr->sxml : xmlNode-pointer -> sxml
;; translate an xmlNode-pointer into sxml.
;; ALSO IGNORING NAMESPACES
;; ALSO ASSUMES UTF-8 ENCODING
(define (xmlNode->sxml nodeptr)
  (match (xmlNode-type nodeptr)
    ['XML_ELEMENT_NODE
     `(,(xml-format-name (xmlNode-name nodeptr) (xmlNode-ns nodeptr))
       ,@(if (xmlNode-properties nodeptr)
             (list (cons '@ (xmlAttrs->sxml (xmlNode-properties nodeptr))))
             null)
       ,@(map xmlNode->sxml (xmlNode-sibs (xmlNode-children nodeptr))))]
    ['XML_TEXT_NODE
     (bytes-decode (xmlNode-content nodeptr))]
    [other
     (error 'xmlNode->sxml "expected XML_ELEMENT_NODE as type, got: " other)]))

;; xmlNodePtr-sibs : xmlNode-pointer/null -> (listof xml-Node-pointer)
;; traverse 'next' links to find all the siblings to the "right" of this node.
(define (xmlNode-sibs nodeptr)
  (cond [(false? nodeptr) '()]
        [else 
         (unless (memq (xmlNode-type nodeptr) '(XML_ELEMENT_NODE XML_TEXT_NODE))
           (error 'xmlNodePtr-sibs "expected XML_ELEMENT_NODE as type, got: " (xmlNode-type nodeptr)))
         (cons nodeptr (xmlNode-sibs (xmlNode-next nodeptr)))]))

;; xmlAttrs->sxml : xmlAttr-pointer -> (listof (list/c symbol? string?))
;; translate a set of attributes by following sibling pointers
(define (xmlAttrs->sxml attrs)
  (cond [(false? attrs) empty]
        [else 
         (unless (eq? (xmlAttr-type attrs) 'XML_ATTRIBUTE_NODE)
           (error 'xmlAttrs->sxml "expected XML_ATTRIBUTE_NODE as type, got: ~v with name ~v" (xmlAttr-type attrs) (xmlNode-name attrs)))
         (cons (list (xml-format-name (xmlAttr-name attrs) (xmlAttr-ns attrs)) 
                     ;; looks like this shape is fixed, but who knows...
                     (bytes-decode (xmlNode-content (cast (xmlAttr-children attrs) _xmlAttr-pointer _xmlNode-pointer)))) 
               (xmlAttrs->sxml (xmlAttr-next attrs)))]))

;; xml-format-name : encode a name as a scheme symbol, using the URI for the namespace
(define (xml-format-name name nsptr)
 (let ([prefix (cond [(false? nsptr) ""]
                      [else (string-append (bytes-decode (xmlNs-href nsptr)) ":")])])
    (string->symbol (string-append prefix (bytes-decode name)))))

;; bytes-decode : bytes->string : ASSUMES A UTF-8 ENCODING
(define bytes-decode bytes->string/utf-8)

;; xml->sxml/file : path -> sxml
(define (xml->sxml/file path #:valid [validation-ctxt #f])
  (unless (file-exists? path)
    (error 'xml->sxml/file "file does not exist: ~a" path))
  (let* ([path-string (cond [(path? path) (path->string path)]
                            [(string? path) path]
                            [else (raise-mismatch-error 'xml->sxml/file "expected path or string as argument, got " path)])]
         [docptr (xmlParseFile path-string)])
    (when validation-ctxt
      (doc-validate validation-ctxt docptr))
    (begin0 (list '*TOP* (xmlNode->sxml (xmlDocGetRootElement docptr)))
            (register-finalizer docptr xmlFreeDoc))))

;; xml->sxml/bytes : bytes -> sxml
(define (xml->sxml/bytes b #:valid [validation-ctxt #f])
  (let* ([docptr (xmlParseDoc b)])
    (when validation-ctxt
      (doc-validate validation-ctxt docptr))
    (begin0 (list '*TOP* (xmlNode->sxml (xmlDocGetRootElement docptr)))
            (register-finalizer docptr xmlFreeDoc))))

;; doc-validate : _xmlRelaxNGValidCtxt _xmlDocPtr ->
(define (doc-validate validation-ctxt docptr)
  (let ([validation-result (xmlRelaxNGValidateDoc validation-ctxt docptr)])
    (unless (= validation-result 0)
      (error 'doc-validate "error in validating document: ~e" validation-result))))


;; RELAX NG SUPPORT

(define-cpointer-type _xmlRelaxNGParserCtxtPtr)
(define-cpointer-type _xmlRelaxNGPtr)
(define-cpointer-type _xmlRelaxNGValidCtxtPtr)


;; free an xmlRelaxNGPtr
(define xmlRelaxNGFree
  (get-ffi-obj "xmlRelaxNGFree" xml2lib (_fun _xmlRelaxNGPtr -> _void)))

;; free an xmlRelaxNGParserCtxtPtr
(define xmlRelaxNGFreeParserCtxt
  (get-ffi-obj "xmlRelaxNGFreeParserCtxt" xml2lib (_fun _xmlRelaxNGParserCtxtPtr -> _void)))

;; free an xmlRelaxNGValidCtxtPtr
(define xmlRelaxNGFreeValidCtxt
  (get-ffi-obj "xmlRelaxNGFreeValidCtxt" xml2lib (_fun _xmlRelaxNGValidCtxtPtr -> _void)))

;; ffi fun to translate a byte-string into a parser context
(define xmlRelaxNGNewMemParserCtxt/ffi
  (get-ffi-obj "xmlRelaxNGNewMemParserCtxt" xml2lib (_fun _bytes (size : _int) -> _xmlRelaxNGParserCtxtPtr/null)))

;; ffi fun to translate a parser context into a validator
(define xmlRelaxNGParse
  (get-ffi-obj "xmlRelaxNGParse" xml2lib (_fun _xmlRelaxNGParserCtxtPtr -> _xmlRelaxNGPtr/null)))

;; ffi fun to translate a validator into a validation context
(define xmlRelaxNGNewValidCtxt 
  (get-ffi-obj "xmlRelaxNGNewValidCtxt" xml2lib (_fun _xmlRelaxNGPtr -> _xmlRelaxNGValidCtxtPtr/null)))

;; ffi fun to validate a doc.
(define xmlRelaxNGValidateDoc
  (get-ffi-obj "xmlRelaxNGValidateDoc" xml2lib
               (_fun _xmlRelaxNGValidCtxtPtr _xmlDocPtr -> _int)))

;; bytes->validation-context : produce a validation context from a bytes containing a relax-ng spec
(define (bytes->validation-context b)
  (let* ([parser-ctxt (xmlRelaxNGNewMemParserCtxt/ffi b (bytes-length b))]
         [spec (xmlRelaxNGParse parser-ctxt)]
         [validation-ctxt (xmlRelaxNGNewValidCtxt spec)])
    (register-finalizer validation-ctxt 
                        (lambda (vc)
                          (xmlRelaxNGFree spec)
                          (xmlRelaxNGFreeParserCtxt parser-ctxt)
                          (xmlRelaxNGFreeValidCtxt vc)))
    validation-ctxt))

(define (validation-context? vc)
  (xmlRelaxNGValidCtxtPtr? vc))