xml.ss
#lang scheme/base
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; XML.plt - XML related utility
;;
;; Bonzai Lab, LLC.  All rights reserved.
;;
;; Licensed under LGPL.
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; xml.ss - utilities for manipulating xexpr, as well as converting between
;;          xexpr and sxml
;; yc 12/22/2009 - first version
(require scheme/match
         "depend.ss"
         "entity.ss" 
         )
#|
Xexpr = String | Symbol | CData? | Number | [(Tag, [(Symbol,String]) [Xexpr]]

;; sxml 
name = LocalName | ExpName 
LocalName = NCName
ExpName = 'namespace-id:LocalName 
namespace-id = 'URI | user-ns-shortcut 
namespaces = (*NAMESPACES* namesapce-assoc*)
namespace-assoc = (namespace-id URI original-prefix?) 

Top = (*TOP* Anno? PI* Comment* Child)
Element = (Symbol AttrList? Element*)
AttrList = (@ Attr* Annos?) 
Attr = (Symbol String? Annos?)
Child = Element | CData | PI | Comment | Entity 
PI = (*PI* Annos? String) 
Comment = (*COMMENT* String) 
Entity = (*ENTITY* String String) 
Annos = { @ namespace? anno* }
anno = String

;;|#

(define (xexpr-node-name elt)
  (if (pair? elt)
      (car elt)
      #f))

(define (has-xexpr-attrs? xexpr)
  (and (pair? xexpr)
       (pair? (cdr xexpr))
       (list? (cadr xexpr))
       (andmap (lambda (kv)
                 (and (list? kv)
                      (= 2 (length kv))))
               (cadr xexpr))))

(define (xexpr-attrs? attrs (error? #t))
  (define (xexpr-attr-errors!)
    (if error?
        (error 'xexpr-attrs? "invalid xexpr attrs: ~a" attrs)
        #f))
  (define (helper rest)
    (cond ((null? rest) #t)
          ((and (pair? (car rest))
                (symbol? (caar rest))
                (pair? (cdar rest)) 
                (string? (cadar rest))) 
           (helper (cdr rest)))
          (else (xexpr-attr-errors!))))
  (if (pair? attrs)
      (helper attrs)
      (xexpr-attr-errors!)))

;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; xexpr-attrs
;; retrieve the attribute of an xexpr
(define (xexpr-attrs xexpr)
  ;; figuring out the xexpr-attr will take running through
  ;; the
  (if (and (pair? xexpr)
           (pair? (cdr xexpr))
           (xexpr-attrs? (cadr xexpr) #f))
      (cadr xexpr)
      '()))

;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; xexpr-elements
;; return the list of elements within the xexpr...
(define (xexpr-elements xexpr)
  (cond ((has-xexpr-attrs? xexpr) (cddr xexpr))
        ((pair? xexpr) (cdr xexpr))
        (else '())))

(define (xexpr->sxml xexpr)
  (cond ((string? xexpr) xexpr)
        ((or (number? xexpr)
             (symbol? xexpr)) 
         (entity->string xexpr)) 
        ((cdata? xexpr)
         (cdata-string xexpr)) 
        ((comment? xexpr)
         `(*COMMENT* ,(comment-text xexpr))) 
        ((p-i? xexpr) 
         `(*PI* ,(p-i-instruction xexpr))) 
        ((pair? xexpr) 
         (xexpr-element->sxml xexpr))
        (else (error 'xexpr->sxml "Unknown type ~a" xexpr))))

(define (xexpr-element->sxml xexpr) 
  (define (attr-helper attrs)
    (if (null? attrs) '()
        `((@ . ,attrs))))
  `(,(xexpr-node-name xexpr) ,@(attr-helper (xexpr-attrs xexpr)) 
                       . ,(map xexpr->sxml (xexpr-elements xexpr))))

(define (sxml->xexpr sxml)
  (define (attr-helper sxml)
    (if-it (sxml:attr-list-node sxml)
           (list (cdr it)) 
           '()))
  (cond ((string? sxml) sxml)
        ((pair? sxml)
         (match (car sxml)
           ('*TOP* (sxml->xexpr (last sxml)))
           ('*PI* (make-p-i (last sxml)))
           ('*COMMENT* (make-comment (last sxml)))
           (else 
            `(,(sxml:node-name sxml)
              ,@(attr-helper sxml)
              . ,(map sxml->xexpr (sxml:content-raw sxml))))))
        (else (error 'sxml->xexpr "unknown type: ~a" sxml))))


;; sxml?
;; determine whether a node is sxml?
(define (sxml? node) 
  (and (pair? node) 
       (symbol? (car node)) 
       ))

;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; provides
(provide xexpr-elements
         xexpr-node-name
         xexpr-attrs
         xexpr->sxml
         sxml->xexpr
         sxml?
         )