mediawiki-xml.scm
(module mediawiki-xml mzscheme
        (require (lib "yacc.ss" "parser-tools")
                 (lib "lex.ss" "parser-tools")
                 (prefix : (lib "lex-sre.ss" "parser-tools")))
        (require (planet "roos.scm" ("oesterholt" "roos.plt" 1 4)))
        (require (planet "xml.scm" ("oesterholt" "ho-utils.plt"  1 0)))
        (require "mediawiki-struct-parse.scm")
        (require "bot.scm")
        (provide mediawiki-xml
                 (all-from "mediawiki-struct-parse.scm")
                 (all-from (planet "roos.scm" ("oesterholt" "roos.plt" 1 4))))

        
        (define-syntax shandle
          (syntax-rules ()
            ((_ handler xexprs (before after))
             (begin before (xexpr-sax handler xexprs) after #f))
            ((_ handler xexprs (newhandler before after))
             (begin before (xexpr-sax newhandler xexprs) after #f))
            ))
        
        (define-syntax handle
          (syntax-rules ()
             ((_ element (handler xexprs)
                 (tag stuff) ...)
              (cond ((eq? 'tag element) (shandle handler xexprs stuff))
                    ...))
            ))

        (define (rd port)
          (let ((r (read-string 1024 port)))
            (if (eq? r eof)
                (begin
                  (close-input-port port)
                  "")
                (string-append r (rd port)))))

        (define (read-in path)
          (rd (open-input-file path)))
        
        (def-class
         (roos-doc (sp "This class can be used to parse mediawiki wikitext to and from XML.")
                   (sp "See wikitext.xsd for the XML Schema definition of the generated XML. The XML can be regenerated to wikitext."
                       "Internally it is stored as an XExpr."))
         (this (mediawiki-xml))
         (supers)
         (private
          (define _xml (list))
          )
         (public

          ((define (sp "Parse WikiText to XML (using wikitext.xsd"))
           (parse page)
            (let ((ip (if (path? page)
                          (open-input-string (string-append "\n" (read-in page)))
                          (open-input-string (string-append "\n" page)))))
              (let ((R (mediawiki-parse ip)))
                (close-input-port ip)
                (set! _xml R)
                _xml)))
          
          ((define (sp "Write out the XExpr of the parsed WikiText to XML."))
           (->xml . file-or-port)
            (let ((str (open-output-string)))
              (write-xexpr _xml str)
              (let ((S (get-output-string str)))
                (close-output-port str)
                (if (not (null? file-or-port))
                    (if (port? (car file-or-port))
                        (display S (car file-or-port))
                        (let ((p (open-output-file (car file-or-port) 'replace)))
                          (display S p)
                          (close-output-port p))))
                S)))
          
          ((define (sp "Write out the XExpr of the parsed WikiText back to WikiText."))
           (->wikitext . file-or-port)
            (let ((port (open-output-string)))
              
              (define (out form . args)
                (display (apply format (cons form args)) port)
                )
              
              (define _page          "")
              (define _ns            (list))
              (define _meta          (list))
              (define _text          "")
              (define _item-prefix   "")
              (define _types         (list))
              (define _args          (list))
              
              (define (handle-ilink element attributes value level inode xexprs)
                (handle element (handle-ilink xexprs)
                        (page        ( (set! _page  (xexpr-get-attr attributes 'link)) #t ))
                        (ns          ( (set! _ns    (append _ns (list value))) #t ))
                        (type        ( (set! _types (append _types (list value))) #t ))
                        (arg         ( (set! _args  (append _args (list value))) #t ))
                        (text        ( sax-handler #t #t))
                        ))
              
              (define (handle-elink . args)
                (apply handle-ilink args))
              
              (define (handle-meta element attributes value level inode xexprs)
                  (handle element (handle-meta xexprs)
                          (page      ( (set! _page (xexpr-get-attr attributes 'link)) #t ))
                          (ns        ( (set! _ns   (append _ns (list value))) #t ))
                          (attribute ( (set! _meta (append _meta (list value))) #t ))
                          (text      ( sax-handler #t #t))
                          ))
              
              (define (handle-include element attributes value level inode xexprs)
                (handle element (handle-include xexprs)
                        (ilink ( (begin (set! _page "") (set! _ns (list)) (set! _text ""))
                                 (begin
                                   (for-each (lambda (e)
                                               (if (not (equal? e "Template"))
                                                   (out "~a:" e)))
                                             _ns)
                                     (out _page))
                                 ))
                        (page  ( (set! _page (xexpr-get-attr attributes 'link))
                                 #t))
                        (ns    ( (set! _ns (append _ns (list value)))
                                 #t))
                        (arg   ( sax-handler
                                 (out "\n|~a=" (xexpr-get-attr attributes 'name))
                                 #t))
                        ))
              
              (define (sax-handler element attributes value level inode xexprs)
                (let ((_port        port)
                      (_item-memory _item-prefix))
                  (handle element (sax-handler xexprs)
                          (h1       ( (out "\n=") (out "=")) )
                          (h2       ( (out "\n==") (out "==")) )
                          (h3       ( (out "\n===") (out "===")) )
                          (h4       ( (out "\n====") (out "====")) )
                          (hline    ( (out "\n----") #t ))
                          (nm       ( (out value) #t) )
                          (i        ( (out "''") (out "''")) )
                          (b        ( (out "'''") (out "'''") ))
                          (bi       ( (out "'''''") (out "'''''") ))
                          (table    ( (out "\n{|") (out "\n|}") ))
                          (trow     ( (out "\n|-") #t ))
                          (tcol     ( (out "\n|") #t ))
                          (include  ( handle-include (out "{{") (out "}}") ))
                          (ul       ( (set! _item-prefix (string-append _item-prefix "*"))
                                      (set! _item-prefix _item-memory)))
                          (nl       ( (set! _item-prefix (string-append _item-prefix "#"))
                                      (set! _item-prefix _item-memory)))
                          (toggle   ( (out "__~a__" value) #t ))
                          (item     ( (out "\n~a" _item-prefix)
                                      #t))
                          (meta     ( handle-meta 
                                      (begin
                                        (set! _page "")
                                        (set! _ns   (list))
                                        (set! _meta (list))
                                        (set! _text "")
                                        (set! port (open-output-string)))
                                      (begin
                                        (set! _text (get-output-string port))
                                        (set! port  _port)
                                        (out  "[[")
                                        (for-each (lambda (e) (out "~a:=" e)) _meta)
                                        (for-each (lambda (e) (if (not (equal? e "default"))
                                                                  (out "~a:" e))) _ns)
                                        (out _page)
                                        (if (not (string=? _text ""))
                                            (out "|~a" _text))
                                        (out "]]"))
                                      ))
                          (ilink    ( handle-ilink 
                                      (begin
                                        (set! _page  "")
                                        (set! _types (list))
                                        (set! _ns    (list))
                                        (set! _args  (list))
                                        (set! _text  "")
                                        (set! port   (open-output-string)))
                                      (begin
                                        (set! _text (get-output-string port))
                                        (set! port  _port)
                                        (out  "[[")
                                        (for-each (lambda (e) (out "~a::" e)) _types)
                                        (for-each (lambda (e) (if (not (equal? e "default"))
                                                                  (out "~a:" e))) _ns)
                                        (out _page)
                                        (for-each (lambda (e) (out "|~a" e)) _args)
                                        (if (not (string=? _text ""))
                                            (out "|~a" _text))
                                        (out "]]"))
                                      ))
                          (elink    ( handle-elink
                                      (begin
                                        (set! _page "")
                                        (set! _text "")
                                        (set! port  (open-output-string)))
                                      (begin
                                        (set! _text (get-output-string port))
                                        (set! port  _port)
                                        (out "[~a" _page)
                                        (if (not (string=? _text ""))
                                            (out " ~a" _text))
                                        (out "]"))
                                      ))
                          )
                        ))
              
              (begin
                (xexpr-sax sax-handler _xml)
                (let ((S (substr (get-output-string port) 1)))
                  (close-output-port port)
                  (if (not (null? file-or-port))
                      (if (port? (car file-or-port))
                          (display S (car file-or-port))
                          (let ((p (open-output-file (car file-or-port) 'replace)))
                            (display S p)
                            (close-output-port p))))
                  S))))
          
          ((define (sp "Get the parsed wikitext as XExpr."))
           (xexpr)
             _xml)
          
          )
         (constructor)
         )



        );;;; module-end