mediawiki-struct-parse.scm
(module mediawiki-struct-parse mzscheme
        (require (lib "yacc.ss" "parser-tools")
                 (lib "lex.ss" "parser-tools")
                 (prefix : (lib "lex-sre.ss" "parser-tools")))
        (require (lib "string.ss" "mzlib"))
        (require (lib "string.ss" "srfi" "13"))
        (require (lib "list.ss" "mzlib"))
        (require (lib "pretty.ss" "mzlib"))
        (require (planet "xml.scm" ("oesterholt" "ho-utils.plt" 1 0)))
        (provide mediawiki-parse
                 pp
                 substr
                 )

        (define (pp . args)
          (apply pretty-print args))
        
        (define VERSION "1.0")

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Support
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define dbg #f)
        
        (define (return sym val)
          (if dbg (display (format "~a~%" sym)))
          val
          )

        (define-syntax lreturn
          (syntax-rules ()
            ((_ (tok x))
             (begin
               (if (and dbg (not (or (eq? 'tok 'token-CHAR) (eq? 'tok 'token-WS))))
                   (display (format "~a~%" 'tok)))
               (tok x)))
            ((_ tok)
             (begin
               (if (and dbg (not (or (eq? 'tok 'token-CHAR) (eq? 'tok 'token-WS))))
                   (display (format "~a~%" 'tok)))
               tok))
            ))


        (define (substr s b . e)
          (let ((l (if (null? e)
                       (string-length s)
                       (if (< (car e) 0)
                           (+ (string-length s) (car e))
                           (car e)))))
            (substring s b l)))

        (define (trim s)
          (string-trim-both s))


        (define (%parse-include S def-ns)
          (parse-include (string-append "{{" S) "Template"))


        (define (%get s p)
          (string-trim-both (substr s p)))

        
        (define (get-elems xelem)
          (cddr xelem))
        
        (define (xexpr-elems-append l1 l2)
          (append l1 (cdr l2)))
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Lexical abbreviations
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define-lex-abbrevs
         (:ws (:or #\newline #\space #\tab))
         (:wsr (:: (:* (:or #\space #\tab)) #\newline))
         (:wst (:or #\space #\tab))
         (:spart (:+ (:or (:/ #\a #\z) (:/ #\A #\Z) (:/ #\0 #\9))))
         (:wikispecials (:or #\= #\[ #\] #\{ #\} #\| #\newline))
         (:no-newline   (:* (char-complement #\newline)))
         (:no-newline1  (:+ (char-complement #\newline)))
         (:no-pipe      (:* (char-complement (:or #\| #\newline))))
         (:noendpre     (:or (:: (char-complement #\/)(:* (char-complement #\>)))
                             (:: #\/ (char-complement #\p) (:* (char-complement #\>)))
                             (:: "/p"  (char-complement #\r) (:* (char-complement #\>)))
                             (:: "/pr" (char-complement #\e) (:* (char-complement #\>)))
                             (:: "/pre" (char-complement #\>) (:* (char-complement #\>)))
                             )
                        )
         (:end-pre      (:: (:* (:* (char-complement #\<)) (:: #\< :noendpre  #\>))
                            (:* (char-complement #\<))
                            "</pre>"))
         (:no-end-inc        (:: (:* (:: (:+ (char-complement #\})) #\} (:+ (char-complement #\})))) (:* (char-complement #\}))))
         (:no-italics-end    (:: (:* (:: (:+ (char-complement #\')) #\' (:+ (char-complement #\')))) (:* (char-complement #\'))))
         (:xt            (:* (char-complement #\>)))
         (:nonowiki      (:or (:: (char-complement #\/) :xt)
                              (:: #\/ (char-complement #\n) :xt)
                              (:: "/n" (char-complement #\o) :xt)
                              (:: "/no" (char-complement #\w) :xt)
                              (:: "/now" (char-complement #\i) :xt)
                              (:: "/nowi" (char-complement #\k) :xt)
                              (:: "/nowik" (char-complement #\i) :xt)
                              (:: "/nowiki" (char-complement #\>) :xt)
                              )
                         )
         (:no-nowiki-end (:: (:* (:* (char-complement #\<)) (:: #\< :nonowiki  #\>))
                             (:* (char-complement #\<))
                             "</nowiki>"))
         (:url           (:or "mailto:" (:: :spart #\: #\/ #\/)))
         (:noclosebrack  (:: (:* (:: (:+ (char-complement #\])) #\] (:+ (char-complement #\])))) (:* (char-complement #\]))))
         (:spartws       (:: (:+ (:or :spart :wst #\. #\, #\; #\- #\_ #\# #\$ #\% #\* #\& #\^ #\( #\) #\! #\@ #\< #\> #\? #\/ #\\ #\' #\` #\~ #\"))))
         )

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Tokens
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define-tokens       value-tokens (ILINK ELINK IELINK CHAR WS WORD ITEM INCLUDE H1 H2 H3 H4 TABLE-BEGIN TABLE-ROW TABLE-COL 
                                                 VERBATIM LINE PRE PAGE VAR VAL TEXT ATTRIBUTE LINKTYPE NAMESPACE TOGGLE BTAG ETAG))
        (define-empty-tokens e-tokens     (EOF  TABLE-END EMPTY-LINE NEWLINE VERB-BEGIN VERB-END INC-BEGIN INC-END
                                                E-BOLD B-BOLD E-ITALICS B-ITALICS E-BOLD-ITALICS B-BOLD-ITALICS HLINE))



        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; attribute parser
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define attribute-lex
          (lexer
           ((eof) 'EOF)
           ((:: (:+ (char-complement (:or :wst #\=))) (:* :wst) "=" (:* :wst))    (lreturn (token-VAR  (trim (substr lexeme 0 -1)))))
           ((:: #\" (:* (char-complement #\")) #\")                               (lreturn (token-VAL  (substr lexeme 1 -1))))
           ((:+ (char-complement (:or :wst #\= #\| #\")))                         (lreturn (token-VAL  lexeme)))
           (any-char                                                              (lreturn (token-CHAR lexeme)))
           ))

        (define attribute-parser
          (parser
           (start start)
           (end EOF)
           (tokens value-tokens e-tokens)
           (error (lambda (a b c) (display (format "ERROR! ~a ~a ~a~%" a b c))(void)))

           (grammar
            (start (()                  (list))
                   ((error start)       $2)
                   ((vars)              $1)
                   )

            (vars  ((VAR VAL vars)      (cons (xexpr-attr (string->symbol $1) $2) $3))
                   ((CHAR vars)         $2)
                   ((CHAR)              (list))
                   ((VAL vars)          $2)
                   ((VAL)               (list))
                   ((VAR VAL)           (list (xexpr-attr (string->symbol $1) $2)))
                   )
            )
           ))

        (define (parse-attributes str def-ns)
          (if (string? str)
              (parse-attributes (open-input-string str) def-ns)
              (begin
                (port-count-lines! str)
                (let ((R  (attribute-parser (lambda () (attribute-lex str)))))
                  (display R)(newline)
                  (let ((RR (apply xexpr-attrs R)))
                  (display RR)(newline)
                    RR)))
              ))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Parsing links
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define (parse-elink l def-ns)
          (let ((L (regexp-match "([^ \t]+)[ \t]+(.*)" l)))
            (if (eq? L #f) (set! L (list l l #f)))
            (let ((link (cadr L))
                  (text (if (eq? (caddr L) #f) "" (caddr L))))
            (xexpr-elem 'elink 
                        (xexpr-attrs (xexpr-attr 'link link))
                        (xexpr-elem 'text (parse-text (trim text) def-ns))))))
        
        
        (define ilink-lex
          (lexer
           ((eof) 'EOF)
           ((:: #\| (:* (:~ #\|)))     (lreturn (token-TEXT (substr lexeme 1))))
           ((:: :spartws ":=")         (lreturn (token-ATTRIBUTE (trim (substr lexeme 0 -2)))))
           ((:: :spartws "::")         (lreturn (token-LINKTYPE (trim (substr lexeme 0 -2)))))
           ((:: :spartws ":")          (lreturn (token-NAMESPACE (trim (substr lexeme 0 -1)))))
           ((:: ":")                   (lreturn (token-NAMESPACE "")))
           ((:: :spartws)              (lreturn (token-PAGE (trim lexeme))))
           ))
        
        (define (ilink-parse def-ns)
          (parser
           (start start)
           (end EOF)
           (tokens value-tokens e-tokens)
           (error (lambda (a b c) (display (format "ERROR! ~a ~a ~a~%" a b c))(void)))

           (grammar
            (start ;(()                  (list))
                   ((error start)       $2)
                   ((link)              $1)
                   )

            (link  ((attributes page)               (xexpr-elem 'meta $2
                                                                (xexpr-elems $1)))
                   ((attributes page texts)         (xexpr-elem 'meta $2 (xexpr-elems $1 $3)))
                   ((linktypes page)                (xexpr-elem 'ilink $2 (xexpr-elems $1)))
                   ((linktypes page argtext)        (xexpr-elem 'ilink $2
                                                                (apply xexpr-elems (cons $1 $3))))
                   )
            
            (page  ((PAGE)               (xexpr-elem 'page (xexpr-attrs (xexpr-attr 'link $1)) (xexpr-elems (xexpr-elem 'ns def-ns))))
                   ((namespaces PAGE)    (xexpr-elem 'page (xexpr-attrs (xexpr-attr 'link $2)) (apply xexpr-elems $1) ))
                   )
            
            (namespaces ((namespace)            (list $1))
                        ((namespace namespaces) (cons $1 $2))
                        )
            
            (namespace  ((NAMESPACE)      (xexpr-elem 'ns (if (string=? $1 "") def-ns $1)))
                        )
            
            (texts  ((TEXT texts1)         (xexpr-elem 'text (parse-text (string-append $1 $2) def-ns)))
                    )

            (texts1 ((TEXT texts1)         (string-append "|" $1 $2))
                    (()                    "")
                    )
            
            (attributes ((ATTRIBUTE attrs) (xexpr-elem 'attributes (apply xexpr-elems (cons (xexpr-elem 'attribute $1) $2))))
                        )
            
            (attrs (()                    (list))
                   ((ATTRIBUTE attrs)     (cons (xexpr-elems 'attribute $1) $2))
                   )
            
            (argtext  ((TEXT)             (list (xexpr-elem 'text (parse-text $1 def-ns))))
                      ((TEXT argtext)     (cons (xexpr-elem 'arg $1) $2))
                      )
            
            (linktypes ((ltypes)          (xexpr-elem 'linktypes (apply xexpr-elems $1)))
                       )
            
            (ltypes    (()                 (list))
                       ((LINKTYPE ltypes)  (cons (xexpr-elem 'type $1) $2))
                       )
            )
           ))
        
        (define (parse-ilink str def-ns)
          (if (string? str)
              (parse-ilink (open-input-string str) def-ns)
              (begin
                (port-count-lines! str)
                (let ((R  ((ilink-parse def-ns) (lambda () (ilink-lex str)))))
                  R))
              ))
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Parsing text for links, categories, tags, markup, etc.
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define in-italics  #f)
        (define in-bold     #f)
        (define in-bi       #f)

        (define text-lex
          (lexer
           ((eof) 'EOF)
           ((:: "<nowiki>" :no-nowiki-end "</nowiki>")                            (lreturn (token-CHAR lexeme)))
           ("''"                                                                  (lreturn (begin (set! in-italics (not in-italics))
                                                                                             (if in-italics (token-B-ITALICS) (token-E-ITALICS)))))
           ("'''"                                                                 (lreturn (begin (set! in-bold (not in-bold))
                                                                                             (if in-bold    (token-B-BOLD) (token-E-BOLD)))))
           ("'''''"                                                               (lreturn (begin (set! in-bi (not in-bi))
                                                                                             (if in-bi      (token-B-BOLD-ITALICS) (token-E-BOLD-ITALICS)))))
           ((:: #\[ :url (:+ (char-complement #\])) #\])                          (lreturn (token-ELINK (trim (substr lexeme 1 -1)))))
           ((:: :url (:+ (char-complement :wst)))                                 (lreturn (token-ELINK lexeme)))
           ((:: "[[" :url :noclosebrack "]]")                                     (lreturn (token-IELINK (trim (substr lexeme 2 -2)))))
           ((:: "[[" :noclosebrack "]]")                                          (lreturn (token-ILINK (trim (substr lexeme 2 -2)))))
           ((:: "__" :spart "__")                                                 (lreturn (token-TOGGLE (trim (substr lexeme 2 -2)))))
           ((:: #\< (:+ (:~ #\newline #\>)) #\>)                                  (lreturn (if (char=? (string-ref lexeme 1) #\/)
                                                                                               (token-ETAG (trim (substr lexeme 2 -1)))
                                                                                               (token-BTAG (trim (substr lexeme 1 -1))))))
           (any-char                                                              (lreturn (token-CHAR lexeme)))
           ))

        (define (text-parser def-ns)
          (parser
           (start start)
           (end EOF)
           (tokens value-tokens e-tokens)
           (error (lambda (a b c) (display (format "ERROR! ~a ~a ~a~%" a b c))(void)))

           (grammar
            (start ;(()                  (list))
                   ((error start)       $2)
                   ((text)              (apply xexpr-elems $1))
                   )

            (text  ((markup text)                         (cons $1 $2))
                   ((string text)                         (cons (xexpr-elem 'nm $1) $2))
                   ((ELINK text)                          (cons (parse-elink $1 def-ns) $2))
                   ((ILINK text)                          (cons (parse-ilink $1 def-ns) $2))
                   ((IELINK text)                         (cons (xexpr-elem 'nm "[") 
                                                                (cons (parse-elink $1 def-ns) 
                                                                      (cons (xexpr-elem 'nm "]") $2) ) ) )
                   ((TOGGLE text)                         (cons (xexpr-elem 'toggle $1) $2))
                   ((BTAG text)                           (cons (xexpr-elem 'xml-begin $1) $2))
                   ((ETAG text)                           (cons (xexpr-elem 'xml-end $1) $2))
                   (()                                    (list))
                   )

            (markup ((B-ITALICS text E-ITALICS)           (xexpr-elem 'i (apply xexpr-elems $2)))
                    ((B-BOLD text E-BOLD)                 (xexpr-elem 'b (apply xexpr-elems $2)))
                    ((B-BOLD-ITALICS text E-BOLD-ITALICS) (xexpr-elem 'bi (apply xexpr-elems $2)))
                    )

            (string ((CHAR)                               $1)
                    ((CHAR string)                        (string-append $1 $2))
                    )

            )
           ))

        (define (parse-text str def-ns)
          (if (string? str)
              (begin
                (parse-text (open-input-string str) def-ns))
              (begin
                (port-count-lines! str)
                (let ((R  ((text-parser def-ns) (lambda () (text-lex str)))))
                  R))
              ))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Include parser
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define include-lex
          (lexer
           ((eof) 'EOF)
           ((:: "{{"  (:+ (char-complement #\newline)))                 (lreturn (token-PAGE (trim (substr lexeme 2)))))
           ((:: #\newline "|"   (char-complement #\=) "=")              (lreturn (token-VAR  (substr lexeme 1 -1))))
           (any-char                                                    (lreturn (token-CHAR lexeme)))
           ))

        (define (include-parser def-ns)
          (parser
           (start start)
           (end EOF)
           (tokens value-tokens e-tokens)
           (error (lambda (a b c) (display (format "ERROR! ~a ~a ~a~%" a b c))(void)))

           (grammar
            (start (() (list))
                   ((error start)       $2)
                   ((PAGE vars)         (return 'include (xexpr-elem 'include (parse-ilink $1 def-ns) (apply xexpr-elems $2 ))) )
                   )

            (vars  (()                 (list))
                   ((VAR text vars)    (cons (xexpr-elem 'arg (xexpr-attrs (xexpr-attr 'name (trim (substr $1 1))))
                                                         (apply xexpr-elems (get-elems (mediawiki-parse $2)))) $3))
                   )

            (text ((CHAR)              "")
                  ((CHAR text)         (string-append $1 $2))
                  )
            )
           ))

        (define (parse-include str def-ns)
          (if (string? str)
              (parse-include (open-input-string str) def-ns)
              (begin
                (port-count-lines! str)
                ((include-parser def-ns)(lambda () (include-lex str))))
              ))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Structure parser
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define (parse-item str def-ns)
          (let ((L (regexp-match "([#*]+)(.*)" str)))
            (let ((bullets (cadr L))
                  (text    (parse-text (caddr L) def-ns)))
              (xexpr-elem 'item (xexpr-attrs (xexpr-attr 'bullets bullets)) text))))
        
        (define mediawiki-struct-lex
          (lexer
           ((eof) 'EOF)
           ((:: #\newline "====" :no-newline1 "====")                 (lreturn (token-H4 (trim (substr lexeme 5 -4)))))
           ((:: #\newline "===" :no-newline1 "===")                   (lreturn (token-H3 (trim (substr lexeme 4 -3)))))
           ((:: #\newline "==" :no-newline1 "==")                     (lreturn (token-H2 (trim (substr lexeme 3 -2)))))
           ((:: #\newline "=" :no-newline1 "=")                       (lreturn (token-H1 (trim (substr lexeme 2 -1)))))
           ((:: #\newline "{|" :no-newline)                           (lreturn (token-TABLE-BEGIN (trim (substr lexeme 3)))))
           ((:: #\newline "|}" :no-newline)                           (lreturn (token-TABLE-END)))
           ((:: #\newline "|-" :no-newline)                           (lreturn (token-TABLE-ROW (trim (substr lexeme 3)))))
           ((:: #\newline "|" :no-newline)                            (lreturn (token-TABLE-COL (trim (substr lexeme 2)))))   ;;; order matters!
           ((:: #\newline :wst :no-newline)                           (lreturn (token-VERBATIM (trim lexeme))))
           ((:: #\newline (:+ (:or "*" "#")) :no-newline)             (lreturn (token-ITEM lexeme)))
           ((:: #\newline "----")                                     (lreturn (token-HLINE)))
           ((:: "<pre>"  :end-pre)                                    (lreturn (token-PRE (substr lexeme 5 -5))))
           ((:: "{{" :no-end-inc  "}}")                               (lreturn (token-INCLUDE (substr lexeme 2 -2))))
           (any-char                                                  (lreturn (token-CHAR lexeme)))
           ))

        (define (mediawiki-struct-parse def-ns)
          (parser
           (start start)
           (end EOF)
           (tokens value-tokens e-tokens)
           (error (lambda (a b c) (display (format "ERROR! ~a ~a ~a~%" a b c))(void)))

           (grammar
            (start (() (list))
                   ((error start) $2)
                   ((wikitext)    (return 'wikitext (xexpr-elem 'wikitext 
                                                                (xexpr-attrs (xexpr-attr 'version VERSION))
                                                                (apply xexpr-elems $1))))
                   )

            (wikitext ((elem)          (list $1))
                      ((elem wikitext) (cons $1 $2))
                      )

            (elem     ((H1)                                   (return 'h1 (xexpr-elem 'h1 (parse-text $1 def-ns))))
                      ((H2)                                   (return 'h2 (xexpr-elem 'h2 (parse-text $1 def-ns))))
                      ((H3)                                   (return 'h3 (xexpr-elem 'h3 (parse-text $1 def-ns))))
                      ((H4)                                   (return 'h4 (xexpr-elem 'h4 (parse-text $1 def-ns))))
                      ((HLINE)                                (return 'hline (xexpr-elem 'hline)))
                      ((TABLE-BEGIN table TABLE-END)          (return 'table (xexpr-elem 'table 
                                                                                         (parse-attributes $1 def-ns) (apply xexpr-elems $2))))
                      ((INCLUDE)                              (%parse-include $1 def-ns))
                      ((text)                                 $1)
                      ((verbatim)                             $1)
                      ((PRE)                                  (return 'verbatim (xexpr-elem 'pre (substr $1 5 -6))))
                      ((items)                                (return 'items (xexpr-elem 'items (apply xexpr-elems (item-reformer $1 def-ns)))))
                      )

            (items    ((ITEM items)                           (cons (parse-item $1 def-ns) $2))
                      ((ITEM)                                 (list (parse-item $1 def-ns)))
                      )
            
            (text    ((ttext)                    (return 'text (xexpr-elem 'text (parse-text $1 def-ns)))))

            (ttext   ((CHAR ttext)               (string-append $1 $2))
                     ((CHAR)                     $1))

            (verbatim ((verbs)                   (return 'verbatim (xexpr-elem 'verbatim $1)))
                      )

            (verbs   ((VERBATIM)                 $1)
                     ((VERBATIM verbs)           (string-append $1 $2))
                     )

            (table   ((rows)                     $1)
                     )

            (rows    (()                         (list))
                     ((TABLE-ROW cols rows)      (return 'row (cons (xexpr-elem 'trow (parse-attributes $1 def-ns) (apply xexpr-elems $2))
                                                                    $3)))
                     )

            (cols   (()                          (list))
                    ((TABLE-COL wikitext cols)   (return 'col (cons (xexpr-elem 'tcol (parse-attributes $1 def-ns) (apply xexpr-elems $2))
                                                                    $3)))
                    ((TABLE-COL cols)            (return 'col (cons (xexpr-elem 'tcol (parse-attributes $1 def-ns))
                                                                    $2)))
                    )

            )
           ))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Item reformer
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define (item-bullets item)
          (let* ((attrs (cadr item))
                 (bul   (car attrs))
                 (val   (cadr bul)))
            (map (lambda (b) 
                   (if (eq? b #\*)
                       'ul
                       'nl))
                 (string->list val))))
        
        (define (item-value item)
          (cddr item))
        
        (define (new-branch bullets item)
          (if (null? bullets)
              (list 'item item)
              (list 
               (list (car bullets) (new-branch (cdr bullets) item)))
              ))
        
        (define (item-append struct el)
          (append struct el))
        
        (define (item-adder struct bullets item)
          (if (null? bullets)
              (if (null? struct)
                  (list 'item item)
                  (item-append struct (list (list 'item item))))
              (if (null? struct)
                  (new-branch bullets item)
                  (let ((node (car (reverse struct))))
                    (if (eq? (car node) (car bullets))
                        (begin
                          (set-cdr! node (item-adder (cdr node) (cdr bullets) item))
                          struct)
                        (item-append struct (new-branch bullets item)))))))
        
        (define (walk-items struct)
          (define (wi el)
            (if (eq? (car el) 'ul)
                (xexpr-elem 'ul (apply xexpr-elems (walk-items (cdr el))))
                (if (eq? (car el) 'nl)
                    (xexpr-elem 'nl (apply xexpr-elems (walk-items (cdr el))))
                    (cadr el))))
          (map wi struct))
        
        (define (item-reformer items def-ns)
          (let ((struct '()))
            (for-each (lambda (i)
                        (set! struct (item-adder struct (item-bullets i) i)))
                      items)
            (walk-items struct)))
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;;; Structure parser
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define (mediawiki-parse ip . ns)
          (let ((def-ns (if (null? ns) "default" (car ns))))
            (if (string? ip)
                (mediawiki-parse (open-input-string ip) def-ns)
                (begin
                  (port-count-lines! ip)
                  ((mediawiki-struct-parse def-ns) (lambda () (mediawiki-struct-lex ip))))
                )))



        );;;; module -end