wiki2xml.rkt
#lang racket

;; Converts wiki text to XML, adding headers based on the
;; metadata included within the file.  This is intended mostly as a
;; demonstrator of how the wiki-text library should be used.
;;
;; Usage:
;;     wiki2xml [--output filename] [--[no]structured] wiki-text?
;;
;; If the WIKI-TEXT argument is missing, then text is read from stdin.
;;
;; If --structured is present (the default), then structure is output
;; as <div> elements; if --nostructured is present, it's output as
;; <hn> elements.
;;
;; Pays attention to ::title, ::keywords (space separted words) and ::date
;; (yyyy-mm-dd) keywords in the input.
;;
;; The wiki syntax is basically the WikiCreole syntax (see parser.scm for
;; details), plus '::md value', where the MD value can be 'title', 'subtitle',
;; 'date' (yyyy-mm-dd), or 'keywords'

(require (prefix-in wt: "parser.ss"))
(require xml)
(require racket/cmdline)
(require (prefix-in srfi19: srfi/19))   ;date handling
(require srfi/8)                        ;receive


;; Unit tests -- nothing here, apart form a form to ignore them.  The
;; actual testing is sone in test-module.ss.
(define-syntax expect
  (syntax-rules ()
    ((_ form ...) (void))))
(define-syntax expect-failure
  (syntax-rules ()
    ((_ form) (void))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Utilities

;; WRITE-XML-TO-PORT : wiki-text port boolean -> #void
;; Write the contents of the wiki-text to the specified port, as
;; XML, inside a document element of <top>...</top>, which has each
;; of the metadata keys as attributes.
;;
;; As a flourish, parse and reformat any 'date keywords
(define (write-xml-to-port wiki-text port structured-xml?)
  (write-xml/content
   (xexpr->xml
    `(top ((xmlns "http://ns.nxg.me.uk/squicky/xml")
           ,@(map (lambda (k)
                    (cond ((or (eqv? k 'date)
                               (eqv? k 'updated))
                           (list k (srfi19:date->string (wt:lookup-parsed wiki-text k) "~4")))
                          (else (list k (wt:lookup wiki-text k)))))
                  (wt:lookup-keys wiki-text)))
          ,@(if structured-xml?
                (add-body-structure (wt:body wiki-text))
                (wt:body wiki-text))))
   port)
  (newline port))

;; ADD-BODY-STRUCTURE : sexp -> sexp
;; Spot h<n> elements, and replace them with div elements
(define (add-body-structure body-sexp)
  (let loop ((body body-sexp)
             (stack '((((class "body"))))))
    (define (pop-stack stack)
      (when (null? stack)
            (error "Can't pop null stack"))
      (values (car stack) (cdr stack)))
    (define (push-stack stack val)
      (cons val stack))
    (define (push-onto-stack-top stack val)
      ;; add val to the beginning of the list at the top of the stack
      ;; (thus stack size stays the same)
      (receive (top rest)
          (pop-stack stack)
        (push-stack rest (cons val top))))
    (define (squash-stack stack)
      ;; add the item at the top of the stack to the front of the list at the second-top of the stack
      ;; (thus stack size goes down by one)
      (receive (top rest)
          (pop-stack stack)
        (push-onto-stack-top rest `(div . ,(reverse top)))))
    (define (misc-list->string list-of-string-or-char)
      (apply string-append
             (map (λ (x)
                     (cond ((string? x) x)
                           ((char? x) (string x))
                           ((and (pair? x) ;dig into any elements in the title, and extract the content
                                 (symbol? (car x)))
                            (misc-list->string (cdr x)))
                           (else (error (format "Can't stringize ~s" x)))))
                  list-of-string-or-char)))
    (define (add-section-at-level new-level title old-stack)
      ;; int string stack -> stack
      (let ((diff (- new-level (length old-stack))))
        (cond ((< diff 0)            ;decrease the stack/section level
               (add-section-at-level new-level title (squash-stack old-stack)))
              ((= diff 0)    ;new section at same level as current one
               (push-stack (squash-stack old-stack) `(((title ,title)))))
              ((= diff +1)          ;increase the section level by one
               (push-stack old-stack `(((title ,title)))))
              (else                     ; diff > 1
               (error "Can't go from h2->h4")))))
    (if (null? body)
        (let clear ((s stack)) ;finished input: clear out the stack, returning the final sexp
          (cond ((null? s)     ;shouldn't happen
                 (error "Stack can't be null?"))
                ((null? (cdr s))        ;length=1: finished
                 `((div . ,(reverse (car s))))) ;the result
                (else
                 (clear (squash-stack s)))))
        (case (caar body)
          ((h2)
           (loop (cdr body)
                 (add-section-at-level 2 (misc-list->string (cdar body)) stack)))
          ((h3)
           (loop (cdr body)
                 (add-section-at-level 3 (misc-list->string (cdar body)) stack)))
          ((h4)
           (loop (cdr body)
                 (add-section-at-level 4 (misc-list->string (cdar body)) stack)))
          (else
           (loop (cdr body)
                 (push-onto-stack-top stack (car body))))))))

(provide add-body-structure)            ;for testing
(expect ((div ((class "body")) (p "Hello") (p "there"))) ;simplest case -- no work to do
        (add-body-structure '((p "Hello") (p "there"))))
(expect ((div ((class "body")) (p "Hello") (div ((title "Section")) (p "content") (p "more"))))
        (add-body-structure '((p "Hello") (h2 "Section") (p "content") (p "more"))))
(expect ((div ((class "body"))
              (div ((title "H2.1"))
                   (p "t2.1")
                   (div ((title "H3"))
                        (div ((title "H4"))
                             (p "t4")
                             (p "t4.1"))))
              (div ((title "H2.2"))
                   (p "t2.2.1")
                   (p "t2.2.2"))))
        (add-body-structure '((h2 "H2.1") (p "t2.1") (h3 "H3") (h4 "H4") (p "t4") (p "t4.1") (h2 "H2.2") (p "t2.2.1") (p "t2.2.2"))))
(expect ((div ((class "body")) (div ((title "pre/post€")) (p "title"))))
        (add-body-structure '((h2 "pre" #\/ "post" #\€) (p "title"))))
(expect-failure (add-body-structure '((h2 "H2") (p "text") (h4 "H4"))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; The main event

(define (Usage)
  (display (format "wiki2xml [--output filename] input-file?~%"))
  (exit 1))

(define (define-option default)
  (let ((v default))
    (lambda args
      (if (null? args)
          v
          (set! v (car args))))))

;; MAIN : string ... -> void
(define (main . args)
  (let ((output-file (define-option #f))
        (structured-output (define-option #t)))
    (let ((input-file
           (command-line
            #:program "wiki2xml"
            #:argv args

            #:once-each
            ("--output"
             filename
             "File to receive XML output"
             (output-file filename))
            ("--structured"
             "Produce output with sections in <div>"
             (structured-output #t))
            ("--nostructured"
             "Produce output with sections in <h2>, <h3>, <h4>"
             (structured-output #f))

            #:args filenames

            filenames)))

      ;; Check, and bail out on bad input
      (when (> (length input-file) 1)
            (Usage))

      (let ((input-port (if (null? input-file)
                            (current-input-port)
                            (open-input-file (car input-file))))
            (output-port (cond ((output-file) => open-output-file)
                               (else (current-output-port)))))
        (write-xml-to-port (wt:parse input-port)
                           output-port
                           (structured-output))))))

(provide main)