html-writing.rkt
#lang racket/base
;;; @Package  html-writing
;;; @Subtitle Writing HTML from SXML/xexp in Racket
;;; @HomePage http://www.neilvandyke.org/racket-html-writing/
;;; @Author   Neil Van Dyke
;;; @Version  0.1
;;; @Date     2011-08-21
;;; @PLaneT   neil/html-writing:1:=0

;; $Id: html-writing.rkt,v 1.457 2011/08/22 04:52:47 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2004--2011 Neil Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

;; TODO: !!! ALMOST CERTAINLY GET RID OF "*splice*", JUS SANITY-CHECK BEFORE WE
;; RIP OUT THE NEW "*splice*" CODE.  AND FIND WHERE TO PUT BACK IN THE OLD SXML
;; EXTRANEOUS LISTS CODE.

;; TODO: !!! HAVE AN ITERATOR/TRAVERSE/WALK/FOLD LIBRARY, AND USE THAT?

;; TODO: !!! PASS UNRECOGNIZED LISTS WITH "*" AS FIRST CHAR OF SYMBOL TO
;; FOREIGN FILTER.  THIS CAN BE COMBINED WITH ERROR-CHECKING THAT THEY BEGIN
;; WITH ALPHA.

;; TODO: !!! Decide which "/fixed" procs to expose, based on what
;; "html-template" needs, and if it makes a performance difference.  Maybe
;; rename them to end with "*".

;; TODO: Add "*VERBATIM*".  Or require that it be done with foreign filters.

;; TODO: What about character encoding and bytes?  Have the port determine
;; encoding?

(require (planet neil/xexp:1:0))

;;; @section Introduction

;;; @i{Note: This package is in a state of active development, and some
;;; interface changes, perhaps not backward-compatible, are expected.
;;; Documentation is gravely lacking.}
;;;
;;; The @b{html-writing} package provides support for writing HTML encoded as
;;; @uref{http://www.neilvandyke.org/racket-xexp/, SXML/@i{xexp}} as HTML.
;;;
;;; This can be used for hand-constructed HTML, HTML constructed by program, or
;;; emitting HTML that has been read via
;;; @uref{http://www.neilvandyke.org/racket-html-parsing/, html-parsing}.
;;;
;;; For a different way of writing HTML from chunks of hand-constructed
;;; @i{xexp}, see the @uref{http://www.neilvandyke.org/html-template-scheme/,
;;; html-template} package.

;;; @section Foreign Filters

;;; @defproc error-html-writing-foreign-filter object context
;;;
;;; !!!

(define (error-html-writing-foreign-filter context object)
  (raise-invalid-xexp-error
   'error-html-writing-foreign-filter
   #:expected (string-append "valid foreign object in "
                             (case context
                               ((content)         "content")
                               ((attribute)       "attribute")
                               ((attribute-value) "attribute value")
                               (else (format "~S(?!?!?!)" context)))
                             " context")
   #:invalid-xexp object))

;;; @defparam current-html-writing-foreign-filter
;;;
;;; !!!

(define current-html-writing-foreign-filter
  (make-parameter error-html-writing-foreign-filter))

;;; @section Writing

;;; !!! The two most common procedures in @b{html-writing} for writing
;;; HTML from an @i{xexp} representation are @code{write-html} and
;;; @code{xexp->html}.  These are perhaps most useful for emitting the result
;;; of parsed and transformed input HTML.  They can also be used for emitting
;;; HTML from generated or handwritten @i{xexp}.

;;; @subsection Writing Attributes

;;; @defproc write-html-attribute-value-char chr out
;;;
;;; !!!

(define (write-html-attribute-value-char chr out)
  (case chr
    ((#\") (display """ out))
    ((#\<) (display "&#60;" out))
    ((#\>) (display "&#62;" out))
    ((#\&) (display "&#38;" out))
    (else  (display chr     out))))

;;; @defproc write-html-attribute-value-string str out
;;;
;;; !!!

(define (write-html-attribute-value-string str out)
  (let ((len (string-length str)))
    (let loop ((i 0))
      (if (< i len)
          (begin (write-html-attribute-value-char (string-ref str i) out)
                 (loop (+ 1 i)))
          (void)))))

;;; @defproc write-html-attribute-value-part/fixed val out foreign-filter
;;;
;;; !!!

(define (write-html-attribute-value-part/fixed thing out foreign-filter)
  (cond ((string? thing)
         (write-html-attribute-value-string thing out))
        ((char? thing)
         (write-html-attribute-value-char thing out))
        ((pair? thing)
         (case (car thing)
           ((&)
            (%html-writing:write-html-entity-ref-args (cdr thing) out))
           ((*splice* *SPLICE*)
            (%html-writing:write-html-attribute-value-part-list/fixed
             (cdr thing)
             out
             foreign-filter))
           (else (raise-invalid-xexp-error
                  'write-html-attribute-value-part/fixed
                  #:expected "pair object in xexp attribute value part"
                  #:invalid-xexp thing))))
        (else
         (let ((filtered (foreign-filter 'attribute-value thing)))
           (if (null? filtered)
               (void)
               (write-html-attribute-value-part/fixed filtered
                                                      out
                                                      foreign-filter))))))

;;; @defproc write-html-attribute-value attr [ out [ foreign-filter ] ]
;;;
;;; !!!

(define (%html-writing:write-html-attribute-value/fixed val out foreign-filter)
  ;; TODO: !!! we currently write the "=" in some cases in which there is no
  ;; value, such as if a foreign filter (or chain of foreign filters yields a
  ;; null.  Find an efficient way to test for that case (such as if there is
  ;; only foreign objects, one or more, and we recursively evaluate them).
  (or (null? val)
      (begin
        (display "=\"" out)
        (%html-writing:write-html-attribute-value-part-list/fixed
         val
         out
         foreign-filter)
        (write-char #\" out))))

(define (write-html-attribute-value
         val
         (out (current-output-port))
         (foreign-filter (current-html-writing-foreign-filter)))
  (%html-writing:write-html-attribute-value/fixed val out foreign-filter))

(define (%html-writing:write-html-attribute-value-part-list/fixed
         part-list
         out
         foreign-filter)
  (for-each (lambda (thing)
              (write-html-attribute-value-part/fixed thing
                                                     out
                                                     foreign-filter))
            part-list))

;;; @defproc write-html-attribute attr [ out [ foreign-filter ] ]
;;;
;;; !!!

(define (write-html-attribute
         attr
         (out            (current-output-port))
         (foreign-filter (current-html-writing-foreign-filter)))
  (%html-writing:write-html-attribute/fixed attr out foreign-filter))

(define (%html-writing:write-html-attribute/fixed attr out foreign-filter)
  (cond ((pair? attr)
         (let ((name (car attr)))
           (or (symbol? name)
               (raise-invalid-xexp-error '%html-writing:write-html-attribute/fixed
                                         #:expected "attribute name"
                                         #:invalid-xexp attr))
           ;; TODO: !!! how can name ever be "@"?  probably delete this test,
           ;; and instead error-check for valid attribute name (starts with
           ;; alpha)
           (or (eq? name '@)
               (begin (write-char #\space out)
                      (display    name    out)
                      (let ((val (cdr attr)))
                        ;; TODO: !!! this is an imperfect way to test for
                        ;; boolean attribute with default value, such as if
                        ;; foreign filters are involved (and also if we add
                        ;; back in metadata).
                        (if (null? val)
                            (begin (display    "=\"" out)
                                   (display    name  out)
                                   (write-char #\"   out))
                            (%html-writing:write-html-attribute-value/fixed
                             val
                             out
                             foreign-filter)))))))
        (else
         (let ((v (foreign-filter 'attribute attr)))
           (cond ((and (pair? v) (memq (car v) '(*splice* *SPLICE*)))
                  (%html-writing:write-html-attribute-list/fixed
                   (cdr v)
                   out
                   foreign-filter))
                 ((null? v) (void))
                 (else
                  (%html-writing:write-html-attribute/fixed
                   v
                   out
                   foreign-filter)))))))

;;; @defproc write-html-attribute-list attr-list [out [foreign-filter]
;;;
;;; !!!

(define (write-html-attribute-list
         attr-list
         (out            (current-output-port))
         (foreign-filter (current-html-writing-foreign-filter)))
  (%html-writing:write-html-attribute-list/fixed attr-list
                                                 out
                                                 foreign-filter))

(define (%html-writing:write-html-attribute-list/fixed attr-list
                                                       out
                                                       foreign-filter)
  (for-each (lambda (attr)
              ;; TODO: !!! handle foreign filter here?
              (%html-writing:write-html-attribute/fixed
               attr
               out
               foreign-filter))
            attr-list))

;;; @defproc  write-html-attribute-or-list [attr-or-list [out [foreign-filter]]
;;;
;;; !!!

(define (write-html-attribute-or-list
         attr-or-list
         (out            (current-output-port))
         (foreign-filter (current-html-writing-foreign-filter)))
  (%html-writing:write-html-attribute-or-list/fixed attr-or-list
                                                    out
                                                    foreign-filter))

(define (%html-writing:write-html-attribute-or-list/fixed attr-or-list
                                                          out
                                                          foreign-filter)
  (cond
   ((pair? attr-or-list)
    (if (symbol? (car attr-or-list))
        (%html-writing:write-html-attribute/fixed attr-or-list
                                                  out
                                                  foreign-filter)
        (%html-writing:write-html-attribute-list/fixed attr-or-list
                                                       out
                                                       foreign-filter)))
   ((null? attr-or-list) (void))
   (else
    (%html-writing:write-html-attribute-or-list/fixed
     (foreign-filter 'attribute attr-or-list)
     out
     foreign-filter))))

;;; @subsection Writing Other

;;; @defproc write-html-decl/fixed xexp out
;;;
;;; !!!

(define (write-html-decl/fixed thing out)
  (or (memq (car thing) '(*decl* *DECL*))
      (raise-invalid-xexp-error 'write-html-decl/fixed
                                #:expected "HTML DECL"
                                #:invalid-xexp thing))
  (let ((head (car (cdr thing))))
    (display "<!" out)
    (display (symbol->string head) out)
    (for-each
     (lambda (n)
       (cond ((symbol? n)
              (write-char #\space out)
              (display (symbol->string n) out))
             ((string? n)
              (display " \"" out)
              (write-html-attribute-value-string n out)
              (write-char #\" out))
             (else (raise-invalid-xexp-error 'write-html-decl/fixed
                                             #:expected "HTML DECL"
                                             #:invalid-xexp thing))))
     (cdr (cdr thing)))
    (write-char #\> out)))

;;; @defproc write-html-pi/fixed xexp out
;;;
;;; !!!

(define (write-html-pi/fixed thing out)
  (or (memq (car thing) '(*pi* *PI*))
      (raise-invalid-xexp-error 'write-html-pi/fixed
                                #:expected "HTML PI"
                                #:invalid-xexp thing))
  (display "<?" out)
  ;; TODO: !!! error-check types in here.  Also, clean up the code to not do
  ;; redundant cdr and car.  Maybe with "match".
  (display (car (cdr thing)) out)
  (write-char #\space out)
  (display (car (cdr (cdr thing))) out)
  ;; TODO: Error-check that no more rest of PI.
  (display "?>" out))

;;; @defproc write-html-entity-ref ref [ out ]
;;;
;;; !!!

(define (write-html-entity-ref thing
                               (out (current-output-port)))
  (if (and (pair? thing)
           (eqv? #\& (car thing)))
      (%html-writing:write-html-entity-ref-args (cdr thing) out)
      (raise-invalid-xexp-error 'write-html-entity-ref
                                #:expected "entity reference"
                                #:invalid-xexp thing)))

(define (%html-writing:write-html-entity-ref-args args out)
  (let ((val (car args)))
    (if (symbol? val)
        (if (null? (cdr args))
            (begin (write-char #\& out)
                   (display    val out)
                   (write-char #\; out))
            (raise-invalid-xexp-error
             'write-html
             #:expected "entity reference args (invalid extra args!!!)"
             #:invalid-xexp args))
        (raise-invalid-xexp-error
         'write-html
         #:expected "entity reference args (non-symbol!!!)"
         #:invalid-xexp args))))

;;; @subsection Writing HTML

;;; @defproc write-html xexp [ out [ foreign-filter ] ]
;;;
;;; Writes a conventional HTML transliteration of the @i{xexp} @var{xexp} to
;;; output port @var{out}.  If @var{out} is not specified, the default is the
;;; current output port.  HTML elements of types that are always empty are
;;; written using HTML4-compatible XHTML tag syntax.
;;;
;;; If @var{foreign-filter} is specified, it is a procedure of two argument
;;; that is applied to any non-@i{xexp} (``foreign'') object encountered in
;;; @var{xexp}, and should yield @i{xexp}.  The first argument is the object,
;;; and the second argument is a symbol for the context.  The possible symbols
;;; are: !!!
;;;
;;; No inter-tag whitespace or line breaks not explicit in @var{xexp} is
;;; emitted.  The @var{xexp} should normally include a newline at the end of
;;; the document.  For example:
;;;
;;; @lisp
;;; (write-html
;;;  '((html (head (title "My Title"))
;;;          (body (@@ (bgcolor "white"))
;;;                (h1 "My Heading")
;;;                (p "This is a paragraph.")
;;;                (p "This is another paragraph.")))))
;;; @end lisp
;;;
;;; @noindent
;;; produces the output:
;;;
;;; @example
;;; <html><head><title>My Title</title></head><body bgcolor=white">
;;; <h1>My Heading</h1><p>This is a paragraph.</p><p>This is
;;; another paragraph.</p></body></html>
;;; @end example

(define (write-html/fixed xexp out foreign-filter)
  (letrec
      ((write-xexp-text-string
        (lambda (str out)
          (let ((len (string-length str)))
            (let loop ((i 0))
              (if (< i len)
                  (begin (display (let ((c (string-ref str i)))
                                    (case c
                                      ((#\&) "&amp;")
                                      ((#\<) "&lt;")
                                      ((#\>) "&gt;")
                                      (else c)))
                                  out)
                         (loop (+ 1 i)))
                  (void))))))
       (write-xexp-text-char
        (lambda (chr out)
          (case chr
            ((#\&) (display "&amp;" out))
            ((#\<) (display "&lt;"  out))
            ((#\>) (display "&gt;"  out))
            (else (display "&#"                out)
                  (display (char->integer chr) out)
                  (display ";"                 out)))))
       (do-thing
        (lambda (thing)
          (cond ((string? thing) (write-xexp-text-string thing out))
                ((char?   thing) (write-xexp-text-char   thing out))
                ((pair?   thing) (if (not (null? thing))
                                     (do-list-thing thing)
                                     (void)))
                (else (do-thing (foreign-filter 'content thing))))))
       (do-list-thing
        (lambda (thing)
          (let ((head (car thing)))
            (cond ((symbol? head)
                   ;; Head is a symbol, so...
                   (case head
                     ((*comment* *COMMENT*)
                      ;; TODO: Make sure the comment text doesn't contain
                      ;; a comment end sequence.
                      (display "<!-- " out)
                      (let ((text (car (cdr thing))))
                        (if (string? text)
                            ;; TODO: Enforce whitespace safety without
                            ;; padding unnecessarily.
                            ;;
                            ;; (let ((len (string-length text)))
                            ;; (if (= len 0)
                            ;; (write-char #\space out)
                            ;; (begin (if (not (eqv?
                            ;; (string-ref text 0)
                            ;; #\space))
                            (display text out)
                            (raise-invalid-xexp-error 'write-html
                                                      #:expected "comment text"
                                                      #:invalid-xexp thing)))
                      (or (null? (cdr (cdr thing)))
                          (raise-invalid-xexp-error 'write-html
                                                    #:expected "comment body"
                                                    #:invalid-xexp thing))
                      (display " -->" out))
                     ((*decl* *DECL*)
                      (write-html-decl/fixed thing out))
                     ((*pi* *PI*)
                      (write-html-pi/fixed thing out))
                     ((*top* *TOP*)
                      (for-each do-thing (cdr thing)))
                     ((@)
                      (raise-invalid-xexp-error
                       'write-html
                       #:expected "element position thing (not element attributes)"
                       #:invalid-xexp thing))
                     ((&)
                      (%html-writing:write-html-entity-ref-args (cdr thing)
                                                                out))
                     (else
                      ;; TODO: !!! error-check that it starts with an alpha?
                      (write-char #\< out)
                      (display head out)
                      (let* ((rest   (cdr thing)))
                        (or (null? rest)
                            (let ((second (car rest)))
                              (and (pair? second)
                                   (not (null? second))
                                   (eq? (car second) '@)
                                   (begin
                                     (%html-writing:write-html-attribute-list/fixed
                                      (cdr second)
                                      out
                                      foreign-filter)
                                     (set! rest (cdr rest))))))
                        (if (memq head always-empty-html-elements)
                            ;; TODO: Error-check to make sure the element
                            ;; has no content other than attributes.  We
                            ;; have to test for cases like: (br (@) ()
                            ;; (()))
                            (display " />" out)
                            (begin (write-char #\> out)
                                   (for-each do-thing rest)
                                   (display "</" out)
                                   (display (symbol->string head) out)
                                   (write-char #\> out)))))))
                  (else
                   ;; Head is NOT a symbol, list, or string, so error.
                   ;;
                   ;; Note: At one point for SXML we did this: (for-each
                   ;; do-thing thing)
                   (raise-invalid-xexp-error 'write-html
                                        #:expected "xexp list"
                                        #:invalid-xexp thing)))))))
    (or (null? xexp) (do-thing xexp))
    (void)))

(define (write-html
         xexp
         (out (current-output-port))
         (foreign-filter (current-html-writing-foreign-filter)))
  (write-html/fixed xexp out foreign-filter))

;;; @defproc xexp->html xexp
;;;
;;; Yields an HTML encoding of @i{xexp} @var{xexp} as a string.  For example:
;;;
;;; @lisp
;;; (xexp->html
;;;  (html->xexp
;;;   "<P>This is<br<b<I>bold </foo>italic</ b > text.</p>"))
;;; @result{} "<p>This is<br /><b><i>bold italic</i></b> text.</p>"
;;; @end lisp
;;;
;;; Note that, since this procedure constructs a string, it is normally best
;;; used when the HTML is small.  When encoding HTML documents of conventional
;;; size, @code{write-html} is likely more efficient.

(define (xexp->html xexp)
  (let ((os (open-output-string)))
    (write-html xexp os)
    (get-output-string os)))

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.1 --- 2011-08-21 -- PLaneT @code{(1 0)}
;;; Part of forked development from HtmlPrag, with substantial changes.
;;;
;;; @end table

(provide
 current-html-writing-foreign-filter
 error-html-writing-foreign-filter
 write-html
 write-html-decl/fixed
 write-html-pi/fixed
 write-html/fixed
 write-html-attribute
 write-html-attribute-list
 write-html-attribute-or-list
 write-html-attribute-value
 write-html-attribute-value-string
 write-html-entity-ref
 xexp->html)