html-writing.rkt
#lang racket/base
;; For legal info, see file "info.rkt".

(require (planet neil/mcfly)
         "planet-neil-xexp.rkt")

;; TODO: !!! CHANGE TO USE BYTES?  UTF-8.

;; TODO: !!! CHANGING TO REMOVE ALL FOREIGN-FILTER ARGUMENTS.

;; TODO: !!! MAYBE MAKE ALL OUTPUT-PORT ARGUMENTS MANDATORY, UNLESS PROFILING
;; SAYS NEGLIGIBLE COST.  2:0 IS THE TIME TO MAKE THEM MANDATORY,
;; API-CHANGE-WISE.

;; TODO: !!! FINISH RE-ADDING OLD SXML NESTED LIST SUPPORT.  INCLUDING CHECKING
;; EVERYWHERE WE TEST FOR PAIR? AND CHECKING FOR NULL? TOO.

;; TODO: !!! HAVE AN ITERATOR/TRAVERSE/WALK/FOLD LIBRARY, AND USE THAT?

;; TODO: !!! PASS UNRECOGNIZED LISTS WITH "*" AS FIRST CHAR OF SYMBOL TO
;; FOREIGN FILTER.  THIS CAN BE COMBINED WITH ERROR-CHECKING THAT THEY BEGIN
;; WITH ALPHA.

;; TODO: !!! Decide which "" procs to expose, based on what
;; "html-template" needs, and if it makes a performance difference.  Maybe
;; rename them to end with "*".

;; TODO: Add "*VERBATIM*".  Or require that it be done with foreign filters.

;; TODO: What about character encoding and bytes?  Have the port determine
;; encoding?

(doc (section "Introduction")

     (para (italic "Warning: State of flux.  Documentation under
construction."))
     
     (para "The "
           (bold "html-writing")
           " package provides support for writing HTML encoded as "
           (hyperlink "http://www.neilvandyke.org/racket-xexp/" "SXML/xexp")
           " as HTML.")

     (para "This can be used for hand-constructed HTML, HTML constructed by
            program, or emitting HTML that has been read via the "
           (hyperlink "http://www.neilvandyke.org/racket-html-parsing/"
                      "html-parsing")
           " package.")

     (para "For a complementary way of writing HTML from SXML/xexp, see the "
           (hyperlink "http://www.neilvandyke.org/html-template-scheme/"
                      "html-template")
           " package."))

(doc (section "Foreign Filters"))

(provide error-html-writing-foreign-filter)
(define (error-html-writing-foreign-filter context object)
  (raise-invalid-xexp-exn
   'error-html-writing-foreign-filter
   #:expected (string-append "valid foreign object in "
                             (case context
                               ((content)         "content")
                               ((attribute)       "attribute")
                               ((attribute-value) "attribute-value")
                               (else (format "~S (internal error)" context)))
                             " context")
   #:invalid-xexp object))

(doc (defparam current-html-writing-foreign-filter ff !!!
       "!!!"))
(provide current-html-writing-foreign-filter)
(define current-html-writing-foreign-filter
  (make-parameter error-html-writing-foreign-filter))

(doc (section "Writing Procedures")

     (para "!!! The two most common procedures in "
           (bold "html-writing")
           " for writing HTML from an SXML/xexp representation are "
           (racket write-html)
           " and "
           (racket xexp->html)
           ".  These are perhaps most useful for emitting the result
of parsed and transformed input HTML.  They can also be used for emitting
HTML from generated or handwritten SXML/xexp."))

(doc (subsection "Writing Attributes"))

(define (%write-html-attribute-value-char chr out)
  (case chr
    ((#\") (display """ out))
    ((#\<) (display "&#60;" out))
    ((#\>) (display "&#62;" out))
    ((#\&) (display "&#38;" out))
    (else (let ((n (char->integer chr)))
            (if (or (< n 32) (= n 127))
                (fprintf out "&#~A;" n)
                (display chr out))))))

(provide write-html-attribute-value-part-string)
(define (write-html-attribute-value-part-string str out)
  (let ((len (string-length str)))
    (let loop ((i 0))
      (if (< i len)
          (begin (%write-html-attribute-value-char (string-ref str i) out)
                 (loop (+ 1 i)))
          (void)))))

(provide write-html-attribute-value-part)
(define (write-html-attribute-value-part thing out)
  (cond ((string? thing)
         (write-html-attribute-value-part-string thing out))
        ((char? thing)
         (%write-html-attribute-value-char thing out))
        ((pair? thing)
         (case (car thing)
           ((&)
            (%html-writing:write-html-entity-ref-args (cdr thing) out))
           (else (%html-writing:write-html-attribute-value-part-list thing out))))
        ((null? thing)
         (void))
        (else
         (let ((filtered ((current-html-writing-foreign-filter) 'attribute-value thing)))
           (if (null? filtered)
               (void)
               (write-html-attribute-value-part filtered
                                                out))))))

(provide write-html-attribute-value)
(define (write-html-attribute-value val out)
  ;; TODO: !!! we currently write the "=" in some cases in which there is no
  ;; value, such as if a foreign filter (or chain of foreign filters yields a
  ;; null.  Find an efficient way to test for that case (such as if there is
  ;; only foreign objects, one or more, and we recursively evaluate them).
  (or (null? val)
      (begin
        (display "=\"" out)
        (cond ((string? val)
               (write-html-attribute-value-part-string val out)
               (write-char #\" out))
              ((pair? val)
               (%html-writing:write-html-attribute-value-part-list val out)
               (write-char #\" out))
              (else (raise-type-error 'write-html-attribute-value
                                      "string or list"
                                      0
                                      (list val out)))))))

(define (%html-writing:write-html-attribute-value-part-list part-list out)
  (for-each (lambda (thing)
              (write-html-attribute-value-part thing
                                                      out))
            part-list))

(provide write-html-attribute)
(define (write-html-attribute attr out)
  ;; TODO: !!! Do we even want this procedure to be exposed?
  (cond ((pair? attr)
         (let ((name (car attr)))
           (or (symbol? name)
               (raise-invalid-xexp-exn 'write-html-attribute
                                       #:expected "attribute name"
                                       #:invalid-xexp attr))
           ;; TODO: !!! how can name ever be "@"?  probably delete this test,
           ;; and instead error-check for valid attribute name (starts with
           ;; alpha)
           (or (eq? name '@)
               (begin (write-char #\space out)
                      (display    name    out)
                      (let ((val (cdr attr)))
                        ;; TODO: !!! this is an imperfect way to test for
                        ;; boolean attribute with default value, such as if
                        ;; foreign filters are involved (and also if we add
                        ;; back in metadata).
                        (if (null? val)
                            (begin (display    "=\"" out)
                                   (display    name  out)
                                   (write-char #\"   out))
                            (write-html-attribute-value val out)))))))
        ((null? attr) (void))
        (else
         (let ((v ((current-html-writing-foreign-filter) 'attributes attr)))
           (cond ((null? v) (void))
                 (else
                  (write-html-attribute v out)))))))

(provide write-html-attribute-list)
(define (write-html-attribute-list attr-list out)
  (for-each (lambda (attr)
              (write-html-attributes attr out))
            attr-list))

;; TODO: !!! Maybe make foreign-filter not be an argument to writing
;; procedures!  We'll keep looking it up if we have to.

;; TODO: Rename write-html-attributes to write-html-attributes?

(provide write-html-attributes)
(define (write-html-attributes attr-or-list out)
  (cond
   ((pair? attr-or-list)
    (if (symbol? (car attr-or-list))
        (write-html-attribute      attr-or-list out)
        (write-html-attribute-list attr-or-list out)))
   ((null? attr-or-list)
    (void))
   (else
    (write-html-attributes
     ((current-html-writing-foreign-filter) 'attributes attr-or-list)
     out))))

(doc (subsection "Writing Other"))

(provide write-html-decl)
(define (write-html-decl thing out)
  (or (memq (car thing) '(*decl* *DECL*))
      (raise-invalid-xexp-exn 'write-html-decl
                                #:expected "DECL"
                                #:invalid-xexp thing))
  (let ((head (car (cdr thing))))
    (display "<!" out)
    (display (symbol->string head) out)
    (for-each
     (lambda (n)
       (cond ((symbol? n)
              (write-char #\space out)
              (display (symbol->string n) out))
             ((string? n)
              (display " \"" out)
              (write-html-attribute-value-part-string n out)
              (write-char #\" out))
             (else (raise-invalid-xexp-exn 'write-html-decl
                                             #:expected "DECL"
                                             #:invalid-xexp thing))))
     (cdr (cdr thing)))
    (write-char #\> out)))

(provide write-html-pi)
(define (write-html-pi thing out)
  (or (memq (car thing) '(*pi* *PI*))
      (raise-invalid-xexp-exn 'write-html-pi
                                #:expected "PI"
                                #:invalid-xexp thing))
  (display "<?" out)
  ;; TODO: !!! error-check types in here.  Also, clean up the code to not do
  ;; redundant cdr and car.  Maybe with "match".
  (display (car (cdr thing)) out)
  (write-char #\space out)
  (display (car (cdr (cdr thing))) out)
  ;; TODO: Error-check that no more rest of PI.
  (display "?>" out))

(provide write-html-entity-ref)
(define (write-html-entity-ref thing out)
  (if (and (pair? thing)
           (eqv? #\& (car thing)))
      (%html-writing:write-html-entity-ref-args (cdr thing) out)
      (raise-invalid-xexp-exn 'write-html-entity-ref
                                #:expected "entity reference"
                                #:invalid-xexp thing)))

(define (%html-writing:write-html-entity-ref-args args out)
  (let ((val (car args)))
    (if (symbol? val)
        (if (null? (cdr args))
            (begin (write-char #\& out)
                   (display    val out)
                   (write-char #\; out))
            (raise-invalid-xexp-exn
             'write-html
             #:expected "entity reference args (invalid extra args!!!)"
             #:invalid-xexp args))
        (raise-invalid-xexp-exn
         'write-html
         #:expected "entity reference args (non-symbol!!!)"
         #:invalid-xexp args))))

(doc (subsection "Writing HTML"))

(doc (defproc (write-html (xexp xexp?) (out output-port?)) void?
       
       ;; TODO: !!! don't say transliteration, or clarify that we have escapes and such      
       (para "Writes a conventional HTML transliteration of the SXML/xexp "
             (racket xexp)
             " to
output port "
             (racket out)
             ".  If "
             (racket out)
             " is not specified, the default is the
current output port.  HTML elements of types that are always empty are
written using HTML4-compatible XHTML tag syntax.")

       (para "!!! If "
             (racket foreign-filter) ;; TODO: !!! UPDATE THIS DOCUMENTATION FOR NO FOREIGN-FILTER ARGUMENT.
             " is specified, it is a procedure of two argument
that is applied to any non-SXML/xexp (``foreign'') object encountered in "
             (racket xexp)
             ", and should yield SXML/xexp.  The first argument is the object,
and the second argument is a symbol for the context.  The possible symbols
are: !!!")

       (para "No inter-tag whitespace or line breaks not explicit in "
             (racket xexp)
             " is emitted.  The "
             (racket xexp)
             " should normally include a newline at the end of
the document.  For example:")

       (racketblock
        (write-html
         '((html (head (title "My Title"))
                 (body (@ (bgcolor "white"))
                       (h1 "My Heading")
                       (p "This is a paragraph.")
                       (p "This is another paragraph."))))
         (current-output-port)))

       (para "produces the output:")

       (nested #:style 'inset
               (racketoutput
                "<html><head><title>My Title</title></head><body bgcolor=\"white\"><h1>My Heading</h1><p>This is a paragraph.</p><p>This isanother paragraph.</p></body></html>"))))
(provide write-html)
(define (write-html xexp out)
  (letrec
      ((write-xexp-text-string
        (lambda (str out)
          (let ((len (string-length str)))
            (let loop ((i 0))
              (if (< i len)
                  (begin (display (let ((c (string-ref str i)))
                                    (case c
                                      ((#\&) "&amp;")
                                      ((#\<) "&lt;")
                                      ((#\>) "&gt;")
                                      (else c)))
                                  out)
                         (loop (+ 1 i)))
                  (void))))))
       (write-xexp-text-char
        (lambda (chr out)
          (case chr
            ((#\&) (display "&amp;" out))
            ((#\<) (display "&lt;"  out))
            ((#\>) (display "&gt;"  out))
            (else (display "&#"                out)
                  (display (char->integer chr) out)
                  (display ";"                 out)))))
       (do-thing
        (lambda (thing)
          (cond ((string? thing) (write-xexp-text-string thing out))
                ((char?   thing) (write-xexp-text-char   thing out))
                ((pair?   thing) (if (not (null? thing))
                                     (do-list-thing thing)
                                     (void)))
                (else (do-thing ((current-html-writing-foreign-filter) 'content thing))))))
       (do-list-thing
        (lambda (thing)
          (let ((head (car thing)))
            (cond ((symbol? head)
                   ;; Head is a symbol, so...
                   (case head
                     ((*comment* *COMMENT*)
                      ;; TODO: Make sure the comment text doesn't contain
                      ;; a comment end sequence.
                      (display "<!-- " out)
                      (let ((text (car (cdr thing))))
                        (if (string? text)
                            ;; TODO: Enforce whitespace safety without
                            ;; padding unnecessarily.
                            ;;
                            ;; (let ((len (string-length text)))
                            ;; (if (= len 0)
                            ;; (write-char #\space out)
                            ;; (begin (if (not (eqv?
                            ;; (string-ref text 0)
                            ;; #\space))
                            (display text out)
                            (raise-invalid-xexp-exn 'write-html
                                                      #:expected "comment text"
                                                      #:invalid-xexp thing)))
                      (or (null? (cdr (cdr thing)))
                          (raise-invalid-xexp-exn 'write-html
                                                    #:expected "comment body"
                                                    #:invalid-xexp thing))
                      (display " -->" out))
                     ((*decl* *DECL*)
                      (write-html-decl thing out))
                     ((*pi* *PI*)
                      (write-html-pi thing out))
                     ((*top* *TOP*)
                      (for-each do-thing (cdr thing)))
                     ((@)
                      (raise-invalid-xexp-exn
                       'write-html
                       #:expected "element position thing (not element attributes)"
                       #:invalid-xexp thing))
                     ((&)
                      (%html-writing:write-html-entity-ref-args (cdr thing)
                                                                out))
                     (else
                      ;; TODO: !!! error-check that it starts with an alpha?
                      (write-char #\< out)
                      (display head out)
                      (let* ((rest   (cdr thing)))
                        (or (null? rest)
                            (let ((second (car rest)))
                              (and (pair? second)
                                   (not (null? second))
                                   (eq? (car second) '@)
                                   (begin
                                     (write-html-attribute-list
                                      (cdr second)
                                      out)
                                     (set! rest (cdr rest))))))
                        (if (memq head always-empty-html-elements)
                            ;; TODO: Error-check to make sure the element
                            ;; has no content other than attributes.  We
                            ;; have to test for cases like: (br (@) ()
                            ;; (()))
                            (display ">" out)
                            (begin (write-char #\> out)
                                   (for-each do-thing rest)
                                   (display "</" out)
                                   (display (symbol->string head) out)
                                   (write-char #\> out)))))))
                  (else
                   ;; Head is NOT a symbol.
                   (for-each do-thing thing)))))))
    (or (null? xexp) (do-thing xexp))
    (void)))

(doc (defproc (xexp->html (xexp xexp?)) string?

       (para "Yields an HTML encoding of SXML/xexp "
             (racket xexp)
             " as a string.  For example:")

       (racketinput
        (xexp->html
         (html->xexp
          "<P>This is<br<b<I>bold </foo>italic</ b > text.</p>"))
        #,(racketresult "<p>This is<br><b><i>bold italic</i></b> text.</p>"))

       (para "Note that, since this procedure constructs a string, it is normally best
used when the HTML is small.  When encoding HTML documents of conventional
size, "
             (racket write-html)
             " is likely more efficient.")))

(provide xexp->html)
(define (xexp->html xexp)
  (let ((os (open-output-string)))
    (write-html xexp os)
    (get-output-string os)))

(provide html->bytes)
(define (html->bytes xexp)
  (let ((ob (open-output-bytes)))
    (write-html xexp ob)
    (get-output-bytes ob)))

(provide html-attribute-value->bytes)
(define (html-attribute-value->bytes xexp)
  (let ((ob (open-output-bytes)))
    (write-html-attribute-value xexp ob)
    (get-output-bytes ob)))

(doc history

     (#:planet 2:0 #:date "2012-06-12"
               (itemlist
                (item "Heavy API and implementation changes (although the
                       previous version was not really documented), including
                       the following.")
                (item "All "
                      (racket out)
                      " arguments are now mandatory rather than optional.")
                (item "All "
                      (racket foreign-filter)
                      " arguments have been removed.")
                (item "Foreign filter context value symbol renamed from "
                      (racket 'attribute)
                      " (singular) to "
                      (racket 'attributes)
                      " (plural).")
                (item "The suffix "
                      (code "/fixed")
                      " has been removed from all identifiers, since all
                       procedures now have fixed arguments.")
                (item (racket write-html-attribute-or-list)
                      " is renamed to "
                      (racket write-html-attributes)
                      ".")
                (item (racket write-html-attribute-value-string)
                      " is renamed to "
                      (racket write-html-attribute-value-part-string)
                      ".")
                (item "Added "
                      (racket html->bytes)
                      " and "
                      (racket html-attribute-value->bytes)
                      ".")
                (item "We no longer do backward-compatible XHTML empty element
                       terminators like the string "
                      (racket " />")
                      "; now they're just "
                      (racket ">")
                      ".")
                (item "In attribute values, some additional characters are now
                       written as numeric character references: ASCII 0 through
                       31, and 127.")
                (item "Got rid of the "
                      (code "*splice*")
                      " form that we have experimentally added, and
philosophically switched back to SXML's arbitrarily nested lists for splicing
with generally less allocation.")
                (item "Restored some of the handling of unnecessary list
                       nesting in SXML/xexp, which had been removed after
                       forking from HtmlPrag and a brief experiment with "
                      (code "*splice*")
                      " when trying to unify SXML and PLT xexprs.")
                (item "Converted to McFly and Overeasy.")))

     (#:version "0.1" #:planet 1:0 #:date "2011-08-21"
                "Part of forked development from HtmlPrag, with substantial changes."))