html-template.rkt
#lang racket/base
;;; @Package  html-template
;;; @Subtitle HTML Writing Template Language based on SXML/xexp in Racket
;;; @HomePage http://www.neilvandyke.org/racket-html-template/
;;; @Author   Neil Van Dyke
;;; @Version  0.1
;;; @Date     2011-08-21
;;; @PLaneT   neil/html-template:1:=0

;; $Id: html-template.rkt,v 1.84 2011/08/22 05:01:18 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2011 Neil Van Dyke.  This program is Free Software;
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

;; TODO: !!! Add verbatim, literal cdata, plaintext, etc.

(require (for-syntax racket/base
                     (planet neil/html-writing:1:0)
                     (planet neil/xexp:1:0))
         (planet neil/html-writing:1:0))

;; TODO: Maybe add "%eval/verbatim" as a convenience for approx. "%eval `(*verbatim* ,...)".

;;; @section Introduction

;;; @i{Note: This package is in a state of active development, and some
;;; interface changes, perhaps not backward-compatible, are expected.
;;; Documentation is gravely lacking.}

;;; The @b{html-template} package implements an HTML-writing template language
;;; based on @uref{http://www.neilvandyke.org/racket-xexp/, SXML/@i{xexp}}.
;;; Compared to doing comparable work using the
;;; @uref{http://www.neilvandyke.org/racket-html-writing/, html-writing}
;;; package, @b{html-template} offers improved compile-time error-checking and
;;; improved run-time efficiency.

;;; !!! For example:
;;;
;;; @lisp
;;; (define (write-essay my-title)
;;;   (html-template
;;;    (html (head (title (%eval my-title)))
;;;          (body (h1 (%eval my-title))
;;;                (p "Kittens claw."
;;;                   (br)
;;;                   "Puppies pee.")))))
;;;
;;; (write-essay "All About Kittens & Puppies")
;;; @end lisp
;;;
;;; produces the output:
;;;
;;; @example
;;; <html><head><title>All About Kittens &amp;
;;; Puppies</title></head><body><h1>All About
;;; Kittens &amp; Puppies</h1><p>Kittens
;;; claw.<br />Puppies pee.</p></body></html>
;;; @end example
;;;
;;; !!! Expanding the @code{html-template} macro in this case results in
;;; something like:
;;;
;;; @lisp
;;; (define (write-essay my-title)
;;;   (display "<html><head><title>")
;;;   (write-html my-title)
;;;   (display "</title></head><body><h1>")
;;;   (write-html my-title)
;;;   (display
;;;    "</h1><p>Kittens claw.<br />Puppies pee.</p></body></html>"))
;;; @end lisp
;;;
;;; Note that much of the computation for HTML formatting is done at compile
;;; time rather than at run time.
;;;
;;; !!!

;; TODO: !!! Only *maybe* suggest these.
;;
;; (put 'html-template              'scheme-indent-function 0)
;; (put 'html-template/port         'scheme-indent-function 1)
;; (put 'html-template-debug-expand 'scheme-indent-function 0)
;; (put 'html-template-to-string    'scheme-indent-function 0)

;;; @section Interface

;;; !!!

;; TODO: Have this regular expression permit XML qualifiers, including URIs?
;;
(define-for-syntax %html-template:html-element-name-rx
  (regexp "^[A-Za-z][A-Za-z0-9]*$"))

(define-for-syntax (%html-template:transform error-name entire-stx)
  (let ((literal #f)
        (result  '()))
    (letrec
        (

         ;; Result Collection:

         (start-literal
          (lambda ()
            (or literal
                (set! literal (open-output-string)))))
         (finish-literal
          (lambda ()
            (and literal
                 (let ((literal-copy literal))
                   (set! literal #f)
                   (add-form
                    (datum->syntax
                     entire-stx
                     `(,(quote-syntax display)
                       ,(begin0 (get-output-string literal-copy)
                          (close-output-port literal-copy)))))))))
         (add-form
          (lambda (form)
            (finish-literal)
            (set! result (cons form result))))
         (str-or-stx->string
          (lambda (x)
            (cond ((string? x) x)
                  ((and (syntax? x) (string? (syntax-e x))) (syntax-e x))
                  (else
                   (error error-name
                          "INTERNAL: invalid in str-or-stx->string: ~S"
                          x)))))
         (add-strings/noescape
          (lambda args
            (start-literal)
            (for-each (lambda (x)
                        (display (str-or-stx->string x) literal))
                      args)))
         (add-strings/escape
          (lambda args
            (start-literal)
            (for-each (lambda (x)
                        (write-html/fixed (str-or-stx->string x) literal #f))
                      args)))
         (add-strings/dquote-escape
          (lambda args
            (start-literal)
            (for-each (lambda (x)
                        (write-html-attribute-value-string
                         (str-or-stx->string x)
                         literal))
                      args)))
         (final-result
          (lambda ()
            (finish-literal)
            (reverse result)))

         ;; Utility:

         (lst-arity-1-val-stx
          (lambda (lst-stx rest)
            (and (null? rest)
                 (raise-syntax-error error-name
                                     "expected 1 argument, got 0"
                                     lst-stx))
            (or (null? (cdr rest))
                (raise-syntax-error error-name
                                    "expected 1 argument, got more"
                                    (if (pair? (cdr rest))
                                        (car (cdr rest))
                                        (cdr rest))))
            (car rest)))

         (lst-assert-arity-1+
          (lambda (lst-stx rest)
            (and (null? rest)
                 (raise-syntax-error error-name
                                     "expected 1 or more arguments, got 0"
                                     lst-stx))))

         (obviously-confused-with-name
          (lambda (stx str what)
            (cond ((equal? str "&")
                   (raise-syntax-error error-name
                                       (string-append
                                        "invalid HTML "
                                        what
                                        " (entity not valid here)")
                                       stx))
                  ((equal? str "@")
                   (raise-syntax-error error-name
                                       (string-append
                                        "invalid HTML "
                                        what
                                        " (attributes list not valid here)")
                                       stx))
                  ((and (> (string-length str) 0)
                        (eqv? #\% (string-ref str 0)))
                   (raise-syntax-error error-name
                                       (string-append
                                        "invalid HTML "
                                        what
                                        " (misspelled special form?)")
                                       stx))
                  (else #f))))

         (assert-valid-attr-name
          (let ((rx (regexp "^[A-Za-z][A-Za-z0-9]*$")))
            (lambda (stx str)
              (or (regexp-match-positions rx str)
                  (obviously-confused-with-name stx str "attribute name")
                  (raise-syntax-error error-name
                                      "invalid HTML attribute name"
                                      stx)))))

         ;; Processing Content:

         (do-content-sequence
          (lambda (lst-stx)
            (let ((lst (if (syntax? lst-stx) (syntax-e lst-stx) lst-stx)))
              (or (pair? lst)
                  (error error-name
                         "INTERNAL: lst not list in do-content-sequence: lst=~S"
                         lst))
              (for-each do-thing-in-content lst))))

         (do-thing-in-content
          (lambda (thing-stx)
            (let ((thing (syntax-e thing-stx)))
              (cond ((string? thing) (do-string-in-content thing-stx))
                    ((null?   thing)
                     (raise-syntax-error
                      error-name
                      "empty list is invalid in HTML element content"
                      thing-stx))
                    ((pair?   thing) (do-list-in-content thing-stx))
                    ((symbol? thing)
                     (raise-syntax-error
                      error-name
                      "symbol is invalid in HTML element content (missing parentheses around it?)"
                      thing-stx))
                    (else (raise-syntax-error
                           error-name
                           "invalid object in HTML element content"
                           thing-stx))))))

         (do-string-in-content
          (lambda (str-stx)
            (add-strings/escape (syntax-e str-stx))))

         (do-list-in-content
          (lambda (lst-stx)
            (let* ((lst      (syntax-e lst-stx))
                   (head-stx (car lst))
                   (head     (syntax-e head-stx)))
              (cond
               ((symbol? head)
                ;; Head of the list *is* a symbol.
                (case head

                  ((&)
                   (let* ((val-stx (lst-arity-1-val-stx lst-stx (cdr lst)))
                          (val     (syntax-e val-stx)))
                     (add-strings/noescape "&")
                     (cond ((symbol? val)
                            (let ((name (symbol->string val)))
                              ;; TODO: Verify it's valid entity name.
                              (add-strings/noescape name)))
                           ((and (integer? val) (exact? val) (>= val 0))
                            (add-strings/noescape "#" (number->string val)))
                           (else
                            (raise-syntax-error error-name
                                                "invalid HTML entity value"
                                                val-stx)))
                     (add-strings/noescape ";")))

                  ((%eval)
                   (let ((exp-stx (lst-arity-1-val-stx lst-stx (cdr lst))))
                     (add-form (datum->syntax
                                exp-stx
                                `(,(quote-syntax write-html)
                                  ,exp-stx)))))

                  ((%verbatim) (do-%verbatim lst-stx))

                  ((%eval/effects-only) (do-%eval/effects-only lst-stx))

                  ((*decl* *DECL*)
                   (start-literal)
                   (write-html-decl/fixed (syntax->datum lst-stx) literal))

                  ((*pi* *PI*)
                   (start-literal)
                   (write-html-pi/fixed (syntax->datum lst-stx) literal))

                  (else
                   ;; "lst" is an HTML element or an invalid % form, so ...
                   (let ((elem-name-str (symbol->string head)))
                     (cond
                      ((regexp-match-positions
                        %html-template:html-element-name-rx
                        elem-name-str)
                       (add-strings/noescape "<" elem-name-str)
                       (let ((content
                              (let ((rest (cdr lst)))
                                (if (null? rest)
                                    ;; Element has neither attributes nor
                                    ;; content.
                                    rest
                                    ;; Element has attributes and/or content.
                                    (let* ((first-stx (car rest))
                                           (first     (syntax-e first-stx)))
                                      (if (and (pair? first)
                                               (eq? (syntax-e (car first)) '@))
                                          ;; Attributes, so process them and
                                          ;; then return sublist after them.
                                          (let ((attrs (cdr first)))
                                            (and (null? attrs)
                                                 (raise-syntax-error
                                                  error-name
                                                  "empty HTML attribute list"
                                                  first-stx))
                                            (do-attribute-sequence attrs)
                                            (cdr rest))
                                          ;; No attributes, so just return
                                          ;; "rest"
                                          rest))))))
                         (if (memq head always-empty-html-elements)
                             ;; It's an HTML element type is always be empty.
                             (begin
                               (or (null? content)
                                   (raise-syntax-error
                                    error-name
                                    "this HTML element cannot have content"
                                    lst-stx))
                               (add-strings/noescape " />"))
                             ;; It's an HTML element type is *not* always-empty.
                             (begin
                               (add-strings/noescape ">")
                               (or (null? content)
                                   (do-content-sequence content))
                               (add-strings/noescape "</"
                                                     elem-name-str
                                                     ">")))))
                      (else
                       (or (obviously-confused-with-name
                            head-stx elem-name-str
                            "element name")
                           (raise-syntax-error error-name
                                               "invalid HTML element name"
                                               head-stx))))))))
               (else
                ;; Head of the list is *not* a symbol.
                (raise-syntax-error
                 error-name
                 "invalid head of list syntax in HTML element content"
                 lst-stx
                 head-stx))))))

         ;; Processing Attributes:

         (do-attribute-sequence
          (lambda (lst-stx)
            (let ((lst (if (syntax? lst-stx) (syntax-e lst-stx) lst-stx)))
              (or (pair? lst)
                  (error
                   error-name
                   "INTERNAL: lst not list in do-attribute-sequence: lst=~S"
                   lst))
              (for-each do-thing-in-attributes lst))))

         (do-thing-in-attributes
          (lambda (thing-stx)
            (let ((thing (syntax-e thing-stx)))
              (cond ((pair?   thing) (do-list-in-attributes thing-stx))
                    ((symbol? thing)
                     (raise-syntax-error
                      error-name
                      "symbol is invalid here (missing parentheses around it?)"
                      thing-stx))
                    (else (raise-syntax-error error-name
                                              "invalid object in HTML attribute"
                                              thing-stx))))))

         (do-list-in-attributes
          (lambda (lst-stx)
            (let* ((lst      (syntax-e lst-stx))
                   (head-stx (car lst))
                   (head     (syntax-e head-stx)))
              (cond
               ((symbol? head)
                ;; Head of the list *is* a symbol.
                (case head
                  ((@ &)
                   (raise-syntax-error
                    error-name
                    "invalid inside attributes list"
                    lst-stx))
                  ((%eval/effects-only)
                   ;; (add-strings/noescape " ")
                   (do-%eval/effects-only lst-stx))
                  ((%verbatim)
                   (add-strings/noescape " ")
                   (do-%verbatim lst-stx))
                  ((%eval)
                   (let ((exp-stx (lst-arity-1-val-stx lst-stx (cdr lst))))
                     (add-form (datum->syntax
                                exp-stx
                                `(,(quote-syntax write-html-attribute-or-list)
                                  ,exp-stx)))))
                  (else
                   ;; "lst" is an attribute, so ...
                   (let ((attr-name-str (symbol->string head)))
                     (assert-valid-attr-name head-stx attr-name-str)
                     (add-strings/noescape " " attr-name-str)
                     (let ((rest (cdr lst)))
                       (cond
                        ((null? rest)
                         ;; No value, which is the same as #t value, so
                         ;; we're done.
                         #f)
                        ((not (null? (cdr rest)))
                         (raise-syntax-error error-name
                                             "expected 0 or 1 arguments"
                                             lst-stx))
                        (else
                         (let* ((val-stx (car rest))
                                (val     (syntax-e val-stx)))
                           (cond
                            ((eqv? #t val)
                             ;; No value, so we're done.
                             #f)
                            ((begin (add-strings/noescape "=") #f) #f)
                            ((string? val)
                             (add-strings/noescape "\"")
                             (add-strings/dquote-escape val)
                             (add-strings/noescape "\""))
                            ((pair? val)
                             (let* ((val-head-stx (car val))
                                    (val-head     (syntax-e val-head-stx)))
                               (case val-head
                                 ((@ &)
                                  (raise-syntax-error
                                   error-name
                                   "invalid for HTML attribute value"
                                   val-stx))
                                 ((%eval/effects-only) (do-%eval/effects-only val-stx))
                                 ((%eval)
                                  ;; TODO: !!! Do something specific to
                                  ;; attribute value, not generic HTML.
                                  (let ((exp-stx (lst-arity-1-val-stx
                                                  val-stx (cdr val))))
                                    (add-form (datum->syntax
                                               exp-stx
                                               `(,(quote-syntax write-html)
                                                 ,exp-stx)))))
                                 ((%verbatim) (do-%verbatim val-stx))
                                 (else
                                  (raise-syntax-error
                                   error-name
                                   "expected HTML attribute value"
                                   val-stx)))))
                            (else
                             (raise-syntax-error error-name
                                                 "expected HTML attribute value"
                                                 val-stx)))))))))))))))

         ;; Generic Special Forms:

         (do-%eval/effects-only
          (lambda (lst-stx)
            (let ((lst (syntax-e lst-stx)))
              (lst-assert-arity-1+ lst-stx (cdr lst))
              (for-each (lambda (x-stx)
                          (let ((x (syntax-e x-stx)))
                            (or (pair? x)
                                (raise-syntax-error
                                 error-name
                                 "literals in %eval/effects-only have no effect"
                                 lst-stx
                                 x-stx))
                            (add-form x-stx)))
                        (cdr lst)))))

         (do-%verbatim
          (lambda (lst-stx)
            (let ((lst (syntax-e lst-stx)))
              (lst-assert-arity-1+ lst-stx (cdr lst))
              (for-each (lambda (x-stx)
                          (let ((x (syntax-e x-stx)))
                            (or (string? x)
                                (raise-syntax-error
                                 error-name
                                 "expected string"
                                 lst-stx
                                 x-stx))
                            (add-strings/noescape x)))
                        (cdr lst))))))

      (do-content-sequence entire-stx)
      (datum->syntax entire-stx `(,(quote-syntax begin)
                                  ,@(final-result)
                                  (,(quote-syntax void)))))))

;; Note: For getting errortrace info for debugging
;; "%html-template:transform" itself, temporarily change it
;; from "define-for-syntax" to "define", and call it like:
;;
;; (syntax->datum
;;  (%html-template:transform
;;   'foo
;;   (datum->syntax #f '((html (body (p "test")))))))

;;; @defproc html-template xexp ...
;;;
;;; !!!

(define-syntax (html-template stx)
  (syntax-case stx ()
    ((html-template X ...)
     ;; TODO: Think we shouldn't have to do this "quasisyntax/loc" here.
     ;; Probably doing something wrong with how syntax object is constructed by
     ;; "%html-template:transform".
     (quasisyntax/loc stx
       #,(%html-template:transform 'html-template
                                   (syntax (X ...)))))))

;;; @defproc html-template/port port xexp ...
;;;
;;; Behaves as @code{html-template}, with the addition that the
;;; @code{current-output-port} parameter set to @var{port} by
;;; @code{parameterize} for the duration, including for run-time evaluations as
;;; a result of @code{%eval/effects-only} and @code{%eval} forms.

(define-syntax (html-template/port stx)
  (syntax-case stx ()
    ((html-template-to-string PORT X ...)
     (let ((transform-stx (%html-template:transform 'html-template/port
                                                    (syntax (X ...)))))
       (quasisyntax/loc stx
         (parameterize ((current-output-port PORT))
           #,transform-stx))))))

;;; @defproc html-template-to-string xexp ...
;;;
;;; !!!

(define-syntax (html-template-to-string stx)
  (syntax-case stx ()
    ((html-template-to-string X ...)
     (let ((transform-stx (%html-template:transform 'html-template-to-string
                                                    (syntax (X ...)))))
       (quasisyntax/loc stx
         (let ((os (open-output-string)))
           (parameterize ((current-output-port os))
             #,transform-stx
             (begin0 (get-output-string os)
               (close-output-port os)))))))))

;;; @section Debugging

;;; @defproc html-template-debug-expand xexp ...
;;;
;;; !!!

(define-syntax (html-template-debug-expand stx)
  (syntax-case stx ()
    ((html-template X ...)
     (let ((dump (syntax->datum
                  (%html-template:transform 'html-template-debug-expand
                                            (syntax (X ...))))))
       (quasisyntax/loc stx (quote #,dump))))))

;;; @section History

;;; @table @asis
;;;
;;; @item Version 0.1 --- 2011-08-21 -- PLaneT @code{(1 0)}
;;;
;;; @end table

(provide
 html-template
 html-template-debug-expand
 html-template-to-string
 html-template/port)