html-template-parse.rkt
#lang racket/base
;; For legal info, see file "info.rkt".

(require (for-syntax racket/base
                     "planet-neil-html-writing.rkt")
         (for-template racket/base
                       "planet-neil-html-writing.rkt"
                       "planet-neil-xexp.rkt")
         "planet-neil-html-writing.rkt"
         "planet-neil-xexp.rkt")

;; TODO: Have this regular expression permit XML qualifiers, including URIs?

;; TODO: !!! make all datum->syntax have a srcloc

(define %html-template:html-element-name-rx
  (regexp "^[A-Za-z][A-Za-z0-9]*$"))

(provide parse-html-template)
(define (parse-html-template error-name entire-stx)
  (let ((literal #f)
        (result  '()))
    (letrec
        (

         ;; TODO: !!! FINISH CHANGING THESE "Result Collection" THINGS SO THAT WE CAN
         ;; PRODUCE A LIST OF BYTES AND BYTES-PRODUCING FORMS WHEN "bytes-raw?" IS TRUE.
         ;; MAKE USE things like html->bytes instead of write-html when bytes-raw?

         ;; Result Collection:

         (start-literal
          (lambda ()
            (or literal
                (set! literal (open-output-bytes)))))
         (finish-literal
          (lambda ()
            (and literal
                 (let ((literal-copy literal))
                   (set! literal #f)
                   (add-form
                    (get-output-bytes literal-copy))))))
         (add-form
          (lambda (form)
            (finish-literal)
            (set! result (cons form result))))
         
         ;;         (str-or-stx->string
         ;;          ;; TODO: !!! THIS SHOULD BE BYTES INSTEAD OF STRING
         ;;          (lambda (x)
         ;;            (cond ((string? x) x)
         ;;                  ((and (syntax? x) (string? (syntax-e x))) (syntax-e x))
         ;;                  (else
         ;;                   (error error-name
         ;;                          "INTERNAL: invalid in str-or-stx->string: ~S"
         ;;                          x)))))
         
         (bytes-or-string-or-stx->bytes
          (lambda (x)
            (or (cond ((bytes?  x) x)
                      ((string? x) (string->bytes/utf-8 x))
                      ((syntax? x) (let ((x-e (syntax-e x)))
                                     (cond ((bytes?  x-e) x-e)
                                           ((string? x-e) (string->bytes/utf-8 x-e))
                                           (else #f))))
                      (else #f))
                (error error-name
                       "INTERNAL: invalid in bytes-or-string-or-stx->bytes: ~S"
                       x))))
         (bytes-or-string-or-stx->string
          (lambda (x)
            (or (cond ;; TODO: !!! bytes? clause here
                 ((string? x) x)
                 ((syntax? x) (let ((x-e (syntax-e x)))
                                (cond ((string? x-e) x-e)
                                      (else #f))))
                 (else #f))
                (error error-name
                       "INTERNAL: invalid in bytes-or-string-or-stx->string: ~S"
                       x))))
         (add-to-literal/noescape
          (lambda args
            (start-literal)
            (for-each (lambda (x)
                        ;; TODO: !!! MAKE THIS WRITE BYTES
                        (write-bytes (bytes-or-string-or-stx->bytes x) literal))
                      args)))
         (add-to-literal/escape
          (lambda args
            (start-literal)
            (for-each (lambda (x)
                        ;; TODO: !!! write-html-bytes/fixed
                        (write-html (bytes-or-string-or-stx->string x) literal))
                      args)))
         (add-to-literal/dquote-escape
          (lambda args
            (start-literal)
            (for-each (lambda (x)
                        (write-html-attribute-value-part-string (bytes-or-string-or-stx->string x) literal))
                      args)))
         (final-result
          (lambda ()
            (finish-literal)
            (reverse result)))

         ;; Utility:

         (lst-arity-1-val-stx
          (lambda (lst-stx rest)
            (and (null? rest)
                 (raise-syntax-error error-name
                                     "expected 1 argument, got 0"
                                     lst-stx))
            (or (null? (cdr rest))
                (raise-syntax-error error-name
                                    "expected 1 argument, got more"
                                    (if (pair? (cdr rest))
                                        (car (cdr rest))
                                        (cdr rest))))
            (car rest)))

         (lst-assert-arity-1+
          (lambda (lst-stx rest)
            (and (null? rest)
                 (raise-syntax-error error-name
                                     "expected 1 or more arguments, got 0"
                                     lst-stx))))

         (obviously-confused-with-name
          (lambda (stx str what)
            (cond ((equal? str "&")
                   (raise-syntax-error error-name
                                       (string-append
                                        "invalid HTML "
                                        what
                                        " (entity not valid here)")
                                       stx))
                  ((equal? str "@")
                   (raise-syntax-error error-name
                                       (string-append
                                        "invalid HTML "
                                        what
                                        " (attributes list not valid here)")
                                       stx))
                  ((and (> (string-length str) 0)
                        (eqv? #\% (string-ref str 0)))
                   (raise-syntax-error error-name
                                       (string-append
                                        "invalid HTML "
                                        what
                                        " (misspelled special form?)")
                                       stx))
                  (else #f))))

         (assert-valid-elem-name
          ;; TODO: What about XML namespaces?
          (let ((rx (regexp "^[A-Za-z][-_A-Za-z0-9]*$")))
            (lambda (stx str)
              (or (regexp-match-positions rx str)
                  (obviously-confused-with-name stx str "element name")
                  (raise-syntax-error error-name
                                      "invalid HTML element name"
                                      stx)))))

         (assert-valid-attr-name
          ;; TODO: What about XML namespaces?
          (let ((rx (regexp "^[A-Za-z][-_A-Za-z0-9]*$")))
            (lambda (stx str)
              (or (regexp-match-positions rx str)
                  (obviously-confused-with-name stx str "attribute name")
                  (raise-syntax-error error-name
                                      "invalid HTML attribute name"
                                      stx)))))

         (handle-char-ref
          (lambda (lst-stx lst-e)
            (let* ((val-stx (lst-arity-1-val-stx lst-stx (cdr lst-e)))
                   (val-e   (syntax-e val-stx)))
              (add-to-literal/noescape #"&")
              (cond ((symbol? val-e)
                     (let ((name-bytes (string->bytes/utf-8 (symbol->string val-e))))
                       ;; TODO: Verify it's valid entity name.
                       (add-to-literal/noescape name-bytes)))
                    ((and (integer? val-e) (exact? val-e) (>= val-e 0))
                     (add-to-literal/noescape #"#" (string->bytes/utf-8 (number->string val-e))))
                    (else
                     (raise-syntax-error error-name
                                         "invalid HTML character reference"
                                         val-stx)))
              (add-to-literal/noescape #";"))))

         ;; Percent-Something Handling:

         (handle-possible-percent-something-or-false
          ;; TODO: !!! Change this to do the add itself, not possibly return
          ;; something to be added.  Especially since we are doing side-effects
          ;; anyway now, with the "add-to-literal" for verbatim handling in
          ;; attribute-list context.  And then maybe have it call an else proc
          ;; argument rather than return something that's effectively a boolean
          ;; for whether handled.
          (lambda (context head-e-orig args-e lst-stx)
            (let loop ((head-e head-e-orig))
              ;; TODO: Maybe do error-checking of args-e for each of the below.
              (case head-e
                ((%format)
                 (list* (case context
                          ((content)         'format/content)
                          ((attributes)      (raise-syntax-error
                                              error-name
                                              (format "~A is invalid in attributes context"
                                                      head-e-orig)
                                              lst-stx))
                          ((attribute-value) 'format/attribute-value)
                          (else (error 'handle-possible-percent-something-or-false
                                       "internal error: ~S in context ~S"
                                       head-e-orig
                                       context)))
                        lst-stx
                        args-e))
                ((%xexp)
                 (list* (case context
                          ((content)         'xexp/content)
                          ((attributes)      'xexp/attributes)
                          ((attribute-value) 'xexp/attribute-value)
                          (else (error 'handle-possible-percent-something-or-false
                                       "internal error: ~S in context ~S"
                                       head-e-orig
                                       context)))
                        lst-stx
                        args-e))
                ((%write)      (begin (and (eqv? 'attributes context)
                                           (add-to-literal/noescape #" "))
                                      (list* 'write      lst-stx args-e)))
                ((%write/port) (begin (and (eqv? 'attributes context)
                                           (add-to-literal/noescape #" "))
                                      (list* 'write/port lst-stx args-e)))
                ((%verbatim)   (begin (and (eqv? 'attributes context)
                                           (add-to-literal/noescape #" "))
                                      (list* 'verbatim   lst-stx args-e)))
                ((%void)       (list* 'void lst-stx args-e))
                ((%)           (loop '%format))
                ((%sxml)       (loop '%xexp))
                (else          #f)))))

         ;; Processing Content:

         (do-content-sequence
          (lambda (lst-stx)
            (let ((lst (if (syntax? lst-stx) (syntax-e lst-stx) lst-stx)))
              (or (pair? lst)
                  (error error-name
                         "INTERNAL: lst not list in do-content-sequence: lst=~S"
                         lst))
              (for-each do-thing-in-content lst))))

         (do-thing-in-content
          (lambda (thing-stx)
            (let ((thing-e (syntax-e thing-stx)))
              (cond ((string? thing-e) (do-string-in-content thing-stx))
                    ((bytes?  thing-e) (do-bytes-in-content  thing-stx))
                    ((null?   thing-e)
                     (raise-syntax-error
                      error-name
                      "empty list is invalid in HTML element content"
                      thing-stx))
                    ((pair?   thing-e) (do-pair-in-content thing-stx))
                    ((symbol? thing-e)
                     (raise-syntax-error
                      error-name
                      "symbol is invalid in HTML element content (missing parentheses around it?)"
                      thing-stx))
                    (else (raise-syntax-error
                           error-name
                           "invalid object in HTML element content"
                           thing-stx))))))

         (do-bytes-in-content
          (lambda (bytes-stx)
            (add-to-literal/escape (syntax-e bytes-stx))))

         (do-string-in-content
          (lambda (str-stx)
            (add-to-literal/escape (syntax-e str-stx))))

         (do-pair-in-content
          (lambda (lst-stx)
            (let* ((lst-e    (syntax-e lst-stx))
                   (head-stx (car lst-e))
                   (head-e   (syntax-e head-stx)))
              (cond
               ((not (symbol? head-e))
                ;;(or (obviously-confused-with-name
                ;;     head-stx elem-name-str
                ;;     "element name")
                (raise-syntax-error error-name
                                    "invalid HTML element name"
                                    head-stx))
               ((handle-possible-percent-something-or-false 'content head-e (cdr lst-e) lst-stx)
                => add-form)
               (else
                ;; Head of the list *is* a symbol.
                (case head-e

                  ((&)
                   (handle-char-ref lst-stx lst-e))

                  ((*decl* *DECL*)
                   (start-literal)
                   (write-html-decl (syntax->datum lst-stx) literal))

                  ((*pi* *PI*)
                   (start-literal)
                   (write-html-pi (syntax->datum lst-stx) literal))

                  (else
                   ;; "lst" is an HTML element or an invalid % form, so ...
                   (let ((elem-name-str (symbol->string head-e)))
                     (assert-valid-elem-name head-stx elem-name-str)
                     (add-to-literal/noescape #"<" (string->bytes/utf-8 elem-name-str))
                     (let ((content
                            (let ((rest (cdr lst-e)))
                              (if (null? rest)
                                  ;; Element has neither attributes nor
                                  ;; content.
                                  rest
                                  ;; Element has attributes and/or content.
                                  (let* ((first-stx (car rest))
                                         (first-e     (syntax-e first-stx)))
                                    (if (and (pair? first-e)
                                             (eq? (syntax-e (car first-e)) '@))
                                        ;; Attributes, so process them and
                                        ;; then return sublist after them.
                                        (let ((attrs (cdr first-e)))
                                          (and (null? attrs)
                                               (raise-syntax-error
                                                error-name
                                                "empty HTML attribute list"
                                                first-stx))
                                          (do-attribute-sequence attrs)
                                          (cdr rest))
                                        ;; No attributes, so just return
                                        ;; "rest"
                                        rest))))))
                       (if (memq head-e always-empty-html-elements)
                           ;; It's an HTML element type is always be empty.
                           (begin
                             (or (null? content)
                                 (raise-syntax-error
                                  error-name
                                  "this HTML element cannot have content"
                                  lst-stx))
                             (add-to-literal/noescape #">"
                                                      ;; #" />"
                                                      ))
                           ;; It's an HTML element type is *not* always-empty.
                           (begin
                             (add-to-literal/noescape #">")
                             (or (null? content)
                                 (do-content-sequence content))
                             (add-to-literal/noescape #"</"
                                                      (string->bytes/utf-8 elem-name-str)
                                                      #">"))))))))))))

         ;; Processing Attributes:

         (do-attribute-sequence
          (lambda (lst-stx)
            (let ((lst (if (syntax? lst-stx) (syntax-e lst-stx) lst-stx)))
              (or (pair? lst)
                  (error
                   error-name
                   "INTERNAL: lst not list in do-attribute-sequence: lst=~S"
                   lst))
              (for-each do-thing-in-attributes lst))))

         (do-thing-in-attributes
          (lambda (thing-stx)
            (let ((thing-e (syntax-e thing-stx)))
              (cond ((pair?   thing-e) (do-pair-in-attributes thing-stx))
                    ((symbol? thing-e)
                     (raise-syntax-error
                      error-name
                      "symbol is invalid here (missing parentheses around it?)"
                      thing-stx))
                    (else (raise-syntax-error error-name
                                              "invalid object in HTML attribute"
                                              thing-stx))))))

         (do-pair-in-attributes
          (lambda (lst-stx)
            (let* ((lst-e    (syntax-e lst-stx))
                   (head-stx (car lst-e))
                   (head-e   (syntax-e head-stx)))
              (cond
               ((not (symbol? head-e))
                (raise-syntax-error error-name
                                    "invalid HTML attribute name"
                                    head-stx))
               ((handle-possible-percent-something-or-false 'attributes head-e (cdr lst-e) lst-stx)
                => add-form)
               (else
                ;; Head of the list *is* a symbol.
                (case head-e
                  ((@ &)
                   (raise-syntax-error
                    error-name
                    "invalid inside attributes list"
                    lst-stx))
                  (else
                   ;; "lst" is an attribute, so ...
                   (let ((attr-name-str (symbol->string head-e)))
                     (assert-valid-attr-name head-stx attr-name-str)
                     (add-to-literal/noescape #" "
                                              (string->bytes/utf-8 attr-name-str)
                                              #"=\"")
                     (let loop ((rest           (cdr lst-e))
                                (seen-anything? #f))
                       (if (null? rest)
                           (if seen-anything?
                               (add-to-literal/noescape #"\"")
                               (raise-syntax-error error-name
                                                   "HTML attribute is missing value expression"
                                                   lst-stx))
                           (let* ((val-stx (car rest))
                                  (val-e   (syntax-e val-stx)))
                             (cond
                              ((string? val-e)
                               (add-to-literal/dquote-escape val-e)
                               (loop (cdr rest) #t))
                              ((pair? val-e)
                               (let* ((val-head-stx (car val-e))
                                      (val-head-e   (syntax-e val-head-stx)))
                                 (case val-head-e
                                   ((&)
                                    (handle-char-ref val-stx val-e)
                                    (loop (cdr rest) #t))
                                   (else (cond ((handle-possible-percent-something-or-false 'attribute-value
                                                                                            val-head-e
                                                                                            (cdr val-e)
                                                                                            val-stx)
                                                => (lambda (x)
                                                     (add-form x)
                                                     (loop (cdr rest) #t)))
                                               (else
                                                (raise-syntax-error error-name
                                                                    "expected HTML attribute value"
                                                                    val-stx)))))))
                              (else
                               (raise-syntax-error error-name
                                                   "expected HTML attribute value"
                                                   val-stx))))))))))))))

         ;; Generic Special Forms:

         ;; (do-%eval/effects-only
         ;;  (lambda (lst-stx)
         ;;    (let ((lst-e (syntax-e lst-stx)))
         ;;      (lst-assert-arity-1+ lst-stx (cdr lst-e))
         ;;      (for-each (lambda (x-stx)
         ;;                  (let ((x-e (syntax-e x-stx)))
         ;;                    (or (pair? x-e)
         ;;                        (raise-syntax-error
         ;;                         error-name
         ;;                         "literals in %eval/effects-only have no effect"
         ;;                         lst-stx
         ;;                         x-stx))
         ;;                    (add-form x-stx)))
         ;;                (cdr lst-e)))))

         ;; (do-%verbatim
         ;;  (lambda (lst-stx)
         ;;    (let ((lst-e (syntax-e lst-stx)))
         ;;      (lst-assert-arity-1+ lst-stx (cdr lst-e))
         ;;      (for-each (lambda (x-stx)
         ;;                  (let ((x-e (syntax-e x-stx)))
         ;;                    ;; TODO: !!! Handle both strings and bytes.
         ;;                    (cond ((bytes? x-e)
         ;;                           (add-to-literal/noescape x-e))
         ;;                          ((string? x-e)
         ;;                           (add-to-literal/noescape (string->bytes/utf-8 x-e)))
         ;;                          (else
         ;;                           (raise-syntax-error
         ;;                            error-name
         ;;                            "expected bytes or string"
         ;;                            lst-stx
         ;;                            x-stx)))))
         ;;                (cdr lst-e)))))
         )

      (do-content-sequence entire-stx)
      (final-result))))