html-codec.rkt
#lang racket/base

(require srfi/14)

(define alternates '((#\- . ndash) (#\& . amp)))

(define e-alternates (make-immutable-hash alternates))

(define d-alternates
   (make-immutable-hash
    (map (λ (pair) (cons (cdr pair) (car pair))) alternates)))

(define good-characters
   (char-set-difference
    char-set:printing
    (list->char-set (map car alternates))))

(define (okay? c) (char-set-contains? good-characters c))

(define (encode-number n)
   (let ((enc (number->string n 16)))
     (case (string-length enc)
       ((1) (string-append "00" enc))
       ((2) (string-append "0" enc))
       ((3) enc)
       (else (error "number wouldn't encode" n enc)))))

(define (encode s)
   (apply
    string-append
    (for/list
     ((character (string->list s)))
     (if (okay? character)
       (list->string (list character))
       (let ((alt
              (hash-ref
               e-alternates
               character
               (λ () (char->integer character)))))
         (let ((alt
                (if (symbol? alt) (symbol->string alt) (encode-number alt))))
           (string-append "&" alt ";")))))))

(define (decode-thing s)
   (let ((c (hash-ref d-alternates (string->symbol s) (λ () #f))))
     (if c
       (list->string (list c))
       (let ((num (string->number s 16)))
         (if num
           (list->string (list (integer->char num)))
           (string-append "&" s ";"))))))

(define (decode s)
   (apply
    string-append
    (let loop ((outside? #t) (bits (regexp-split #rx"&" s)))
      (cond
       ((null? bits) null)
       (outside? (cons (car bits) (loop #f (cdr bits))))
       (else
        (let ((enc (car bits)))
          (let ((match (regexp-match #rx"([^;]+);(.*)" enc)))
            (if match
              (list*
               (decode-thing (cadr match))
               (loop #t (cons (caddr match) (cdr bits))))
              (list* "&" enc (loop #f (cdr bits)))))))))))

(provide encode decode good-characters)