html-codec.ss
#lang scheme/base

(require srfi/14)

(define alternates
  '((#\- . ndash)
    (#\& . amp)))

(define e-alternates
  (make-immutable-hash alternates))

(define d-alternates
  (make-immutable-hash 
   (map
    (λ (pair) (cons (cdr pair) (car pair)))
    alternates)))

(define good-characters 
  (char-set-difference char-set:printing (list->char-set (map car alternates))))

(define (okay? c)
  (char-set-contains? good-characters c))

(define (encode-number n)
  (let ((enc (number->string n #x10)))
    (case (string-length enc)
      ((1) (string-append "00" enc))
      ((2) (string-append "0" enc))
      ((3) enc)
      (else (error "number wouldn't encode" n enc)))))

(define (encode s)
  (apply
   string-append
   (for/list ((character (string->list s)))
     (if (okay? character)
         (list->string (list character))
         (let ((alt (hash-ref e-alternates character (λ () (char->integer character)))))
           (let ((alt (if (symbol? alt) (symbol->string alt) (encode-number alt))))
             (string-append "&" alt ";")))))))


(define (decode-thing s)
  (let ((c (hash-ref d-alternates (string->symbol s) (λ () #f))))
    (if c 
        (list->string (list c))
        (let ((num (string->number s #x10)))
          (if num 
              (list->string (list (integer->char num)))
              (string-append "&" s ";"))))))

(define (decode s)
  (apply string-append
         (let loop ((outside? #t) (bits (regexp-split #rx"&" s)))
           (cond
             ((null? bits) null)
             (outside? (cons (car bits) (loop #f (cdr bits))))
             (else
              (let ((enc (car bits)))
                (let ((match (regexp-match #rx"([^;]+);(.*)" enc)))
                  (if match
                      (list* (decode-thing (cadr match)) (loop #t (cons (caddr match) (cdr bits))))
                      (list* "&" enc (loop #f (cdr bits)))))))))))

(provide encode decode good-characters)