main.rkt
#lang racket/base

(require "parser.ss" racket/promise racket/class racket/port)

(define *encode-entities*
   (map
    (λ (pair)
      (cons
       (regexp (string-append "[" (cdr pair) "]"))
       (string-append "\\&" (car pair) ";")))
    (filter (λ (pair) (not (equal? (car pair) "nbsp"))) *default-entities*)))

(define (thunk? p) (and (procedure? p) (procedure-arity-includes? p 0)))

(define (encode item)
   (cond
    ((symbol? item) (encode (symbol->string item)))
    ((number? item) (encode (number->string item)))
    ((thunk? item) (encode (item)))
    ((promise? item) (encode (force item)))
    ((bytes? item) (encode (bytes->string/utf-8 item)))
    ((string? item)
     (foldl
      (λ (pair current) (regexp-replace (car pair) current (cdr pair)))
      item
      *encode-entities*))
    (else (error (format "What is ~s doing in the tree?" item)))))

(define thingy% (class object% (init-field parent) (super-new)))

(define element%
   (class thingy%
     (inspect #f)
     (init-field name attrs contents)
     (super-new)
     (inherit-field parent)
     (define the-parent (and (is-a? parent element%) parent))
     
     (set! contents (reverse (map (λ (content) (content this)) contents)))
     (when
      (not (null? contents))
      (let loop ((before #f) (current contents))
        (if (null? current)
          (void)
          (let ((after (if (null? (cdr current)) #f (cadr current))))
            (begin
              (when (and before (car current) (eq? before (car current)))
                (error "feep" before current))
              (when (and after (car current) (eq? after (car current)))
                (error "feepA" after current))
              (send (car current) set-siblings before after)
              (loop (car current) (cdr current)))))))
     
     (define (calculate-items)
       (reverse
        (cons
         this
         (foldl
          (λ (content daeh)
            (if (is-a? content element%)
                (append (get-field items content) daeh)
                (cons content daeh)))
          '()
          contents))))

     (field
      (items (calculate-items))
      (prev #f)
      (next #f))
     
     (define/public (set-siblings p n) (set! prev p) (set! next n))
     
     (define/public (delete)
       (when prev (set-field! next prev next))
       (when next (set-field! prev next prev))
       (when the-parent (send the-parent remove this)))
     
     (define/public (remove child)
       (set! contents (filter (λ (e) (not (eq? e child))) contents))
       (set! items (calculate-items)))
     
     (define/public
      (find check)
      (car
       (or (memf check items) (raise-parse-error "Could not find a match"))))
     (define/public (find-all check) (filter check items))
     (define/public (mapp check) (map check items))
     (define/public
      (fold proc . init)
      (apply foldl proc (append init (list items))))
     (define/public
      (render-contents)
      (with-output-to-string (λ () (print-contents))))
     (define/public
      (print-contents)
      (for-each (λ (content) (send content print)) contents))
     (define/public
      (print)
      (display "<")
      (display (encode name))
      (hash-for-each
       attrs
       (λ (n v)
         (display " ")
         (display (encode n))
         (when v (display "=\"") (display (encode v)) (display "\""))))
      (if contents
        (begin
          (display ">")
          (print-contents)
          (display "</")
          (display name)
          (display ">"))
        (display " />")))
     (define/public (get-attr name) (hash-ref attrs name (λ () #f)))))

(define text%
   (class thingy%
     (inspect #f)
     (init-field text)
     (field (name #f) (prev #f) (next #f))
     (super-new)
     (define/public (set-siblings p n) (set! prev p) (set! next n))
     (define/public (print) (display (encode text)))))

(define comment%
   (class text%
     (super-new)
     (define/override (print) (display "<!--") (super print) (display "-->"))))

(define-syntax-rule
  (will-be class (attr value) ...)
  (λ (parent) (new class (parent parent) (attr value) ...)))

(define (cons-ify item)
   (cons (car item) (if (null? (cdr item)) #f (cadr item))))

(define (produce-dom)
   (let ((parse
          (make-parser
           'start:
           (lambda (tag attrs seed virtual?) '())
           'end:
           (lambda (tag attrs parent-seed seed virtual?)
             (cons
              (will-be
               element%
               (name tag)
               (attrs (make-immutable-hash (map cons-ify attrs)))
               (contents seed))
              parent-seed))
           'comment:
           (lambda (text seed) (cons (will-be comment% (text text)) seed))
           'text:
           (lambda (text seed) (cons (will-be text% (text text)) seed)))))
     (begin0
       (findf
        (λ (e) (is-a? e element%))
        (reverse (map (λ (p) (p '())) (parse '())))))))

(define (is-element? e (name #f))
   (and (is-a? e element%) (if name (eq? name (get-field name e)) #t)))

(define (is-text? e) (is-a? e text%))

(provide is-element? is-text? produce-dom cons-ify exn:parse?)

(define (example)
   (with-input-from-string
    "<html><a href='http://synx' attatt><p>This is <!-- ssh I'm hunting wabbits --> crunk</p></html><bhtml>ahhh</bhtml"
    produce-dom))

(define (example-2)
   (let ((o (example)))
     (send (get-field parent (send o find (λ (thingy) (and (is-a? thingy element%) (eq? (get-field name thingy) 'p))))) get-attr
       'href)))

(require net/url)

(define (example-3)
   (let ((soup
          (call/input-url
           (string->url "http://browse.deviantart.com/")
           get-pure-port
           (λ (port)
             (parameterize ((current-input-port port)) (produce-dom))))))
     (for-each
      (λ (span)
        (let ((a (send span find (is-element? 'a)))
              (img (send span find (is-element? 'img))))
          (let ((src (send img get-attr 'src)) (href (send a get-attr 'href)))
            (when (not (regexp-match #rx"^http://browse" href))
              (display "Thumbnail: ")
              (display src)
              (display "\nLink: ")
              (display href)
              (display "\n---\n")))))
      (send soup find-all
        (λ
         (e)
         (and ((is-element? 'span) e)
              (equal? (send e get-attr 'class) "tt-w")))))))