main.ss
#lang scheme

(require "parser.ss")
;(require (prefix-in log: (planet synx/log)))

(define *encode-entities*
  (map
   (λ (pair) (cons (regexp (string-append "[" (cdr pair) "]")) (string-append "\\&" (car pair) ";")))
   (filter
    (λ (pair)
      (not (equal? (car pair) "nbsp"))) ; can't reliably tell if a space is a decoded nbsp, or just a space.
        *default-entities*)))

(define (thunk? p)
  (and (procedure? p) (procedure-arity-includes? p 0)))

(define (encode item)
  (cond
    [(symbol? item) (encode (symbol->string item))]
    [(number? item) (encode (number->string item))]
    [(thunk? item) (encode (item))]
    [(promise? item) (encode (force item))]
    [(bytes? item) (encode (bytes->string/utf-8 item))]
    [(string? item)
     (foldl
      (λ (pair current)
        (regexp-replace (car pair) current (cdr pair)))
      item
      *encode-entities*)]
    [else (error (format "What is ~s doing in the tree?" item))]))

(define thingy%
  (class object%
    (init-field parent)
    (super-new)))

; each element has all its children items as a flat list
; for searching through. Each element can always refer
; to their contents and parent just fine though.

(define element%
  (class thingy%
    (inspect #f)
    (init-field name attrs contents)
    (super-new)
    (set! contents (reverse (map (λ (content) (content this)) contents)))
    (when (not (null? contents))
      (let loop ([before #f] [current contents])
        (if (null? current) (void)
            (let ([after (if (null? (cdr current)) #f (cadr current))])
              (begin
                (when (and before (car current) (eq? before (car current)))
                  (error "feep" before current))
                (when (and after (car current) (eq? after (car current)))
                  (error "feepA" after current))
                (send (car current) set-siblings before after)
                (loop (car current) (cdr current)))))))
    (field 
     [items (reverse
             (cons this (foldl (λ (content daeh) 
                               (if (is-a? content element%)
                                   (append (get-field items content) daeh)
                                   (cons content daeh))) '() contents)))]
     [prev #f]
     [next #f])
    
    (define/public (set-siblings p n)
      (set! prev p)
      (set! next n))
    
    (define/public (find check)
      (car
       (or (memf check items)
           (raise-parse-error "Could not find a match"))))
    
    (define/public (find-all check)
      (filter check items))
    (define/public (mapp check)
      (map check items))
    (define/public (fold proc . init)
      (apply foldl proc (append init (list items))))
    
    (define/public (render-contents)
      (with-output-to-string
       (λ () (print-contents))))
    
    (define/public (print-contents)
      (for-each 
       (λ (content) (send content print)) 
       contents))
    
    (define/public (print)
      (display "<")
      (display (encode name))
      (hash-for-each 
       attrs
       (λ (n v)
         (display " ")
         (display (encode n))
         (when v
           (display "=\"")
           (display (encode v))
           (display "\""))))
       (if contents
          (begin
            (display ">")
            (print-contents)
            (display "</")
            (display name)
            (display ">"))
          (display " />")))
            
    (define/public (get-attr name)
      (hash-ref attrs name (λ () #f)))))

(define text%
  (class thingy%
    (inspect #f)
    (init-field text)
    (field [name #f]
           [prev #f]
           [next #f])
    
    (super-new)
    
    ; redundant...
    (define/public (set-siblings p n)
      (set! prev p)
      (set! next n))
    
    (define/public (print)
      (display (encode text)))))

(define comment%
  (class text% 
    (super-new)
    
    (define/override (print)
      (display "<!--")
      (super print)
      (display "-->"))))


(define-syntax-rule (will-be class (attr value) ...)
  (λ (parent)
    (new class [parent parent] [attr value] ...)))

(define (cons-ify item)
  (cons (car item) 
        (if (null? (cdr item))
            #f
            (cadr item))))

(define (produce-dom)
  (let ([parse
         (make-parser
          'start: (lambda (tag attrs seed virtual?) 
;                    (log:log "s<~s>" tag)
                    '())
          'end:   (lambda (tag attrs parent-seed seed virtual?)
 ;                   (log:log "e<~s>" tag)
                    (cons (will-be element% [name tag] [attrs (make-immutable-hash (map cons-ify attrs))] [contents seed])
                          parent-seed))
          'comment: (lambda (text seed) (cons (will-be comment% [text text]) seed))
          'text:  (lambda (text seed) (cons (will-be text% [text text]) seed)))])
    (begin0
      (findf (λ (e) (is-a? e element%)) (reverse (map (λ (p) (p '())) (parse '()))))
;      (log:log "produced DOM\n")
      )))

;(define (is-element? name)
;  (λ (e)
;    (and (is-a? e element%) (eq? (get-field name e) name))))

(define (is-element? e [name #f]) 
  (and
   (is-a? e element%)
   (if name 
       (eq? name (get-field name e))
       #t)))
(define (is-text? e) (is-a? e text%))

(provide is-element? is-text? produce-dom cons-ify exn:parse?)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


(define (example)
  (with-input-from-string 
   "<html><a href='http://synx' attatt><p>This is <!-- ssh I'm hunting wabbits --> crunk</p></html><bhtml>ahhh</bhtml" 
   produce-dom))

(define (example-2)
  (let ([o (example)])
    (send
     (get-field
      parent
      (send o find
            (λ (thingy) 
              (and
               (is-a? thingy element%)
               (eq? (get-field name thingy) 'p)))))
     get-attr 'href)))

(require net/url)
(current-proxy-servers 
 '(("http" "localhost" 3128)))

(define (example-3)
  (let ([soup (call/input-url 
               (string->url "http://browse.deviantart.com/") get-pure-port
               (λ (port) 
                 (parameterize ([current-input-port port])
                   (produce-dom))))])
    (for-each
     (λ (span)
       (let ([a (send span find (is-element? 'a))]
             [img (send span find (is-element? 'img))])
         (let ([src (send img get-attr 'src)]
               [href (send a get-attr 'href)])
           (when (not (regexp-match #rx"^http://browse" href)) 
             (display "Thumbnail: ")
             (display src)
             (display "\nLink: ")
             (display href)
             (display "\n---\n")))))
     (send soup find-all
           (λ (e) (and ((is-element? 'span) e) (equal? (send e get-attr 'class) "tt-w")))))))