main.ss
#lang scheme/base

(require (prefix-in ff3: "ff3.ss")
         (prefix-in log: (planet synx/log))
         (planet synx/util:1/repeatedly)
         net/url
         net/head
         srfi/19

         web-server/http
         
         scheme/promise
         scheme/port
         scheme/contract)

(current-proxy-servers '(("http" "localhost" 3128)))
;(current-proxy-servers null)

(define (decode-date raw)
  (if raw
      (string->date raw "~a, ~d ~b ~Y ~H:~M:~S GMT")
      (current-date)))
(define (encode-date value)
  (date->string value "~a, ~d ~b ~Y ~H:~M:~S GMT"))

(define (get-code header)
  (let ([match (regexp-match #rx"HTTP/1.[01] ([0-9]+) (.*)((?s:.*))" header)])
    (when (not match)
      (error (format "Could not find code in ~s" header)))
    (ff3:add-cookies! header)
    (values (string->number (cadr match)) (caddr match) header)))

(define *default-headers* 
  '(("User-Agent" . "naughtyweasel")))

(define (headers->alist headers)
  (map (λ (header) (cons (header-field header) (header-value header))) headers))

(define (build-headers uri [headers null])
  (λ () ; VERY IMPORTANT for cookie regeneration
    (join-headers
     (let*
         ; steal the cookies from Firefox
         ([cookies (ff3:format/cookie (ff3:get-cookies uri))]
          [headers (append *default-headers* headers)])
       (let ((headers (if (null? cookies)
                          headers
                          (append headers
                                  (headers->alist (list cookies))))))
         headers)))))

(define (encode t)
  (cond
    ((number? t) (number->string t))
    ((bytes? t) (bytes->string/utf-8 t))
    ((string? t) t)
    (else (error "No idea what ~s is" t))))

(define (join-headers headers)
  (map
   (λ (header)
     (cond
       ((header? header) 
        (string-append (header-field header) ": " (header-value header)))
       ((pair? header) 
        (string-append (encode (car header)) ": " (encode (cdr header))))
       ((string? header) header)
       (else (error "Don't know what ~s header is" header))))
   headers))
   
(define (split-header header)
  (filter
   (λ (s) (> (string-length s) 0))
   (regexp-split #rx"\r\n" header)))
  
(define (resolve-redirect next uri action input-header)
  (λ (code type modified header input)
    (if (and (> code 300) (< code 400))
        (begin
          (close-input-port input)
          (raw-action action (string->url (extract-field "Location" header)) input-header next))
        (next code type modified header input))))

(define (handle-errors next uri)
  (λ (code type modified header input)
    (if (>= code 400) (raise 
                       (make-exn:fail:network
                        (format "There was an error fetching ~a ~a ~a" (url->string uri) code header)
                        (current-continuation-marks)))
        (next code type modified header input))))

(define (raw-action action uri input-header next #:redirect? (redirect? #t) #:raise? (raise? #t))
  (let ((next
         (let ((next (if redirect? (resolve-redirect next uri action input-header) next)))
           (if raise? (handle-errors next uri) next))))
    (repeatedly
     10
     (format "Took too long to ~s ~s" action (url->string uri))
     (λ (retry)
       (call/input-url
        uri
        (λ (uri) (action uri (input-header)))
        (λ (input)
          (let ([header (purify-port input)])
            (let-values ([(code message header) (get-code header)])
              (if (= code 502) (retry) ; squid slow site grumble grumble
                  (next code 
                        (extract-field "Content-Type" header) (decode-date (extract-field "Last-Modified" header))
                        header input))))))))))

(define (raw-head uri headers redirect?)
  ; note headers must be a THUNK
  (let/ec return
    (raw-action 
     head-impure-port uri headers #:redirect? redirect? #:raise? #f
     (λ (code type modified header input)
       (return code header)))))

(define (head uri (headers null) #:redirect? (redirect? #f))
  (raw-head uri (build-headers headers)))

(define (get uri process [headers null])
  (log:info "Headers ~s ~s" (url->string uri) headers)
  (raw-action get-impure-port uri (build-headers uri headers) process))

(define (post uri data process (headers null))
  (raw-action
   (λ (uri header)
     (post-impure-port uri data header))
   uri (build-headers uri headers)
   process))

(define progress (make-parameter #f))

(define (progress-copy-port length in out)
  (let ((buffer (make-bytes 4096)))
    (let loop ((total 0))
      (let ((num (read-bytes! buffer in)))
        (if (eof-object? num) num
            (let ((total (+ total num)))
              (write-bytes buffer out 0 num)
              ((progress) total length)
              (loop total)))))))

(define-syntax-rule (aif test id yes no)
  (let ((id test))
    (if id yes no)))

(define (download-basic uri dest input-header [exists 'replace])
  (raw-action 
   get-impure-port uri 
   input-header ; note, is a thunk!
   (λ (code type modified response-headers input)
     (define length (aif (extract-field "Content-Length" response-headers) result (string->number result) 0))
     (when (and
            (eq? exists 'append)
            (not (or (= code 206) (= code 200))))
       (error (format "Ayieee trying to cobble file mutant bloog! ~a ~a ~a" uri code response-headers)))
     (call-with-output-file dest #:exists exists
       (λ (output)
         (if (progress)
             (progress-copy-port length input output)
             (copy-port input output))))
     (file-or-directory-modify-seconds dest (time-second (date->time-utc modified)))
     (values type modified))))

(define (download-partial uri dest header size)
  (download-basic
   uri
   dest
   (λ ()
     (cons
      (cons "Range" (format "bytes=~a-" size))
      (header)))
   'append))

(define (download uri dest [headers null])
  (let ([input-header (build-headers uri headers)])
    (let-values ([(code first-header) (raw-head uri input-header #t)])
      (when (not (= code 200))
        (error "Could not get the head for ~a ~a ~a" (url->string uri) code first-header))
      (let ([ranges (extract-field "Accept-Ranges" first-header)]
            [length (extract-field "Content-Length" first-header)])
        (let ([length (if length (string->number length) #f)])
          (if (and
               (file-exists? dest)
               ranges
               (regexp-match #rx"bytes" ranges))
              (let ([size (file-size dest)])
                (if (and length (< size length))
                    (download-partial uri dest input-header size)
                    (download-basic uri dest input-header)))
              (download-basic uri dest input-header)))))))

(provide/contract
 (progress parameter?)
 (head (->* (url?) ((listof (cons/c string? ; name
                                    string? ; value
                                    ) ; input headers
                            ) #:redirect? boolean?) 
            (values integer? ; code
                    string? ; header
                    )))
 [get (->* (url? (-> integer? ; code
                     string? ; type
                     date? ; modified
                     string? ; header
                     input-port? ; source
                     any) ; input handler
                 ) ((listof (cons/c string? ; name
                                    string? ; value
                                    )) ; input headers
                            ) any)]
 (post (->* (url? bytes? ; data
                  (-> integer? ; code
                      string? ; type
                      date? ; modified
                      string? ; header
                      input-port? ; source
                      any) ; input handler
                  ) ((listof (cons/c string? ; name
                                     string? ; value
                                     )) ; input headers
                     ) any))
 [download (->* (url? path-string?) ((listof (cons/c string? ; name
                                                     string? ; value
                                                     )) ; input headers
                                     ) (values string? ; type
                                               date? ; modified
                                               ))])