main.rkt
#lang racket/base

(require (prefix-in ff3: "ff3.rkt")
         (planet synx/util:2/repeatedly)
	 (prefix-in log: (planet synx/log:1))
	 file/gunzip
         net/url
         net/head
         srfi/19
         web-server/http
	 racket/match
         racket/promise
         racket/port
         racket/contract)

(current-proxy-servers '(("http" "localhost" 8123)))

(define (decode-date raw)
  (if raw (string->date raw "~a, ~d ~b ~Y ~H:~M:~S GMT") (current-date)))

(define (encode-date value) (date->string value "~a, ~d ~b ~Y ~H:~M:~S GMT"))

(define (get-code uri header)
  (let ((match (regexp-match #rx"HTTP/1.[01] ([0-9]+) (.*)((?s:.*))" header)))
    (when (not match) (error (format "Could not find code in ~s" header)))
    (ff3:add-cookies! (url-host uri) header)
    (values (string->number (cadr match)) (caddr match) header)))

(define *default-headers* '(("User-Agent" . "naughtyweasel")))

(define (headers->alist headers)
  (map
   (λ (header) (cons (header-field header) (header-value header)))
   headers))

(define (build-headers uri headers)
  (λ ()
    (join-headers
     (let* ((cookies (ff3:format/cookie (ff3:get-cookies uri)))
            (headers (append *default-headers* headers)))
       (let ((headers
              (if (null? cookies)
                headers
                (append headers (headers->alist (list cookies))))))
         headers)))))

(define (encode t)
  (cond
   ((number? t) (number->string t))
   ((bytes? t) (bytes->string/utf-8 t))
   ((string? t) t)
   (else (error "No idea what ~s is" t))))

(define (join-headers headers)
  (map
   (λ (header)
     (cond
      ((header? header)
       (string-append (header-field header) ": " (header-value header)))
      ((pair? header)
       (string-append (encode (car header)) ": " (encode (cdr header))))
      ((string? header) header)
      (else (error "Don't know what ~s header is" header))))
   headers))

(define (split-header header)
  (filter (λ (s) (> (string-length s) 0)) (regexp-split #rx"\r
" header)))

(define (de-post-ify action)
  (if (or (eq? action get-impure-port) (eq? action head-impure-port))
    action
    get-impure-port))

(define (resolve-redirect next uri action input-header)
  (λ (code message header input)
    (if (and (>= code 300) (< code 304))
      (begin
        (close-input-port input)
        (let ((action (if (= code 303) action (de-post-ify action))))
          (raw-action
           action
           (combine-url/relative uri (extract-field "Location" header))
           input-header
           next)))
      (next code message header input))))

(define (handle-errors next uri)
  (λ (code message header input)
    (if (>= code 400)
      (raise
       (make-exn:fail:network
        (format
         "There was an error fetching ~a ~a ~a"
         (url->string uri)
         code
         header)
        (current-continuation-marks)))
      (next code message header input))))

(define (make-codec op)
  (lambda (input)
    (define-values (wrapped-input output) (make-pipe))
    (thread
     (lambda ()
       (dynamic-wind
	   void
	   (lambda () 
	     (op input output))
	   (lambda () 
	     (close-input-port input)
	     (close-output-port output)))))
    wrapped-input))

(define unzip-input (make-codec gunzip-through-ports))
(define inflate-input (make-codec inflate))

(define (raw-action
         action
         uri
         input-header
         next
         #:redirect?
         (redirect? #t)
         #:raise?
         (raise? #t))
  (let ((next
         (let ((next
                (if redirect?
                  (resolve-redirect next uri action input-header)
                  next)))
           (if raise? (handle-errors next uri) next))))
    (repeatedly
     10
     (format "Took too long to ~s ~s" action (url->string uri))
     (λ (retry)
       (call/input-url
        uri
        (λ (uri) (action uri (input-header)))
        (λ (input)
          (let ((header (purify-port input)))
            (let-values (((code message header) (get-code uri header)))
              (if (= code 502)
                (retry)
		(let* ((encoding (extract-field "Content-Encoding" header))
		       (input (match encoding
				     ("gzip" (unzip-input input))
				     ("deflate" (inflate-input input))
				     (else input))))
		  (next code message header input)))))))))))

(define (raw-head uri headers redirect?)
  (let/ec
   return
   (raw-action
    head-impure-port
    uri
    headers
    #:redirect?
    redirect?
    #:raise?
    #f
    (λ (code message header input) (return code header)))))

(define (head uri (headers null) #:redirect? (redirect? #f))
  (raw-head uri (build-headers uri headers) redirect?))

(define (get uri process #:headers (headers null))
  (raw-action get-impure-port uri (build-headers uri headers) process))

(define (post uri data process (headers null))
  (raw-action
   (λ (uri header) (post-impure-port uri data header))
   uri
   (build-headers uri headers)
   process))

(define progress (make-parameter #f))

(define (progress-copy-port length in out)
  (let ((buffer (make-bytes 4096)))
    (let loop ((total 0))
      (let ((num (read-bytes! buffer in)))
        (if (eof-object? num)
          num
          (let ((total (+ total num)))
            (write-bytes buffer out 0 num)
            ((progress) total length)
            (loop total)))))))

(define-syntax-rule (aif test id yes no) (let ((id test)) (if id yes no)))

(define (download-basic uri dest input-header (exists 'replace))
  (raw-action
   get-impure-port
   uri
   input-header
   (λ (code message header input)
     (define length
       (aif
        (extract-field "Content-Length" header)
        result
        (string->number result)
        0))
     (when (and (eq? exists 'append) (not (or (= code 206) (= code 200))))
       (error
        (format
         "Ayieee trying to cobble file mutant bloog! ~a ~a ~a"
         uri
         code
         header)))
     (call-with-output-file
      dest
      #:exists
      exists
      (λ (output)
        (if (progress)
          (progress-copy-port length input output)
          (copy-port input output))))
     (let ((modified (decode-date (extract-field "Last-Modified" header))))
       (file-or-directory-modify-seconds
        dest
        (time-second (date->time-utc modified)))
       (values (extract-field "Content-Type" header) modified)))))

(define (download-partial uri dest header size)
  (download-basic
   uri
   dest
   (λ () (cons (string-append "Range: " (format "bytes=~a-" size)) (header)))
   'append))

(define (download uri dest (headers null))
  (let ((input-header (build-headers uri headers)))
    (let-values (((code first-header) (raw-head uri input-header #t)))
      (when (not (= code 200))
        (error
         "Could not get the head for ~a ~a ~a"
         (url->string uri)
         code
         first-header))
      (let ((ranges (extract-field "Accept-Ranges" first-header))
            (length (extract-field "Content-Length" first-header)))
        (let ((length (if length (string->number length) #f)))
          (if (and (file-exists? dest) ranges (regexp-match #rx"bytes" ranges))
            (let ((size (file-size dest)))
              (if (and length (< size length))
                (download-partial uri dest input-header size)
                (download-basic uri dest input-header)))
            (download-basic uri dest input-header)))))))

(define handler/c (-> integer? string? string? input-port? any))

(define input-header/c (listof (cons/c string? string?)))

(define prepared-header/c (-> (listof string?)))

(provide/contract
 (handler/c contract?)
 (input-header/c contract?)
 (decode-date (-> (or/c bytes? false/c) date?))
 (encode-date (-> date? string?))
 (build-headers (-> url? input-header/c prepared-header/c))
 (raw-action
  (->*
   ((-> url? prepared-header/c input-port?) url? prepared-header/c handler/c)
   (#:redirect? boolean? #:raise? boolean?)
   any))
 (progress parameter?)
 (head
  (->* (url?) (input-header/c #:redirect? boolean?) (values integer? string?)))
 (get (->* (url? handler/c) (#:headers input-header/c) any))
 (post (->* (url? bytes? handler/c) (input-header/c) any))
 (download (->* (url? path-string?) (input-header/c) (values string? date?))))