#lang racket/base
(require (prefix-in ff3: "ff3.rkt")
(prefix-in ssl: "ssl-url.rkt")
(planet synx/util:2/repeatedly)
(prefix-in log: (planet synx/log:1))
file/gunzip
net/url
net/head
srfi/19
web-server/http
racket/match
racket/promise
racket/port
racket/contract)
(current-proxy-servers '(("http" "localhost" 8123)))
(define (decode-date raw)
(if raw (string->date raw "~a, ~d ~b ~Y ~H:~M:~S GMT") (current-date)))
(define (encode-date value) (date->string value "~a, ~d ~b ~Y ~H:~M:~S GMT"))
(define (get-code uri header)
(let ((match (regexp-match #rx"HTTP/1.[01] ([0-9]+) (.*)((?s:.*))" header)))
(when (not match) (error (format "Could not find code in ~s" header)))
(ff3:add-cookies! (url-host uri) header)
(values (string->number (cadr match)) (caddr match) header)))
(define *default-headers* '(("User-Agent" . "naughtyweasel")))
(define (headers->alist headers)
(map
(λ (header) (cons (header-field header) (header-value header)))
headers))
(define (build-headers uri headers)
(λ ()
(join-headers
(let* ((cookies (ff3:format/cookie (ff3:get-cookies uri)))
(headers (append *default-headers* headers)))
(let ((headers
(if (null? cookies)
headers
(append headers (headers->alist (list cookies))))))
headers)))))
(define (encode t)
(cond
((number? t) (number->string t))
((bytes? t) (bytes->string/utf-8 t))
((string? t) t)
(else (error "No idea what ~s is" t))))
(define (join-headers headers)
(map
(λ (header)
(cond
((header? header)
(string-append (header-field header) ": " (header-value header)))
((pair? header)
(string-append (encode (car header)) ": " (encode (cdr header))))
((string? header) header)
(else (error "Don't know what ~s header is" header))))
headers))
(define (split-header header)
(filter (λ (s) (> (string-length s) 0)) (regexp-split #rx"\r
" header)))
(define (choose-get-impure-port ssl?)
(if ssl? ssl:get-impure-port get-impure-port))
(define (choose-head-impure-port ssl?)
(if ssl? ssl:head-impure-port head-impure-port))
(define (choose-post-impure-port next)
(lambda (ssl?)
(lambda (uri header)
(next uri header (if ssl? ssl:post-impure-port post-impure-port)))))
(define (de-post-ify choose-action)
(if (or (eq? choose-action choose-get-impure-port)
(eq? choose-action choose-head-impure-port))
choose-action
choose-get-impure-port))
(define follow-redirects? (make-parameter #t))
(define (resolve-redirect next uri choose-action input-header)
(if (follow-redirects?)
(λ (code message header input)
(if (and (>= code 300) (< code 304))
(begin
(close-input-port input)
(let ((choose-action (if (= code 303)
choose-action
(de-post-ify choose-action))))
(raw-action
choose-action
(combine-url/relative uri (extract-field "Location" header))
input-header
next)))
(next code message header input)))
next))
(define raise-errors? (make-parameter #t))
(define (handle-errors next uri)
(if (raise-errors?)
(λ (code message header input)
(if (>= code 400)
(raise
(make-exn:fail:network
(format
"There was an error fetching ~a ~a ~a"
(url->string uri)
code
header)
(current-continuation-marks)))
(next code message header input)))
next))
(define (make-codec op)
(lambda (input)
(define-values (wrapped-input output) (make-pipe))
(thread
(lambda ()
(dynamic-wind
void
(lambda ()
(op input output))
(lambda ()
(close-input-port input)
(close-output-port output)))))
wrapped-input))
(define unzip-input (make-codec gunzip-through-ports))
(define inflate-input (make-codec inflate))
(define (raw-action
choose-action
uri
input-header
next)
(let ((next (handle-errors
(resolve-redirect next uri choose-action input-header)
uri)))
(repeatedly
10
(format "Took too long to ~s ~s" choose-action (url->string uri))
(λ (retry)
(call/input-url
uri
(λ (uri) ((choose-action (regexp-match #rx"^https://"
(url->string uri)))
uri (input-header)))
(λ (input)
(let ((header (purify-port input)))
(let-values (((code message header) (get-code uri header)))
(if (= code 502)
(retry)
(let* ((encoding (extract-field "Content-Encoding" header))
(input (match encoding
("gzip" (unzip-input input))
("deflate" (inflate-input input))
(else input))))
(next code message header input)))))))))))
(define (head-with-built-headers uri headers)
(parameterize
((raise-errors? #f))
(let/ec
return
(raw-action
choose-head-impure-port
uri
headers
(λ (code message header input) (return code header))))))
(define (head uri (headers null))
(head-with-built-headers uri (build-headers uri headers)))
(define (get uri process #:headers (headers null))
(raw-action choose-get-impure-port uri (build-headers uri headers) process))
(define (post uri data process (headers null))
(raw-action
(choose-post-impure-port (lambda (uri header post) (post uri data header)))
uri
(build-headers uri headers)
process))
(define progress (make-parameter #f))
(define (progress-copy-port length in out)
(let ((buffer (make-bytes 4096)))
(let loop ((total 0))
(let ((num (read-bytes! buffer in)))
(if (eof-object? num)
num
(let ((total (+ total num)))
(write-bytes buffer out 0 num)
((progress) total length)
(loop total)))))))
(define-syntax-rule (aif test id yes no) (let ((id test)) (if id yes no)))
(define (download-basic uri dest input-header (exists 'replace))
(raw-action
choose-get-impure-port
uri
input-header
(λ (code message header input)
(define length
(aif
(extract-field "Content-Length" header)
result
(string->number result)
0))
(when (and (eq? exists 'append) (not (or (= code 206) (= code 200))))
(error
(format
"Ayieee trying to cobble file mutant bloog! ~a ~a ~a"
uri
code
header)))
(call-with-output-file
dest
#:exists
exists
(λ (output)
(if (progress)
(progress-copy-port length input output)
(copy-port input output))))
(let ((modified (decode-date (extract-field "Last-Modified" header))))
(file-or-directory-modify-seconds
dest
(time-second (date->time-utc modified)))
(values (extract-field "Content-Type" header) modified)))))
(define (download-partial uri dest header size)
(download-basic
uri
dest
(λ () (cons (string-append "Range: " (format "bytes=~a-" size)) (header)))
'append))
(define (download uri dest (headers null))
(let ((input-header (build-headers uri headers)))
(let-values (((code first-header)
(parameterize
((raise-errors? #f))
(head-with-built-headers uri input-header))))
(when (not (= code 200))
(error
"Could not get the head for ~a ~a ~a"
(url->string uri)
code
first-header))
(let ((ranges (extract-field "Accept-Ranges" first-header))
(length (extract-field "Content-Length" first-header)))
(let ((length (if length (string->number length) #f)))
(if (and (file-exists? dest) ranges (regexp-match #rx"bytes" ranges))
(let ((size (file-size dest)))
(if (and length (< size length))
(download-partial uri dest input-header size)
(download-basic uri dest input-header)))
(download-basic uri dest input-header)))))))
(define handler/c (-> integer? string? string? input-port? any))
(define input-header/c (listof (cons/c string? string?)))
(define prepared-header/c (-> (listof string?)))
(define action-chooser/c (-> boolean? (-> url? prepared-header/c input-port?)))
(provide/contract
(raise-errors? parameter?)
(follow-redirects? parameter?)
(handler/c contract?)
(input-header/c contract?)
(decode-date (-> (or/c bytes? false/c) date?))
(encode-date (-> date? string?))
(build-headers (-> url? input-header/c prepared-header/c))
(raw-action
(->*
(action-chooser/c url? prepared-header/c handler/c)
(#:redirect? boolean? #:raise? boolean?)
any))
(progress parameter?)
(head
(->* (url?) (input-header/c #:redirect? boolean?) (values integer? string?)))
(get (->* (url? handler/c) (#:headers input-header/c) any))
(post (->* (url? bytes? handler/c) (input-header/c) any))
(download (->* (url? path-string?) (input-header/c) (values string? date?))))