#lang scheme
(require (prefix-in ff3: "../ff3.ss")
(prefix-in log: (planet synx/log))
"../repeatedly.ss"
net/url
net/head
srfi/19
web-server/http
scheme/promise)
(current-proxy-servers '(("http" "localhost" 3128)))
(define (decode-date raw)
(if raw
(string->date raw "~a, ~d ~b ~Y ~H:~M:~S GMT")
(current-date)))
(define (encode-date value)
(date->string value "~a, ~d ~b ~Y ~H:~M:~S GMT"))
(define (get-code header)
(let ([match (regexp-match #rx"HTTP/1.[01] ([0-9]+) (.*)((?s:.*))" header)])
(if match (values (string->number (cadr match)) (caddr match) header)
(error (format "Could not find code in ~s" header)))))
(define *default-headers*
'(("User-Agent" . "naughtyweasel")))
(define (headers->alist headers)
(map (λ (header) (cons (header-field header) (header-value header))) headers))
(define (build-headers uri [headers null])
(let*
([cookies (ff3:format/cookie (ff3:get-cookies uri))]
[headers (append *default-headers* headers)])
(if (null? cookies)
headers
(append headers
(headers->alist (list cookies))))))
(define (resolve-redirect uri input-header)
(let-values ([(redirect? uri header)
(repeatedly
10
"Took too long to redirect"
(λ (retry)
(call/input-url
uri
(λ (uri)
(head-impure-port uri (join-headers input-header)))
(λ (input)
(let-values ([(code message header) (get-code (purify-port input))])
(cond
((= code 502) (retry)) [(< code 300) (values #f uri header)]
[(>= code 400) (raise
(make-exn:fail:network
(format "There was an error fetching ~a ~a ~a" (url->string uri) code header)
(current-continuation-marks)))]
[else (values #t (string->url (extract-field "Location" header)) #f)]))))))])
(if redirect?
(resolve-redirect uri input-header)
(values uri header))))
(define (resolve-redirect-2 uri input-header)
(call-with-values (λ () (resolve-redirect uri input-header))
(λ (uri header) uri)))
(define (encode t)
(cond
((number? t) (number->string t))
((bytes? t) (bytes->string/utf-8 t))
((string? t) t)
(else (error "No idea what ~s is" t))))
(define (join-headers headers)
(map
(λ (header)
(cond
((header? header)
(string-append (header-field header) ": " (header-value header)))
((pair? header)
(string-append (encode (car header)) ": " (encode (cdr header))))
((string? header) header)
(else (error "Don't know what ~s header is" header))))
headers))
(define (split-header header)
(filter
(λ (s) (> (string-length s) 0))
(regexp-split #rx"\r\n" header)))
(define (head uri (headers null) #:redirect? (redirect? #f))
(let* ((input-header (join-headers (build-headers uri headers)))
(uri (if redirect? (resolve-redirect-2 uri input-header) uri)))
(repeatedly
10
"Took too long to get head"
(λ (retry)
(let ((header
(call/input-url
uri
(λ (uri) (get-impure-port uri input-header))
(λ (input)
(purify-port input)))))
(let-values (((code message header) (get-code header)))
(if (eq? code 502) (retry)
(values code header))))))))
(define (raw-action action uri header err-message process)
(let* ((input-header (join-headers header))
[uri (resolve-redirect-2 uri input-header)])
(repeatedly
20
(delay (list err-message (string->url uri)))
(λ (retry)
(call/input-url
uri
(λ (uri) (action uri input-header))
(λ (input)
(let ([header (purify-port input)])
(let-values ([(code message header) (get-code header)])
(when (= code 502) (retry)) (when (>= code 300) (log:error (format "Um... huh. ~a ~a ~a~n" code message header)))
(process code
(extract-field "Content-Type" header) (decode-date (extract-field "Last-Modified" header))
header input)))))))))
(define (get uri process [headers null] (err-message "Took too long to get!"))
(log:info "Headers ~s ~s" (url->string uri) headers)
(raw-action
get-impure-port
uri (build-headers uri headers)
err-message
(λ (code type modified header input)
(process code type modified header input))))
(define (post uri data process (headers null))
(raw-action
(λ (uri header)
(post-impure-port uri data header))
uri (join-headers (build-headers uri headers))
"Took too long to post!"
(λ (code type modified header input)
(process code header input))))
(define progress (make-parameter #f))
(define (progress-copy-port length in out)
(let ((buffer (make-bytes 4096)))
(let loop ((total 0))
(let ((num (read-bytes! buffer in)))
(if (eof-object? num) num
(let ((total (+ total num)))
(write-bytes buffer out 0 num)
((progress) total length)
(loop total)))))))
(define-syntax-rule (aif test id yes no)
(let ((id test))
(if id yes no)))
(define (download-basic uri dest headers [exists 'replace])
(get uri
(λ (code type modified response-headers input)
(define length (aif (extract-field "Content-Length" response-headers) result (string->number result) 0))
(when (and
(eq? exists 'append)
(not (or (= code 206) (= code 200))))
(error (format "Ayieee trying to cobble file mutant bloog! ~a ~a ~a" uri code response-headers)))
(call-with-output-file dest #:exists exists
(λ (output)
(if (progress)
(progress-copy-port length input output)
(copy-port input output))))
(file-or-directory-modify-seconds dest (time-second (date->time-utc modified)))
(values type modified))
headers
"Took too long to download!"))
(define (download-partial uri dest header size)
(download-basic
uri
dest
(cons
(cons "Range" (format "bytes=~a-" size))
header)
'append))
(define (download uri dest [headers null])
(let ([input-header (build-headers uri headers)])
(let-values ([(uri first-header) (resolve-redirect uri input-header)])
(let-values ([(code message first-header) (get-code first-header)])
(cond
((= code 502)
(download uri dest headers))
((not (= code 200)) (error "Could not download ~a ~a" code first-header)))
(let ([ranges (extract-field "Accept-Ranges" first-header)]
[length (extract-field "Content-Length" first-header)])
(let ([length (if length (string->number length) #f)])
(if (and
(file-exists? dest)
ranges
(regexp-match #rx"bytes" ranges))
(let ([size (file-size dest)])
(if (and length (< size length))
(download-partial uri dest input-header size)
(download-basic uri dest input-header)))
(download-basic uri dest input-header))))))))
(provide/contract
(progress parameter?)
(head (->* (url?) ((listof (cons/c string? string?)) #:redirect? boolean?) (values integer? string?)))
[get (->* (url? (-> integer? string? date? string? input-port? any)) ((listof (cons/c string? string?)) any/c) any)]
(post (->* (url? bytes? (-> integer? string? input-port? any)) ((listof (cons/c string? string?))) any))
[download (->* (url? path-string?) ((listof (cons/c string? string?))) (values string? date?))])