#lang scheme
(require (prefix-in ff3: "ff3.ss")
(prefix-in log: (planet synx/log))
(planet synx/util:1/repeatedly)
net/url
net/head
srfi/19
web-server/http
scheme/promise)
(current-proxy-servers '(("http" "localhost" 3128)))
(define (decode-date raw)
(if raw
(string->date raw "~a, ~d ~b ~Y ~H:~M:~S GMT")
(current-date)))
(define (encode-date value)
(date->string value "~a, ~d ~b ~Y ~H:~M:~S GMT"))
(define (get-code header)
(let ([match (regexp-match #rx"HTTP/1.[01] ([0-9]+) (.*)((?s:.*))" header)])
(when (not match)
(error (format "Could not find code in ~s" header)))
(ff3:add-cookies! header)
(values (string->number (cadr match)) (caddr match) header)))
(define *default-headers*
'(("User-Agent" . "naughtyweasel")))
(define (headers->alist headers)
(map (λ (header) (cons (header-field header) (header-value header))) headers))
(define (build-headers uri [headers null])
(λ () (join-headers
(let*
([cookies (ff3:format/cookie (ff3:get-cookies uri))]
[headers (append *default-headers* headers)])
(let ((headers (if (null? cookies)
headers
(append headers
(headers->alist (list cookies))))))
headers)))))
(define (encode t)
(cond
((number? t) (number->string t))
((bytes? t) (bytes->string/utf-8 t))
((string? t) t)
(else (error "No idea what ~s is" t))))
(define (join-headers headers)
(map
(λ (header)
(cond
((header? header)
(string-append (header-field header) ": " (header-value header)))
((pair? header)
(string-append (encode (car header)) ": " (encode (cdr header))))
((string? header) header)
(else (error "Don't know what ~s header is" header))))
headers))
(define (split-header header)
(filter
(λ (s) (> (string-length s) 0))
(regexp-split #rx"\r\n" header)))
(define (resolve-redirect next uri action input-header)
(λ (code type modified header input)
(if (and (> code 300) (< code 400))
(begin
(close-input-port input)
(raw-action action (string->url (extract-field "Location" header)) input-header next))
(next code type modified header input))))
(define (handle-errors next uri)
(λ (code type modified header input)
(if (>= code 400) (raise
(make-exn:fail:network
(format "There was an error fetching ~a ~a ~a" (url->string uri) code header)
(current-continuation-marks)))
(next code type modified header input))))
(define (raw-action action uri input-header next #:redirect? (redirect? #t) #:raise? (raise? #t))
(let ((next
(let ((next (if redirect? (resolve-redirect next uri action input-header) next)))
(if raise? (handle-errors next uri) next))))
(repeatedly
10
(format "Took too long to ~s ~s" action (url->string uri))
(λ (retry)
(call/input-url
uri
(λ (uri) (action uri (input-header)))
(λ (input)
(let ([header (purify-port input)])
(let-values ([(code message header) (get-code header)])
(if (= code 502) (retry) (next code
(extract-field "Content-Type" header) (decode-date (extract-field "Last-Modified" header))
header input))))))))))
(define (head uri (headers null) #:redirect? (redirect? #f))
(let/ec return
(raw-action head-impure-port uri (build-headers headers) #:redirect? redirect? #:raise? #f
(λ (code type modifie header input)
(return code header)))))
(define (get uri process [headers null])
(log:info "Headers ~s ~s" (url->string uri) headers)
(raw-action get-impure-port uri (build-headers uri headers) process))
(define (post uri data process (headers null))
(raw-action
(λ (uri header)
(post-impure-port uri data header))
uri (build-headers uri headers)
process))
(define progress (make-parameter #f))
(define (progress-copy-port length in out)
(let ((buffer (make-bytes 4096)))
(let loop ((total 0))
(let ((num (read-bytes! buffer in)))
(if (eof-object? num) num
(let ((total (+ total num)))
(write-bytes buffer out 0 num)
((progress) total length)
(loop total)))))))
(define-syntax-rule (aif test id yes no)
(let ((id test))
(if id yes no)))
(define (download-basic uri dest headers [exists 'replace])
(get uri
(λ (code type modified response-headers input)
(define length (aif (extract-field "Content-Length" response-headers) result (string->number result) 0))
(when (and
(eq? exists 'append)
(not (or (= code 206) (= code 200))))
(error (format "Ayieee trying to cobble file mutant bloog! ~a ~a ~a" uri code response-headers)))
(call-with-output-file dest #:exists exists
(λ (output)
(if (progress)
(progress-copy-port length input output)
(copy-port input output))))
(file-or-directory-modify-seconds dest (time-second (date->time-utc modified)))
(values type modified))
headers))
(define (download-partial uri dest header size)
(download-basic
uri
dest
(cons
(cons "Range" (format "bytes=~a-" size))
header)
'append))
(define (download uri dest [headers null])
(let ([input-header (build-headers uri headers)])
(let-values ([(uri first-header) (resolve-redirect uri input-header)])
(let-values ([(code message first-header) (get-code first-header)])
(cond
((= code 502)
(download uri dest headers))
((not (= code 200)) (error "Could not download ~a ~a" code first-header)))
(let ([ranges (extract-field "Accept-Ranges" first-header)]
[length (extract-field "Content-Length" first-header)])
(let ([length (if length (string->number length) #f)])
(if (and
(file-exists? dest)
ranges
(regexp-match #rx"bytes" ranges))
(let ([size (file-size dest)])
(if (and length (< size length))
(download-partial uri dest input-header size)
(download-basic uri dest input-header)))
(download-basic uri dest input-header))))))))
(provide/contract
(progress parameter?)
(head (->* (url?) ((listof (cons/c string? string? ) ) #:redirect? boolean?)
(values integer? string? )))
[get (->* (url? (-> integer? string? date? string? input-port? any) ) ((listof (cons/c string? string? )) ) any)]
(post (->* (url? bytes? (-> integer? string? date? string? input-port? any) ) ((listof (cons/c string? string? )) ) any))
[download (->* (url? path-string?) ((listof (cons/c string? string? )) ) (values string? date? ))])