#lang racket/base (require (prefix-in ff3: "ff3.rkt") (planet synx/util:2/repeatedly) (prefix-in log: (planet synx/log:1)) file/gunzip net/url net/head srfi/19 web-server/http racket/match racket/promise racket/port racket/contract) (current-proxy-servers '(("http" "localhost" 8123))) (define (decode-date raw) (if raw (string->date raw "~a, ~d ~b ~Y ~H:~M:~S GMT") (current-date))) (define (encode-date value) (date->string value "~a, ~d ~b ~Y ~H:~M:~S GMT")) (define (get-code uri header) (let ((match (regexp-match #rx"HTTP/1.[01] ([0-9]+) (.*)((?s:.*))" header))) (when (not match) (error (format "Could not find code in ~s" header))) (ff3:add-cookies! (url-host uri) header) (values (string->number (cadr match)) (caddr match) header))) (define *default-headers* '(("User-Agent" . "naughtyweasel"))) (define (headers->alist headers) (map (λ (header) (cons (header-field header) (header-value header))) headers)) (define (build-headers uri headers) (λ () (join-headers (let* ((cookies (ff3:format/cookie (ff3:get-cookies uri))) (headers (append *default-headers* headers))) (let ((headers (if (null? cookies) headers (append headers (headers->alist (list cookies)))))) headers))))) (define (encode t) (cond ((number? t) (number->string t)) ((bytes? t) (bytes->string/utf-8 t)) ((string? t) t) (else (error "No idea what ~s is" t)))) (define (join-headers headers) (map (λ (header) (cond ((header? header) (string-append (header-field header) ": " (header-value header))) ((pair? header) (string-append (encode (car header)) ": " (encode (cdr header)))) ((string? header) header) (else (error "Don't know what ~s header is" header)))) headers)) (define (split-header header) (filter (λ (s) (> (string-length s) 0)) (regexp-split #rx"\r " header))) (define (de-post-ify action) (if (or (eq? action get-impure-port) (eq? action head-impure-port)) action get-impure-port)) (define (resolve-redirect next uri action input-header) (λ (code message header input) (if (and (>= code 300) (< code 304)) (begin (close-input-port input) (let ((action (if (= code 303) action (de-post-ify action)))) (raw-action action (combine-url/relative uri (extract-field "Location" header)) input-header next))) (next code message header input)))) (define (handle-errors next uri) (λ (code message header input) (if (>= code 400) (raise (make-exn:fail:network (format "There was an error fetching ~a ~a ~a" (url->string uri) code header) (current-continuation-marks))) (next code message header input)))) (define (make-codec op) (lambda (input) (define-values (wrapped-input output) (make-pipe)) (thread (lambda () (dynamic-wind void (lambda () (op input output)) (lambda () (close-input-port input) (close-output-port output))))) wrapped-input)) (define unzip-input (make-codec gunzip-through-ports)) (define inflate-input (make-codec inflate)) (define (raw-action action uri input-header next #:redirect? (redirect? #t) #:raise? (raise? #t)) (let ((next (let ((next (if redirect? (resolve-redirect next uri action input-header) next))) (if raise? (handle-errors next uri) next)))) (repeatedly 10 (format "Took too long to ~s ~s" action (url->string uri)) (λ (retry) (call/input-url uri (λ (uri) (action uri (input-header))) (λ (input) (let ((header (purify-port input))) (let-values (((code message header) (get-code uri header))) (if (= code 502) (retry) (let* ((encoding (extract-field "Content-Encoding" header)) (input (match encoding ("gzip" (unzip-input input)) ("deflate" (inflate-input input)) (else input)))) (next code message header input))))))))))) (define (raw-head uri headers redirect?) (let/ec return (raw-action head-impure-port uri headers #:redirect? redirect? #:raise? #f (λ (code message header input) (return code header))))) (define (head uri (headers null) #:redirect? (redirect? #f)) (raw-head uri (build-headers uri headers) redirect?)) (define (get uri process #:headers (headers null)) (raw-action get-impure-port uri (build-headers uri headers) process)) (define (post uri data process (headers null)) (raw-action (λ (uri header) (post-impure-port uri data header)) uri (build-headers uri headers) process)) (define progress (make-parameter #f)) (define (progress-copy-port length in out) (let ((buffer (make-bytes 4096))) (let loop ((total 0)) (let ((num (read-bytes! buffer in))) (if (eof-object? num) num (let ((total (+ total num))) (write-bytes buffer out 0 num) ((progress) total length) (loop total))))))) (define-syntax-rule (aif test id yes no) (let ((id test)) (if id yes no))) (define (download-basic uri dest input-header (exists 'replace)) (raw-action get-impure-port uri input-header (λ (code message header input) (define length (aif (extract-field "Content-Length" header) result (string->number result) 0)) (when (and (eq? exists 'append) (not (or (= code 206) (= code 200)))) (error (format "Ayieee trying to cobble file mutant bloog! ~a ~a ~a" uri code header))) (call-with-output-file dest #:exists exists (λ (output) (if (progress) (progress-copy-port length input output) (copy-port input output)))) (let ((modified (decode-date (extract-field "Last-Modified" header)))) (file-or-directory-modify-seconds dest (time-second (date->time-utc modified))) (values (extract-field "Content-Type" header) modified))))) (define (download-partial uri dest header size) (download-basic uri dest (λ () (cons (string-append "Range: " (format "bytes=~a-" size)) (header))) 'append)) (define (download uri dest (headers null)) (let ((input-header (build-headers uri headers))) (let-values (((code first-header) (raw-head uri input-header #t))) (when (not (= code 200)) (error "Could not get the head for ~a ~a ~a" (url->string uri) code first-header)) (let ((ranges (extract-field "Accept-Ranges" first-header)) (length (extract-field "Content-Length" first-header))) (let ((length (if length (string->number length) #f))) (if (and (file-exists? dest) ranges (regexp-match #rx"bytes" ranges)) (let ((size (file-size dest))) (if (and length (< size length)) (download-partial uri dest input-header size) (download-basic uri dest input-header))) (download-basic uri dest input-header))))))) (define handler/c (-> integer? string? string? input-port? any)) (define input-header/c (listof (cons/c string? string?))) (define prepared-header/c (-> (listof string?))) (provide/contract (handler/c contract?) (input-header/c contract?) (decode-date (-> (or/c bytes? false/c) date?)) (encode-date (-> date? string?)) (build-headers (-> url? input-header/c prepared-header/c)) (raw-action (->* ((-> url? prepared-header/c input-port?) url? prepared-header/c handler/c) (#:redirect? boolean? #:raise? boolean?) any)) (progress parameter?) (head (->* (url?) (input-header/c #:redirect? boolean?) (values integer? string?))) (get (->* (url? handler/c) (#:headers input-header/c) any)) (post (->* (url? bytes? handler/c) (input-header/c) any)) (download (->* (url? path-string?) (input-header/c) (values string? date?))))