#lang racket/base ;; Copyright Neil Van Dyke. See file "info.rkt". ;; TODO: This code is some old code that is still evolving.... ;; TODO: !!! document this. regarding test failures, according to ;; http://tools.ietf.org/html/rfc3986#section-3.5 , fragment identifiers can ;; contain "/" and "?", but that might break some other code, so we will encode ;; those. ;; TODO: !!! update for IETF @uref{STD66, ;; ftp://ftp.rfc-editor.org/in-notes/std/std66.txt} (also RFC3986) (require (planet neil/mcfly)) (module+ test (require (planet neil/overeasy:3:0))) (define-logger uri) (doc (section "Introduction") (para (italic "Note: This package is in the middle of an API overhaul, but is useful in its current form. A later version will have some API changes, including use of optional keyword arguments.")) (para "This Racket " (bold "uri") " package implements parsing, representation, and transforming of Web Uniform Resource Identifiers (URI), which includes Uniform Resource Locators (URL) and Uniform Resource Names (URN). It supports absolute and relative URIs and URI references. This library does " (italic "not") " implement features for using URI to retrieve objects from the Web, such as making HTTP requests.") (para (hyperlink "http://www.ietf.org/rfc/rfc3305.txt" "RFC 2396") " was the principal reference used for this implementation. Earlier versions were informed by other RFCs, including " (hyperlink "http://www.ietf.org/rfc/rfc2396.txt" "RFC 2396") " and " (hyperlink "http://www.ietf.org/rfc/rfc2732.txt" "RFC 2732") ".") (para "Goals of this package are correctness, efficiency, and power.")) ;; Character Portability and Utilities: (define (%hex-char->integer c) ;; TODO: Could change to hash or arithmetic (case c ((#\0) 0) ((#\1) 1) ((#\2) 2) ((#\3) 3) ((#\4) 4) ((#\5) 5) ((#\6) 6) ((#\7) 7) ((#\8) 8) ((#\9) 9) ((#\A #\a) 10) ((#\B #\b) 11) ((#\C #\c) 12) ((#\D #\d) 13) ((#\E #\e) 14) ((#\F #\f) 15) (else #f))) (module+ test (test-section '%hex-char->integer (test (%hex-char->integer #\/) #f) (test (%hex-char->integer #\0) 0) (test (%hex-char->integer #\1) 1) (test (%hex-char->integer #\2) 2) (test (%hex-char->integer #\3) 3) (test (%hex-char->integer #\4) 4) (test (%hex-char->integer #\5) 5) (test (%hex-char->integer #\6) 6) (test (%hex-char->integer #\7) 7) (test (%hex-char->integer #\8) 8) (test (%hex-char->integer #\9) 9) (test (%hex-char->integer #\:) #f) (test (%hex-char->integer #\@) #f) (test (%hex-char->integer #\A) 10) (test (%hex-char->integer #\B) 11) (test (%hex-char->integer #\C) 12) (test (%hex-char->integer #\D) 13) (test (%hex-char->integer #\E) 14) (test (%hex-char->integer #\F) 15) (test (%hex-char->integer #\G) #f) (test (%hex-char->integer #\`) #f) (test (%hex-char->integer #\a) 10) (test (%hex-char->integer #\b) 11) (test (%hex-char->integer #\c) 12) (test (%hex-char->integer #\d) 13) (test (%hex-char->integer #\e) 14) (test (%hex-char->integer #\f) 15) (test (%hex-char->integer #\g) #f))) (define (%two-hex-char->ascii-char str k) ;; TODO: We could use a hash... (integer->char (+ (* 16 (%hex-char->integer (string-ref str k))) (%hex-char->integer (string-ref str (+ 1 k)))))) ;; Immutability Portability: ;; TODO: The immutability stuff is old, and most of it is probably overkill for ;; Racket. (define (%take-i lst i) (if (< i 1) '() (let loop ((lst lst) (i i)) (if (null? lst) '() (cons (car lst) (if (< i 2) '() (loop (cdr lst) (- i 1)))))))) ;; TODO: We had this commented-out before moving the antiresolution code back ;; in... (define (%take-i/append list-a copy-len tail-list) (if (< copy-len 1) tail-list (let loop ((copy-len (- copy-len 1)) (list-a list-a)) (if (null? list-a) tail-list (cons (car list-a) (if (< copy-len 1) tail-list (loop (- copy-len 1) (cdr list-a)))))))) (define (%list->list-i lst) ;; Note: currently only works with proper, non-cyclic lists. (let loop ((rest lst)) (cond ((null? rest) lst) ((pair? rest) (if (immutable? rest) (loop (cdr rest)) (let loop ((rest lst)) (if (null? rest) '() (cons (car rest) (loop (cdr rest))))))) (else (error "not a proper list" lst))))) (define %map-i map) ;; (define (%map-i proc . lsts) ;; (%list->list-i (apply map proc lsts))) (define (%reverse-i lst) (let loop ((new-head '()) (old-rest lst)) (if (null? old-rest) new-head (loop (cons (car old-rest) new-head) (cdr old-rest))))) (define %string->string-i string->immutable-string) (define (%append-i . args) (if (null? args) '() ;; TODO: make sure final arg is immutable? (let loop ((head (car args)) (rest (cdr args))) (cond ((null? rest) head) ((null? head) (loop (car rest) (cdr rest))) (else (let loop2 ((head2 (car head)) (rest2 (cdr head))) (cons head2 (if (null? rest2) (loop (car rest) (cdr rest)) (loop2 (car rest2) (cdr rest2)))))))))) (define (%string-append-i . args) (%string->string-i (apply string-append args))) (define (%string-i . args) (%string->string-i (apply string args))) (define (%substring-i str start-k end-k) (%string->string-i (if (and (= start-k 0) (or (not end-k) (= end-k (string-length str)))) str (if end-k (substring str start-k end-k) (substring str start-k))))) (define (%get-output-string-i . args) (%string->string-i (apply get-output-string args))) ;; Regexp Portability and Utilities: (define (%rxpos-str pos str) (and pos (substring str (car pos) (cdr pos)))) (define (%rxpos-lowstr pos str) (let ((s (%rxpos-str pos str))) (if s (string-downcase s) #f))) (define (%rxpos-lowsym pos str) (let ((s (%rxpos-lowstr pos str))) (if s (string->symbol s) s))) ;; (define (%rxpos-num pos str) ;; (let ((s (%rxpos-str pos str))) ;; (if s (string->number s) #f))) (define %rxpos-unescape-num (let ((rx "^[0-9]+$")) (lambda (pos str) (if pos (let ((s (uri-unescape str (car pos) (cdr pos)))) (if (regexp-match rx s) (string->number s) #f)) #f)))) ;; (define (%rxpos-str-i pos str) ;; (if pos (%substring-i str (car pos) (cdr pos)) #f)) (define (%rxpos-unescape-str-i pos str) (if pos (uri-unescape-i str (car pos) (cdr pos)) #f)) ;; (define (%rxpos-lowstr-i pos str) ;; (if pos ;; (%string->string-i ;; (string-downcase (substring str (car pos) (cdr pos)))) ;; #f)) (define (%rxpos-unescape-lowstr-i pos str) (if pos (%string->string-i (string-downcase (%uri-unescape/shared-ok str (car pos) (cdr pos)))) #f)) (define (%rxpos-subtract pos offset) (if (and pos offset (not (zero? offset))) (cons (- (car pos) offset) (- (cdr pos) offset)) pos)) ;; (define (%rxpos->rxpos-i pos) ;; (if pos ;; (cons (car pos) (cdr pos)) ;; #f)) (define (%make-rx-replace-map-posns empty-string os-result-proc substring-result-proc orig-result-proc) (let ((fixed (lambda (rx str proc start end) (let ((end (let ((len (string-length str))) (if (and end (<= end len)) end len)))) (if (>= start end) empty-string (let ((os #f)) (let loop ((beg start)) (let ((rxmatch (regexp-match-positions rx str beg end))) (if rxmatch (begin (or os (set! os (open-output-string))) (let ((skip-end (car (car rxmatch)))) (and (> skip-end beg) (display (substring str beg skip-end) os))) (display (apply proc rxmatch) os) (let ((new-beg (cdr (car rxmatch)))) (and (< new-beg end) (loop new-beg)))) (and os (display (substring str beg end) os))))) (cond (os => os-result-proc) ((or (not (zero? start)) end) (substring-result-proc str start end)) (else (orig-result-proc str))))))))) (case-lambda ((rx str proc start end) (fixed rx str proc start end)) ((rx str proc start) (fixed rx str proc start #f)) ((rx str proc) (fixed rx str proc 0 #f))))) (define %rx-replace-map-posns-nm (%make-rx-replace-map-posns "" get-output-string substring string-copy)) (define %rx-replace-map-posns-i (%make-rx-replace-map-posns "" %get-output-string-i %substring-i %string->string-i)) (define %rx-replace-map-posns/shared-ok (%make-rx-replace-map-posns "" get-output-string substring (lambda (n) n))) (define (%make-rx-replacers/nm-i-so rx proc) (let ((make-proc (lambda (rx-replace-map-posns) (let ((fixed (lambda (str start end) (rx-replace-map-posns rx str (lambda args (apply proc str args)) start end)))) (case-lambda ((str start end) (fixed str start end)) ((str start) (fixed str start #f)) ((str) (fixed str 0 #f))))))) (values (make-proc %rx-replace-map-posns-i) (make-proc %rx-replace-map-posns-nm) (make-proc %rx-replace-map-posns/shared-ok)))) (define-syntax %with-rx-match-positions (syntax-rules (else) ((_ RMP MATCHED) (%with-rx-match-positions RMP MATCHED (else (error "regexp match failed")))) ((_ (RMP-ARGS ...) ((VARS ...) MATCHED-EXPS ...) (else ELSE-EXPS ...)) (let ((rxmatch (regexp-match-positions RMP-ARGS ...))) (if rxmatch (apply (lambda (VARS ...) MATCHED-EXPS ...) rxmatch) (begin ELSE-EXPS ...)))))) (define %rxpos-tag (string->symbol "*%rxpos*")) (define (%make-tagged-rxpos rxpos) (cons %rxpos-tag rxpos)) (define (%tagged-rxpos? v) (and (pair? v) (eq? (car v) %rxpos-tag))) ;; (define (%tagged-rxpos-values-or-value v proc) ;; (if (%tagged-rxpos? v) ;; (proc (cadr v) (cddr v)) ;; v)) (define (%rxpos-subtract-and-tag pos offset) (%make-tagged-rxpos (%rxpos-subtract pos offset))) (define-syntax %uri-struct-rxpos-field-proc (syntax-rules () ((_ GET-PROC PARSE-PROC SET-PROC) (lambda (uri) ;; Note: Tempting to add a uri-string->uri-struct here, but ;; probably doesn't belong here, in case any of the procs want to ;; to work with a possible string form. (let ((v (GET-PROC uri))) (if (%tagged-rxpos? v) (let ((pos (cdr v))) ;; TODO: This is kinda weird to be setting the value to #f. We ;; have to make sure that all the parsed value representations ;; are OK with that. We should probably have a "/pos" form of ;; the procedures instead. (let ((o (if pos (PARSE-PROC (uri->string uri) (car pos) (cdr pos)) #f))) (SET-PROC uri o) o)) v)))))) ;; Utilities: (doc (section "Escaping and Unescaping") (para "Several procedures to support escaping and unescaping of URI component strings, as described in [RFC2396 sec. 2.4], are provided. Also provided are escaping and unescaping procedures that also support " (tt "+") " as an encoding of a space character, as is used in some HTTP encodings of HTML forms.") (para "These procedures have multiple variants, concerning mutability and sharing of the strings they yield, with following the naming convention:") (itemlist (item (tt (italic "foo")) " --- If the output would be equal to the input, might yield the identical input string rather than a copy. Output might or might not be mutable.") (item (nonbreaking (tt (italic "foo") "-i")) " --- Always yields an immutable string.") (item (nonbreaking (tt (italic "foo") "-nm")) " --- Always yields a new, mutable string.")) (para "Many applications will not call these procedures directly, since most of this library's interface automatically escapes and unescapes strings as appropriate.")) (doc (subsection "Escaping")) (doc (defproc* (((uri-escape (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ((uri-escape-i (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) immutable-string?) ((uri-escape-nm (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ) (para "Yields a URI-escaped encoding of string " (racket str) ". If " (racket start) " and " (racket end) " are given, then they designate the substring of " (racket str) " to use. All characters are escaped, except alphanumerics, minus, underscore, period, and tilde. For example.") (racketinput (uri-escape "a = b/c + d") #,(racketresult "a%20%3D%20b%2Fc%20%2B%20d")))) (provide uri-escape-i uri-escape-nm uri-escape) (define-values (uri-escape-i uri-escape-nm uri-escape) (%make-rx-replacers/nm-i-so #rx"[^-_.~a-zA-Z0-9]" (lambda (str pos) (char->uri-escaped-string (string-ref str (car pos)))))) (module+ test (test-section 'uri-escape (test (uri-escape "") "") (test (uri-escape "a b") "a%20b") (test (uri-escape "a b c") "a%20b%20c") (test (uri-escape "aaa bbb ccc") "aaa%20bbb%20ccc") (test (uri-escape " a ") "%20a%20") (test (uri-escape "a/b") "a%2Fb") (test (uri-escape "a%b") "a%25b"))) (define %uri-escape/shared-ok uri-escape) (doc (defproc* (((uri-plusescape (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ((uri-plusescape-i (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) immutable-string?) ((uri-plusescape-nm (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ) (para "Like " (racket uri-escape) ", except encodes space characters as " (tt "+") " instead of " (racket "%20") ". This should generally only be used to mimic the encoding some Web browsers do of HTML form values. For example:") (racketinput (uri-plusescape "a = b/c + d") #,(racketresult "a+%3D+b%2Fc+%2B+d")))) (provide uri-plusescape-i uri-plusescape-nm uri-plusescape) (define-values (uri-plusescape-i uri-plusescape-nm uri-plusescape) (%make-rx-replacers/nm-i-so #rx"( )|[^-_.~a-zA-Z0-9 ]" (lambda (str pos space-pos) (if space-pos "+" (char->uri-escaped-string (string-ref str (car pos))))))) (module+ test (test-section 'uri-plusescape (test (uri-plusescape "") "") (test (uri-plusescape "a b") "a+b") (test (uri-plusescape "a b c") "a+b+c") (test (uri-plusescape "aaa bbb ccc") "aaa+bbb+ccc") (test (uri-plusescape " a ") "+a+") (test (uri-plusescape "a/b") "a%2Fb") (test (uri-plusescape "a%b") "a%25b"))) (doc (subsection "Unescaping")) (doc (defproc* (((uri-unescape (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ((uri-unescape-i (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) immutable-string?) ((uri-unescape-nm (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ) (para "Yields an URI-unescaped string from the encoding in string " (racket str) ". If " (racket start) " and " (racket end) " are given, then they designate the substring of " (racket str) " to use. For example:") (racketinput (uri-unescape "a%20b+c%20d") #,(racketresult "a b+c d")))) (provide uri-unescape-i uri-unescape-nm uri-unescape) (define-values (uri-unescape-i uri-unescape-nm uri-unescape) (%make-rx-replacers/nm-i-so #rx"%([0-9a-fA-F][0-9a-fA-F])?" (lambda (str pos hex-pos) (if hex-pos (%two-hex-char->ascii-char str (car hex-pos)) ;; TODO: Benchmark above against: ;; (integer->char ;; (string->number (substring str (car hex-pos) (cdr hex-pos)) 16)) "%")))) (module+ test (test-section 'uri-unescape (test (uri-unescape "") "") (test (uri-unescape "%") "%") (test (uri-unescape "%0") "%0") (test (uri-unescape "%1") "%1") (test (uri-unescape "a%20b") "a b") (test (uri-unescape "a%20b%20c") "a b c") (test (uri-unescape "aaa%20bbb%20ccc") "aaa bbb ccc") (test (uri-unescape "%20a%20") " a ") (test (uri-unescape "a%2Fb") "a/b") (test (uri-unescape "a%2fb") "a/b") (test (uri-unescape "a%25b") "a%b"))) (define %uri-unescape/shared-ok uri-unescape) (doc (defproc* (((uri-unplusescape (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?) ((uri-unplusescape-i (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) immutable-string?) ((uri-unplusescape-nm (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) string?)) (para "Like " (racket uri-unescape) ", but also decodes the plus (" (tt "+") ") character to space character. For example:") (racketinput (uri-unplusescape "a%20b+c%20d") #,(racketresult "a b c d")))) (provide uri-unplusescape-i uri-unplusescape-nm uri-unplusescape) (define-values (uri-unplusescape-i uri-unplusescape-nm uri-unplusescape) (%make-rx-replacers/nm-i-so #rx"(\\+)|%([0-9a-fA-F][0-9a-fA-F])?" (lambda (str pos plus-pos hex-pos) (cond (plus-pos " ") (hex-pos (%two-hex-char->ascii-char str (car hex-pos))) ;; TODO: Benchmark above against: ;; (hex-pos ;; (integer->char ;; (string->number (substring str (car hex-pos) (cdr hex-pos)) 16) ;; 16)) (else "%"))))) ;; TODO: !!! TESTS (doc (defproc* (((char->uri-escaped-string (chr character?)) string?) ((char->uri-escaped-string-i (chr character?)) string?)) (para "Yields a URI-escaped string of character " (racket chr) ". For example:") (racketinput (char->uri-escaped-string #\/) #,(racketresult "%2F")))) (provide char->uri-escaped-string char->uri-escaped-string-i) (define-values (char->uri-escaped-string char->uri-escaped-string-i) (let* ((hex #(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\A #\B #\C #\D #\E #\F)) (make-proc (lambda (string) (lambda (chr) (let ((n (char->integer chr))) (string #\% (vector-ref hex (quotient n 16)) (vector-ref hex (remainder n 16)))))))) (values (make-proc string) (make-proc %string-i)))) (doc (section "URI Representation") (para "This package can deal with URI expressed in one of two forms: a special " (racket uri) " object, and a string. In this way, it is similar to the Racket " (racket path) " object and operations. The support for strings is as a convenience for programming. In general, procedures that return a URI will do so as a " (racket uri) " object, even if provided strings as arguments.")) (define (%uri-struct-write-proc record port write?) (if write? (begin (write-string "#uri" port) (write (uri->string record) port)) (begin (write-string (uri->string record) port) (void)))) (define-struct uri-struct ( (string #:mutable) (scheme #:mutable) (opaque-k #:mutable) ;; TODO: !!! get rid of opaque-k and pound-k and make it immutable? or have we already covered that with accessors? (pound-k #:mutable) (authority #:mutable) (path #:mutable) (query #:mutable) ) #:methods gen:custom-write ((define write-proc %uri-struct-write-proc))) ;; @subsection Predicate ;; @defproc uri? v ;; ;; !!! ;; !!! define uri? predicate (doc (defproc (uri? (x any/c)) boolean? (para "Predicate for whether " (racket x) " is a " (racket uri) " object."))) (provide (rename-out (uri-struct? uri?))) (doc (defproc (uri-string? (x any/c)) boolean? (para "Predicate for whether " (racket x) " is either a " (racket uri) " object or a string."))) (provide uri-string?) (define (uri-string? x) (or (uri-struct? x) (string? x))) (provide uri->string) (define (uri->string uri) (uri-struct-string uri)) (doc (subsection "Converting Strings to URI Objects")) ;; TODO: Add URI extraction from strings, and distinguish from string ;; conversion. The normal *string*->uri* procedures maybe shouldn't strip ;; leading and trailing garbage. Have "extract-uri" and "extract-uri" ;; procedures? (doc (defproc* (((string->uri (str string?)) uri?) ((string/base->uri (str string?) (base uri-string?)) uri?)) (para "!!!") (para "Note that the value of " (racket (uri->string (string->uri #,(italic "S")))) " will " (italic "not") " always be equal to " (racket #,(italic "S")) "."))) (provide string->uri) (define (string->uri str) (substring->uri str 0 #f)) (provide string/base->uri) (define (string/base->uri str base-uri) (substring/base->uri str 0 #f (if base-uri (uri-string->uri base-uri) #f))) (doc (defproc* ( ((substring->uri (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?)) uri?) ((substring/base->uri (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?)) uri?) ) (para "!!!"))) (provide substring->uri) (define substring->uri (let ((rx (regexp (string-append "^[ \t<]*" "(" ; <1 used "(?:" ; <: scheme-colon "([a-zA-Z][-+.a-zA-Z0-9]*)" ; =2 scheme ":)?" ; >: scheme-colon "(?://" ; <: slashslash-authority "([^<>?/#]*)" ; =3 authority ")?" ; >: slashslash-authority "([^<>?#]*)" ; =4 path "(?:\\?" ; <: question-query "([^<>#]*)" ; =5 query ")?" ; >: question-query "(#[^>]*)?" ; =6 pound-fragment ")" ; >1 used )))) (lambda (str start end) (%with-rx-match-positions (rx str start end) ((whole used scheme authority path query pound-fragment) (let ((used-start (car used))) (make-uri-struct ;; string (%substring-i str used-start (cdr used)) ;; scheme (%rxpos-lowsym scheme str) ;; opaque-k (if scheme (+ 1 (cdr (%rxpos-subtract scheme used-start))) 0) ;; pound-k (and pound-fragment (car (%rxpos-subtract pound-fragment used-start))) ;; auth, path, query (%rxpos-subtract-and-tag authority used-start) (%rxpos-subtract-and-tag path used-start) (%rxpos-subtract-and-tag query used-start)))))))) (provide substring/base->uri) (define (substring/base->uri str start end base-uri) ;; TODO: Make a version of this that doesn't call substring->uri, ;; which will save some allocations in a potentially much-called ;; procedure (for example, parsing an HTML Web page with 1000 URLs while ;; resolving against a known base URI). (let ((uri (substring->uri str start end))) (if base-uri (resolve-uri uri base-uri) uri))) (define (%uri-string+error-name->uri uri error-name) (cond ((uri-struct? uri) uri) ((string? uri) (string->uri uri)) (else (raise-type-error error-name "uri-string?" uri)))) ;; TODO: Convert internal uses of uri-string->uri to uri-string+error-name->uri (doc (defproc (uri-string->uri (uri uri-string?)) uri? (para "!!!"))) (provide uri-string->uri) (define (uri-string->uri uri) (%uri-string+error-name->uri uri 'uri-string->uri)) (doc (subsection "Writing URIs to Ports and Converting URIs to Strings")) (doc (defproc* (((display-uri (uri uri-string?) (port output-port? (current-output-port))) void?) ((display-uri/nofragment (uri uri-string?) (port output-port? (current-output-port))) void?)) (para "Displays " (racket uri) " to output port " (racket port) ". For example:") (racketinput (display-uri "http://s/foo#bar")) (nested #:style 'inset (racketoutput "http://s/foo#bar")) (racketinput (display-uri/nofragment "http://s/foo#bar")) (nested #:style 'inset (racketoutput "http://s/foo")))) (provide display-uri) (define (display-uri uri (port (current-output-port))) (display uri port)) (provide display-uri/nofragment) (define (display-uri/nofragment uri (port (current-output-port))) ;; TODO: Do a faster and simpler version, using pound-k and substring? (display (uri->string/nofragment uri) port)) (doc (defproc (uri->string (uri uri-string?)) uri? (para "Yields the full string representation of URI " (racket uri) ". Of course this is not needed when using only the string representation of URI, but using this procedure in libraries permits the " (racket uri) " to also be used. For example:") (racketinput (define my-uri (string->uri "http://www/"))) (racketinput my-uri #,(racketresult #,(elem "#uri\"http://www/\""))) (racketinput (uri->string my-uri) #,(racketresult "http://www/")))) (provide uri->string/nofragment) (define (uri->string/nofragment uri) (let ((pound-k (uri-struct-pound-k uri))) (if pound-k (%substring-i (uri->string uri) 0 pound-k) (uri->string uri)))) (doc (section "URI Schemes") (para "URI schemes are currently represented as lowercase Racket symbols and associated data.")) (doc (defproc (urischeme? (x any/c)) boolean? (para "Predicate for whether or not " (racket x) " is a " (racket urischeme) "."))) (provide urischeme?) (define (urischeme? x) (symbol? x)) (doc (deftogether ((defthing ftp-urischeme urischeme?) (defthing gopher-urischeme urischeme?) (defthing http-urischeme urischeme?) (defthing https-urischeme urischeme?) (defthing imap-urischeme urischeme?) (defthing ipp-urischeme urischeme?) (defthing news-urischeme urischeme?) (defthing nfs-urischeme urischeme?) (defthing telnet-urischeme urischeme?)) (para "Some common URI scheme symbols. (These are not so useful; they were in an earlier version of this library that needed to work with Scheme implementations with case-insensitive readers.)")) (racketinput ftp-urischeme #,(racketresult ftp))) (provide ftp-urischeme) (define ftp-urischeme (string->symbol "ftp")) (provide gopher-urischeme) (define gopher-urischeme (string->symbol "gopher")) (provide http-urischeme) (define http-urischeme (string->symbol "http")) (provide https-urischeme) (define https-urischeme (string->symbol "https")) (provide imap-urischeme) (define imap-urischeme (string->symbol "imap")) (provide ipp-urischeme) (define ipp-urischeme (string->symbol "ipp")) (provide news-urischeme) (define news-urischeme (string->symbol "news")) (provide nfs-urischeme) (define nfs-urischeme (string->symbol "nfs")) (provide telnet-urischeme) (define telnet-urischeme (string->symbol "telnet")) (doc (defproc (uri-scheme (uri uri-string?)) (or/c urischeme? #f) (para "Yields the URI scheme of " (racket uri) ", or " (racket #f) " if none can be determined. For example:") (racketinput (uri-scheme "Http://www") #,(racketresult http)))) (provide uri-scheme) (define (uri-scheme uri) (uri-struct-scheme uri)) (define %hierarchical-schemes (list ftp-urischeme gopher-urischeme http-urischeme https-urischeme imap-urischeme ipp-urischeme nfs-urischeme)) ;; ;; @defproc uri-with-scheme uri urischeme @result{} string ;; ;; ;; ;; TODO: in the current version oo this library, just does a string ;; ;; replacement of the uri scheme. no sensitivity to default port numbers ;; ;; of uri schemes (partly because we are getting into territory where we ;; ;; need to know whether an authority is a server authority. ;; ;; (define (uri-with-scheme uri urischeme) ;; ;; TODO: Just do a string operation here, if it's a string for which we ;; ;; don't have a uri. ;; ;; ;; ;; TODO: Maybe force urischeme to a valid one, so that they can use quoted ;; ;; symbols in a case-insensitive Racket implementation safely. ;; (uri->string (uri-with-scheme (uri-string->uri uri) urischeme))) ;; (test ;; (uri-with-scheme "http://w/" https-uri-scheme) ;; "https://w/") ;; ;; (test ;; (uri-with-scheme "http://w:80/" https-uri-scheme) ;; "https://w/") ;; ;; (test ;; (uri-with-scheme "http://w:8080/" https-uri-scheme) ;; "https://w:8080/") ;; "imap://minbari.org/gray-council;UIDVALIDITY=385759045/;UID=20" ;; "imap://michael@minbari.org/users.*;type=list" ;; "imap://psicorp.org/~peter/%E6%97%A5%E6%9C%AC%E8%AA%9E/%E5%8F%B0%E5%8C%97" ;; "imap://;AUTH=KERBEROS_V4@minbari.org/gray-council/;uid=20/;section=1.2" ;; "imap://;AUTH=*@minbari.org/gray%20council?SUBJECT%20shadows" (define %urischeme-to-default-portnum-alist (list (cons ftp-urischeme 21) (cons gopher-urischeme 70) (cons http-urischeme 80) (cons https-urischeme 443) (cons imap-urischeme 143) (cons ipp-urischeme 631) (cons news-urischeme 119) (cons nfs-urischeme 2049) (cons telnet-urischeme 23))) ;; TODO: Introduce a parameter for an ADT for default port numbers ;; @defproc register-urischeme-default-portnum sym portnum ;; ;; Registers integer @var{portnum} as the default port number for the server ;; authority component of URI scheme @var{sym}. ;; ;; @lisp ;; (define x-foo-urischeme (string->symbol "x-foo")) ;; (register-urischeme-default-portnum x-foo-urischeme 007) ;; (register-urischeme-default-portnum x-foo-urischeme 666) ;; @error{} cannot change uri scheme default portnum: x-foo 7 666 ;; @end lisp ;; ;; (provide register-urischeme-default-portnum) ;; (define (register-urischeme-default-portnum urischeme portnum) ;; ;; TODO: Ideally, we would have a mutex lock around the list. ;; ;; But wait til we make it a hashtable with other urischeme data. ;; (let ((old-portnum (urischeme-default-portnum urischeme))) ;; (if old-portnum ;; (and (not (equal? portnum old-portnum)) ;; (error "cannot change uri scheme default portnum:" ;; urischeme ;; old-portnum ;; portnum)) ;; (set! %urischeme-to-default-portnum-alist ;; (cons (cons urischeme portnum) ;; %urischeme-to-default-portnum-alist))))) ;; @defproc register-urischeme-hierarchical sym ;; ;; Registers URI scheme @var{sym} as having a ``hierarchical'' form as ;; described in [RFC2396 sec. 3]. ;; (provide register-urischeme-hierarchical) ;; (define (register-urischeme-hierarchical urischeme) ;; ;; TODO: Mutex it, although pretty benign if we don't. ;; (if (memq urischeme %hierarchical-schemes) ;; (void) ;; (begin (set! %hierarchical-schemes ;; (cons urischeme %hierarchical-schemes)) ;; (void)))) (doc (defproc (uri-with-scheme (uri uri-string?) (urischeme urischeme?)) uri? (para "!!!"))) (provide uri-with-scheme) (define (uri-with-scheme uri urischeme) ;; TODO: what about port numbers on URIs with server authorities? we should ;; at least double check that if we have the default port number under old ;; scheme, we have the default under the new scheme (we currently do, but ;; errors, or a default scheme registered after parsing might break that). ;; we should also decide what it means when the server authority has a ;; non-default port number for the old scheme -- just keep it, probably. ;; Actually, this procedure mainly makes sense only for URIs that don't ;; already have schemes. (if (eq? urischeme (uri-struct-scheme uri)) uri ;; TODO: !!! we need to remove default port when we start with "//www:80", which does not have a scheme, ;; and go to "http://www", which does have a scheme. ;; But this is only for the string representation. So maybe we shouldn't ;; do the dumb string operations below if we are going to a hierarchical scheme. !!! (string->uri (let ((old-str (uri->string uri)) (opaque-k (uri-struct-opaque-k uri))) (if (zero? opaque-k) (%string-append-i (urischeme->string urischeme) ":" old-str) (%string-append-i (urischeme->string urischeme) (substring old-str (- opaque-k 1) (string-length old-str)))))))) (doc (defproc* (((string->urischeme (str string?)) urischeme?) ((symbol->urischeme (sym symbol?)) urischeme?)) (para "!!!"))) (provide string->urischeme) (define (string->urischeme str) (string->symbol (string-downcase str))) (provide symbol->urischeme) (define (symbol->urischeme sym) (string->symbol (string-downcase (symbol->string sym)))) (doc (defproc (urischeme->string (urischeme urischeme?)) string? (para "!!!"))) (provide urischeme->string) (define (urischeme->string urischeme) (symbol->string urischeme)) (doc (defproc (urischeme-hierarchical? (urischeme urischeme?)) urischeme? (para "!!!"))) (provide urischeme-hierarchical?) (define (urischeme-hierarchical? urischeme) (and (memq urischeme %hierarchical-schemes) #true)) (doc (defproc (urischeme-default-portnum (urischeme urischeme)) exact-nonnegative-integer? (para "!!!"))) (provide urischeme-default-portnum) (define (urischeme-default-portnum urischeme) (let ((pair (assq urischeme %urischeme-to-default-portnum-alist))) (if pair (cdr pair) #f))) (doc (subsection "URI Reference Fragment Identifiers")) (doc (defproc* (((uri-fragment (uri uri-string?)) (or/c #f string?)) ((uri-fragment/escaped (uri uri-string?)) (or/c #f string?))) (para "Yields the fragment identifier component of URI (or URI reference) " (racket uri) " as a string, or " (racket #f) " if there is no fragment. " (racket uri-fragment) " yields the fragment in unescaped form, and " (racket uri-fragment/escaped) " yields an escaped form in the unusual case that is desired. For example:") (racketinput (uri-fragment "foo#a%20b") #,(racketresult "a b")) (racketinput (uri-fragment/escaped "foo#a%20b") #,(racketresult "a%20b")))) (provide uri-fragment) (define (uri-fragment uri) ;; Note: We make this always immutable because some strings we yield will be ;; immutable, so we should be consistent. (let* ((uri (uri-string->uri uri)) (pound-k (uri-struct-pound-k uri))) (if pound-k (uri-unescape-i (uri->string uri) (+ 1 pound-k)) #f))) (provide uri-fragment/escaped) (define (uri-fragment/escaped uri) ;; TODO: Do we want to escape better if the original fragment wasn't? (let* ((uri (uri-string->uri uri)) (pound-k (uri-struct-pound-k uri))) (if pound-k (%substring-i (uri->string uri) (+ 1 pound-k) #f) #f))) (doc (defproc (uri-without-fragment (uri uri-string?)) uri? (para "Yields " (racket uri) " without the fragment component. For example:") (racketinput (uri-without-fragment "http://w/#bar") #,(racketresult #,(elem "#uri\"http://w/\""))))) (provide uri-without-fragment) (define (uri-without-fragment uri) (uri-with-fragment uri #f)) (doc (defproc* (((uri-with-fragment (uri uri-string?) (fragment (or/c #f string?))) uri?) ((uri-with-fragment/escaped (uri uri-string?) (fragment (or/c #f string?))) uri?)) (para "Yields a URI that is like " (racket uri) " except with the fragment " (racket fragment) " (or no fragment, if " (racket fragment) " is " (racket #f) "). For example:") (racketinput (uri-with-fragment "http://w/" "foo") #,(racketresult "http://w/#foo")) (racketinput (uri-with-fragment "http://w/#foo" "bar") #,(racketresult "http://w/#bar")) (racketinput (uri-with-fragment "http://w/#bar" #f) #,(racketresult "http://w/")) (para "The " (racket uri-with-fragment/escaped) " variant can be used when the desired fragment string is already in uri-escaped form:") (racketinput (uri-with-fragment "foo" "a b") #,(racketresult "foo#a%20b")) (racketinput (uri-with-fragment/escaped "foo" "a%20b") #,(racketresult "foo#a%20b")))) (provide uri-with-fragment) (define (uri-with-fragment uri fragment) (uri-with-fragment/escaped uri (if fragment (uri-escape-i fragment) #f))) (module+ test (test-section 'uri-with-fragment (test (uri->string (uri-with-fragment (string->uri "http://www/foo#bar") "x y z")) "http://www/foo#x%20y%20z"))) (provide uri-with-fragment/escaped) (define (uri-with-fragment/escaped uri fragment) ;; TODO: Don't use string->uri and friends here. Or at least copy over ;; any parsed server, path, and query info. (let ((old-pound-k (uri-struct-pound-k uri))) (if fragment (if old-pound-k (if (equal? (uri-fragment/escaped uri) fragment) uri (string->uri (%string-append-i (uri->string/nofragment uri) "#" fragment))) (string->uri (%string-append-i (uri->string uri) "#" fragment))) (if old-pound-k (string->uri (uri->string/nofragment uri)) uri)))) (module+ test (test-section 'uri-with-fragment/escaped (test (uri->string (uri-with-fragment/escaped (string->uri "http://www/foo#bar") "x y z")) "http://www/foo#x y z"))) (doc (section "Hierarchical URIs #1") (para "This and some of the following subsections concern ``hierarchical'' generic URI syntax as described in " (hyperlink "http://www.ietf.org/rfc/rfc3305.txt" "RFC2396") ", sec. 3.")) (doc (defproc (uri-hierarchical? (uri uri-string?)) boolean? (para "Yields a Boolean value for whether or not the URI scheme of URI " (racket uri) " is known to have a ``hierarchical'' generic URI layout. For example:") (racketinput (uri-hierarchical? "http://www/") #,(racketresult #t)) (racketinput (uri-hierarchical? "mailto://www/") #,(racketresult #f)) (racketinput (uri-hierarchical? "//www/") #,(racketresult #f)))) (provide uri-hierarchical?) (define (uri-hierarchical? uri) (let ((uri (uri-string->uri uri))) (urischeme-hierarchical? (uri-struct-scheme uri)))) (doc (subsection "Server-Based Naming Authorities") (para "Several procedures extract the server authority values from URIs [RFC2396 sec. 3.2.2].")) (doc (defproc (uri-server-userinfo+host+portnum (uri uri-string?)) (values (or/c #f string?) (or/c #f string?) (or/c #f exact-nonnegative-integer?)) (para "Yields three values for the server authority of URI " (racket uri) ": the userinfo as a string (or " (racket #f) "), the host as a string (or " (racket #f) "), and the effective port number as an integer (or " (racket #f) "). The effective port number of a server authority defaults to the default of the URI scheme unless overridden. For example (note the effective port number is 21, the default for the " (tt "ftp") " scheme):") (racketinput (uri-server-userinfo+host+portnum "ftp://anon@@ftp.foo.bar/") #,(racketresult "anon" "ftp.foo.bar" 21)))) (provide uri-server-userinfo+host+portnum) (define (uri-server-userinfo+host+portnum uri) (uri-userinfo+host+portnum (uri-string->uri uri))) (doc (defproc* (((uri-server-userinfo (uri uri-string?)) (or/c #f string?)) ((uri-server-host (uri uri-string?)) (or/c #f string?)) ((uri-server-portnum (uri uri-string?)) (or/c #f exact-nonnegative-integer?))) (para "Yield the respective part of the server authority of " (racket uri) ". See the discussion of " (racket uri-server-userinfo+host+portnum) "."))) (provide uri-server-userinfo) (define (uri-server-userinfo uri) (uriserver-userinfo (uri-uriserver (uri-string->uri uri)))) (provide uri-server-host) (define (uri-server-host uri) (uriserver-host (uri-uriserver (uri-string->uri uri)))) (provide uri-server-portnum) (define (uri-server-portnum uri) (uri-portnum (uri-string->uri uri))) (module+ test (test-section 'uri-server-portnum (test (uri-server-portnum (string->uri "http:")) 80) (test (uri-server-portnum (string->uri "http://www/")) 80) (test (uri-server-portnum (string->uri "http://www:80/")) 80) (test (uri-server-portnum (string->uri "http://www:8080/")) 8080))) (doc (subsection "Hierarchical Paths") (para "A parsed hierarchical path [RFC2396 sec. 3] is represented in this package as a tuple of a list of path segments and an " (deftech "upcount") ". The list of path segments does not contain any ``" (tt ".") "'' or ``" (tt "..") "'' relative components, as those are removed during parsing. The upcount is either " (racket #f) ", meaning an absolute path, or an integer 0 or greater, meaning a relative path of that many levels ``up.'' A path segment without any parameters is represented as either a string or, if empty, " (racket #f) ". For example:") (racketinput (uri-path-upcount+segments "/a/b/") #,(racketresult #f ("a" "b" #f))) (racketinput (uri-path-upcount+segments "/a/b/c") #,(racketresult #f ("a" "b" "c"))) (racketinput (uri-path-upcount+segments "/a/../../../b/c") #,(racketresult 2 ("b" "c"))) (para "and:") (racketinput (uri-path-upcount+segments "/.") #,(racketresult #f ())) (racketinput (uri-path-upcount+segments "/") #,(racketresult #f (#f))) (racketinput (uri-path-upcount+segments ".") #,(racketresult 0 (#f))) (racketinput (uri-path-upcount+segments "") #,(racketresult 0 ())) (racketinput (uri-path-upcount+segments "./") #,(racketresult 0 (#f))) (racketinput (uri-path-upcount+segments "..") #,(racketresult 1 ())) (racketinput (uri-path-upcount+segments "/..") #,(racketresult 1 ())) (racketinput (uri-path-upcount+segments "../") #,(racketresult 1 (#f))) (para "A path segment with parameters is represented as a list, with the first element a string or " (racket #f) " for the path name, and the remaining elements strings for the parameters. For example:") (racketinput (uri-path-segments "../../a/b;p1/c/d;p2;p3/;p4") #,(racketresult ("a" ("b" "p1") "c" ("d" "p2" "p3") (#f "p4")))) (para "In the current version of this package, parsed paths are actually represented in reverse, which simplifies path resolution and permits list tails to be shared among potentially large numbers of long paths. For example:") (racketinput (let ((base (string->uripath "/a/b/c/index.html"))) (map (lambda (n) (resolve-uripath (string->uripath n) base)) '("x.html" "y/y.html" "../z/z.html")))) ;; TODO: Make this have racketresult look, while keeping print-graph info: (nested #:style 'inset (verbatim "((\"x.html\" . #0=(\"c\" . #1=(\"b\" \"a\")))\n" " (\"y.html\" \"y\" . #0#)\n" " (\"z.html\" \"z\" . #1#)))))\n"))) (doc (defproc* (((uri-path-upcount+segments (uri uri-string?)) (values (or/c #f exact-nonnegative-integer?) (listof (or/c #f string?)))) ((uri-path-upcount+segments/reverse (uri uri-string?)) (values (or/c #f exact-nonnegative-integer?) (listof (or/c #f string?))))) (para "Yields the path upcount and the segments of " (racket uri) " as two values. The segments list should be considered immutable, as it might be shared elsewhere. " (racket uri-path-upcount+segments/reverse) " yields the segments list in reverse order, and is the more efficient of the two procedures.") (racketinput (uri-path-upcount+segments/reverse "../a/../../b/./c") #,(racketresult 2 ("c" "b"))) (racketinput (uri-path-upcount+segments "../a/../../b/./c") #,(racketresult 2 ("b" "c"))))) (provide uri-path-upcount+segments) (define (uri-path-upcount+segments uri) (uripath-upcount+segments (uri-uripath (uri-string->uri uri)))) (provide uri-path-upcount+segments/reverse) (define (uri-path-upcount+segments/reverse uri) (uripath-upcount+segments/reverse (uri-uripath (uri-string->uri uri)))) (doc (defproc* (((uri-path-upcount (uri uri-string?)) (or/c #f exact-nonnegative-integer?)) ((uri-path-segments (uri uri-string?)) (listof (or/c #f string?))) ((uri-path-segments/reverse (uri uri-string?)) (listof (or/c #f string?)))) (para "See the documentation for " (racket uri-path-upcount+segments) ".") (racketinput (uri-path-upcount "../a/../../b/./c") #,(racketresult 2)) (racketinput (uri-path-segments "../a/../../b/./c") #,(racketresult ("b" "c"))) (racketinput (uri-path-segments/reverse "../a/../../b/./c") #,(racketresult ("c" "b"))))) (provide uri-path-upcount) (define (uri-path-upcount uri) (uripath-upcount (uri-uripath (%uri-string+error-name->uri uri 'uri-path-upcount)))) (provide uri-path-segments) (define (uri-path-segments uri) (uripath-segments (uri-uripath (%uri-string+error-name->uri uri 'uri-path-segments)))) (provide uri-path-segments/reverse) (define (uri-path-segments/reverse uri) (uripath-segments/reverse (uri-uripath (%uri-string+error-name->uri uri 'uri-path-segments/reverse)))) (doc (defproc* (((urisegment-name (urisegment urisegment?)) (or/c #f string?)) ((urisegment-params (urisegment urisegment?)) (listof (or/c #f string?))) ((urisegment-name+params (urisegment urisegment?)) (values (or/c #f string?) (listof (or/c #f string?)))) ((urisegment-has-params? (urisegment urisegment?)) boolean?)) (para "Yield the components of a parsed URI segment. The values should be considered immutable. For example:") (racketinput (urisegment-name+params "foo") #,(racketresult "foo" ())) (racketinput (urisegment-name+params #f) #,(racketresult #f ())) (racketinput (urisegment-name+params '("foo" "p1" "p2")) #,(racketresult "foo" ("p1" "p2"))) (racketinput (urisegment-name+params '(#f "p1" "p2")) #,(racketresult #f ("p1" "p2"))))) (provide urisegment-name) (define (urisegment-name segment) (if (pair? segment) (car segment) segment)) (provide urisegment-params) (define (urisegment-params segment) (if (pair? segment) (cdr segment) '())) (provide urisegment-name+params) (define (urisegment-name+params segment) (if (pair? segment) (values (car segment) (cdr segment)) (values segment '()))) (provide urisegment-has-params?) (define (urisegment-has-params? segment) (pair? segment)) (define (%parse-uri-path-params str start end) (%with-rx-match-positions (#rx"^([^;]+)?(;)?" str start end) ((whole param semi) (cons (if param (uri-unescape-i str (car param) (cdr param)) #f) (if semi (%parse-uri-path-params str (cdr whole) end) '()))))) (doc (subsection "Attribute-Value Queries") (para "This library provides support for parsing the URI query component " "[" (hyperlink "http://www.ietf.org/rfc/rfc2396.txt" "RFC 2396") " sec. 3.4], as attribute-value lists in the manner of " (racket 'http) " URI scheme queries. Parsed queries are represented as association lists, in which the " (italic "car") " of each pair is the attribute name as a string, and the " (italic "cdr") " is either the attribute value as a string or " (racket #t) " if no value given. All strings are uri-unescaped. For example:") (racketinput (uri-query "?q=fiendish+scheme&case&x=&y=1%2B2")) (racketresultblock (("q" . "fiendish scheme") ("case" . #t) ("x" . "") ("y" . "1+2")))) ;; TODO: not so good for @code{imap} URI scheme [RFC2192]. (doc (defproc (uri-query (uri uri-string?)) uriquery? (para "Yields the parsed attribute-value query of " (racket uri) ", or " (racket #f) " if no query. For example:") (racketinput (uri-query "?x=42&y=1%2B2") #,(racketresult (("x" . "42") ("y" . "1+2")))))) (provide uri-query) (define (uri-query uri) (uri-uriquery (uri-string->uri uri))) (doc (defproc (uri-query-value (uri uri-string?) (attr string?)) (or/c #f #t string?) (para "Yields the value of attribute " (racket attr) " in " (racket uri) "'s query, or " (racket #f) " if " (racket uri) " has no query component or no " (racket attr) " attribute. If the attribute appears multiple times in the query, the value of the first occurrence is used. For example:") (racketinput (uri-query-value "?x=42&y=1%2B2" "y") #,(racketresult "1+2")))) (provide uri-query-value) (define (uri-query-value uri attr) (let ((query (uri-query uri))) (if query (uriquery-value query attr) #f))) (doc (defproc (uriquery-value (uriquery uriquery?) (attr string?)) (or/c #f #t string?) (para "Yields the value of attribute " (racket attr) " in " (racket uriquery) ", or " (racket #f) " if there is no such attribute. If the attribute appears multiple times in the query, the value of the first occurrence is used."))) (provide uriquery-value) (define (uriquery-value uriquery attr) (let ((pair (assoc attr uriquery))) (and pair (cdr pair)))) (doc (section "Hierarchical URIs #2")) (define (%make-hierarchical-uri urischeme uriserver uripath uriquery fragment) (let-values (((string opaque-k pound-k) (%build-hierarchical-uri-string+opaque+pound urischeme uriserver uripath uriquery fragment))) (make-uri-struct string urischeme opaque-k pound-k uriserver uripath uriquery))) (define (%build-hierarchical-uri-string+opaque+pound urischeme uriserver uripath uriquery fragment) ;; Note: this procedure is currently only used when the uri scheme is known, ;; but will probably evolve into a procedure that works with unknown URI ;; schemes, so some code has been written to support that. ;; ;; TODO: fragment should already be *un*escaped (let ((str (let ((os (open-output-string))) (and urischeme (begin (display urischeme os) (write-char #\: os))) (if uriserver (begin (display "//" os) (write-uriserver uriserver os) (and uripath (write-uripath/leading-slash uripath os))) (and uripath (write-uripath uripath os))) (and uriquery (not (null? uriquery)) (begin (write-char #\? os) (write-uriquery uriquery os))) (and fragment (begin (write-char #\# os) (display (%uri-escape/shared-ok fragment) os))) (%get-output-string-i os)))) (values str (if urischeme (+ 1 (string-length (symbol->string urischeme))) #f) (if fragment (- (string-length str) (string-length fragment) 1) #f)))) ;; TODO: maybe make the hierarchical-uri parsing functions document that they ;; might only be meaningful on hierarchical uri. maybe refuse to work with ;; known urischemes not known to be hierarchical. (doc (defproc (uri-uriserver (uri uri-string?)) uriserver? (para "!!!"))) (provide uri-uriserver) (define (uri-uriserver uri-string) (let ((uri-struct (%uri-string+error-name->uri uri-string 'uri-uriserver))) ;; TODO: !!! WHAT WAS I DOING WITH THIS RXPOS TAG STUFF?! (let ((substring->uriserver (lambda (str start end) (substring/default-portnum->uriserver str start end (urischeme-default-portnum (uri-struct-scheme uri-struct)))))) ((%uri-struct-rxpos-field-proc uri-struct-authority substring->uriserver set-uri-struct-authority!) uri-struct)))) (module+ test (test-section 'uri-uriserver (test (uri-uriserver (string->uri "//www:80/")) '(#f "www" 80)) (test (uri-uriserver (resolve-uri (string->uri "//www:80/") (string->uri "http:"))) "www"))) (doc (defproc (uri-uriserver+path+query (uri uristring?)) !!! (para "!!!"))) (provide uri-uriserver+path+query) (define (uri-uriserver+path+query uri) (values (uri-uriserver uri) (uri-uripath uri) (uri-uriquery uri))) (doc (defproc (uri-uriserver+uripath+uriquery (uri uri-string?)) (values uriserver? uripath? uriquery?) (para "!!!"))) (provide uri-uriserver+uripath+uriquery) (define (uri-uriserver+uripath+uriquery uri) (uri-uriserver+path+query (uri-string->uri uri))) (doc (defproc (uri-userinfo+host+portnum (uri uri-string?)) !!! (para "!!!"))) (provide uri-userinfo+host+portnum) (define (uri-userinfo+host+portnum uri) (let-values (((userinfo host portnum) (uriserver-userinfo+host+portnum (uri-uriserver uri)))) (values userinfo host (or portnum (uri-portnum uri))))) (doc (defproc (uri-portnum (uri uri-string?)) (or/c #f exact-nonnegative-integer?) (para "!!!"))) (provide uri-portnum) (define (uri-portnum uri) (or (uriserver-portnum (uri-uriserver uri)) (let ((urischeme (uri-struct-scheme uri))) (and urischeme (urischeme-default-portnum urischeme))))) (define (%string-or-f->string-i-or-f str) (and str (%string->string-i str))) (doc (defproc* (((make-uriserver (userinfo (or/c #f string?)) (host (or/c #f string?)) (portnum (or/c #f exact-nonnegative-integer?))) uriserver?) ((make-uriserver/default-portnum (userinfo (or/c #f string?)) (host (or/c #f string?)) (portnum (or/c #f exact-nonnegative-integer?)) (default-portnum (or/c #f exact-nonnegative-integer?))) uriserver?)) (para "!!!"))) (provide make-uriserver/default-portnum) (define (make-uriserver/default-portnum userinfo host portnum default-portnum) (let ((portnum (if (equal? portnum default-portnum) #f portnum))) (cond ((or userinfo portnum) (list (%string-or-f->string-i-or-f userinfo) (%string-or-f->string-i-or-f host) portnum)) (host (%string-or-f->string-i-or-f host)) (else #f)))) (provide make-uriserver) (define (make-uriserver userinfo host portnum) (make-uriserver/default-portnum userinfo host portnum #f)) ;; TODO: if uriserver is to have the default port number for a uri scheme, then ;; portnum should be #f. (consider sharing the same uriserver for http and ;; https uri, for example. also consider normal http uri do not show default ;; port number.) (define (%make-or-reuse-uriserver userinfo host portnum base-uriserver) (if (or userinfo host portnum) (if base-uriserver (let-values (((base-u base-h base-p) (uriserver-userinfo+host+portnum base-uriserver))) (if (and (equal? userinfo base-u) (equal? host base-h) (equal? portnum base-p)) base-uriserver (make-uriserver (or userinfo base-u) (or host base-h) (or portnum base-p)))) (make-uriserver userinfo host portnum)) base-uriserver)) (define (%make-or-reuse-uriserver/default-portnum userinfo host portnum base-uriserver default-portnum) (%make-or-reuse-uriserver userinfo host (if (equal? portnum default-portnum) #f portnum) base-uriserver)) (doc (defproc* ( ((string->uriserver (str string?)) uriserver?) ((string/base->uriserver (str string?) (base-uriserver uriserver?)) uriserver?) ((string/default-portnum->uriserver (str string?) (default-portnum (or/c #f exact-nonnegative-integer?))) uriserver?) ((string/base/default-portnum->uriserver (str string?) (base-uriserver uriserver?) (default-portnum (or/c #f exact-nonnegative-integer?))) uriserver?) ((substring->uriserver (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?)) uriserver?) ((substring/base->uriserver (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?) (base-uriserver uriserver?)) uriserver?) ((substring/default-portnum->uriserver (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?) (default-portnum (or/c #f exact-nonnegative-integer?))) uriserver?) ((substring/base/default-portnum->uriserver (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?) (base-uriserver uriserver?) (default-portnum (or/c #f exact-nonnegative-integer?))) uriserver?) ) (para "!!!"))) ;; TODO: !!! We sure do have a lot of these procedures... Replace with bigger ;; procedures with keyword arguments. (provide string->uriserver) (define (string->uriserver str) (substring/base/default-portnum->uriserver str 0 #f #f #f)) (module+ test (test-section 'string->uriserver (test (string->uriserver "") #f) (test (string->uriserver "myhost") "myhost") (test (string->uriserver "myhost:80") '(#f "myhost" 80)) (test (string->uriserver "myuser@myhost") '("myuser" "myhost" #f)) (test (string->uriserver "myuser@myhost:80") '("myuser" "myhost" 80)) (test (string->uriserver "john%20smith@f%6F%4F:8%30") '("john smith" "foo" 80)))) (provide string/base->uriserver) (define (string/base->uriserver str base-uriserver) (substring/base/default-portnum->uriserver str 0 #f base-uriserver #f)) (provide string/base/default-portnum->uriserver) (define (string/base/default-portnum->uriserver str base-uriserver default-portnum) (substring/base/default-portnum->uriserver str 0 #f base-uriserver default-portnum)) (provide string/default-portnum->uriserver) (define (string/default-portnum->uriserver str default-portnum) (substring/base/default-portnum->uriserver str 0 #f #f default-portnum)) (module+ test (test-section 'string/default-portnum->uriserver (test (string/default-portnum->uriserver "www.foo:80" 80) "www.foo") (test (string/default-portnum->uriserver "www.foo:8080" 80) '(#f "www.foo" 8080)) (test (string/default-portnum->uriserver "u@www.foo:80" 80) '("u" "www.foo" #f)) (test (string/default-portnum->uriserver "u@www.foo:8080" 80) '("u" "www.foo" 8080)) (test (string/default-portnum->uriserver ":80" 80) #f) (test (string/default-portnum->uriserver ":8080" 80) '(#f #f 8080)))) (provide substring->uriserver) (define (substring->uriserver str start end) (substring/base/default-portnum->uriserver str start end #f #f)) (provide substring/base->uriserver) (define (substring/base->uriserver str start end base-uriserver) (substring/base/default-portnum->uriserver str start end base-uriserver #f)) (provide substring/default-portnum->uriserver) (define (substring/default-portnum->uriserver str start end default-portnum) (substring/base/default-portnum->uriserver str start end #f default-portnum)) (provide substring/base/default-portnum->uriserver) (define substring/base/default-portnum->uriserver (let ((rx (regexp "^(?:([^@:]+)@)?([^@:]+)?(?::([0-9%a-fA-F]+))?"))) (lambda (str start end base-uriserver default-portnum) (%with-rx-match-positions (rx str start (or end (string-length str))) ((whole userinfo host portnum) (if (or userinfo host portnum) (%make-or-reuse-uriserver/default-portnum (%rxpos-unescape-str-i userinfo str) (%rxpos-unescape-lowstr-i host str) (%rxpos-unescape-num portnum str) base-uriserver default-portnum) #f)))))) (doc (defproc* (((uriserver-userinfo (uriserver uriserver?)) (or/c #f string?)) ((uriserver-host (uriserver uriserver?)) (or/c #f string?)) ((uriserver-portnum (uriserver uriserver?)) (or/c #f exact-nonnegative-integer?)) ((uriserver-userinfo+host+portnum (uriserver uriserver?)) (values (or/c #f string?) (or/c #f string?) (or/c #f exact-nonnegative-integer?)))) (para "!!!"))) (provide uriserver-userinfo) (define (uriserver-userinfo uriserver) (if (pair? uriserver) (list-ref uriserver 0) #f)) (provide uriserver-host) (define (uriserver-host uriserver) (cond ((not uriserver) #f) ((string? uriserver) uriserver) (else (list-ref uriserver 1)))) (provide uriserver-portnum) (define (uriserver-portnum uriserver) (if (pair? uriserver) (list-ref uriserver 2) #f)) (provide uriserver-userinfo+host+portnum) (define (uriserver-userinfo+host+portnum uriserver) (cond ((not uriserver) (values #f #f #f)) ((string? uriserver) (values #f uriserver #f)) (else (apply values uriserver)))) (doc (defproc (write-uriserver (uriserver uriserver?) (port output-port?)) void? (para "!!!"))) (provide write-uriserver) (define (write-uriserver uriserver port) (let-values (((userinfo host portnum) (uriserver-userinfo+host+portnum uriserver))) (and userinfo (begin (display (%uri-escape/shared-ok userinfo) port) (write-char #\@ port))) (and host (display (%uri-escape/shared-ok host) port)) (and portnum (begin (write-char #\: port) (display portnum port))))) (doc (defproc (uriserver-with-default-portnum (uriserver uriserver?) (default-portnum exact-nonnegative-integer?)) uriserver? (para "!!!"))) (provide uriserver-with-default-portnum) (define (uriserver-with-default-portnum uriserver default-portnum) (if (and default-portnum (equal? (uriserver-portnum uriserver) default-portnum)) (let-values (((userinfo host portnum) (uriserver-userinfo+host+portnum uriserver))) (make-uriserver userinfo host #f)) uriserver)) (doc (defproc* (((resolve-uriserver (uriserver uriserver?) (base-uriserver uriserver?)) uriserver?) ((resolve-uriserver/default-portnum (uriserver uriserver?) (base-uriserver uriserver?) (default-portnum exact-nonnegative-integer?)) uriserver?)) (para "!!!"))) ;; Note: base-uriserver must be from uri of the same scheme as uriserver (provide resolve-uriserver) (define (resolve-uriserver uriserver base-uriserver) (or uriserver base-uriserver)) (provide resolve-uriserver/default-portnum) (define (resolve-uriserver/default-portnum uriserver base-uriserver default-portnum) (uriserver-with-default-portnum (or uriserver base-uriserver) default-portnum)) (doc (subsection "Hierarchical Paths")) (doc (defproc* (((uri-path (uri uri-string?)) uripath?) ((uri-path/noparams (uri uri-string?)) uripath?) ((uri-uripath (uri uri-string?)) uripath?) ((uri-uripath/noparams (uri uri-string?)) uripath?)) (para "!!!"))) ;; TODO: "path/noparams" might be be an unusual case. Maybe better to have the ;; "/noparams" be on "uripath-segments/noparams" and ;; "uripath-upcount+segments/noparams". Or maybe we should add ;; "uri-path-upcount", "uri-path-segments", "uri-path-segments/noparams", etc. (provide uri-path) (define (uri-path uri) (uri-uripath (uri-string->uri uri))) (module+ test (test-section 'uri-path (test (uri-path "../../a/b;p1/c/d;p2;p3/;p5") '(2 (#f "p5") ("d" "p2" "p3") "c" ("b" "p1") "a")))) (provide uri-path/noparams) (define (uri-path/noparams uri) (uri-uripath/noparams (uri-string->uri uri))) (module+ test (test-section 'uri-path/noparams (test (uri-path/noparams "../../a/b;p1/c/d;p2;p3/e/;p5") '(2 #f "e" "d" "c" "b" "a")))) (provide uri-uripath) (define uri-uripath (lambda (uri-string) ((%uri-struct-rxpos-field-proc uri-struct-path substring->uripath set-uri-struct-path!) (%uri-string+error-name->uri uri-string 'uri-uripath)))) (provide uri-uripath/noparams) (define (uri-uripath/noparams uri) (let*-values (((uripath) (uri-uripath uri)) ((ups segs) (uripath-upcount+segments/reverse uripath))) (let loop ((rest segs)) (cond ((null? rest) uripath) ((urisegment-has-params? (car rest)) (make-uripath/reverse/shared-ok ups (%map-i urisegment-name segs))) (else (loop (cdr rest))))))) (doc (defproc* (((make-uripath (upcount uriupcount? (segments urisegments?))) uripath?) ((make-uripath/reverse (upcount uriupcount? (segments urisegments?))) uripath?) ((make-uripath/reverse/shared-ok (upcount uriupcount? (segments urisegments?))) uripath?)) (para "!!!"))) (provide make-uripath/reverse/shared-ok) (define (make-uripath/reverse/shared-ok upcount segments) (if upcount ;; (and upcount (not (null? segments))) (cons upcount segments) segments)) (provide make-uripath/reverse) (define (make-uripath/reverse upcount segments) (make-uripath/reverse/shared-ok upcount (%list->list-i segments))) (provide make-uripath) (define (make-uripath upcount segments) (make-uripath/reverse/shared-ok upcount (%reverse-i segments))) (doc (defproc (uripath-with-upcount (uripath uripath?) (upcount uriupcount?)) (para "!!!"))) (provide uripath-with-upcount) (define (uripath-with-upcount uripath upcount) (let-values (((old-uc old-segs) (uripath-upcount+segments/reverse uripath))) (if (equal? upcount old-uc) uripath (make-uripath/reverse/shared-ok upcount old-segs)))) (doc (defproc* (((string->uripath (str string?)) uripath?) ((string/base->uripath (str string?) (base-uripath uripath?)) uripath?) ((substring->uripath (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?)) uripath?) ((substring/base->uripath (str string?) (start exact-nonnegative-integer?) (end exact-nonnegative-integer?) (base-uripath uripath?)) uripath?)) (para "!!!") (para "Note: Contrary to [RFC2396], we don't require base to be absolute."))) (provide string->uripath) (define (string->uripath str) (substring->uripath str 0 #f)) (module+ test (test-section 'string->uripath (test (string->uripath "") '(0)) (test (string->uripath ".") '(0 #f)) (test (string->uripath "..") '(1)) (test (string->uripath "./") '(0 #f)) (test (string->uripath "../") '(1 #f)) (test (string->uripath "/") '(#f)) (test (string->uripath "/.") '(#f)) (test (string->uripath "/..") '(1)) (test (string->uripath "a/b") '(0 "b" "a")) (test (string->uripath "/a/b") '("b" "a")) (test (string->uripath "./a/b") '(0 "b" "a")) (test (string->uripath "/./a/b") '("b" "a")) (test (string->uripath "/../a/b") '(1 "b" "a")) (test (string->uripath "/../../a/b") '(2 "b" "a")) (test (string->uripath "/../../../a/b") '(3 "b" "a")) (test (string->uripath "/a/../b") '("b")) (test (string->uripath "/a/../../b") '(1 "b")) (test (string->uripath "/a/../../../b") '(2 "b")) (test (string->uripath "a/../b") '(0 "b")) (test (string->uripath "a/../../b") '(1 "b")) (test (string->uripath "a/../../../b") '(2 "b")) (test (string->uripath "/") '(#f)) (test (string->uripath "//") '(#f #f)) (test (string->uripath "///") '(#f #f #f)) (test (string->uripath "/a/") '(#f "a")) (test (string->uripath "//a") '("a" #f)) (test (string->uripath "/;p/a") '("a" (#f "p"))) (test (string->uripath "///a") '("a" #f #f)) (test 'empty-path-segment-at-start-of-absolute-path (string->uripath "//a/b") '("b" "a" #f)) (test 'empty-path-segment-at-start-of-relative-path (string->uripath ".//a/b") '(0 "b" "a" #f)) (test 'empty-path-segment-with-param-at-start-of-absolute-path (string->uripath "/;p/a/b") '("b" "a" (#f "p"))) (test 'empty-path-segment-with-param-at-start-of-relative-path (string->uripath ";p/a/b") '(0 "b" "a" (#f "p"))) (test 'empty-path-segment-in-middle-of-path (string->uripath "/a//b/") '(#f "b" #f "a")) (test 'empty-path-segment-with-parameter-in-middle-of-path (string->uripath "/a/;p/b/") '(#f "b" (#f "p") "a")) (test 'empty-path-segment-with-parameter-at-end-of-path (string->uripath "/a/b/;p") '((#f "p") "b" "a")) (test 'empty-path-parameter (string->uripath "/a/;/b") '("b" (#f #f) "a")) (test 'multiple-empty-path-parameters (string->uripath "/a/;;;/b") '("b" (#f #f #f #f) "a")) (test 'path-segment-beginning-with-dot (string->uripath "/a/.b/c") '("c" ".b" "a")) (test 'path-segment-beginning-with-double-dot (string->uripath "/a/..b/c") '("c" "..b" "a")))) (provide string/base->uripath) (define (string/base->uripath str base-uripath) (substring/base->uripath str 0 #f base-uripath)) ;; TODO: URI parsing code should ignore whitespace when parsing a ;; "<([Uu][Rr][Ll]:)?[^>]*>" one. Distinguish "extract-uri" or "parse-uri" ;; from "string->uri". (provide substring->uripath) (define substring->uripath ;; TODO: We can drop the second match-position from "path-rx". (let ((path-rx #rx"^(/)?(.+)?$") (segment-rx #rx"^([^/;]+)?(;[^/]*)?(/)?")) (lambda (str start end) (let ((end (or end (string-length str)))) (%with-rx-match-positions (path-rx str start end) ((whole leading-slash path-pos) (if path-pos (let loop ((start (car path-pos)) (uc (if leading-slash #f 0)) (segs '())) (%with-rx-match-positions (segment-rx str start end) ((whole name-pos params slash) (let ((name (if name-pos (uri-unescape-i str (car name-pos) (cdr name-pos)) #f))) (cond ((and name (string=? name "..")) (let-values (((new-uc new-segs) (if (null? segs) (values (+ 1 (or uc 0)) segs) (values uc (cdr segs))))) (if slash (loop (cdr whole) new-uc new-segs) (make-uripath/reverse/shared-ok new-uc new-segs)))) ((and name (string=? name ".")) (if slash (loop (cdr whole) uc segs) (make-uripath/reverse/shared-ok uc (cons #f segs)))) (else (let ((new-segs (cons (if params (cons name (%parse-uri-path-params str (+ 1 (car params)) (cdr params))) name) segs))) (if slash (loop (cdr whole) uc new-segs) (make-uripath/reverse/shared-ok uc new-segs))))))))) (if leading-slash (make-uripath/reverse/shared-ok #f (cons #f '())) (make-uripath/reverse/shared-ok 0 '()))))))))) (provide substring/base->uripath) (define (substring/base->uripath str start end base-uripath) (let ((uripath (substring->uripath str start end))) (if base-uripath (resolve-uripath uripath base-uripath) uripath))) ;; TODO: maybe add /noparams variants of uripath accessors, and *maybe* remove ;; the uri-path/noparams. Update: Make optional keyword arguments. (doc (defproc* ( ((uripath-upcount (uripath uripath?)) uriupcount?) ((uripath-segments (uripath uripath?)) urisegments?) ((uripath-segments/reverse (uripath uripath?)) urisegments?) ((uripath-upcount+segments (uripath uripath?)) (values uriupcount? urisegments?)) ((uripath-upcount+segments/reverse (uripath uripath?)) (values uriupcount? urisegments?)) ) (para "!!!"))) (provide uripath-upcount+segments/reverse) (define (uripath-upcount+segments/reverse path) ;;!!! shouldn't the null path be (values 0 '()) ? (cond ((null? path) (values #f '())) ((integer? (car path)) (values (car path) (cdr path))) (else (values #f path)))) (provide uripath-upcount+segments) (define (uripath-upcount+segments uripath) (let-values (((uc segs) (uripath-upcount+segments/reverse uripath))) (values uc (%reverse-i segs)))) (provide uripath-upcount) (define (uripath-upcount uripath) (and (not (null? uripath)) (integer? (car uripath)) (car uripath))) (provide uripath-segments/reverse) (define (uripath-segments/reverse path) (cond ((null? path) '()) ((integer? (car path)) (cdr path)) (else path))) (provide uripath-segments) (define (uripath-segments uripath) (%reverse-i (uripath-segments/reverse uripath))) (doc (defproc (uripath-has-params? (uripath uripath?)) boolean? (para "!!!"))) (provide uripath-has-params?) (define (uripath-has-params? uripath) (let ((segs (uripath-segments/reverse uripath))) (let loop ((segs segs)) (cond ((null? segs) #f) ((urisegment-has-params? (car segs)) #t) (else (loop (cdr segs))))))) (doc (defproc* (((write-uripath (uripath uripath?) (port output-port?)) void?) ((write-uripath/leading-slash (uripath uripath?) (port output-port?)) void?)) (para "!!!"))) (define (%write-uripath/leading-slash-arg uripath port leading-slash?) ;; TODO: !!! Why do we need "leading-slash?"? is that different than #f for the upcount? (let-values (((uc segs) (uripath-upcount+segments uripath))) ;; First, handle any upcount, which is leading slashes, single dots, and double dots. (if uc ;; We have upcount, so !!! (begin (and leading-slash? ;; (if (zero? uc) ;; TODO: This isn't right. Look at what the parsing ;; procedure produces, and special-case if need be. ;; Update: This TODO is from before we added the handling of #f in the segs to get "." properly, ;; so revisit this code in light of that. ;; (write-string "/." port) (write-char #\/ port)) (let loop ((i uc)) (and (> i 0) (begin (write-string "../" port) (loop (- i 1)))))) ;; We don't have upcount, meaning an absolute path, so simply write leading slash. (write-char #\/ port)) ;; TODO: Make this use segments/reverse, use non-tail recursion on ;; segments, and eliminate use of %for-each/between. (and segs (not (null? segs)) (let loop ((first-seg? #true) (head-seg (car segs)) (tail-segs (cdr segs))) ;; TODO: make/use a urisegment-name+params (let ((name (urisegment-name head-seg))) (if name ;; This segment has a name, so write it. (write-string (%uri-escape/shared-ok name) port) ;; This segment doesn't have a name, so, if upcount is 0if the only segment (!!!) ;; ;; TODO: !!! Only write this dot if uripath we're the first segment and either upcount is 0 or there are segments after us. (and first-seg? (or (equal? 0 uc) (not (null? tail-segs))) (write-char #\. port))) (for-each (lambda (param) (write-char #\; port) (write-string (%uri-escape/shared-ok param) port)) (urisegment-params head-seg))) (or (null? tail-segs) (begin (write-char #\/ port) (loop #false (car tail-segs) (cdr tail-segs)))))))) (define (%for-each/between each-proc between-proc lst) ;; TODO: Lose this procedure. (and (not (null? lst)) (let loop ((head (car lst)) (rest (cdr lst))) (each-proc head) (if (null? rest) (void) (begin (between-proc) (loop (car rest) (cdr rest))))))) (provide write-uripath) (define (write-uripath uripath port) (%write-uripath/leading-slash-arg uripath port #f)) (provide write-uripath/leading-slash) (define (write-uripath/leading-slash uripath port) (%write-uripath/leading-slash-arg uripath port #t)) (doc (defproc* (((uripath->string (uripath uripath?)) string?) ((uripath->string/leading-slash (uripath uripath?)) string?)) (para "!!!") (racketinput (uri-path-segments "//a/b") #,(racketresult ("b"))) (racketinput (uri-path-segments "/.//a/b") #,(racketresult (#f "a" "b"))) (para "!!!") (racketinput (uripath->string (string->uripath "//b")) #,(racketresult "//b")) (racketinput (uripath->string/leading-slash (string->uripath "//b")) #,(racketresult "/.//b")) (racketinput (uripath->string/leading-slash (string->uripath "/a/b")) #,(racketresult "/a/b")) (racketinput (uripath->string/leading-slash (string->uripath "/;p1/b")) #,(racketresult "/;p1/b")))) (provide uripath->string) (define (uripath->string uripath) (let ((os (open-output-string))) (write-uripath uripath os) (get-output-string os))) (module+ test (test-section 'uripath->string (test-section 'from-string (test (uripath->string '(#f)) "/") (test (uripath->string (string->uripath "//a")) "/./a") (test 'empty (uripath->string (string->uripath "")) "") (test 'dir-then-dot (uripath->string (string->uripath "d/.")) "d/") (test 'dot (uripath->string (string->uripath ".")) ".") (test 'dotdot (uripath->string (string->uripath "..")) "../")) (test-section 'from-uripath (test (uripath->string '(0 #f)) ".")) ;; TODO: !!! LOTS MORE TESTS FOR THIS! IT WAS BREAKING SOME OF THE ANTIRESOLVE TESTS. FOR EXAMPLE, ;; (uripath->string '(0 #f)) WAS GIVING "" INSTEAD OF "." )) (provide uripath->string/leading-slash) (define (uripath->string/leading-slash uripath) (let ((os (open-output-string))) (write-uripath/leading-slash uripath os) (get-output-string os))) (module+ test (test-section 'uripath->string/leading-slash (test (uripath->string/leading-slash '(#f)) "/") (test (uripath->string/leading-slash (string->uripath "//a")) "/./a"))) (doc (defproc (resolve-uripath (uripath uripath?) (base-uripath uripath?)) uripath? (para "!!!"))) (provide resolve-uripath) (define (resolve-uripath uripath base-uripath) (let-values (((old-upcount old-segs) (uripath-upcount+segments/reverse uripath))) (cond ((and (not old-upcount) (null? old-segs)) (let-values (((base-upcount base-segs) (uripath-upcount+segments/reverse base-uripath))) ;; TODO: unify this with below. ;; ;; TODO: check for base-upcount #f below. (make-uripath/reverse/shared-ok (if old-upcount (max old-upcount (or base-upcount 0)) base-upcount) base-segs))) (old-upcount (let-values (((base-upcount base-segs) (uripath-upcount+segments/reverse base-uripath))) (cond ((not (null? base-segs)) ;; "base-segs" is not null, so... ;; ;; TODO: We can simplify these "usable-base-segs" things once we've ;; tested all the oddball cases. (let-values (((usable-base-segs usable-base-segs-len) (let ((base-segs-len (length base-segs))) ;; (if (urisegment-name (car base-segs)) (if (null? old-segs) (values base-segs base-segs-len) (values (cdr base-segs) (- base-segs-len 1)))))) (let ((base-segs-to-skip (min old-upcount usable-base-segs-len))) (if (< base-segs-to-skip 0) (make-uripath/reverse/shared-ok (if base-upcount (max (+ old-upcount base-segs-to-skip) base-upcount) (+ old-upcount base-segs-to-skip)) (%append-i old-segs usable-base-segs)) (make-uripath/reverse/shared-ok base-upcount (%append-i old-segs (list-tail usable-base-segs base-segs-to-skip))))))) (base-upcount ;; "base-segs" is null, but "base-upcount" is not #f, so... ;; ;; TODO: do we take #f in base-segs into account? (if (> base-upcount old-upcount) (uripath-with-upcount uripath base-upcount) uripath)) (else uripath)))) (else uripath)))) (module+ test (test-section 'resolve-uripath (test (resolve-uripath '(2 "c" "b" "a") '(#f "z" "y" "x")) '("c" "b" "a" "x")) (test (uripath->string (resolve-uripath (string->uripath "../../a/b/c") (string->uripath "/x/y/z/"))) "/x/a/b/c"))) (doc (defproc (absolute-uripath (uripath uripath?)) uripath? (para "!!!"))) (provide absolute-uripath) (define (absolute-uripath uripath) (let-values (((upc segs) (uripath-upcount+segments/reverse uripath))) (cond ((null? segs) (make-uripath/reverse/shared-ok #f '(#f))) (upc (make-uripath/reverse/shared-ok #f segs)) (else uripath)))) (doc (subsection "Attribute-Value Queries")) (doc (defproc (uri-uriquery (uri uri-string?)) uriquery? (para "!!!"))) (provide uri-uriquery) (define (uri-uriquery uri) ((%uri-struct-rxpos-field-proc uri-struct-query substring->uriquery set-uri-struct-query!) (%uri-string+error-name->uri uri 'uri-uriquery))) (doc (defproc* (((string->uriquery (str string?)) uriquery?) ((substring->uriquery (str string?) (start exact-nonnegative-integer? 0) (end exact-nonnegative-integer? (string-length str))) uriquery?)) (para "!!!"))) (provide substring->uriquery) (define (substring->uriquery str (start 0) (end #f)) (let loop ((start start) (end (or end (string-length str)))) (%with-rx-match-positions (#rx"^([^&=]+)?(?:=([^&]*))?(&)?" str start end) ((whole name-pos val-pos amp-pos) (if name-pos (cons (cons (uri-unescape-i str (car name-pos) (cdr name-pos)) (if val-pos (uri-unplusescape-i str (car val-pos) (cdr val-pos)) #t)) (if amp-pos (loop (cdr amp-pos) end) '())) (if amp-pos (loop (cdr amp-pos) end) '())))))) (provide string->uriquery) (define (string->uriquery str) (substring->uriquery str 0 #f)) (module+ test (test-section 'string->uriquery (test (string->uriquery "q=fiendish+scheme&case&foo=&x=1%2B2") '(("q" . "fiendish scheme") ("case" . #t) ("foo" . "") ("x" . "1+2"))) (test (string->uriquery "") '()) (test (string->uriquery "&") '()) (test (string->uriquery "&&") '()) (test (string->uriquery "x&&") '(("x" . #t))) (test (string->uriquery "&&x") '(("x" . #t))) (test (uriquery-value (string->uriquery "x=a%20b") "x") "a b") (test (uriquery-value (string->uriquery "x=a%20b") "y") #f) (test (uriquery-value (string->uriquery "x=") "x") "") (test (uriquery-value (string->uriquery "x=&") "x") "") (test (uriquery-value (string->uriquery "x") "x") #t) (test (uriquery-value (string->uriquery "x&") "x") #t))) (doc (defproc (write-uriquery (uriquery uriquery?) (port output-port? (current-output-port))) void? (para "!!!"))) (provide write-uriquery) (define (write-uriquery uriquery (port (current-output-port))) (or (null? uriquery) (let loop ((head (car uriquery)) (rest (cdr uriquery))) (let ((attr (car head)) (val (cdr head))) (and attr (display (%uri-escape/shared-ok attr) port)) (and (string? val) (begin (write-char #\= port) (display (%uri-escape/shared-ok val) port))) (if (null? rest) (void) (begin (write-char #\& port) (loop (car rest) (cdr rest)))))))) ;; TODO: add uriquery-string (doc (section "Resolution") (para "This subsection concerns resolving relative URI to absolute URI.")) (doc (defproc (absolute-uri? (uri uri-string?)) boolean? (para "Yields a Boolean value for whether or not URI " (racket uri) " is " (italic "known") " by the library's criteria to be absolute."))) (provide absolute-uri?) (define (absolute-uri? uri) (let* ((uri (uri-string->uri uri)) (urischeme (uri-struct-scheme uri))) (if urischeme (if (urischeme-hierarchical? urischeme) (let-values (((server path query) (uri-uriserver+path+query uri))) (and server path (not (uripath-upcount path)))) #f) #f))) (doc (defproc (resolve-uri (uri uri-string?) (base-uri uri-string)) uri? (para "Yields a URI that is URI " (racket uri) " possibly resolved with respect to URI " (racket base-uri) ", but not necessarily absolute. As an extension to [RFC2396] rules for resolution, " (racket base-uri) " may be a relative URI.") (racketinput (resolve-uri "x.html" "http://w/a/b/c.html") #,(racketresult #,(elem "#uri\"http://w/a/b/x.html\""))) (racketinput (resolve-uri "//www:80/" "http:") #,(racketresult #,(elem "#uri\"http://www/\""))))) (provide resolve-uri) (define (resolve-uri uri base-uri) (cond ((not base-uri) uri) ((uri-struct? uri) (resolve-uri/base-uri uri (uri-string->uri base-uri))) ((equal? "" uri) base-uri) (else ;; TODO: If "uri" is a string URI for which we don't yet have a uri, we ;; could use "string/base->uri", which in the future might be more ;; efficient. (resolve-uri/base-uri (uri-string->uri uri) (uri-string->uri base-uri))))) (module+ test (test-section 'resolve-uri (test (uri->string (resolve-uri (string->uri ".././.././././foo.html") (string->uri "http://www/aaa/bbb/ccc/index.html"))) "http://www/aaa/foo.html") (test (uri->string (resolve-uri (string->uri ".") (string->uri "http://www"))) ;; TODO: There might not be any good answer to this one. Maybe ;; just leave off the "/.", so long as that doesn't make ;; anything in the rest of the URI ambiguous? "http://www/.") (test (uri->string (resolve-uri (string->uri "x") (string->uri "http://www/a/b/c/"))) "http://www/a/b/c/x") (test (uri->string (resolve-uri (string->uri "../x") (string->uri "http://www/a/b/c/"))) "http://www/a/b/x") (test (uri->string (resolve-uri (string->uri "../../x") (string->uri "http://www/a/b/c/"))) "http://www/a/x") (test (uri->string (resolve-uri (string->uri "../../") (string->uri "http://www/a/b/c/"))) "http://www/a/") (test (uri->string (resolve-uri (string->uri "../..") (string->uri "http://www/a/b/c/"))) ;; TODO: This seems probably right, but we should check with an ;; authoritative source. "http://www/a/b") (test (uri->string (resolve-uri (string->uri "mailto:foo@bar") (string->uri "http://www/a/index.html"))) "mailto:foo@bar") (test (uri->string (resolve-uri (string->uri "www/a/index.html") (string->uri "mailto:foo@bar"))) "mailto:www/a/index.html") (test (uri->string (resolve-uri (string->uri "//www:80/") (string->uri "http:"))) "http://www/" #:fail "!!! it's leaving the port number in") (test (uri->string (resolve-uri (string->uri "/foo?x=1&y=a%20b&z") (string->uri "http:"))) "http:/foo?x=1&y=a%20b&z") ;; New representation and resolver tests to fix "" and "." path semantics: (test (uri->string (resolve-uri (string->uri "") (string->uri "http://www/foo/bar/index.html"))) "http://www/foo/bar/index.html") (test (uri->string (resolve-uri (string->uri ".") (string->uri "http://www/foo/bar/index.html"))) "http://www/foo/bar/") (test (string->uripath "") '(0)) (test (string->uripath ".") '(0 #f)) (test (string->uripath "/") '(#f)) (test (uri->string (resolve-uri (string->uri "//www/") (string->uri "http:"))) "http://www/") (test (uri-path (string->uri "//www:80/")) '(#f)) (test-section 'rfc9386-sec-5-4-1 (define abcdpq (string->uri "http://a/b/c/d;p?q")) (test-section 'part-1 (test (uri->string (resolve-uri (string->uri "g:h") abcdpq)) "g:h") (test (uri->string (resolve-uri (string->uri "g") abcdpq)) "http://a/b/c/g") (test (uri->string (resolve-uri (string->uri "./g") abcdpq)) "http://a/b/c/g") (test (uri->string (resolve-uri (string->uri "g/") abcdpq)) "http://a/b/c/g/") (test (uri->string (resolve-uri (string->uri "/g") abcdpq)) "http://a/g") (test (uri->string (resolve-uri (string->uri "//g") abcdpq)) "http://g") (test (uri->string (resolve-uri (string->uri "?y") abcdpq)) "http://a/b/c/d;p?y") (test (uri->string (resolve-uri (string->uri "g?y") abcdpq)) "http://a/b/c/g?y") (test (uri->string (resolve-uri (string->uri "#s") abcdpq)) "http://a/b/c/d;p?q#s" #:fail "fragment alone should be easy fix") (test (uri->string (resolve-uri (string->uri "g#s") abcdpq)) "http://a/b/c/g#s") (test (uri->string (resolve-uri (string->uri "g?y#s") abcdpq)) "http://a/b/c/g?y#s") (test (uri->string (resolve-uri (string->uri ";x") abcdpq)) "http://a/b/c/;x") (test (uri->string (resolve-uri (string->uri "g;x") abcdpq)) "http://a/b/c/g;x") (test (uri->string (resolve-uri (string->uri "g;x?y#s") abcdpq)) "http://a/b/c/g;x?y#s") (test (uri->string (resolve-uri (string->uri "") abcdpq)) "http://a/b/c/d;p?q") (test (uri->string (resolve-uri (string->uri ".") abcdpq)) "http://a/b/c/") (test (uri->string (resolve-uri (string->uri "./") abcdpq)) "http://a/b/c/") (test (uri->string (resolve-uri (string->uri "..") abcdpq)) "http://a/b/" #:fail "it has http://a/b/c") (test (uri->string (resolve-uri (string->uri "../") abcdpq)) "http://a/b/") (test (uri->string (resolve-uri (string->uri "../g") abcdpq)) "http://a/b/g") (test (uri->string (resolve-uri (string->uri "../..") abcdpq)) "http://a/" #:fail "it does http://a/b") (test (uri->string (resolve-uri (string->uri "../../") abcdpq)) "http://a/") (test (uri->string (resolve-uri (string->uri "../../g") abcdpq)) "http://a/g")) (test-section 'part-2 (test (uri->string (resolve-uri (string->uri "../../../g") abcdpq)) "http://a/g") (test (uri->string (resolve-uri (string->uri "../../../../g") abcdpq)) "http://a/g") (test (uri->string (resolve-uri (string->uri "/./g") abcdpq)) "http://a/g") (test (uri->string (resolve-uri (string->uri "/../g") abcdpq)) "http://a/g" #:fail "it does http://a/b/g") (test (uri->string (resolve-uri (string->uri "g.") abcdpq)) "http://a/b/c/g.") (test (uri->string (resolve-uri (string->uri ".g") abcdpq)) "http://a/b/c/.g") (test (uri->string (resolve-uri (string->uri "g..") abcdpq)) "http://a/b/c/g..") (test (uri->string (resolve-uri (string->uri "..g") abcdpq)) "http://a/b/c/..g") (test (uri->string (resolve-uri (string->uri "./../g") abcdpq)) "http://a/b/g") (test (uri->string (resolve-uri (string->uri "./g/.") abcdpq)) "http://a/b/c/g/") (test (uri->string (resolve-uri (string->uri "g/./h") abcdpq)) "http://a/b/c/g/h") (test (uri->string (resolve-uri (string->uri "g/../h") abcdpq)) "http://a/b/c/h") (test (uri->string (resolve-uri (string->uri "g;x=1/./y") abcdpq)) "http://a/b/c/g;x=1/y" #:fail "it %-encodes = in parameter") (test (uri->string (resolve-uri (string->uri "g;x=1/../y") abcdpq)) "http://a/b/c/y") (test (uri->string (resolve-uri (string->uri "g?/./x") abcdpq)) "http://a/b/c/g?y/./x" #:fail "it %-encodes / in fragment") (test (uri->string (resolve-uri (string->uri "g?y/../x") abcdpq)) "http://a/b/c/g?y/../x" #:fail "it %-encodes / in fragment") (test (uri->string (resolve-uri (string->uri "g#s/./x") abcdpq)) "http://a/b/c/g#s/./x" #:fail "it %-encodes / in fragment") (test (uri->string (resolve-uri (string->uri "g#s/../x") abcdpq)) "http://a/b/c/g#s/../x" #:fail "it %-encodes / in fragment"))))) (define (%resolved-hierarchical-uri/base-uri/scheme/authority old-uri base-uri new-urischeme new-authority) (log-uri-debug "(%resolved-hierarchical-uri/base-uri/scheme/authority :old-uri ~S :base-uri ~S :new-urischeme ~S :new-authority ~S)" old-uri base-uri new-urischeme new-authority) (%make-hierarchical-uri new-urischeme new-authority ;; (resolve-uriserver/default-portnum ;; (uri-uriserver old-uri) ;; (uri-uriserver base-uri) ;; (urischeme-default-portnum new-urischeme)) (resolve-uripath (uri-uripath old-uri) (uri-uripath base-uri)) ;; TODO: is this right, that query not inherited? (uri-uriquery old-uri) (uri-fragment old-uri))) (define (%resolved-hierarchical-uri/base-uri/scheme old-uri base-uri new-urischeme) (log-uri-debug "(%resolved-hierarchical-uri/base-uri/scheme ~S ~S ~S)" old-uri base-uri new-urischeme) (let ((old-uriserver (uri-uriserver old-uri)) (base-uriserver (uri-uriserver base-uri))) (log-uri-debug "%resolved-hierarchical-uri/base-uri/scheme :old-uriserver ~S :base-uriserver ~S" old-uriserver base-uriserver) (if old-uriserver ;; old-uri has an authority (if base-uriserver ;; both old-uri and base-uri have authorities (if (equal? old-uriserver base-uriserver) ;; both have authorities, and they are equal (%resolved-hierarchical-uri/base-uri/scheme/authority old-uri base-uri new-urischeme old-uriserver) ;; both have authorities, but they are not equal old-uri) ;; old-uri has an authority, but base does not old-uri) ;; old-uri does not have an authority (if base-uriserver ;; old-uri does not have an authority, but base-uri does (%resolved-hierarchical-uri/base-uri/scheme/authority old-uri base-uri new-urischeme base-uriserver) ;; neither old-uri nor base-uri has an authority (%resolved-hierarchical-uri/base-uri/scheme/authority old-uri base-uri new-urischeme old-uriserver))))) (provide resolve-uri/base-uri) (define (resolve-uri/base-uri uri base-uri) (log-uri-debug "(resolve-uri/base-uri ~S ~S)" uri base-uri) (cond ((equal? (uri->string uri) "") base-uri) ((absolute-uri? uri) uri) (else (let ((old-urischeme (uri-struct-scheme uri)) (base-urischeme (uri-struct-scheme base-uri))) (if old-urischeme ;; old-uri *has* a scheme (if base-urischeme ;; both old-uri and base-uri *have* schemes (if (eq? old-urischeme base-urischeme) ;; old-uri's base-uri's schemes are *equal* (if (urischeme-hierarchical? base-urischeme) ;; scheme *is* hierarchical (%resolved-hierarchical-uri/base-uri/scheme uri base-uri old-urischeme) ;; scheme is *not* hierarchical uri) ;; old-uri's base-uri's schemes are *not* equal uri) ;; old-uri has a scheme, but base-uri does not uri) ;; old-uri does *not* have a scheme (if base-urischeme ;; old-uri does not have a scheme, but base-uri does, so use base's. (if (urischeme-hierarchical? base-urischeme) ;; scheme *is* hierarchical (%resolved-hierarchical-uri/base-uri/scheme (uri-with-scheme uri base-urischeme) base-uri base-urischeme) ;; scheme is *not* hierarchical (uri-with-scheme uri base-urischeme)) ;; neither old-uri nor base-uri have a scheme uri)))))) (doc (defproc (absolute-uri (uri uri-string)) uri? (para "Yields a URI that may be a variation on " (racket uri) " that has been forced to absolute (by, e.g., dropping relative path components, or supplying a missing path). The result might not be an absolute URI, however, due to limitations of the library or insufficient information in the URI. For example:") (racketinput (absolute-uri "http://w/../a") #,(racketresult "http://w/a")) (racketinput (absolute-uri "http://w") #,(racketresult "http://w/")))) (provide absolute-uri) (define (absolute-uri uri) (if (uri-hierarchical? uri) (let* ((old-uripath (uri-uripath uri)) (new-uripath (absolute-uripath old-uripath))) (if (eq? new-uripath old-uripath) uri ;; TODO: Constructing a new uri like this is potentially a ;; problem, if uri is later made extensible and other fields can ;; be added to it. (%make-hierarchical-uri (uri-struct-scheme uri) (uri-uriserver uri) new-uripath (uri-uriquery uri) (uri-fragment uri)))) uri)) (module+ test (test-section 'absolute-uri (test (uri->string (absolute-uri (string->uri "http:foo"))) "http:/foo") (test (uri->string (absolute-uri (string->uri "http:?xxx"))) "http:/?xxx") (test (uri->string (absolute-uri (string->uri "http:../foo"))) "http:/foo") (test (uri->string (absolute-uri (string->uri "http:"))) "http:/") (test (uri->string (absolute-uri (string->uri "mailto:foo"))) "mailto:foo"))) ;; TODO: normalized-uri ;; ;; @defproc normalized-uri uri @result{} string ;; ;; Yields a possibly ``normalized'' variation on URI @var{uri}, such as by ;; consistent use of escaping, and by resolving relative elements in any path. ;; The exact behavior of this procedure will change in future versions of the ;; library. (doc (section "Antiresolution") (para "!!!")) (doc (defproc (antiresolve-uripath (uripath uripath?) (base-uripath uripath?)) uripath? (para "Antiresolves " (racket uripath) " with respect to " (racket base-uripath) ". For example:") (para "!!!"))) (provide antiresolve-uripath) (define (antiresolve-uripath a-uripath b-uripath) (let-values (((a-u a-s) (uripath-upcount+segments/reverse a-uripath)) ((b-u b-s) (uripath-upcount+segments/reverse b-uripath))) (if (or a-u b-u) (error 'antiresolve-uripath "cannot handle relative uripaths in ~S and/or ~S" a-uripath b-uripath) (let-values (((u s) (%antiresolve-uripath/u+s a-s b-s))) (make-uripath/reverse/shared-ok u s))))) ;; TODO: Offer alternative modes for anti-resolve. The following would be ;; "always-relative" mode. Other likely modes include "minimize-string-length" ;; and "(max-upcount N)". Maybe "minimize-segment-count". (define (%antiresolve-uripath/u+s a-segs b-segs) ;; TODO: We could do one loop while no differences found, and a separate loop ;; for once differences found. That might be a mess, though. (let loop ((depth 0) (a-segs-tail a-segs) (b-segs-tail b-segs) (differ-upcount #f) ;;(differ-b-tail #f) ) (if (null? a-segs-tail) ;; No more a-segs, so check if we're also out of b-segs... (if (null? b-segs-tail) ;; We simultaneously reached the ends of a-segs and b-segs, which ;; means they're the same length, so see when we had our last ;; difference... (cond ((not differ-upcount) ;; No differences were found, so the two are equivalent. (values 0 '())) ((equal? (+ 1 differ-upcount) depth) ;; Same lengths, everything was different, so... (values differ-upcount a-segs)) (else ;; Same lengths, differences were found, but not everything ;; was different, so... (values differ-upcount (%take-i a-segs (+ 1 differ-upcount))))) ;; a-segs is null, but b-segs is not, so... (let ((extra-b-len (length b-segs-tail))) (let-values (((ups segs) (%antiresolve-uripath/u+s a-segs (list-tail b-segs extra-b-len)))) ;; TODO: See if (and (zero? ups) (null? segs)) affects the ups. (values (+ extra-b-len (or ups 0)) segs)))) ;; a-segs is *not* null, so see if b-segs is... (if (null? b-segs-tail) ;; a-segs is *not* null, but b-segs *is* null, so... (let loop-a-segs-longer ((len 0) (extra-a-segs a-segs-tail) (a-segs-tail a-segs)) (if (null? extra-a-segs) (let*-values (((ups segs) (%antiresolve-uripath/u+s a-segs-tail b-segs)) ((null-uripath?) (and (zero? ups) (null? segs)))) ;; Note: We could cons the front a-segs as we go through ;; this loop, but that makes us not be tail-recursive, and ;; there's the problem of consing to the result uripath ;; segments without the upcount. (values ups (%take-i/append a-segs (if null-uripath? (+ 1 len) len) segs))) (loop-a-segs-longer (+ 1 len) (cdr extra-a-segs) (cdr a-segs-tail)))) ;; Neither a-segs nor b-segs is null, so... (if (eq? a-segs-tail b-segs-tail) ;; The remainder of a-segs-tail and b-segs-tail are ;; identical, so... (values differ-upcount (%take-i a-segs (+ 1 differ-upcount))) ;; The remainder of a-segs-tail and b-segs-tail are *not* ;; identical, so... (if (equal? (car a-segs-tail) (car b-segs-tail)) ;; The heads *are* equal, so recurse *without* updating the ;; differs. (loop (+ 1 depth) (cdr a-segs-tail) (cdr b-segs-tail) differ-upcount) ;; The heads are *not* equal, so recurse *while* updating ;; the differs. (loop (+ 1 depth) (cdr a-segs-tail) (cdr b-segs-tail) depth))))))) (module+ test (test-section 'antiresolve-uripath (test 'equivalent (antiresolve-uripath '("c" "b" "a") '("c" "b" "a")) '(0)) (test 'same-length-only-top-segment-different (antiresolve-uripath '("x" "c" "b" "a") '("p" "c" "b" "a")) '(0 "x")) (test 'same-length-everything-different (antiresolve-uripath '("z" "y" "x") '("c" "b" "a")) '(2 "z" "y" "x")) (test 'same-length-shared-base (antiresolve-uripath '("z" "y" "x" "c" "b" "a") '("r" "q" "p" "c" "b" "a")) '(2 "z" "y" "x")) (test 'same-length-identical-base (let ((tail '("b" "a"))) (antiresolve-uripath (cons "z" (cons "y" (cons "x" (cons "c" tail)))) (cons "r" (cons "q" (cons "p" (cons "c" tail)))))) '(2 "z" "y" "x")) (test 'equivalent-2 (antiresolve-uripath '("c" "b" "a") '("c" "b" "a")) '(0)) (test 'longer-entirely-shared-base-1 (antiresolve-uripath '("d" "c" "b" "a") '("c" "b" "a")) '(0 "d" "c")) (test 'longer-entirely-shared-base-2 (antiresolve-uripath '("e" "d" "c" "b" "a") '("c" "b" "a")) '(0 "e" "d" "c")) (test 'longer-entirely-shared-base-3 (antiresolve-uripath '("f" "e" "d" "c" "b" "a") '("c" "b" "a")) '(0 "f" "e" "d" "c")) (test 'longer-entirely-shared-base-4 (antiresolve-uripath '("g" "f" "e" "d" "c" "b" "a") '("c" "b" "a")) '(0 "g" "f" "e" "d" "c")) (test 'longer-partially-shared-base-1 (antiresolve-uripath '("e" "d" "c" "b" "a") '("y" "x" "c" "b" "a")) '(1 "e" "d")) (test 'longer-partially-shared-base-2 (antiresolve-uripath '("f" "e" "d" "c" "b" "a") '("y" "x" "c" "b" "a")) '(1 "f" "e" "d")) (test 'longer-partially-shared-base-3 (antiresolve-uripath '("g" "f" "e" "d" "c" "b" "a") '("y" "x" "c" "b" "a")) '(1 "g" "f" "e" "d")) (test 'longer-partially-shared-base-4 (antiresolve-uripath '("g" "f" "e" "d" "c" "b" "a") '("z" "y" "x" "c" "b" "a")) '(2 "g" "f" "e" "d")) (test 'b-longer-partially-shared-base (antiresolve-uripath '("d" "c" "b" "a") '("z" "y" "x" "c" "b" "a")) '(2 "d")) (test 'b-longer-all-different (antiresolve-uripath '("z" "y" "x") '("f" "e" "d" "c" "b" "a")) '(5 "z" "y" "x")) (test-section 'from-uri-tests ;; TODO: !!! fix the expected values for these (test 'link-to-sibling-html-file (antiresolve-uripath (uri-uripath "http://foo/next-page.html") (uri-uripath "http://foo/this-page.html")) '(0 "next-page.html")) (test 'link-to-directory-of-html-file (antiresolve-uripath (uri-uripath "http://foo/") (uri-uripath "http://foo/somepage.html")) '(0 #f)) ; "." (test 'link-to-parent-of-directory-of-html-file (antiresolve-uripath (uri-uripath "http://foo/") (uri-uripath "http://foo/bar/somepage.html")) '(1 #f)) ; ".." (test 'link-to-root-path (antiresolve-uripath (uri-uripath "http://foo/") (uri-uripath "http://foo/bar/products/12398734")) '(2 #f)) ; "../.." (test 'link-to-immediate-subpath (antiresolve-uripath (uri-uripath "http://foo/products/") (uri-uripath "http://foo/products/12398734")) '(0 #f)) ; ".." ))) (doc (defproc (antiresolve-uriserver (uriserver uriserver?) (base-uriserver uriserver?)) uriserver? (para "Antiresolves " (racket uriserver) " with respect to " (racket base-uriserver) ". For example:") (para "!!!"))) (provide antiresolve-uriserver) (define (antiresolve-uriserver uriserver base-uriserver) (if (equal? uriserver base-uriserver) #f uriserver)) (module+ test (test-section 'antiresolve-uriserver (test (uri-uriserver "") #f) (test (uri-uriserver "http://bar/") "bar") (test (uri-uriserver "http://bar:80/") "bar") (test (antiresolve-uriserver (uri-uriserver "") (uri-uriserver "http://foo/")) #f) (test (antiresolve-uriserver (uri-uriserver "http://bar/") (uri-uriserver "http://foo/")) "bar") (test (antiresolve-uriserver (uri-uriserver "http://bar/") (uri-uriserver "")) "bar") (test (antiresolve-uriserver (uri-uriserver "") (uri-uriserver "")) #f) (test (antiresolve-uriserver (uri-uriserver "http://foo/") (uri-uriserver "http://foo/")) #f) (test (antiresolve-uriserver (uri-uriserver "http://foo:80/") (uri-uriserver "http://foo/")) #f) (test (antiresolve-uriserver (uri-uriserver "http://foo/") (uri-uriserver "http://foo:80/")) #f) ;; TODO: Have an antiresolve-uriserver/default-portnum that handles ;; uriserver antiresolution with specified portnum (not like above tests, ;; where the default portnum of 80 has been removed already from the ;; uriserver representation). )) (doc (defproc (antiresolve-uri (uri uri-string?) (base-uri uri-string?)) uri? (para "Antiresolves " (racket uri) " with respect to " (racket base-uri) ". For example:") (para "!!!"))) (provide antiresolve-uri) (define (antiresolve-uri a-uri-string base-uri-string) (log-uri-debug "(antiresolve-uri ~S ~S)" a-uri-string base-uri-string) (let* ((a-uri (%uri-string+error-name->uri a-uri-string 'antiresolve-uri)) (base-uri (%uri-string+error-name->uri base-uri-string 'antiresolve-uri)) (a-urischeme (uri-scheme a-uri)) (base-urischeme (uri-scheme base-uri))) (if (or (not a-urischeme) (eq? a-urischeme base-urischeme)) ;; The urischemes agree, so see whether hierarchical. (if (urischeme-hierarchical? base-urischeme) ;; The urischemes agree and are hierarchical, so compare uriservers. (if (antiresolve-uriserver (uri-uriserver a-uri) (uri-uriserver base-uri)) ;; uriservers are *not* equivalent, so don't antiresolve the URI. a-uri ;; uriservers *are* equivalent, so antiresolve paths. (let ((new-path (antiresolve-uripath (uri-uripath a-uri) (uri-uripath base-uri)))) ;; TODO: !!! don't forget query parts. ;; TODO: !!! check whether anything changed before constructing a new URI. (%make-hierarchical-uri #f ; urischeme #f ; uriserver new-path ; uripath (uri-uriquery a-uri) ; uriquery (uri-fragment a-uri) ; fragment ))) ;; The urischemes agree but are not hierarchical, so don't antiresolve the URI. a-uri) ;; The urischemes don't agree, so don't antiresolve the URI a-uri))) (module+ test (test-section 'antiresolve-uri (test 'initial (uri->string (antiresolve-uri "http://foo/bar" "http://foo/")) "bar") (test 'link-to-sibling-html-file (uri->string (antiresolve-uri "http://foo/next-page.html" "http://foo/this-page.html")) "next-page.html") (test 'link-to-directory-of-html-file (uri->string (antiresolve-uri "http://foo/" "http://foo/somepage.html")) ".") (test 'link-to-parent-of-directory-of-html-file (uri->string (antiresolve-uri "http://foo/" "http://foo/bar/somepage.html")) "../") (test 'link-to-root-path (uri->string (antiresolve-uri "http://foo/" "http://foo/products/12398734")) "../") (test 'link-to-immediate-subpath (uri->string (antiresolve-uri "http://foo/products/" "http://foo/products/12398734")) ".") ;; TODO: !!! add more tests )) (doc (section "Known Issues") (itemlist (item "This library started out as old Scheme code a decade ago, and is currently receiving overhaul. In some ways, would be easier to redo from scratch.") (item "Combine some of the variants of procedures with " (tt "/") " in the names into single procedures with optional keyword arguments.") (item "Add ability to register new URI schemes (their names, whether they're hierarchical, and if so the default port number for server authority."))) (doc history (#:planet 2:0 #:date "2013-09-05" (itemlist (item "In the process of turning some old code into new URI library, breaking the API without remorse. Also now using McFly and Overeasy."))) (#:version "0.2" #:planet 1:0 #:date "2011-08-23" (itemlist (item "This is a release of some code-in-progress that has been sitting around unreleased for years. It has been changed heavily since the 2004, non-PLaneT release, including getting rid of the " (racket uriobj) "-specific operations, so that all operations work on both string and object forms. A few tests fail. Non-backward-compatible API changes are expected."))) (#:version "0.1" #:date "2004-08-18" (itemlist (item " Initial release. Incorporates some code from UriFrame."))))