url-rewriting.scm
(module url-rewriting mzscheme
        (require (lib "url.ss" "net"))
        (require (lib "contract.ss"))
        (require (lib "dispatchers/dispatch.ss" "web-server"))
        (require (lib "configuration.ss" "web-server"))
        (require (lib "request-structs.ss" "web-server"))
        (require (lib "pregexp.ss"))
        (require (lib "list.ss"))
        (require (planet "spod.scm" ("oesterholt" "roos.plt" 1 0)))
        (provide/contract
         (interface-version dispatcher-interface-version?))
        (provide gen-dispatcher
                 url-rewrite
                 url-rewriting-debug
                 url-rewriting-documentation)


        (spod-module-def)
        (spod-module-add (s= "url-rewriting - rewrite urls with regular expression matching"))

        (spod-module-add
         (sp "This module provides a url rewriting for the PLT Web Server."
             "It will match a host and a url, calling a rewrite function if matched.")
         (s== "Configuring")
         (sp "This module does " (s% "(dynamic-require \"rewrite-rules.scm\" 'rewrite-rules)") " and "
             (s% "(dynamic-require \"rewrite-rules.scm\" 'rewrite-rules-debug)") " if the file " (s% "rewrite-rules.scm")
             " or the file " (s% "rewrite-rules.ss") " is found in the current directory.")
         (sp "This file must implement a module " (s% "rewrite-rules") " and provide two functions: "
             (s% "(rewrite-rules)") " and " (s% "(rewrite-rules-debug)"))
         (s=== (s% "(rewrite-rules) --> (list of rule)"))
         (sp "A rule is a list of two regular expressions or boolean #t and a rewriter function. If " (s% "#t") " is used "
             "as a rule, the rule will allways match the given host or url. The match will be " (s% "(list <host>)")
             " or " (s% "(list <url>)") ":"
             (s% "(list ") (s% "host:pregexp-string | #t") (s% "url:pregexp-string | #t")
             (s% "rewriter:(mhost:pregexp-match murl:pregexp-match host:string url:string) --> new-url:string")
             (s% ")"))
         (s=== (s% "(rewrite-rules-debug) --> boolean"))
         (sp "Must return #t to turn on rewrite debugging; #f otherwise.")
         )

        (spod-module-add
         (s== "Provided functions")
         )

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; support
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define-struct rewrite-t (pregexp-host pregexp-url rewriter id s-pregexp-host s-pregexp-url matcher))

        (define (get-host req)
          (letrec ((f (lambda (H)
                        (if (null? H)
                            #f
                            (let ((h (car H)))
                              (let ((field (bytes->string/utf-8 (header-field h))))
                                (if (string-ci=? field "host")
                                    (bytes->string/utf-8 (header-value h))
                                    (f (cdr H)))))))))
            (f (request-headers/raw req))))


        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; matching rules
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define RULES       '())
        (define ID            0)
        (define DEBUG        #f)
        (define MANUAL-DEBUG #f)

        (define URL-CONFIG #f)
        (define URL-DEBUG  #f)

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; matching and rewriting
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define (register-url-rewriter _pregexp-host _pregexp-url rewriter)
          (set! ID (+ ID 1))
          (set! RULES (append RULES (list
                                     (make-rewrite-t (if (eq? _pregexp-host #t)
                                                         #t
                                                         (pregexp _pregexp-host))
                                                     (if (eq? _pregexp-url #t)
                                                         #t
                                                         (pregexp _pregexp-url))
                                                     rewriter
                                                     ID
                                                     _pregexp-host
                                                     _pregexp-url
                                                     #f))))
          ID)
        
        (define (register-matcher-rewriter _matcher _rewriter)
          (set! ID (+ ID 1))
          (set! RULES (append RULES (list
                                     (make-rewrite-t #f #f _rewriter ID "" "" _matcher))))
          ID)

        (define (rewrite host url)
          (letrec ((find (lambda (L)
                           (if (null? L)
                               url
                               (let ((exp (car L)))
                                 (cond 
                                  ((eq? (rewrite-t-matcher exp) #f)
                                   (let ((H (if (eq? (rewrite-t-pregexp-host exp) #t)
                                                (list host)
                                                (pregexp-match (rewrite-t-pregexp-host exp) host))))
                                     (if DEBUG (display (format "url-rewriting: matching host ~a with ~a. match: ~a~%"
                                                                host (rewrite-t-s-pregexp-host exp) H)))
                                     (if (eq? H #f)
                                              (find (cdr L))
                                         (let ((M (if (eq? (rewrite-t-pregexp-url exp) #t)
                                                      (list url)
                                                      (pregexp-match (rewrite-t-pregexp-url exp) url))))
                                           (if DEBUG (display (format "url-rewriting: matching url ~a with ~a. match: ~a~%"
                                                                      url (rewrite-t-s-pregexp-url exp) M)))
                                           (if (eq? M #f)
                                               (find (cdr L))
                                               ((rewrite-t-rewriter exp) H M host url))))))
                                  ((procedure? (rewrite-t-matcher exp))
                                   (call-with-values 
                                    (lambda () ((rewrite-t-matcher exp) host url))
                                    (lambda (host-match . url-match)
                                      (if (eq? host-match #f)
                                          (find (cdr L))
                                          ((rewrite-t-rewriter exp) host-match (car url-match) host url)))))))))))
            (let ((new-url (find RULES)))
              (if DEBUG (display (format "url-rewriting: result: ~a~%" new-url)))
              (string->url new-url))))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; exported interface
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (spod-module-add
         (s=== (s% "(url-rewrite url:url?|string?) --> string?"))
         (sp "Matches a given url with all registered RULES and rewrites for the first match."
             "Returns the rewritten URL as a string or the original url, if no rule matches.")
         (sp "This function can be used to test your url rewriting system."))
        (define (url-rewrite host url)
          (url->string (if (string? url)
                           (rewrite host (string->url url))
                           (rewrite host url))))

        (spod-module-add
         (s=== (s% "(url-rewriting-debug boolean?) --> undefined"))
         (sp "Turns on or off debugging for the url-rewriting. Works with the url-rewrite function."
             "To turn on debugging in the web server context, see 'Configuring'"))
        (define (url-rewriting-debug yn)
          (set! MANUAL-DEBUG yn))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; interface version
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define interface-version 'v1)

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; make dispatcher
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define (gen-dispatcher)

          (if (eq? URL-CONFIG #f)
              (begin

                (cond
                 ((file-exists? "rewrite-rules.scm")
                  (begin
                    (set! URL-CONFIG (dynamic-require "rewrite-rules.scm" 'rewrite-rules))
                    (set! URL-DEBUG  (dynamic-require "rewrite-rules.scm" 'rewrite-rules-debug))))
                 ((file-exists? "rewrite-rules.ss")
                  (begin
                    (set! URL-CONFIG (dynamic-require "rewrite-rules.ss" 'rewrite-rules))
                    (set! URL-DEBUG  (dynamic-require "rewrite-rules.ss" 'rewrite-rules-debug))))
                 (else
                  (begin
                    (set! URL-CONFIG (lambda () '()))
                    (set! URL-DEBUG  (lambda () #f)))))

                (for-each (lambda (rule) 
                            (if (procedure? (car rule))
                                (apply register-matcher-rewriter rule) 
                                (apply register-url-rewriter rule))) (URL-CONFIG))))

          (lambda (conn req)
            (set! DEBUG (or MANUAL-DEBUG (URL-DEBUG)))
            (let ((_url  (url->string (request-uri req)))
                  (_host (get-host req)))
              (if DEBUG (display (format "rules:~s~%" (map (lambda (r)
                                                             (list
                                                              (rewrite-t-s-pregexp-host r)
                                                              (rewrite-t-s-pregexp-url  r)))
                                                           RULES))))
              (if DEBUG (display (format "url-rewriting url:~a, host:~a ~%" _url _host)))
              (set-request-uri! req (rewrite _host _url))
              (next-dispatcher))))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;;; url rewriting documentation
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        (define %module-doc (spod-module-doc))
        (define (url-rewriting-documentation)
          %module-doc)

        )