scgi.ss
#lang scheme/base
;;; @Package     scgi
;;; @Subtitle    Web HTTP SCGI and CGI in PLT Racket/Scheme
;;; @HomePage    http://www.neilvandyke.org/scgi-scheme/
;;; @Author      Neil Van Dyke
;;; @Version     0.3
;;; @Date        2010-11-14
;;; @PLaneT      neil/scgi:1:2

;; $Id: scgi.ss,v 1.50 2010/11/14 20:33:25 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2010 Neil Van Dyke.  This program is Free Software;
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

(require (only-in scheme/port make-limited-input-port)
         (only-in scheme/tcp  tcp-accept tcp-listen))

;;; @section Introduction
;;;
;;; @i{Note: This library is currently being used successfully in a large
;;; system.  As noted in the API documentation, one feature has not yet been
;;; implemented fully, and some documentation remains to be done.}
;;;
;;; The @b{scgi} library implements fast Web CGI using the SCGI protocol.  This
;;; library is used in conjunction with an HTTP server supporting SCGI, such as
;;; Apache Server with the @code{mod_scgi} module.
;;;
;;; The @b{scgi} library also supports running as normal Web CGI without any
;;; change to source code, such as during development of an application intended
;;; to be deployed using SCGI.
;;;
;;; SCGI was specified by Neil Schemenauer in ``SCGI: A Simple Common Gateway
;;; Interface alternative,'' dated 2008-06-23
;;; (@indicateurl{http://python.ca/scgi/protocol.txt}).
;;;
;;; An example usage of this library:
;;;
;;; @example
;;; #! /usr/bin/mzscheme
;;; #lang scheme/base
;;; @end example
;;; @lisp
;;; (require (planet neil/scgi))
;;;
;;; (let ((my-foo (my-open-foo)))
;;;
;;;   (cgi (lambda ()
;;;          (and (eq? (cgi-type) 'scgi)
;;;               (my-do-some-init-only-for-scgi)))
;;;        (lambda ()
;;;          (display "Content-type: text/html\r\n\r\n")
;;;          (display "<p>Hello, world</p>"))
;;;        (lambda ()
;;;          (my-close-foo my-foo))))
;;; @end lisp
;;;
;;; @subsection Apache mod_scgi
;;;
;;; Note that your Apache Server installation might not have @code{mod_scgi}
;;; module installed or enabled by default.  If you happen to be running Debian
;;; Lenny (@code{stable}) or similar, this module can be installed via the
;;; Debian package @code{libapache2-mod-scgi}.

;; TODO: !!! Document the Apache SCGI configuration.
;;
;; @example
;; SCGIMount /dynamic 127.0.0.1:4000
;; SCGIHandler on
;; SCGIServer 127.0.0.1:4000
;; SCGIServerTimeout 60
;; @end example

(define-syntax %debug
  (syntax-rules ()
    ((_ FMT ARG ...)
     (log-debug (format (string-append "~A " FMT)
                        (current-milliseconds)
                        ARG ...)))))

(define %cur-cgi-request-id               (make-parameter #f))
(define %cur-cgi-type                     (make-parameter #f))
(define %cur-end-cgi-request-ec           (make-parameter #f))
(define %cur-scgi-content-length          (make-parameter #f))
(define %cur-scgi-variables               (make-parameter #f))
(define %cur-stop-cgi-service-immediately (make-parameter #f))

(define (%invalid-cgi-type-error sym)
  (error sym "Invalid CGI type: ~S" (%cur-cgi-type)))

(define (%cgi-context-only-error sym)
  (error sym "Only works in CGI context"))

(define (%cgi-request-context-only-error sym)
  (error sym "Only works in CGI request context"))

(define (%assert-cgi-request-context sym)
  (or (%cur-cgi-request-id)
      (%cgi-request-context-only-error sym)))

(define (%bytes->number x)
  ;; TODO: Maybe make this more efficient.
  (string->number (bytes->string/latin-1 x)))

(define (%read-scgi-variables in)
  (begin0
      (cond
       ((regexp-match #rx"^[0-9]+" in)
        => (lambda (m)
             (or (eqv? 58 (read-byte in)) ; colon
                 (error '%read-scgi-variables
                        "Expected colon after ~S."
                        (car m)))
             ;; Note: We do the read non-tail-recursively, so that we don't
             ;; have to reverse the result.
             (let loop ((size-left (%bytes->number (car m))))
               (if (zero? size-left)
                   '()
                   (cond
                    ((regexp-match #rx"^([A-Za-z0-9_]+)\000([^\000]*)\000"
                                   in
                                   0
                                   size-left)
                     =>
                     (lambda (m)
                       (cons (cons (list-ref m 1)
                                   (list-ref m 2))
                             (loop (- size-left
                                      (bytes-length (list-ref m 0)))))))
                    (else
                     (error
                      '%read-scgi-variables
                      "Could not read SCGI header with ~S bytes remaining."
                      size-left)))))))
       (else (error '%read-scgi-variables
                    "Did not read size number of SCGI header.")))
    (or (eqv? 44 (read-byte in))
        (error '%read-scgi-variables
               "Could not read comma in SCGI header."))))

(define (%handle-scgi-accept in out proc request-id)
  (%debug "%handle-scgi-accept request-id=~S begin"
          request-id)
  ;; TODO: Catch any error in here, log it to stderr (with flush), and
  ;; kill thread without exiting app.
  (let ((request-exit-status 0))
    (let* ((variables (%read-scgi-variables in))
           (content-length
            (%bytes->number
             (cond ((assoc #"CONTENT_LENGTH" variables) => cdr)
                   (error '%handle-scgi-accept
                          "~S missing CONTENT_LENGTH in ~S"
                          request-id
                          variables))))
           (content-in (make-limited-input-port in content-length)))
      (%debug "%handle-scgi-accept request-id=~S variables=~S"
              request-id
              variables)
      (parameterize ((current-input-port       content-in)
                     (current-output-port      out)
                     (%cur-scgi-variables      variables)
                     (%cur-scgi-content-length content-length)
                     (%cur-cgi-request-id      request-id))
        (%debug "%handle-scgi-accept request-id=~S proc" request-id)
        (proc)
        (%debug "%handle-scgi-accept request-id=~S flushing" request-id)
        (flush-output out)
        (%debug "%handle-scgi-accept request-id=~S end" request-id)))))

;;; @section Interface

;;; @defproc cgi startup-proc request-proc shutdown-proc
;;;
;;; Implement CGI.  Normal CGI is used if the @code{REQUEST_URI} environment
;;; variable is defined; otherwise, SCGI is used.
;;;
;;; @var{startup-proc} is a thunk that is evaluated once (before listener
;;; starts).  @var{request-proc} is evaluated once for each request (which, in
;;; normal CGI, is once).  @var{shutdown-proc} is evaluated once, as processing
;;; of all CGI requests has finished.
;;;
;;; For evaluation of @var{request-proc}, the default input and output ports
;;; are as with normal CGI, regardless of whether normal CGI or SCGI is in use.

(define (cgi startup-proc
             request-proc
             shutdown-proc
             #:scgi-hostname       (scgi-hostname       "127.0.0.1")
             #:scgi-max-allow-wait (scgi-max-allow-wait 4)
             #:scgi-portnum        (scgi-portnum        4000))
  (let ((type (if (getenv "REQUEST_URI")
                  'normal
                  'scgi)))
    (parameterize ((%cur-cgi-type type))
      (startup-proc)

      (dynamic-wind

        (lambda () #f)

        (lambda ()
          (let/ec stop-cgi-service-immediately-ec
            (parameterize ((%cur-stop-cgi-service-immediately
                            stop-cgi-service-immediately-ec))

              (case type

                ((scgi)
                 (let ((listener-cust (make-custodian)))
                   (parameterize ((current-custodian listener-cust))

                     (let ((listener (tcp-listen scgi-portnum
                                                 scgi-max-allow-wait
                                                 #f
                                                 scgi-hostname)))
                       (dynamic-wind
                         (lambda () #f)
                         (lambda ()
                           (let loop ((request-id 1))
                             (let ((request-cust (make-custodian listener-cust)))
                               (parameterize ((current-custodian request-cust))
                                 (let-values (((in out) (tcp-accept listener)))
                                   (%debug "cgi accepted request-id=~S" request-id)
                                   (thread (lambda ()
                                             (dynamic-wind
                                               (lambda () #f)
                                               (lambda ()
                                                 (%handle-scgi-accept in
                                                                      out
                                                                      request-proc
                                                                      request-id))
                                               (lambda ()
                                                 (custodian-shutdown-all request-cust)))))))
                               (loop (+ 1 request-id)))))
                         (lambda ()
                           (custodian-shutdown-all listener-cust)
                           ;; TODO: !!! are we shutting down the tcp-listen?
                           (sleep 1)))))))

                ((normal)
                 ;; TODO: !!! set up end-cgi-request
                 (parameterize ((%cur-cgi-request-id 0))
                   ;; !!!
                   (request-proc)
                   ;; !!!
                   ))

                (else (%invalid-cgi-type-error 'cgi))))))

        (lambda ()
          (parameterize-break #t
            (shutdown-proc)))))))

;;; @defproc cgi-content-length
;;;
;;; In a CGI request context, returns the CGI content length -- the number of
;;; bytes that can be read from the default input port -- as integer.

(define (cgi-content-length)
  (case (or (%cur-cgi-type)
            ;; Note: Error message here complains about CGI request context,
            ;; even though the test is only for any CGI context.
            (%cgi-request-context-only-error 'cgi-content-length))

    ((scgi)
     (%cur-scgi-content-length))

    ((normal)
     (%assert-cgi-request-context 'cgi-content-length)
     (string->number (getenv "CONTENT_LENGTH")))

    (else (%invalid-cgi-type-error 'cgi-content-length))))

;;; @defproc  cgi-content-type
;;; @defprocx cgi-http-cookie
;;; @defprocx cgi-http-host
;;; @defprocx cgi-https
;;; @defprocx cgi-path-info
;;; @defprocx cgi-query-string
;;; @defprocx cgi-remote-user
;;; @defprocx cgi-request-method
;;; @defprocx cgi-request-uri
;;; @defprocx cgi-script-name
;;; @defprocx cgi-server-name
;;; @defprocx cgi-server-port
;;; @defprocx cgi-http-user-agent
;;;
;;; In a CGI request context, returns the corresponding CGI value as a string.

(define (%make-cgi-variable-proc sym name-bytes)
  (let ((name-string (bytes->string/latin-1 name-bytes)))
    (lambda ()
      (case (%cur-cgi-type)

        ((scgi)
         (cond ((assoc name-bytes (or (%cur-scgi-variables)
                                      (%cgi-request-context-only-error sym)))
                => (lambda (pair)
                     (bytes->string/latin-1 (cdr pair))))
               (else #f)))

        ((normal)
         (%assert-cgi-request-context sym)
         (getenv name-string))

        (else (%invalid-cgi-type-error sym))))))

(define cgi-content-type    (%make-cgi-variable-proc 'cgi-content-type
                                                     #"CONTENT_TYPE"))
(define cgi-http-cookie     (%make-cgi-variable-proc 'cgi-http-cookie
                                                     #"HTTP_COOKIE"))
(define cgi-http-host       (%make-cgi-variable-proc 'cgi-http-host
                                                     #"HTTP_HOST"))
(define cgi-http-user-agent (%make-cgi-variable-proc 'cgi-http-user-agent
                                                     #"HTTP_USER_AGENT"))
(define cgi-https           (%make-cgi-variable-proc 'cgi-https
                                                     #"HTTPS"))
(define cgi-path-info       (%make-cgi-variable-proc 'cgi-path-info
                                                     #"PATH_INFO"))
(define cgi-query-string    (%make-cgi-variable-proc 'cgi-query-string
                                                     #"QUERY_STRING"))
(define cgi-remote-user     (%make-cgi-variable-proc 'cgi-remote-user
                                                     #"REMOTE_USER"))
(define cgi-request-method  (%make-cgi-variable-proc 'cgi-request-method
                                                     #"REQUEST_METHOD"))
(define cgi-request-uri     (%make-cgi-variable-proc 'cgi-request-uri
                                                     #"REQUEST_URI"))
(define cgi-script-name     (%make-cgi-variable-proc 'cgi-script-name
                                                     #"SCRIPT_NAME"))
(define cgi-server-name     (%make-cgi-variable-proc 'cgi-server-name
                                                     #"SERVER_NAME"))
(define cgi-server-port     (%make-cgi-variable-proc 'cgi-server-port
                                                     #"SERVER_PORT"))

;;; @defproc scgi-variables
;;;
;;; When called in SCGI mode, this procedure yields an alist of SCGI variables
;;; with both the key and value of each pair being byte strings.  Calling this
;;; procedure in normal CGI mode is an error.
;;;
;;; Note that normally you will not need to use this procedure, and will
;;; instead use procedures like @code{cgi-request-uri}, which work in both SCGI
;;; and normal CGI modes.

(define (scgi-variables)
  (or (%cur-scgi-variables)
      (if (eq? (%cur-cgi-type) 'scgi)
          (%cgi-request-context-only-error 'scgi-variables)
          (error 'scgi-variables
                 "Only works in SCGI type CGI request context. Type: ~S"
                 (%cur-cgi-type)))))

;;; @defproc cgi-type
;;;
;;; Returns a symbol indicating the CGI type: @code{normal} or @code{scgi}.
;;; Behavior outside of the @code{cgi} form is undefined.

(define (cgi-type)
  (or (%cur-cgi-type)
      (%cgi-context-only-error 'cgi-type)))

;;; @defproc cgi-request-id
;;;
;;; In CGI request context, yields a printable identifying object for the
;;; current request that is unique at least for the current requests being
;;; handled.  This identifying object is intended to be used in debugging
;;; messages.

(define (cgi-request-id)
  (or (%cur-cgi-request-id)
      (%cgi-request-context-only-error 'cgi-request-id)))

;; TODO: !!! finish this

;;; @defproc  end-cgi-request
;;; @defprocx end-cgi-request/error
;;;
;;; (NOTE: THIS IS NOT YET FULLY IMPLEMENTED AND TESTED.)

(define (%end-cgi-request/sym/status sym status)
  ((or (%cur-end-cgi-request-ec)
       (%cgi-request-context-only-error sym))
   status))

(define (end-cgi-request)
  (%end-cgi-request/sym/status 'end-cgi-request 0))

(define (end-cgi-request/error)
  (%end-cgi-request/sym/status 'end-cgi-request/error 1))

;;; @defproc stop-cgi-service-immediately
;;;
;;; Stops processing all CGI requests.  This works only within the
;;; @var{request-proc} of the @code{cgi} form.

(define (stop-cgi-service-immediately)
  ((or (%cur-stop-cgi-service-immediately)
       (%cgi-context-only-error 'stop-cgi-service-immediately))))

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.3 --- 2010-11-14 --- PLaneT @code{(1 2)}
;;; Added @code{cgi-http-user-agent}.
;;;
;;; @item Version 0.2 --- 2010-10-11 --- PLaneT @code{(1 1)}
;;; Documentation changes to reflect that it is successfully in use in a real
;;; system, and some work remains.
;;;
;;; @item Version 0.1 --- 2010-05-25 --- PLaneT @code{(1 0)}
;;; Initial release.  Preliminary.
;;;
;;; @end table

(provide
 end-cgi-request
 end-cgi-request/error
 cgi
 cgi-content-length
 cgi-content-type
 cgi-http-cookie
 cgi-http-host
 cgi-http-user-agent
 cgi-https
 cgi-path-info
 cgi-query-string
 cgi-remote-user
 cgi-request-method
 cgi-request-id
 cgi-request-uri
 cgi-script-name
 cgi-server-name
 cgi-server-port
 cgi-type
 scgi-variables
 stop-cgi-service-immediately)