#lang scheme/base ;;; @Package scgi ;;; @Subtitle Web Server HTTP SCGI and CGI in Racket ;;; @HomePage http://www.neilvandyke.org/racket-scgi/ ;;; @Author Neil Van Dyke ;;; @Version 0.6 ;;; @Date 2011-08-22 ;;; @PLaneT neil/scgi:1:=5 ;; $Id: scgi.rkt,v 1.69 2011/08/22 11:06:20 neilpair Exp $ ;;; @legal ;;; Copyright @copyright{} 2010--2011 Neil Van Dyke. This program is Free ;;; Software; Software; you can redistribute it and/or modify it under the ;;; terms of the GNU Lesser General Public License as published by the Free ;;; Software Foundation; either version 3 of the License (LGPL 3), or (at your ;;; option) any later version. This program is distributed in the hope that it ;;; will be useful, but without any warranty; without even the implied warranty ;;; of merchantability or fitness for a particular purpose. See ;;; @indicateurl{http://www.gnu.org/licenses/} for details. For other licenses ;;; and consulting, please contact the author. ;;; @end legal (require (only-in scheme/port make-limited-input-port) (only-in scheme/tcp tcp-accept/enable-break tcp-close tcp-listen)) ;;; @section Introduction ;;; ;;; The @b{scgi} library implements fast Web CGI using the SCGI protocol. This ;;; library is used in conjunction with an HTTP server supporting SCGI, such as ;;; Apache Server with the @code{mod_scgi} module. ;;; ;;; The @b{scgi} library also supports running as normal Web CGI without any ;;; change to the source code of the app, such as during development of an ;;; application intended to be deployed using SCGI. This also gives ;;; flexibility in deployment, allowing a system administrator to switch ;;; between either mode just by editing the HTTP server configuration. ;;; ;;; The SCGI protocol was specified by Neil Schemenauer in ``SCGI: A Simple ;;; Common Gateway Interface alternative,'' dated 2008-06-23 ;;; (@uref{http://python.ca/scgi/protocol.txt}). ;;; ;;; An example usage of this library: ;;; ;;; @lisp ;;; (require (planet neil/scgi)) ;;; ;;; (cgi (lambda () ;;; (and (eq? (cgi-type) 'scgi) ;;; (my-do-some-init-only-for-scgi))) ;;; ;;; (lambda () ;;; (display "Content-type: text/html\r\n\r\n") ;;; (display "<p>Hello, world</p>")) ;;; ;;; (lambda () ;;; (my-shutdown-stuff))) ;;; @end lisp ;;; ;;; @subsection Apache mod_scgi ;;; ;;; Apache Server is one way to run SCGI, though not the only way. Note that ;;; your Apache installation might not have the @code{mod_scgi} module ;;; installed or enabled by default. If you happen to be running Debian Lenny ;;; (@code{stable}) or similar, this module can be installed via the Debian ;;; package @code{libapache2-mod-scgi}. ;;; ;;; Once you've installed @code{mod_scgi}, you need some standard SCGI ;;; directives to end up in your Apache config files, whether you accomplish ;;; that by editing config files manually, making symbolic links in a ;;; @code{mods-enabled} directory, or clicking in a GUI. ;;; ;;; For example, the following loads @code{mod_scgi}, maps URL paths under ;;; @code{/mypath} to the SCGI server on the local machine at the standard SCGI ;;; TCP port, and sets a 60-second timeout for the SCGI server to respond to a ;;; request before Apache drops the connection: ;;; ;;; @example ;;; LoadModule scgi_module /usr/lib/apache2/modules/mod_scgi.so ;;; SCGIMount /mypath 127.0.0.1:4000 ;;; SCGIServerTimeout 60 ;;; @end example ;;; ;;; There are additional @code{mod_scgi} Apache config directives, including ;;; @code{SCGIHandler} and @code{SCGIServer}. (define-syntax %debug (syntax-rules () ((_ FMT ARG ...) (log-debug (format (string-append "scgi: ~A " FMT) (current-milliseconds) ARG ...))))) (define %cur-cgi-request-id (make-parameter #f)) (define %cur-cgi-type (make-parameter #f)) (define %cur-end-cgi-request-ec (make-parameter #f)) (define %cur-scgi-content-length (make-parameter #f)) (define %cur-scgi-variables (make-parameter #f)) (define %cur-stop-cgi-service-immediately (make-parameter #f)) (define (%invalid-cgi-type-error sym) (error sym "Invalid CGI type: ~S" (%cur-cgi-type))) (define (%cgi-context-only-error sym) (error sym "Only works in CGI context")) (define (%cgi-request-context-only-error sym) (error sym "Only works in CGI request context")) (define (%assert-cgi-request-context sym) (or (%cur-cgi-request-id) (%cgi-request-context-only-error sym))) (define (%bytes->number x) ;; TODO: Maybe make this more efficient. (string->number (bytes->string/latin-1 x))) (define (%read-scgi-variables in) (begin0 (cond ;; TODO: See whether these regexp operations on the port are sucking in ;; our entire input, including POST data. If we limit this regexp-match ;; on input port to 5 chars or so, and also address the regexp below. ;; UPDATE: In stress-testing with doing an HTTP POST of approx 100MB ;; form file attachment, hundreds of times, under Racket 5.1.1 and PLT ;; Scheme 4.2.5, process size did not grow. Additionally, at least ;; under 4.2.5, it appears that the regexps alone did *not* cause all ;; the POST data to be read. We probably still want to limit or get rid ;; of these regexps, but the successful test results are reassuring for ;; now. ((regexp-match #rx"^[0-9]+" in) => (lambda (m) (or (eqv? 58 (read-byte in)) ; colon (error '%read-scgi-variables "Expected colon after ~S." (car m))) ;; Note: We do the read non-tail-recursively, so that we don't ;; have to reverse the result. (let loop ((size-left (%bytes->number (car m)))) (if (zero? size-left) '() (cond ;; TODO: Should we limit this regexp as well? Or rewrite ;; it to do char-by-char I/O and not use regexp? ((regexp-match #rx"^([A-Za-z0-9_]+)\000([^\000]*)\000" in 0 size-left) => (lambda (m) (cons (cons (list-ref m 1) (list-ref m 2)) (loop (- size-left (bytes-length (list-ref m 0))))))) (else (error '%read-scgi-variables "Could not read SCGI header with ~S bytes remaining." size-left))))))) (else (error '%read-scgi-variables "Did not read size number of SCGI header."))) (or (eqv? 44 (read-byte in)) (error '%read-scgi-variables "Could not read comma in SCGI header.")))) (define (%handle-scgi-accept in out proc request-id) (%debug "%handle-scgi-accept request-id=~S begin" request-id) (let ((request-exit-status 0)) (let* ((variables (%read-scgi-variables in)) (content-length (%bytes->number (cond ((assoc #"CONTENT_LENGTH" variables) => cdr) (error '%handle-scgi-accept "~S missing CONTENT_LENGTH in ~S" request-id variables)))) (content-in (make-limited-input-port in content-length))) (%debug "%handle-scgi-accept request-id=~S variables=~S" request-id variables) (parameterize ((current-input-port content-in) (current-output-port out) (%cur-scgi-variables variables) (%cur-scgi-content-length content-length) (%cur-cgi-request-id request-id)) (%debug "%handle-scgi-accept request-id=~S proc" request-id) (proc) (%debug "%handle-scgi-accept request-id=~S flushing" request-id) (flush-output out) (%debug "%handle-scgi-accept request-id=~S end" request-id))))) ;;; @section Interface ;;; @defproc cgi startup-proc request-proc shutdown-proc ;;; ;;; Implement CGI. Normal CGI is used if the @code{REQUEST_URI} environment ;;; variable is defined (which suggests that the code is being called in a CGI ;;; context); otherwise, SCGI is used. ;;; ;;; @var{startup-proc} is a thunk that is evaluated once (before listener ;;; starts). @var{request-proc} is evaluated once for each request (which, in ;;; normal CGI, is once). @var{shutdown-proc} is evaluated once, as processing ;;; of all CGI requests has finished. ;;; ;;; For evaluation of @var{request-proc}, the default input and output ports ;;; are as with normal CGI, regardless of whether normal CGI or SCGI is in use. ;;; ;;; This procedure also accepts a few optional keyword arguments, all of which ;;; apply to SCGI mode only: ;;; ;;; @lisp ;;; #:scgi-hostname (scgi-hostname "127.0.0.1") ;;; #:scgi-portnum (scgi-portnum 4000) ;;; #:scgi-max-allow-wait (scgi-max-allow-wait 4) ;;; #:reuse-scgi-port? (reuse-scgi-port? #t) ;;; @end lisp ;;; ;;; @code{scgi-hostname} is the hostname or IP address (as a string) for the ;;; interface on which to listen. @code{scgi-portnum} is the TCP port number ;;; on that interface. @code{scgi-max-allow-wait} is the maximum number of ;;; unaccepted connections to permit waiting. @code{reuse-scgi-port?} is ;;; whether or not to reuse the TCP port number, such as if a previous server ;;; exited and the port is in a @code{TIME_WAIT} state. (define (cgi startup-proc request-proc shutdown-proc #:scgi-hostname (scgi-hostname "127.0.0.1") #:scgi-max-allow-wait (scgi-max-allow-wait 4) #:scgi-portnum (scgi-portnum 4000) #:reuse-scgi-port? (reuse-scgi-port? #t)) (let ((type (if (getenv "REQUEST_URI") 'normal 'scgi))) (parameterize ((%cur-cgi-type type)) (startup-proc) (dynamic-wind (lambda () #f) (lambda () (let/ec stop-cgi-service-immediately-ec (parameterize ((%cur-stop-cgi-service-immediately stop-cgi-service-immediately-ec)) (case type ((scgi) (let ((listener-cust (make-custodian))) (parameterize ((current-custodian listener-cust)) (let ((listener (tcp-listen scgi-portnum scgi-max-allow-wait reuse-scgi-port? scgi-hostname))) (dynamic-wind (lambda () #f) (lambda () (let loop ((request-id 1)) (let ((request-cust (make-custodian listener-cust))) (parameterize ((current-custodian request-cust)) (let-values (((in out) (tcp-accept/enable-break listener))) (%debug "cgi accepted request-id=~S" request-id) (thread (lambda () (dynamic-wind (lambda () #f) (lambda () (%handle-scgi-accept in out request-proc request-id)) (lambda () (custodian-shutdown-all request-cust))))))) (loop (+ 1 request-id))))) (lambda () (%debug "scgi listener custodian shutdown") (custodian-shutdown-all listener-cust) ;; Note: We have verified under Racket 5.1.1 that ;; the custodian already closes the listener, so ;; this code is not necessary. ;; ;; (%debug "scgi listener tcp-close") ;; (with-handlers ;; ((exn:fail? ;; (lambda (e) ;; (%debug ;; "scgi-listener tcp-close failed: ~S" ;; (exn-message e))))) ;; (tcp-close listener)) (%debug "scgi listener shutdown sleep") (sleep 1))))))) ((normal) ;; TODO: !!! Set up end-cgi-request (parameterize ((%cur-cgi-request-id 0)) (request-proc))) (else (%invalid-cgi-type-error 'cgi)))))) (lambda () (parameterize-break #t (shutdown-proc))))))) ;;; @defproc cgi-content-length ;;; ;;; In a CGI request context, returns the CGI content length -- the number of ;;; bytes that can be read from the default input port -- as integer. (define (cgi-content-length) (case (or (%cur-cgi-type) ;; Note: Error message here complains about CGI request context, ;; even though the test is only for any CGI context. (%cgi-request-context-only-error 'cgi-content-length)) ((scgi) (%cur-scgi-content-length)) ((normal) (%assert-cgi-request-context 'cgi-content-length) (string->number (getenv "CONTENT_LENGTH"))) (else (%invalid-cgi-type-error 'cgi-content-length)))) ;;; @defproc make-cgi-variable-proc sym name-bytes ;;; ;;; Produces a procedure for getting a CGI environment variable value as a ;;; string. Works whether in normal CGI or SCGI. This is useful for accessing ;;; non-standard variables, such as might be provided by an unusual Apache ;;; module. Argument @var{sym} is a symbol for the name of the procedure, ;;; which is used in error reporting. Argument @var{name-bytes} is the name of ;;; the environment variable, as a byte string. ;;; ;;; For example, the @code{cgi-remote-user} procedure could be defined as: ;;; ;;; @lisp ;;; (define cgi-remote-user ;;; (make-cgi-variable-proc 'cgi-remote-user #"REMOTE_USER")) ;;; @end lisp (define (make-cgi-variable-proc sym name-bytes) (let ((name-string (bytes->string/latin-1 name-bytes))) (lambda () (case (%cur-cgi-type) ((scgi) (cond ((assoc name-bytes (or (%cur-scgi-variables) (%cgi-request-context-only-error sym))) => (lambda (pair) (bytes->string/latin-1 (cdr pair)))) (else #f))) ((normal) (%assert-cgi-request-context sym) (getenv name-string)) (else (%invalid-cgi-type-error sym)))))) ;;; @defproc cgi-content-type ;;; @defprocx cgi-document-root ;;; @defprocx cgi-http-cookie ;;; @defprocx cgi-http-host ;;; @defprocx cgi-http-referer ;;; @defprocx cgi-http-user-agent ;;; @defprocx cgi-https ;;; @defprocx cgi-path-info ;;; @defprocx cgi-path-translated ;;; @defprocx cgi-query-string ;;; @defprocx cgi-remote-addr ;;; @defprocx cgi-remote-host ;;; @defprocx cgi-remote-user ;;; @defprocx cgi-request-method ;;; @defprocx cgi-request-uri ;;; @defprocx cgi-script-name ;;; @defprocx cgi-server-name ;;; @defprocx cgi-server-port ;;; ;;; In a CGI request context, returns the corresponding CGI value as a string. ;;; Note that @code{cgi-content-length} is @emph{not} in this list, and it ;;; returns a number rather than a string. (define-syntax %define-cgi-variable-proc (syntax-rules () ((_ ID BYTES) (define ID (make-cgi-variable-proc (quote ID) BYTES))))) (%define-cgi-variable-proc cgi-content-type #"CONTENT_TYPE") (%define-cgi-variable-proc cgi-document-root #"DOCUMENT_ROOT") (%define-cgi-variable-proc cgi-http-cookie #"HTTP_COOKIE") (%define-cgi-variable-proc cgi-http-host #"HTTP_HOST") (%define-cgi-variable-proc cgi-http-referer #"HTTP_REFERER") (%define-cgi-variable-proc cgi-http-user-agent #"HTTP_USER_AGENT") (%define-cgi-variable-proc cgi-https #"HTTPS") (%define-cgi-variable-proc cgi-path-info #"PATH_INFO") (%define-cgi-variable-proc cgi-path-translated #"PATH_TRANSLATED") (%define-cgi-variable-proc cgi-query-string #"QUERY_STRING") (%define-cgi-variable-proc cgi-remote-addr #"REMOTE_ADDR") (%define-cgi-variable-proc cgi-remote-host #"REMOTE_HOST") (%define-cgi-variable-proc cgi-remote-user #"REMOTE_USER") (%define-cgi-variable-proc cgi-request-method #"REQUEST_METHOD") (%define-cgi-variable-proc cgi-request-uri #"REQUEST_URI") (%define-cgi-variable-proc cgi-script-name #"SCRIPT_NAME") (%define-cgi-variable-proc cgi-server-name #"SERVER_NAME") (%define-cgi-variable-proc cgi-server-port #"SERVER_PORT") ;;; @defproc scgi-variables ;;; ;;; When called in SCGI mode, this procedure yields an alist of SCGI variables ;;; with both the key and value of each pair being byte strings. Calling this ;;; procedure in normal CGI mode is an error. ;;; ;;; Note that normally you will not need to use this procedure, and will ;;; instead use procedures like @code{cgi-request-uri}, which work in both SCGI ;;; and normal CGI modes. (define (scgi-variables) (or (%cur-scgi-variables) (if (eq? (%cur-cgi-type) 'scgi) (%cgi-request-context-only-error 'scgi-variables) (error 'scgi-variables "Only works in SCGI type CGI request context. Type: ~S" (%cur-cgi-type))))) ;;; @defproc cgi-type ;;; ;;; Returns a symbol indicating the CGI type: @code{normal} or @code{scgi}. ;;; Behavior outside of the @code{cgi} form is undefined. (define (cgi-type) (or (%cur-cgi-type) (%cgi-context-only-error 'cgi-type))) ;;; @defproc cgi-request-id ;;; ;;; In CGI request context, yields a printable identifying object for the ;;; current request that is unique at least for the current requests being ;;; handled. This identifying object is intended to be used in debugging ;;; messages. (define (cgi-request-id) (or (%cur-cgi-request-id) (%cgi-request-context-only-error 'cgi-request-id))) ;; TODO: !!! finish implementing end-cgi-request ;; @defproc end-cgi-request ;; @defprocx end-cgi-request/error ;; ;; (NOTE: THIS IS NOT YET FULLY IMPLEMENTED AND TESTED.) (define (%end-cgi-request/sym/status sym status) ((or (%cur-end-cgi-request-ec) (%cgi-request-context-only-error sym)) status)) (define (end-cgi-request) (%end-cgi-request/sym/status 'end-cgi-request 0)) (define (end-cgi-request/error) (%end-cgi-request/sym/status 'end-cgi-request/error 1)) ;;; @defproc stop-cgi-service-immediately ;;; ;;; Stops processing all CGI requests. This works only within the ;;; @var{request-proc} of the @code{cgi} form. (define (stop-cgi-service-immediately) ((or (%cur-stop-cgi-service-immediately) (%cgi-context-only-error 'stop-cgi-service-immediately)))) ;;; @section Troubleshooting ;;; This section has some troubleshooting tips. Currently, these come from use ;;; with @code{mod_scgi} atop Apache 2.2.9 atop Debian GNU/Linux. ;;; ;;; @itemize ;;; ;;; @item ;;; Racket error ``tcp-write: error writing (Broken pipe; errno=32)'' or ;;; ``tcp-write: error writing (Connection reset by peer; errno=104)'' is ;;; likely due to the HTTP request having been dropped by the HTTP client ;;; (e.g., user stops a page load in their browser before page finishes ;;; loading) or by Apache hitting @code{SCGIServerTimeout} for the request. ;;; Note that buffered I/O means that you won't necessarily get this error even ;;; if the request is aborted this way. ;;; ;;; @item ;;; Apache error log entry ``Premature end of script headers: @i{PATH}'' ;;; followed by ``(500)Unknown error 500: scgi: Unknown error 500: error ;;; reading response headers'' can mean that @code{SCGIServerTimeout} was hit ;;; before any HTTP headers from the SCGI request handler were started or ;;; completed. Note that buffered I/O can mean that some of the Racket code of ;;; the handler wrote some text, but it was not yet flushed to the SCGI client. ;;; ;;; @item ;;; Apache error log entry ``(70007)The timeout specified has expired: ;;; ap_content_length_filter: apr_bucket_read() failed'' followed by ;;; ``(70007)The timeout specified has expired: scgi: The timeout specified has ;;; expired: ap_pass_brigade()'' can mean that @code{SCGIServerTimeout} was hit ;;; after HTTP headers from the request handler had been received by the SCGI ;;; client. ;;; ;;; @item ;;; Apache error log entry ``(32)Broken pipe: scgi: Broken pipe: error sending ;;; request body'' might be due to a request handler finished without consuming ;;; all of the HTTP @code{POST} data. ;;; ;;; @item ;;; Apache error log entry ``(103)Software caused connection abort: scgi: ;;; Software caused connection abort: ap_pass_brigade()'' is another one that ;;; can be caused by the HTTP client dropping the connection before the handler ;;; finishes. ;;; ;;; @end itemize ;;; @unnumberedsec History ;;; @table @asis ;;; ;;; @item Version 0.6 --- 2011-08-22 --- PLaneT @code{(1 5)} ;;; Small documentation fixes. ;;; ;;; @item Version 0.5 --- 2011-05-16 --- PLaneT @code{(1 4)} ;;; For closer to backward compatibility with PLT Scheme 4.x, changed ;;; references to @code{racket} modules to @code{scheme}. ;;; More documentation. ;;; ;;; @item Version 0.4 --- 2011-05-16 --- PLaneT @code{(1 3)} ;;; Added @code{#:reuse-scgi-port?} argument to procedure @code{cgi}. ;;; Added several new CGI environment variable procedures. ;;; Added @code{make-cgi-variable-proc}. ;;; The @code{cgi} procedure now uses @code{tcp-accept/enable-break} rather than ;;; @code{tcp-accept}. ;;; Added more @code{log-debug}. ;;; Added documentation about @code{mod_scgi} configuration, troubleshooting, ;;; and the keyword arguments to the @code{cgi} procedure. ;;; Removed documentation placeholders for @code{end-cgi-request} for now. ;;; Various additional quality assurance testing has been done, and more is the ;;; works. ;;; ;;; @item Version 0.3 --- 2010-11-14 --- PLaneT @code{(1 2)} ;;; Added @code{cgi-http-user-agent}. ;;; ;;; @item Version 0.2 --- 2010-10-11 --- PLaneT @code{(1 1)} ;;; Documentation changes to reflect that it is successfully in use in a real ;;; system, and some work remains. ;;; ;;; @item Version 0.1 --- 2010-05-25 --- PLaneT @code{(1 0)} ;;; Initial release. Preliminary. ;;; ;;; @end table (provide end-cgi-request end-cgi-request/error cgi cgi-content-length cgi-content-type cgi-document-root cgi-http-cookie cgi-http-host cgi-http-referer cgi-http-user-agent cgi-https cgi-path-info cgi-path-translated cgi-query-string cgi-remote-addr cgi-remote-host cgi-remote-user cgi-request-method cgi-request-id cgi-request-uri cgi-script-name cgi-server-name cgi-server-port cgi-type make-cgi-variable-proc scgi-variables stop-cgi-service-immediately)