#lang racket/base
;; For legal info, see file "info.rkt".

(require (for-syntax racket/base
         (planet neil/mcfly))

(doc (section "Introduction")

     (para "The "
           (bold "scgi")
           " library implements fast Web CGI using the SCGI protocol.  This
library is used in conjunction with an HTTP server supporting SCGI, such as
Apache HTTP Server with the "
           (code "mod_scgi")
           " module.")

     (para "The "
           (bold "scgi")
           " library also supports running as normal Web CGI without any change
to the source code of the app, such as during development of an application
intended to be deployed using SCGI.  This also gives flexibility in deployment,
allowing a system administrator to switch between either mode just by editing
the HTTP server configuration.")

     (para "The SCGI protocol was specified by Neil Schemenauer in ``"
           (hyperlink ""
                      "SCGI: A Simple Common Gateway Interface alternative")
           ",'' dated 2008-06-23.")

     (para "An example use of this library:")

      (require (planet neil/scgi:2))

      (cgi #:startup  (lambda ()
                        (printf "Startup for cgi-type ~S..."

           #:request  (lambda ()
                        (display "Content-type: text/html\r\n\r\n")
                        (display "<p>Hello, world!</p>")
                        (printf "<p>Your IP address is: ~S</p>"

           #:shutdown (lambda ()
                        (printf "Shutdown down for cgi-type ~S..."

     (para "The procedure provided to the "
           (racket #:request)
           " is the one that gets called for each request.  Within that procedure, is the "
           (deftech "CGI request context")
           ", in which procedures like "
           (racket cgi-remote-addr)
           " may be called to get information about that particular request.  All three procedures are called within "
           (deftech "CGI context")
           ", in which procedures concerning the CGI mechanism separate from individual requests, such as "
           (racket cgi-type)
           " may be called.  Many programs will need to provide only the "
           (racket #:request)
           " procedure."))

(define-syntax (%scgi:debug stx)
  (syntax-case stx
    ((_ FMT ARG ...)
     (let ((fmt (syntax->datum #'FMT)))
       (if (string? fmt)
           (quasisyntax/loc stx
             (log-debug (format #,(datum->syntax
                                   (string-append "scgi: ~A "
                                ARG ...)))
           (raise-syntax-error '%scgi:debug
                               (format "expected string; got ~S"
                                       (syntax->datum #'FMT))

(define %scgi:cur-cgi-request-id               (make-parameter #f))
(define %scgi:cur-cgi-type                     (make-parameter #f))
(define %scgi:cur-end-cgi-request-ec           (make-parameter #f))
(define %scgi:cur-scgi-content-length          (make-parameter #f))
(define %scgi:cur-scgi-variables               (make-parameter #f))
(define %scgi:cur-stop-cgi-service-immediately (make-parameter #f))

(define (%scgi:invalid-cgi-type-error sym)
  (error sym "Invalid CGI type: ~S" (%scgi:cur-cgi-type)))

(define (%scgi:cgi-context-only-error sym)
  (error sym "Only works in CGI context"))

(define (%scgi:cgi-request-context-only-error sym)
  (error sym "Only works in CGI request context"))

(define (%scgi:assert-cgi-request-context sym)
  (or (%scgi:cur-cgi-request-id)
      (%scgi:cgi-request-context-only-error sym)))

(define (%scgi:bytes->number x)
  ;; TODO: Make this more efficient.
  (string->number (bytes->string/latin-1 x)))

(define (%scgi:read-scgi-variables in)
       ;; TODO: Compare performance of "regexp-match" and "regexp-try-match".
       ;; Also compare performance to byte-by-byte I/O without regexps.
       ((regexp-match #rx"^[0-9]+" in)
        => (lambda (m)
             (or (eqv? 58 (read-byte in)) ; colon
                 (error '%scgi:read-scgi-variables
                        "Expected colon after ~S."
                        (car m)))
             ;; Note: We do the read non-tail-recursively, so that we don't
             ;; have to reverse the result.
             (let loop ((size-left (%scgi:bytes->number (car m))))
               (if (zero? size-left)
                    ;; TODO: Should we limit this regexp as well?  Or rewrite
                    ;; it to do char-by-char I/O and not use regexp?
                    ((regexp-match #rx"^([A-Za-z0-9_]+)\000([^\000]*)\000"
                     (lambda (m)
                       (cons (cons (list-ref m 1)
                                   (list-ref m 2))
                             (loop (- size-left
                                      (bytes-length (list-ref m 0)))))))
                      "Could not read SCGI header with ~S bytes remaining."
       (else (error '%scgi:read-scgi-variables
                    "Did not read size number of SCGI header.")))
    (or (eqv? 44 (read-byte in))
        (error '%scgi:read-scgi-variables
               "Could not read comma in SCGI header."))))

(define (%scgi:handle-scgi-accept in out proc request-id)
  (%scgi:debug "%scgi:handle-scgi-accept request-id=~S begin"
  (let ((request-exit-status 0))
    (let* ((variables (%scgi:read-scgi-variables in))
             (cond ((assoc #"CONTENT_LENGTH" variables) => cdr)
                   (error '%scgi:handle-scgi-accept
                          "~S missing CONTENT_LENGTH in ~S"
           (content-in (make-limited-input-port in content-length)))
      (%scgi:debug "%scgi:handle-scgi-accept request-id=~S variables=~S"
      (parameterize ((current-input-port       content-in)
                     (current-output-port      out)
                     (%scgi:cur-scgi-variables      variables)
                     (%scgi:cur-scgi-content-length content-length)
                     (%scgi:cur-cgi-request-id      request-id))
        (%scgi:debug "%scgi:handle-scgi-accept request-id=~S proc" request-id)
        (%scgi:debug "%scgi:handle-scgi-accept request-id=~S flushing" request-id)
        (flush-output out)
        (%scgi:debug "%scgi:handle-scgi-accept request-id=~S end" request-id)))))

(doc (section "Interface"))

(doc (defproc
          (#:startup             startup-proc        (-> any)          void)
          (#:request             request-proc        (-> any))
          (#:shutdown            shutdown-proc       (-> any)          void)
          (#:scgi-hostname       scgi-hostname       (or/c string? #f) "")
          (#:scgi-max-allow-wait scgi-max-allow-wait exact-nonnegative-integer? 4)
          (#:scgi-portnum        scgi-portnum        (and/c exact-nonnegative-integer?
                                                            (integer-in 0 65535)) 4000)
          (#:reuse-scgi-port?    reuse-scgi-port?    boolean?          #t))

       (para "Implement CGI.  Normal CGI is used if the "
             (code "REQUEST_URI")
             " environment variable is defined (which suggests that the code is
being called in a CGI context); otherwise, SCGI is used.")

       (para (racket startup-proc)
             " is a thunk that is evaluated once (before listener starts).  "
             (racket request-proc)
             " is evaluated once for each request (which, in normal CGI, is once). "
             (racket shutdown-proc)
             " is evaluated once, as processing of all CGI requests has finished.")

       (para "For evaluation of "
             (racket request-proc)
             ", the default input and output ports are as with normal CGI, regardless
of whether normal CGI or SCGI is in use.")

       (para "This procedure also accepts a few optional keyword arguments, all of
which apply to SCGI mode only.  "
             (racket scgi-hostname)
             " is the hostname or IP address (as a string) for the interface on which
to listen.  "
             (racket scgi-portnum)
             " is the TCP port number on that interface.  "
             (racket scgi-max-allow-wait)
             " is the maximum number of unaccepted connections to permit waiting.  "
             (racket reuse-scgi-port?)
             " is whether or not to reuse the TCP port number, such as if a previous
server exited and the port is in a "
             (racket TIME_WAIT)
             " state.")))
(provide cgi)
(define (cgi #:startup             (startup-proc        void)
             #:request              request-proc
             #:shutdown            (shutdown-proc       void)
             #:scgi-hostname       (scgi-hostname       "")
             #:scgi-max-allow-wait (scgi-max-allow-wait 4)
             #:scgi-portnum        (scgi-portnum        4000)
             #:reuse-scgi-port?    (reuse-scgi-port?    #t))
  (let ((type (if (getenv "REQUEST_URI")
    (parameterize ((%scgi:cur-cgi-type type))


        (lambda () #f)

        (lambda ()
          (let/ec stop-cgi-service-immediately-ec
            (parameterize ((%scgi:cur-stop-cgi-service-immediately
              (case type

                 (let ((listener-cust (make-custodian)))
                   (parameterize ((current-custodian listener-cust))
                     (let ((listener (tcp-listen scgi-portnum
                         (lambda () #f)
                         (lambda ()
                           (let loop ((request-id 1))
                             (let ((request-cust (make-custodian
                               (parameterize ((current-custodian request-cust))
                                 (let-values (((in out) (tcp-accept/enable-break
                                   (%scgi:debug "cgi accepted request-id=~S"
                                   (thread (lambda ()
                                               (lambda () #f)
                                               (lambda ()
                                               (lambda ()
                               (loop (+ 1 request-id)))))
                         (lambda ()
                           (%scgi:debug "scgi listener custodian shutdown")
                           (custodian-shutdown-all listener-cust)
                           ;; Note: We have verified under Racket 5.1.1 that
                           ;; the custodian already closes the listener, so
                           ;; this code is not necessary.
                           ;; (%scgi:debug "scgi listener tcp-close")
                           ;; (with-handlers
                           ;;     ((exn:fail?
                           ;;       (lambda (e)
                           ;;         (%scgi:debug
                           ;;          "scgi-listener tcp-close failed: ~S"
                           ;;          (exn-message e)))))
                           ;;   (tcp-close listener))
                           (%scgi:debug "scgi listener shutdown sleep")
                           (sleep 1)))))))

                 ;; TODO: Set up end-cgi-request?
                 (parameterize ((%scgi:cur-cgi-request-id 0))

                (else (%scgi:invalid-cgi-type-error 'cgi))))))

        (lambda ()
          (parameterize-break #t

(doc (defproc (cgi-content-length)
       (para "In a CGI request context, returns the CGI content length -- the
number of bytes that can be read from the default input port -- as integer.")))
(provide cgi-content-length)
(define (cgi-content-length)
  (case (or (%scgi:cur-cgi-type)
            ;; Note: Error message here complains about CGI request context,
            ;; even though the test is only for any CGI context.
            (%scgi:cgi-request-context-only-error 'cgi-content-length))


     (%scgi:assert-cgi-request-context 'cgi-content-length)
     (string->number (getenv "CONTENT_LENGTH")))

    (else (%scgi:invalid-cgi-type-error 'cgi-content-length))))

(doc (defproc (make-cgi-variable-proc
               (proc-name-sym symbol?)
               (name-bytes    bytes?))
         (-> string?)
       (para "Produces a procedure for getting a CGI environment variable value
as a string.  Works whether in normal CGI or SCGI.  This is useful for
accessing non-standard variables, such as might be provided by an unusual
Apache module.  Argument "
             (racket proc-name-sym)
             " is a symbol for the name of the procedure, which is used in
error reporting.  Argument "
             (racket name-bytes)
             " is the name of the environment variable, as a byte string.")
       (para "For example, were the "
             (racket cgi-remote-user)
             " procedure not already defined, it could be defined as:")
        (define cgi-remote-user
          (make-cgi-variable-proc 'cgi-remote-user
(provide make-cgi-variable-proc)
(define (make-cgi-variable-proc proc-name-sym name-bytes)
  (let ((name-string (bytes->string/latin-1 name-bytes)))
    (lambda ()
      (case (%scgi:cur-cgi-type)

         (cond ((assoc name-bytes (or (%scgi:cur-scgi-variables)
                                      (%scgi:cgi-request-context-only-error proc-name-sym)))
                => (lambda (pair)
                     (bytes->string/latin-1 (cdr pair))))
               (else #f)))

         (%scgi:assert-cgi-request-context proc-name-sym)
         (getenv name-string))

        (else (%scgi:invalid-cgi-type-error proc-name-sym))))))

(doc (defproc* (((cgi-content-type) string?)
                ((cgi-document-root) string?)
                ((cgi-http-cookie) string?)
                ((cgi-http-host) string?)
                ((cgi-http-referer) string?)
                ((cgi-http-user-agent) string?)
                ((cgi-https) string?)
                ((cgi-path-info) string?)
                ((cgi-path-translated) string?)
                ((cgi-query-string) string?)
                ((cgi-remote-addr) string?)
                ((cgi-remote-host) string?)
                ((cgi-remote-user) string?)
                ((cgi-request-method) string?)
                ((cgi-request-uri) string?)
                ((cgi-script-name) string?)
                ((cgi-server-name) string?)
                ((cgi-server-port) string?))
       (para "In a CGI request context, returns the corresponding CGI value as
a string.  Note that "
             (racket cgi-content-length)
             " is "
             (italic "not")
             " in this list, and it returns a number rather than a string.")))

(define-syntax %scgi:define-cgi-variable-proc
  (syntax-rules ()
    ((_ ID BYTES)
     (define ID (make-cgi-variable-proc (quote ID) BYTES)))))

(provide                        cgi-content-type)
(%scgi:define-cgi-variable-proc cgi-content-type    #"CONTENT_TYPE")

(provide                        cgi-document-root)
(%scgi:define-cgi-variable-proc cgi-document-root   #"DOCUMENT_ROOT")

(provide                        cgi-http-cookie)
(%scgi:define-cgi-variable-proc cgi-http-cookie     #"HTTP_COOKIE")

(provide                        cgi-http-host)
(%scgi:define-cgi-variable-proc cgi-http-host       #"HTTP_HOST")

(provide                        cgi-http-referer)
(%scgi:define-cgi-variable-proc cgi-http-referer    #"HTTP_REFERER")

(provide                        cgi-http-user-agent)
(%scgi:define-cgi-variable-proc cgi-http-user-agent #"HTTP_USER_AGENT")

(provide                        cgi-https)
(%scgi:define-cgi-variable-proc cgi-https           #"HTTPS")

(provide                        cgi-path-info)
(%scgi:define-cgi-variable-proc cgi-path-info       #"PATH_INFO")

(provide                        cgi-path-translated)
(%scgi:define-cgi-variable-proc cgi-path-translated #"PATH_TRANSLATED")

(provide                        cgi-query-string)
(%scgi:define-cgi-variable-proc cgi-query-string    #"QUERY_STRING")

(provide                        cgi-remote-addr)
(%scgi:define-cgi-variable-proc cgi-remote-addr     #"REMOTE_ADDR")

(provide                        cgi-remote-host)
(%scgi:define-cgi-variable-proc cgi-remote-host     #"REMOTE_HOST")

(provide                        cgi-remote-user)
(%scgi:define-cgi-variable-proc cgi-remote-user     #"REMOTE_USER")

(provide                        cgi-request-method)
(%scgi:define-cgi-variable-proc cgi-request-method  #"REQUEST_METHOD")

(provide                        cgi-request-uri)
(%scgi:define-cgi-variable-proc cgi-request-uri     #"REQUEST_URI")

(provide                        cgi-script-name)
(%scgi:define-cgi-variable-proc cgi-script-name     #"SCRIPT_NAME")

(provide                        cgi-server-name)
(%scgi:define-cgi-variable-proc cgi-server-name     #"SERVER_NAME")

(provide                        cgi-server-port)
(%scgi:define-cgi-variable-proc cgi-server-port     #"SERVER_PORT")

(doc (defproc (scgi-variables)
         (list/c (cons/c bytes? bytes?))
       (para "When called in SCGI mode, this procedure yields an alist of SCGI
variables with both the key and value of each pair being byte strings.  Calling
this procedure in normal CGI mode is an error.")
       (para "Note that normally you will not need to use this procedure, and will
instead use procedures like "
             (racket cgi-request-uri)
             ", which work in both SCGI and normal CGI modes.")))
(provide scgi-variables)
(define (scgi-variables)
  (or (%scgi:cur-scgi-variables)
      (if (eq? (%scgi:cur-cgi-type) 'scgi)
          (%scgi:cgi-request-context-only-error 'scgi-variables)
          (error 'scgi-variables
                 "Only works in SCGI type CGI request context. Type: ~S"

(doc (defproc (cgi-type)
         (or/c 'normal 'scgi)
       (para "Returns a symbol indicating the CGI type: "
             (racket 'normal)
             " or "
             (racket 'scgi)
             ". Behavior outside of the procedures of the "
             (racket cgi)
             " procedure is undefined.")))
(provide cgi-type)
(define (cgi-type)
  (or (%scgi:cur-cgi-type)
      (%scgi:cgi-context-only-error 'cgi-type)))

(doc (defproc (cgi-request-id)
       (para "In CGI request context, yields a printable identifying object for
the current request that is unique at least for the current requests being
handled.  This identifying object is intended to be used in debugging
(provide cgi-request-id)
(define (cgi-request-id)
  (or (%scgi:cur-cgi-request-id)
      (%scgi:cgi-request-context-only-error 'cgi-request-id)))

;; TODO: end-cgi-request and end-cgi-request/error are not yet fully
;; implemented and tested.  or perhaps we should leave that up to the
;; application programmer, for performance reasons in case they don't actually
;; need it.

(define (%scgi:end-cgi-request/sym/status sym status)
  ((or (%scgi:cur-end-cgi-request-ec)
       (%scgi:cgi-request-context-only-error sym))

;; (provide end-cgi-request)
(define (end-cgi-request)
  (%scgi:end-cgi-request/sym/status 'end-cgi-request 0))

;; (provide end-cgi-request/error)
(define (end-cgi-request/error)
  (%scgi:end-cgi-request/sym/status 'end-cgi-request/error 1))

(doc (defproc (stop-cgi-service-immediately)
       (para "Stops processing all CGI requests.  This works only within the "
             (racket #:request)
             " procedure of the "
             (racket cgi)
             " procedure.")))
(provide stop-cgi-service-immediately)
(define (stop-cgi-service-immediately)
  (void ((or (%scgi:cur-stop-cgi-service-immediately)
             (%scgi:cgi-context-only-error 'stop-cgi-service-immediately)))))

(doc (section "Apache mod_scgi"))

(doc (subsection "Apache mod_scgi Configuration")

     (para "Apache HTTP Server is one way to run SCGI, though not the only way.
Note that your Apache installation might not have the "
           (code "mod_scgi")
           " module installed or enabled by default.  If you happen to be
running Debian GNU/Linux, this module can be installed via the Debian package "
           (code "libapache2-mod-scgi")

     (para "Once you've installed "
           (code "mod_scgi")
           ", you need some standard SCGI directives to end up in your Apache
config files, whether you accomplish that by editing config files manually,
making symbolic links in a "
           (filepath "mods-enabled")
           " directory, or clicking in a GUI.")

     (para "For example, the following loads "
           (code "mod_scgi")
           ", maps URL paths under "
           (filepath "/mypath")
           " to the SCGI server on the local machine at the standard SCGI TCP
port, and sets a 60-second timeout for the SCGI server to respond to a request
before Apache drops the connection:")

      "LoadModule scgi_module /usr/lib/apache2/modules/\n"
      "SCGIMount /mypath\n"
      "SCGIServerTimeout 60\n")

     (para "There are additional "
           (code "mod_scgi")
           " Apache config directives, including "
           (code "SCGIHandler")
           " and "
           (code "SCGIServer")

(doc (subsection "Apache mod_scgi Troubleshooting")

     (para "This section has some troubleshooting tips.  Currently, these come from
use with "
           (code "mod_scgi")
           " atop Apache 2.2.9 atop Debian GNU/Linux.")


      (item "Racket error ``tcp-write: error writing (Broken pipe; errno=32)''
or ``tcp-write: error writing (Connection reset by peer; errno=104)'' is likely
due to the HTTP request having been dropped by the HTTP client (e.g., user
stops a page load in their browser before page finishes loading) or by Apache
hitting "
            (code "SCGIServerTimeout")
            " for the request.  Note that buffered I/O means that you won't
necessarily get this error even if the request is aborted this way.")

      (item "Apache error log entry ``Premature end of script headers: "
            (italic "PATH")
            "'' followed by ``(500)Unknown error 500: scgi: Unknown error 500:
error reading response headers'' can mean that "
            (code "SCGIServerTimeout")
            " was hit before any HTTP headers from the SCGI request handler
were started or completed.  Note that buffered I/O can mean that some of the
Racket code of the handler wrote some text, but it was not yet flushed to the
SCGI client.")

      (item "Apache error log entry ``(70007)The timeout specified has expired:
ap_content_length_filter: apr_bucket_read() failed'' followed by ``(70007)The
timeout specified has expired: scgi: The timeout specified has expired:
ap_pass_brigade()'' can mean that "
            (code "SCGIServerTimeout")
            " was hit after HTTP headers from the request handler had been
received by the SCGI client.")

      (item "Apache error log entry ``(32)Broken pipe: scgi: Broken pipe: error
sending request body'' might be due to a request handler finished without
consuming all of the HTTP "
            (code "POST")
            " data.")

      (item "Apache error log entry ``(103)Software caused connection abort:
scgi: Software caused connection abort: ap_pass_brigade()'' is another one that
can be caused by the HTTP client dropping the connection before the handler

(doc history

     (#:planet 2:0 #:date "2012-06-12"

                (item "The three main procedures arguments to the "
                      (racket cgi)
                      " procedure now have keywords.  What are now the "
                      (racket #:startup)
                      " and "
                      (racket #:shutdown)
                      " arguments are now also optional.")

                (item "Converted to "
                      (hyperlink "" "McFly")

                (item "Changed internal-only identifiers from having a "
                      (racketfont "%")
                      " prefix to a "
                      (racketfont "%scgi:")
                      " prefix, to help disambiguate when identifiers appear in
error messages in large systems.")

                (item "Documentation changes.")

                (item "Improvement to the internal debugging macro.")))

     (#:version "0.6" #:planet 1:5 #:date "2011-08-22"

                 (item "Small documentation fixes.")))

     (#:version "0.5" #:planet 1:4 #:date "2011-05-16"

                 (item "For closer to backward compatibility with PLT Scheme
4.x, changed references to "
                       (racket racket)
                       " modules to "
                       (racket scheme)

                 (item "More documentation.")))

     (#:version "0.4" #:planet 1:3 #:date "2011-05-16"

                 (item "Added "
                       (racket #:reuse-scgi-port?)
                       " argument to procedure "
                       (racket cgi)
                       ". Added several new CGI environment variable procedures.")

                 (item "Added "
                       (racket make-cgi-variable-proc)

                 (item "The "
                       (racket cgi)
                       " procedure now uses "
                       (racket tcp-accept/enable-break)
                       " rather than "
                       (racket tcp-accept)

                 (item "Added more "
                       (racket log-debug)

                 (item "Added documentation about "
                       (code "mod_scgi")
                       " configuration, troubleshooting,and the keyword
arguments to the "
                       (racket cgi)
                       " procedure.")

                 (item "Removed documentation placeholders for "
                       (racket end-cgi-request)
                       " for now.")

                 (item "Various additional quality assurance testing has been
done, and more is the works.")))

     (#:version "0.3" #:planet 1:2 #:date "2010-11-14"

                 (item "Added "
                       (racket cgi-http-user-agent)

     (#:version "0.2" #:planet 1:1 #:date "2010-10-11"

                 (item "Documentation changes to reflect that it is successfully in use in a real
system, and some work remains.")))

     (#:version "0.1" #:planet 1:0 #:date "2010-05-25"

                 (item "Initial release.  Preliminary."))))