gdbdump.rkt
#lang racket/base
;; For legal info, see file "info.rkt".

(require mzlib/os
         racket/port
         (planet neil/mcfly))

(doc (section "Introduction")

     (para "The "
           (code "gdbdump")
           " package is a possible aid in debugging potential system problems
in a Racket-based production application.  It is designed to be used by the
application program itself, perhaps when an error condition is detected, or
when invoked by a system administrator from a management console user
interface.")

     (para "Specifically, invoking the "
           (racket gdbdump)
           " procedure attempts to use the "
           (hyperlink "http://en.wikipedia.org/wiki/GNU_Debugger"
                      "GNU Debugger")
           " (GDB) to attach temporarily to the process, and dump full native
code backtraces of all native threads to a file.  This file can then be given
to programmers for debugging.  This might be helpful, for example, if problems
with low-level system calls or native code libraries invoked via the FFI.")

     (para "The "
           (code "gdbdump")
           " package is currently only for GNU/Linux and similar Unix-like
systems, with GDB installed."))

(doc (section "Interface")

     (para "The main interface is the "
           (racket gdbdump)
           " procedure.  Options are set via parameters, rather than as
arguments to "
           (racket gdbdump)
           ", so that they can be type-checked during normal operation of the
program, since "
           (racket gdbdump)
           " might be applied only rarely and in already exceptional
circumstances."))

(doc (defparam current-gdbdump-gdb-program filename string?
       (para "String for the complete path to the GDB executable.  The default is to try "
             (racket (find-executable-path "gdb"))
             ", as evaluated at time the "
             (code "gdbdump")
             " package is loaded, and to falls back to "
             (racket "/usr/bin/gdb")
             ".  So, if installing GDB for use by "
             (racket gdbdump)
             " in a process that is already running, you will want to have it
accessible as "
             (filepath "/usr/bin/gdb")
             ", via symbolic link if not by file.")))
(provide current-gdbdump-gdb-program)
(define  current-gdbdump-gdb-program
  (make-parameter (cond ((find-executable-path "gdb") => path->string)
                        (else "/usr/bin/gdb"))
                  (lambda (v)
                    (cond ((string? v) v)
                          (else (raise-type-error 'current-gdbdump-gdb-program
                                                  "string?"
                                                  v))))))

(doc (defparam current-gdbdump-file path complete-path?
       (para "Complete path to the file to which the dump should be written.  By default, it is "
             (racket (build-path (find-system-path 'temp-dir) "gdbdump"))
             ", for example, "
             (racket "/var/tmp/gdbdump")
             ".")))
(provide current-gdbdump-file)
(define  current-gdbdump-file
  (make-parameter (build-path (find-system-path 'temp-dir) "gdbdump")
                  (lambda (v)
                    (let ((path (cleanse-path v)))
                      (if (complete-path? path)
                          (raise-type-error 'current-gdbdump-file
                                            "complete-path?"
                                            v)
                          path)))))

(doc (defparam current-gdbdump-local-sleep seconds (>=/c 0)
       (para "Number of seconds to "
             (racket sleep)
             " between spawning the subprocess that will invoke GDB, and
checking the results or killing the process.  The default is "
             (racket 10)
             ".")))
(provide current-gdbdump-local-sleep)
(define current-gdbdump-local-sleep
  (make-parameter 10
                  (lambda (v)
                    (if (and (number? v)
                             (<= 0 v))
                        v
                        (raise-type-error 'current-gdbdump-local-sleep
                                          "number? >= 0"
                                          v)))))

(define %gdbdump:dev-null-path (string->path "/dev/null"))

(define (%gdbdump:write-command-line-arg str out)
  (if (regexp-match? #rx"[^-_.:/a-z0-9]" str)
      (begin (write-char #\" out)
             (for ((c (in-string str)))
               (let ((n (char->integer c)))
                 (if (<= 32 n 126)
                     (if (memv c '(#\" #\$ #\\))
                         (begin (write-char #\\ out)
                                (write-char c   out))
                         (write-char c out))
                     (error '%gdbdump:write-command-line-arg
                            "non-printable-ASCII char ~S in ~S"
                            c
                            str))))
             (write-char #\" out))
      (write-string str out)))

(define (%gdbdump:make-a-big-argument gdb-program this-program-or-false pid path)
  (call-with-output-string
   (lambda (out)
     (write-string "sleep 1 ; " out)
     (%gdbdump:write-command-line-arg gdb-program out)
     (write-string " -n -silent -batch -nw -e /dev/null " out)
     (and this-program-or-false
          (%gdbdump:write-command-line-arg this-program-or-false out))
     (write-string " -p " out)
     (%gdbdump:write-command-line-arg (number->string pid) out)
     (write-string " -ex \"set history save off\" -ex \"set pagination off\" -ex \"set editing off\" -ex \"info inferiors\" -ex \"info threads\" -ex \"thread apply all backtrace full\" -ex detach -ex quit < /dev/null >> " out)
     (%gdbdump:write-command-line-arg (path->string path) out)
     (write-string " 2>&1" out))))

(doc (defproc (gdbdump)
         void?
       (para "Attempts to use GDB to get native backtraces of all native threads of the native process hosting the current Racket evaluation, writing them to the file specified by "
             (racket current-gdbdump-file)
             ".")
       (para "The GDB executable from "
             (racket current-gdbdump-gdb-program)
             " is used.  GDB is invoked under a "
             (filepath "/bin/sh")
             " process, after a brief sleep in the "
             (filepath "sh")
             " process, to possibly give the Racket thread applying "
             (racket gdbdump)
             " a chance to "
             (racket sleep)
             ".  All three basic "
             (code "stdio")
             " ports of the "
             (filepath "sh")
             " and GDB processes should be files, and not pass through the
Racket process.  (Note: It is conceivable that GDB will run before the Racket
thread sleeps, such as if a long GC cycle is triggered immediately after
creating the subprocess.)")
       (para "If the GDB process is still running after "
             (racket current-gdbdump-local-sleep)
             " seconds have elapsed, it is killed.")
       (para "After attempting to use GDB, an event is logged to the current logger via "
             (racket log-info)
             " or "
             (racket log-error)
             ".")))
(provide gdbdump)
(define (gdbdump)
  (let ((path                  (current-gdbdump-file))
        (pid                   (or (getpid)
                                   (error 'gdbdump
                                          "could not get PID")))
        (this-program-or-false (let* ((exec-path (find-system-path 'exec-file))
                                      (exec-path (if (complete-path? exec-path)
                                                    exec-path
                                                    (find-executable-path exec-path))))
                                 (and exec-path
                                      (path->string exec-path)))))
    (call-with-output-file* %gdbdump:dev-null-path
      (lambda (null-out)
        (call-with-input-file* %gdbdump:dev-null-path
          (lambda (null-in)
            (call-with-output-file* path
              (lambda (out)
                ;; TODO: Maybe we shouldn't use "write", in case the writing
                ;; options are borked.
                (write-string "PID: " out)
                (write pid out)
                (newline out)
                (write-string "Command-Line: " out)
                (write (current-command-line-arguments) out)
                (newline out)
                (write-string "Executable: " out)
                (write this-program-or-false out)
                (newline out)
                (newline out))
              #:exists 'truncate/replace)
            (let-values (((sub stdout stdin stderr)
                          (parameterize ((subprocess-group-enabled #t))
                            (subprocess null-out  ; stdout
                                        null-in   ; stdin
                                        null-out  ; stderr
                                        "/bin/sh" ; command
                                        ; arg ...
                                        "-c"
                                        (%gdbdump:make-a-big-argument
                                         (current-gdbdump-gdb-program)
                                         this-program-or-false
                                         pid
                                         path)))))
              (sleep (current-gdbdump-local-sleep))
              ;; TODO: Instead of this fixed sleep, we might instead sleep
              ;; until subprocess exits.
              (let ((status (subprocess-status sub)))
                (cond ((eq? 'running status)
                       (log-error "gdbdump: gdb process still running")
                       ;; TODO: Is this the right argument for "force?"?
                       (with-handlers ((exn:fail?
                                        (lambda (exn)
                                          (log-error (string-append
                                                      "gdbdump: could not kill gdb: "
                                                      (exn-message exn))))))
                         (subprocess-kill sub #t)))
                      ((equal? 0 status)
                       (log-info (format "gdbdump: wrote to ~S"
                                         (path->string path))))
                      (else
                       (log-error (format "gdbdump: gdb exit status ~S"
                                          status)))))))))
      #:exists 'append)))

(doc history

     (#:planet 1:0 #:date "2012-06-25"
               (itemlist
                (item "Initial release.  Tested lightly."))))