progedit.rkt
#lang racket/base
;; For legal info, see file "info.rkt".

(require racket/contract/base
         racket/file
         racket/port
         syntax/srcloc
         (planet neil/mcfly))

(doc (section "Introduction")

     (para "The "
           (bold "progedit")
           " package is for programmatic editing of files via Racket programs.
For an example of programmatic editing, "
           (bold "progedit")
           " was originally written so that the "
           (hyperlink "http://www.neilvandyke.org/mcfly-tools/" "McFly Tools")
           " package could modify a user's "
           (filepath "info.rkt")
           ", such as by adding "
           (racket define)
           " forms that were missing, without modifying anything else in the file.")

     (para "We expect that "
           (bold "progedit")
           " will usually be used mostly with syntax objects, in this pattern:")

     (itemlist #:style 'ordered

               (item "Parse file into syntax objects.")
               (item "Identify desired changes to the file, in terms of "
                     (italic "deletes")
                     ", "
                     (italic "inserts")
                     ", and "
                     (italic "replaces")
                     ".")
               (item "Apply all the desired changes to the file in a single read-write pass."))

     (para "The "
           (racket progedit-file)
           " procedure provides a framework for the reading and writing
 (retaining or restoring the original file contents on an error, and leaving a
backup file on success).  The "
           (racket progedit)
           " procedure accepts a language for the changes, encoded as
dynamically generated lists, and applies the changes to the file.")

     (para "For example, let's say we have a file in a language like in file "
           (filepath "myfile")
           " below.")

     (filebox "myfile"
              (racketblock
               (assign honorific "Dr.")
               (assign name "John")
               (code:comment "(perpetually)")
               (assign age 29)))

     (para "And let's say we want to write a program that accepts a file in this language, and makes sure that any "
           (racket name)
           " variable in the file is set to "
           (racket "Jane")
           ".  Specifically, if it finds a "
           (racket (assign name #,(italic "value")))
           " form in the file, it replaces "
           (italic "value")
           " with "
           (racket "Jane")
           "; and if it doesn't find that form in the file, it adds a new "
           (racket (assign name "Jane"))
           " form to the file.  Here's such a program, using "
           (racket progedit-file)
           " and "
           (racket progedit)
           ":")

     (RACKETBLOCK
      (UNSYNTAX (code "#lang racket"))
      (require (planet neil/progedit:1)
               syntax/parse)
      
      (progedit-file
       "myfile"
       #:read
       (lambda (in)
         (let loop ((name-stx #f))
           (let ((stx (read-syntax #f in)))
             (if (eof-object? stx)
                 (if name-stx
                     (values '()
                             `((,name-stx ,#'"Jane")))
                     (values `((#f #\newline
                                   ,#'(assign name "Jane")
                                   #\newline))
                             '()))
                 (syntax-parse stx
                   (((~datum assign) (~datum name) VAL)
                    (if name-stx
                        (raise-syntax-error
                         'foo
                         "name assigned multiple times"
                         stx
                         #f
                         (list name-stx))
                        (loop #'VAL)))
                   (_ (loop name-stx)))))))
       #:write
       (lambda (in out inserts replaces)
         (progedit in
                   out
                   #:inserts  inserts
                   #:replaces replaces))))

     (para "This program will edit the above "
           (filepath "myfile")
           " to change "
           (racket "John")
           " to "
           (racket "Jane")
           ", so the file becomes:")

     (filebox "myfile"
              (racketblock
               (assign honorific "Dr.")
               (assign name (code:hilite "Jane"))
               (code:comment "(perpetually)")
               (assign age 29)))

     (para "Now, if we manually edit "
           (filepath "myfile")
           " to remove the "
           (racket (assign name #,(italic "value")))
           " form altogether, it looks like:")

     (filebox "myfile"
              (racketblock
               (assign honorific "Dr.")
               (code:comment "(perpetually)")
               (assign age 29)))

     (para "If we run our program again, it adds a new form to the end:")

     (filebox "myfile"
              (racketblock
               (assign honorific "Dr.")
               (code:comment "(perpetually)")
               (assign age 29)

               (code:hilite (assign name "Jane"))))

     (para "Notice that, although this particular program parses the file using "
           (racket read-syntax)
           ", which doesn't even see the comment in the file, the comment remains intact. "
           (racket progedit)
           " changes only the parts of the file it's told to, and leaves
every other character in the file intact."))

(doc (section "Interface")

     (para "The main engine for "
           (bold "progedit")
           " is the "
           (racket progedit)
           " procedure.  "
           (racket progedit)
           " will often be used in conjunction with the "
           (racket progedit-file)
           " procedure."))

(doc procedure default-progedit-write-stx
     "Used as the default for the optional "
     (racket #:write-stx)
     " argument of "
     (racket progedit)
     ", this procedure writes "
     (racket stx)
     " as if it were in the Racket programming language.")
(provide/contract (default-progedit-write-stx (-> syntax? output-port? void)))
(define (default-progedit-write-stx stx out)
  (parameterize ((print-box                  #t)
                 (print-boolean-long-form    #f)
                 (print-graph                #f)
                 (print-hash-table           #t)
                 (print-mpair-curly-braces   #t)
                 (print-pair-curly-braces    #f)
                 (print-reader-abbreviations #t)
                 (print-struct               #t)
                 ;; Note: "print-syntax-width" cannot be +inf.0 due to bug in
                 ;; latest Racket release version at time of this writing,
                 ;; 5.2.1-patch1.  A fix should be in 5.4.  So... Old MacDonald
                 ;; had a farm...
                 (print-syntax-width         #e1e10)
                 (print-unreadable           #f)
                 (print-vector-length        #f)
                 (read-accept-bar-quote      #t)
                 (read-case-sensitive        #t))
    (write (syntax->datum stx) out)))

(define (%progedit:resolve-position position)
  (log-debug (format "~S" `(%progedit:resolve-position, position)))
  (let-syntax ((error-invalid-position
                (syntax-rules ()
                  ((_) (error '%progedit:resolve-position
                              "invalid position ~S"
                              position)))))
    (cond ((exact-positive-integer? position) position)
          ((not position) position)
          ((pair? position)
           (let ((doit (lambda (position proc)
                         (let* ((stx (cdr position))
                                (stx (if (syntax? stx)
                                         stx
                                         (if (syntax? (car stx))
                                             (if (null? (cdr stx))
                                                 (car stx)
                                                 (error '%progedit:resolve-position
                                                        "position ~S invalid (junk after syntax object)"
                                                        position
                                                        (car stx)))
                                             (error '%progedit:resolve-position
                                                    "position ~S invalid (~S is not a syntax object)"
                                                    position
                                                    (car stx))))))
                           (or (with-handlers ((exn:fail?
                                                (lambda (e)
                                                  (error '%progedit:resolve-position
                                                         "could not get location from position ~S (~A)"
                                                         position
                                                         (exn-message e)))))
                                 (proc stx))
                               (error '%progedit:resolve-position
                                      "got #f location from position ~S"
                                      position))))))
             (case (car position)
               ((before) (doit position source-location-position))
               ((after)  (doit position source-location-end))
               (else     (error-invalid-position)))))
          (else (error-invalid-position)))))

(define (%progedit:resolve-delete delete)
  ;; TODO: Error-check that start <= end.
  ;;
  ;; TODO: Error-check that start isn't the "end" symbol.
  ;;
  ;; TODO: !!! Do we handle the "end" symbol here?  Should it be #f anyway?
  (if (pair? delete)
      (cons (%progedit:resolve-position (car delete))
            (%progedit:resolve-position (cdr delete)))
      (with-handlers ((exn:fail? (lambda (e)
                                   (error '%progedit:resolve-delete
                                          "could not get non-#f location from delete ~S"
                                          delete))))
        (let ((start (or (source-location-position delete) (error "false")))
              (span  (or (source-location-span     delete) (error "false"))))
          (cons start (+ start span))))))

(define (%progedit:resolve-insert insert)
  (if (pair? insert)
      ;; Note: We don't let the CDR be a number or #f, so that we can use the
      ;; resolve-delete (NUMBER . NUMBER-OR-FALSE) pair to represent delete and then
      ;; distinguish inserts by not being that format.
      (let ((content (cdr insert)))
        (if (or (not content) (number? content))
            (error '%progedit:resolve-insert
                   "invalid insert ~S (content cannot be #f or a number)"
                   insert)
            (cons (%progedit:resolve-position (car insert))
                  content)))
      (error '%progedit:resolve-insert
             "invalid insert ~S (must be a pair)"
             insert)))

(define (%progedit:resolve-orders #:deletes  deletes
                                  #:inserts  inserts
                                  #:replaces replaces)
  (let ((resolved-replace-deletes (map (lambda (replace)
                                         (%progedit:resolve-delete (car replace)))
                                       replaces)))
    (values (append (map %progedit:resolve-delete deletes)
                    resolved-replace-deletes)
            (append (map %progedit:resolve-insert inserts)
                    (map (lambda (replace-delete replace)
                           (cons (car replace-delete)
                                 (cdr replace)))
                         resolved-replace-deletes
                         replaces)))))

(define (%progedit:sorted-orders->instructions-debug-dump
         #:sorted-orders sorted-orders)
  (reverse
   (%progedit:fold-instructions/sorted-orders
    #:sorted-orders sorted-orders
    #:seed          '()
    #:copy-proc     (lambda (seed arg) `((copy  ,arg) ,@seed))
    #:skip-proc     (lambda (seed arg) `((skip  ,arg) ,@seed))
    #:write-proc    (lambda (seed arg) `((write ,arg) ,@seed)))))

(define (%progedit:sort-resolved-orders #:resolved-deletes resolved-deletes
                                        #:resolved-inserts resolved-inserts)
  ;; TODO: !!! Sort "after" and "before" differently, as noted elsewhere in this code?
  (let ((resolved-deletes
         (sort resolved-deletes
               (lambda (a b)
                 ;; Sort so that deletes that have the same start are sorted so
                 ;; that the larger extent comes first, to reduce skip
                 ;; instructions.
                 (let ((a-start (car a))
                       (b-start (car b)))
                   (cond ((< a-start b-start) #t)
                         ((> a-start b-start) #f)
                         (else (let ((a-arg (cdr a))
                                     (b-arg (cdr b)))
                                 (if a-arg
                                     ;; A is *not* a delete to end of file.
                                     (if b-arg
                                         ;; Neither A nor B is a delete to end of file.
                                         (if (> a-arg b-arg)
                                             ;; A's extent is larger, so we say A precedes.
                                             #t
                                             ;; A's extent is not larger, so we don't say A precedes.
                                             #f)
                                         ;; A is not a delete to end of file, but B is, so we don't say A precedes.
                                         #f)
                                     ;; A *is* a delete to end of file.
                                     (if b-arg
                                         ;; A is a delete to end of file, but B isn't, so we say A precedes.
                                         #t
                                         ;; Both A and B are deletes to end of file, so we don't say A precedes.
                                         #f)))))))))
        (resolved-inserts (sort resolved-inserts
                                (lambda (a b)
                                  (let ((a-start (car a))
                                        (b-start (car b)))
                                    (if a-start
                                        ;; A is not end-of-file.
                                        (if b-start
                                            ;; Neither A nor B is end-of-file.
                                            (< (car a) (car b))
                                            ;; B is end-of-file and A isn't, so say A precedes.
                                            #t)
                                        ;; A is end-of-file, so regardless of B, don't say A precedes.
                                        #f))))))
    (let loop ((resolved-deletes resolved-deletes)
               (resolved-inserts resolved-inserts))
      (cond ((null? resolved-deletes) resolved-inserts)
            ((null? resolved-inserts) resolved-deletes)
            (else (let ((delete-start (caar resolved-deletes))
                        (insert-start (caar resolved-inserts)))
                    (if (< delete-start insert-start)
                        (cons (car resolved-deletes)
                              (loop (cdr resolved-deletes)
                                    resolved-inserts))
                        (cons (car resolved-inserts)
                              (loop resolved-deletes
                                    (cdr resolved-inserts))))))))))

(define (%progedit:fold-instructions/sorted-orders
         #:sorted-orders sorted-orders
         #:seed          seed
         #:copy-proc     copy-proc
         #:skip-proc     skip-proc
         #:write-proc    write-proc)
  (let loop ((seed     seed)
             (orders   sorted-orders)
             (position 1))
    (if (null? orders)
        (if position
            (copy-proc seed #f)
            seed)
        (let*-values (((order)         (car orders))
                      ((order-start)   (car order))
                      ((order-arg)     (cdr order))
                      ((seed position) (if position
                                           (if order-start
                                               (let ((to-copy (- order-start position)))
                                                 (if (> to-copy 0)
                                                     (values (copy-proc seed (- order-start position))
                                                             order-start)
                                                     (values seed
                                                             position)))
                                               (values (copy-proc seed #f)
                                                       #f))
                                           (values seed
                                                   position))))
          (cond ((exact-positive-integer? order-arg)
                 ;; Order is a delete to a position (not to end of file).
                 (if (and position (> order-arg position))
                     (loop (skip-proc seed (- order-arg position))
                           (cdr orders)
                           order-arg)
                     (loop seed
                           (cdr orders)
                           position)))
                ((not order-arg)
                 ;; Order is a delete to end of file.
                 (loop seed
                       (cdr orders)
                       #f))
                (else
                 ;; Order is an insert.
                 (loop (write-proc seed order-arg)
                       (cdr orders)
                       position)))))))

(define (%progedit:fold-instructions #:deletes    deletes
                                     #:inserts    inserts
                                     #:replaces   replaces
                                     #:seed       seed
                                     #:copy-proc  copy-proc
                                     #:skip-proc  skip-proc
                                     #:write-proc write-proc)
  (let*-values (((resolved-deletes resolved-inserts)
                 (%progedit:resolve-orders #:deletes  deletes
                                           #:inserts  inserts
                                           #:replaces replaces))
                ((sorted-orders)
                 (%progedit:sort-resolved-orders #:resolved-deletes resolved-deletes
                                                 #:resolved-inserts resolved-inserts)))
    (%progedit:fold-instructions/sorted-orders #:sorted-orders sorted-orders
                                               #:seed             seed
                                               #:copy-proc        copy-proc
                                               #:skip-proc        skip-proc
                                               #:write-proc       write-proc)))

(define (%progedit:copy-port-characters in-port
                                        out-port
                                        #:limit orig-limit
                                        #:must-fill-limit? (must-fill-limit? #t))
  (let* ((buffer-size 4096)
         (buffer      (make-string buffer-size)))
    (let loop ((limit  orig-limit)
               (copied 0))
      (if (zero? limit)
          copied
          (let* ((request-size (min buffer-size limit))
                 (read-count   (read-string! buffer in-port 0 request-size)))
            (if (eof-object? read-count)
                (if must-fill-limit?
                    (error '%progedit:copy-port/limit
                           "expected to copy ~S characters from ~S to ~S, but only read ~S before eof"
                           orig-limit
                           in-port
                           out-port
                           copied)
                    copied)
                (begin (write-string buffer out-port 0 read-count)
                       (loop (- limit read-count)
                             (+ copied read-count)))))))))

(define (%progedit:skip-port-characters in-port
                                        orig-count
                                        #:must? (must? #t))
  (let* ((buffer-size 4096)
         (buffer      (make-string buffer-size)))
    (let loop ((count  orig-count)
               (copied 0))
      (if (zero? count)
          copied
          (let* ((request-size (min buffer-size count))
                 (read-count   (read-string! buffer in-port 0 request-size)))
            (if (eof-object? read-count)
                (if must?
                    (error '%progedit:copy-port/count
                           "expected to skip ~S characters from ~S, but only read ~S before eof"
                           orig-count
                           in-port
                           copied)
                    copied)
                (loop (- count read-count)
                      (+ copied read-count))))))))

(define (%progedit:write-content content out write-stx)
  ;; TODO: Set write parameters for writing syntax.
  (void
   (let loop ((thing content))
     (cond ((string? thing)     (write-string thing out))
           ((bytes?  thing)     (write-bytes  thing out))
           ((char?   thing)     (write-char   thing out))
           ((syntax? thing)     (default-progedit-write-stx thing out))
           ((pair?   thing)     (loop (car thing)) (loop (cdr thing)))
           ((null?   thing)     #f)
           ((procedure? thing)  (thing out))
           ((input-port? thing) (copy-port thing out))
           (else (error '%progedit:write-thing
                        "do not know how to write ~S in content ~S to port ~S"
                        thing
                        content
                        out))))))

(define (%progedit:debug-dump-from-orders #:deletes  deletes
                                          #:inserts  inserts
                                          #:replaces replaces)

  (let*-values (((resolved-deletes resolved-inserts)
                 (%progedit:resolve-orders #:deletes  deletes
                                           #:inserts  inserts
                                           #:replaces replaces))
                ((sorted-orders)
                 (%progedit:sort-resolved-orders
                  #:resolved-deletes resolved-deletes
                  #:resolved-inserts resolved-inserts))
                ((instructions)
                 (%progedit:sorted-orders->instructions-debug-dump
                  #:sorted-orders sorted-orders)))
    `((*debug-dump-from-orders*
       :resolved-deletes ,resolved-deletes
       :resolved-inserts ,resolved-inserts
       :sorted-orders    ,sorted-orders
       :instructions     ,instructions))))

(doc (defproc (progedit
               (in                  input-port?)
               (out                 output-port?)
               (#:deletes  deletes  list?        '())
               (#:inserts  inserts  list?        '())
               (#:replaces replaces list?        '())
               (#:write-stx write-stx (-> syntax? output-port? any) default-progedit-write-stx))
       void?

       (para "Performs a programmatic editing of the input read from "
             (racket in)
             ", writing the edited result to "
             (racket out)
             ".  This is usually used in conjunction with "
             (racket progedit-file)
             ", which supplies the input and output ports.")

       (para "The edits are specified by the language of the "
             (racket deletes)
             ", "
             (racket inserts)
             ", and "
             (racket replacements)
             " arguments.  A BNF-like grammar for this language is:")

       (racketgrammar* #:literals (after before)

                       (deletes (delete ...))

                       (inserts (insert ...))

                       (replaces (replace ...))

                       (delete syntax
                               (position . position))

                       (insert (position . content))

                       (replace (delete . content))

                       (position exact-positive-integer
                                 #f
                                 (before . syntax)
                                 (after  . syntax)
                                 (before syntax)
                                 (after  syntax))

                       (content syntax
                                string
                                byte-string
                                character
                                input-port
                                procedure
                                (content . content)
                                ()))

       (para "In general, you usually want to specify "
             (italic "position")
             " either by a syntax object taken from a parse of the input, or by "
             (racket #f)
             ", meaning the end of the input.  You can also use a number for
the character position, with the characters being numbered starting with 1.")

       (para "For "
             (italic "content")
             ", syntax objects are written as Racket code.  Strings, byte strings, and characters are written verbatim.  Input ports are written by copying their content to the output.  Procedures are written by applying the procedure with "
             (racket out)
             " as an argument.  Pairs are written by recursively writing their
CAR and CDR.")))
;; TODO: Get rid of "defproc" once McFly handles "->*" contract combinators.
(provide progedit)
(define (progedit in
                  out
                  #:deletes   (deletes  '())
                  #:inserts   (inserts  '())
                  #:replaces  (replaces '())
                  #:write-stx (write-stx default-progedit-write-stx))
  (void
   (%progedit:fold-instructions
    #:deletes    deletes
    #:inserts    inserts
    #:replaces   replaces
    #:seed       #f
    #:copy-proc  (lambda (seed size)
                   (if size
                       (begin0 #f
                         (%progedit:copy-port-characters in out #:limit size))
                       (begin0 #f
                         (copy-port in out))))
    #:skip-proc  (lambda (seed size)
                   (if size
                       (begin0 #f
                         (%progedit:skip-port-characters in size))
                       ;; TODO: Seek to end of input, in case they try to use
                       ;; this port afterwards?
                       #f))
    #:write-proc (lambda (seed content)
                   (begin0 #f
                     (%progedit:write-content content out write-stx))))))

(doc procedure default-progedit-file-backup
     "This procedure is used as the default for the optional "
     (racket #:backup)
     " argument of "
     (racket progedit-file)
     ".  It returns "
     (racket (path-add-suffix path ".bak"))
     " after deleting any such existing file.")
(provide/contract (default-progedit-file-backup
                    (-> path-string? path?)))
(define (default-progedit-file-backup path)
  (let ((backup-path (path-add-suffix path ".bak")))
    (and (file-exists? backup-path)
         (delete-file backup-path))
    backup-path))

(doc (defproc (progedit-file
               (         filename    path-string?)
               (#:read   read-proc   (-> input-port? any))
               (#:write  write-proc  procedure?)
               (#:backup backup-proc (-> path-string? path-string?)
                         default-progedit-file-backup))
       any

       (para "Applies "
             (racket read-proc)
             " and "
             (racket write-proc)
             " to read and then write file "
             (racket filename)
             ", creating a backup file named by calling the "
             (racket backup)
             " procedure argument with an argument of "
             (racket filename)
             ".  Any error results in the file's contents either being left
unchanged or being restored.  Changes to the file during "
             (racket write-proc)
             ", such as another program modifying the file, results in an
error.")

       (para "Symbolic links are followed, so the actual file is edited, and
any symbolic link remains unmodified.")

       (para (racket read-proc)
             " is called with an argument of an input port on the contents of
the file.  This is not necessarily on the file itself; it might be on a
copy of the file.  The value or values returned by "
             (racket read-proc)
             " are appended to the arguments when "
             (racket write-proc)
             " is called.")

       (para (racket write-proc)
             " is called with two arguments --- an input port and an output-port -- with an additional argument for each value returned by "
             (racket read-proc)
             ".  Normally this will be one additional argument, unless a multiple-value return is used by "
             (racket write-proc)
             ".  This provides a functional way to communicate information from "
             (racket read-proc)
             " to "
             (racket write-proc)
             ".  The input port will be on the contents of the file before editing, and the write port will be for the contents of the file after editing.  Normally, "
             (racket write-proc)
             " will use procedure "
             (racket progedit)
             " to handle these two ports.")))
;; TODO: Get rid of "defproc" once McFly handles "->*" contract combinators.
(provide/contract (progedit-file
                   (->* (path-string?
                         #:read  (-> input-port? any)
                         #:write procedure?)
                        (#:backup (-> path-string? path-string?))
                        any)))
(define (progedit-file filename
                       #:read   read-proc
                       #:write  write-proc
                       #:backup (backup-proc
                                 default-progedit-file-backup))
  (let* ((actual-path     (resolve-path (cleanse-path filename)))
         (orig-file-size  (file-size actual-path))
         (orig-file-mtime (file-or-directory-modify-seconds actual-path))
         (backup-path     (backup-proc filename)))
    ;; Note: We use "copy-file" instead of "rename-file-or-directory"
    ;; because "actual-path" and "backup-path" might be on
    ;; different filesystems.
    (copy-file actual-path backup-path #f)
    (with-handlers ((exn:fail? (lambda (e)
                                 (copy-file backup-path actual-path #t)
                                 (raise e #t))))
      (let ((read-vals
             (call-with-input-file*
              backup-path
              (lambda (in)
                (begin0 (call-with-values (lambda ()
                                            (read-proc in))
                          list)
                  (or (and (equal? orig-file-size
                                   (file-size actual-path))
                           (equal? orig-file-mtime
                                   (file-or-directory-modify-seconds actual-path)))
                      (error 'progedit-file
                             "file ~S changed while reading"
                             (path->string actual-path))))))))
        (call-with-input-file*
         backup-path
         (lambda (in)
           (call-with-output-file* actual-path
                                   (lambda (out)
                                     (apply write-proc in out read-vals))
                                   #:exists 'replace)))))))

;; TODO: Maybe add a feature to make dealing with "#lang" and "(module ...)"
;; easier.

(doc history

     (#:planet 1:2 #:date "2012-06-11"

               "Documentation fixes.  (Thanks to Laurent Orseau.)")
     
     (#:planet 1:1 #:date "2012-06-11"

               "Documentation fixes.")
     
     (#:planet 1:0 #:date "2012-06-11"
               
               "Initial release."))