path-misc.rkt
#lang racket/base
;; Copyright Neil Van Dyke. See file "info.rkt".

(require racket/file
         (planet neil/mcfly))

(module+ test
  (require (planet neil/overeasy:2)))

(doc (section "Introduction")

     (para "The "
           (tt "path-misc")
           " package provides some utilities for dealing with "
           (racket path)
           " objects.")

     (para "The current version of this package has only two procedures."))

(doc (section "Interface"))

(define-logger path-misc)

(define %path-misc:slash-tmp-path (string->path "/tmp"))

(doc (defproc (temp-directory)
         path?
       (para "Like "
             (racket (find-system-path 'temp-dir))
             ", except, on Unix-like systems, "
             (filepath "/tmp")
             " is preferred over "
             (filepath "/var/tmp")
             " and "
             (filepath "/usr/tmp")
             ".")))
(provide temp-directory)
(define (temp-directory)
  (case (system-type 'os)
    ((unix macosx)
     (if (directory-exists? %path-misc:slash-tmp-path )
         %path-misc:slash-tmp-path
         (find-system-path 'temp-dir)))
    (else (find-system-path 'temp-dir))))

(doc (defproc (canonicalize-path (path path-string?))
         path?
       (para "Returns a canonical path to the same file or directory that "
             (racket path)
             " would reach.  The canonical path is cleansed, simplified,
case-normalized, and complete, and all symbolic links have been followed.")
       (para "For example, say that you have a Unix filesystem
directory structure like (where ``"
             (tt "->")
             "'' denotes a symbolic link):")
       (verbatim #:indent 4
                 "/tmp/\n"
                 "|-- a/\n"
                 "|   |-- b1/\n"
                 "|   |   `-- s -> ../b2/c/f\n"
                 "|   `-- b2/\n"
                 "|       `-- c/\n"
                 "|           `-- f\n"
                 "`-- x/\n"
                 "    `-- y -> ../a")
       (para "If your current directory is then "
             (filepath "/tmp/x")
             ", you get the following behavior:")
       (racketinput
        (canonicalize-path "y/b1/s"))
       (racketresultblock
        #,(tt "#<path:/tmp/a/b2/c/f>"))
       (para "If a cycle is detected while following symbolic links, an
exception is raised.")))
(provide canonicalize-path)
(define (canonicalize-path path
                           #:complete? (complete? #true)
                           #:base      (base      (current-directory)))
  (let*-values
      (((old-path) (simplify-path (cleanse-path path) #true))
       ((old-path) (normal-case-path
                    (if complete?
                        (simplify-path (path->complete-path old-path base)
                                       #true)
                        old-path)))
       ((result ignored-link-identity-hash)
        (let loop ((new-path     old-path)
                   (link-identity-hash (make-immutable-hash)))
          (log-path-misc-debug "canonicalize-path: loop :new-path ~S :link-identity-hash ~S"
                               new-path
                               link-identity-hash)
          (let-values (((link-identity-hash) (hash-set link-identity-hash new-path #true))
                       ((base name must-be-dir?)     (split-path new-path)))
            (log-path-misc-debug "canonicalize-path: :base ~S :name ~S :must-be-dir? ~S"
                                 base
                                 name
                                 must-be-dir?)
            (cond ((not base) (values new-path link-identity-hash))
                  ((path? base)
                   ;; TODO: Have to look at "name" before we do this based on
                   ;; "base"?
                   (let-values (((new-base link-identity-hash) (loop base link-identity-hash)))
                     ;; Note: We always do a new build-path, even if "new-base" is
                     ;; the same as "base", since this lets us strip the trailing
                     ;; "/", which lets us then test "link-exists?" on the actual
                     ;; link file, rather than what it refers to.
                     (let ((new-path (build-path new-base name)))
                       (log-path-misc-debug "canonicalize-path: :new-base ~S :new-path ~S"
                                            new-base
                                            new-path)
                       (cond ((link-exists? new-path)
                              (let ((link-identity (file-or-directory-identity new-path #true)))
                                (if (hash-ref link-identity-hash link-identity #false)
                                    (error 'canonicalize-path
                                           "path ~S has infinite loop (detected at ~S)"
                                           path
                                           new-path)
                                    (let ((link-resolution (resolve-path new-path)))
                                      (log-path-misc-debug "canonicalize-path: :link-resolution ~S"
                                                           link-resolution)
                                      (loop (normal-case-path
                                             (simplify-path
                                              (path->complete-path link-resolution
                                                                   new-base)))
                                            (hash-set link-identity-hash link-identity #true))))))
                             ((or (file-exists?      new-path)
                                  (directory-exists? new-path))
                              (values new-path link-identity-hash))
                             (else (error 'canonicalize-path
                                          "path ~S does not exist (due to ~S)"
                                          path
                                          new-path))))))
                  (else (error 'canonicalize-path
                               "internal error: invalid base ~S from split-path for path ~S (original ~S)"
                               base
                               new-path
                               path)))))))
    result))

(module+ test

  (let ((windows? (eq? (system-type 'os) 'windows))
        (test-dir (make-temporary-file "test-path-misc-~A"
                                       'directory
                                       (temp-directory))))
    (and windows?
         (log-path-misc-warning "some tests disabled due to running on Microsoft Windows"))
    (dynamic-wind
      void
      (lambda ()

        (or windows?
            (test 'file-identity-1
                  (let* ((d0 (build-path test-dir "file-identity-1"))
                         (d1 (build-path d0 "d1"))
                         (s1 (build-path d0 "s1")))
                    (make-directory d0)
                    (make-directory d1)
                    (make-file-or-directory-link "d1" s1)
                    (equal? (file-or-directory-identity d1 #true)
                            (file-or-directory-identity s1 #true)))
                  #false))

        (or windows?
            (test 'file-identity-2
                  (let* ((d0 (build-path test-dir "file-identity-2"))
                         (f1 (build-path d0 "f1"))
                         (s1 (build-path d0 "s1")))
                    (make-directory d0)
                    (call-with-output-file f1 (lambda (out) (display "my contents\n" out)))
                    (make-file-or-directory-link "f1" s1)
                    (equal? (file-or-directory-identity f1 #true)
                            (file-or-directory-identity s1 #true)))
                  #false))

        (or windows?
            (test 'file-identity-3
                  (let* ((d0 (build-path test-dir "file-identity-3"))
                         (f1 (build-path d0 "f1"))
                         (s1 (build-path d0 "s1"))
                         (s2 (build-path d0 "s2")))
                    (make-directory d0)
                    (call-with-output-file f1 (lambda (out) (display "my contents\n" out)))
                    (make-file-or-directory-link "f1" s1)
                    (make-file-or-directory-link "f1" s2)
                    (equal? (file-or-directory-identity s1 #true)
                            (file-or-directory-identity s2 #true)))
                  #false))

        ;;        (or windows?
        ;;            (test 'resolve-path-1
        ;;                  (let ((d0 (build-path test-dir "resolve-path-1")))
        ;;                    (make-directory d0)
        ;;                    (make-directory (build-path d0 "d1"))
        ;;                    (make-directory (build-path d0 "d1" "d2"))
        ;;                    (make-file-or-directory-link "d1" (build-path d0 "s1"))
        ;;                    (make-file-or-directory-link "d2" (build-path d0 "d1" "s2"))
        ;;                    (parameterize ((current-directory d0))
        ;;                      (resolve-path "s1")))
        ;;                  (string->path "d1")))
        ;;        (or windows?
        ;;            (test 'resolve-path-2
        ;;                  (let ((d0 (build-path test-dir "resolve-path-2")))
        ;;                    (make-directory d0)
        ;;                    (make-directory (build-path d0 "d1"))
        ;;                    (make-directory (build-path d0 "d1" "d2"))
        ;;                    (make-file-or-directory-link "d1" (build-path d0 "s1"))
        ;;                    (make-file-or-directory-link "d2" (build-path d0 "d1" "s2"))
        ;;                    (parameterize ((current-directory d0))
        ;;                      (resolve-path "s1/s2")))
        ;;                  (string->path "d2")))
        (or windows?
            (let ((d0 (build-path test-dir "canonicalize-1")))
              (test 'canonicalize-1
                    (begin (make-directory d0)
                           (make-directory (build-path d0 "d1"))
                           (make-directory (build-path d0 "d1" "d2"))
                           (make-file-or-directory-link "d1" (build-path d0 "s1"))
                           (make-file-or-directory-link "d2" (build-path d0 "d1" "s2"))
                           (parameterize ((current-directory d0))
                             (canonicalize-path "s1/s2")))
                    (build-path d0 "d1" "d2"))))
        (or windows?
            (let* ((d0 (build-path test-dir "canonicalize-2"))
                   (d1 (build-path d0 "d1"))
                   (s1 (build-path d0 "d1" "s1"))
                   (s2 (build-path d0 "s2")))
              (test 'canonicalize-2
                    (begin (make-directory d0)
                           (make-directory d1)
                           (make-file-or-directory-link "../s2" s1)
                           (make-file-or-directory-link "d1"    s2)
                           (parameterize ((current-directory d0))
                             (canonicalize-path "d1/s1")))
                    d1)))
        (or windows?
            (let* ((d0 (build-path test-dir "canonicalize-infinite-loop-1"))
                   (s1 (build-path d0 "s1"))
                   (s2 (build-path d0 "s2")))
              (test #:id   'canonicalize-infinite-loop-1
                    #:code (begin (make-directory d0)
                                  (make-file-or-directory-link "s2" s1)
                                  (make-file-or-directory-link "s1" s2)
                                  (parameterize ((current-directory d0))
                                    (canonicalize-path "s1")))
                    #:exn  (lambda (e)
                             (equal? (exn-message e)
                                     (format "canonicalize-path: path \"s1\" has infinite loop (detected at ~S)"
                                             s1))))))

        ;; TODO: More tests
        )
      (lambda ()
        (delete-directory/files test-dir)))))

(doc (section "Known Issues")

     (itemlist

      (item (racket canonicalize-path)
            " needs more testing, especially on non-Unix-like systems.")))

(doc history

     (#:planet 1:0 #:date "2012-11-27"
               (itemlist
                (item "Initial release."))))