;;; @Package     tabexpand.scm
;;; @Subtitle    Tab Character Expansion in Scheme
;;; @HomePage
;;; @Author      Neil W. Van Dyke
;;; @AuthorEmail
;;; @Version     0.2
;;; @Date        2005-02-25

;; $Id: tabexpand.scm,v 1.10 2005/02/25 23:41:52 neil Exp $

;;; @legal
;;; Copyright @copyright{} 2004 - 2005 Neil W. Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 2.1 of the License, or (at your option) any
;;; later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See the GNU Lesser
;;; General Public License [LGPL] for details.  For other license options and
;;; commercial consulting, contact the author.
;;; @end legal

;; (load "~/collects/testeez/testeez.scm")
(define-syntax %tabexpand:testeez
  (syntax-rules () ((_ x ...)
                    ;; (testeez x ...)
                    (error "Tests disabled.")

;;; @section Introduction

;;; There is no denying that ASCII tab characters are an archaic abomination
;;; [JWZ].  Savvy Emacs users might have noticed that the [Quack] option
;;; variable @code{quack-tabs-are-evil-p} defaults to true.  Note also that
;;; @code{quack-tidy} gladly slays any tab in sight, laughing maniacally as
;;; only the truly righteous can.  Sadly, not all strings in the universe are
;;; Scheme code subject to the wrath of Quack, therefore...
;;; This very simple Scheme library provides procedures for expanding tab
;;; characters.  It was written early one Sunday morning to complement the
;;; plethora of PLT-specific solutions being offered to the problem on the
;;; nascent Schematics cookbook Wiki.  Its source code is a bit verbose, but it
;;; tries not to generate much garbage, it supports non-zero starting columns,
;;; and it should work with any R5RS Scheme implementation that supports
;;; [SRFI-6].  (A future edition of this continuing epic might remove the
;;; dependency on SRFI-6, should we bother to benchmark and find that some
;;; implementations are not as efficient as we'd like.)
;;; At time of this writing, the author notes with no small amount of interest
;;; that the Internet domain name @code{} has not yet been taken.

;;; @section Procedures

;;; Three procedures are provided.  Most applications will use the simple
;;; @code{tabexpand}.

;;; @defproc  tabexpand/stop/col str stop col
;;; @defprocx tabexpand/stop     str stop
;;; @defprocx tabexpand          str
;;; Yields a new string that is equivalent to string @var{str} except that any
;;; ASCII tab characters have been expanded to space characters.  @var{stop}, a
;;; positive integer defaulting to @code{8}, is used as the tabstop.
;;; @var{col}, a nonnegative integer defaulting to @code{0}, is the context
;;; starting column for the beginning of the string, with respect to which tabs
;;; positions should be calculated.  All characters other than tab are treated
;;; as if they were normal printable characters with no special effect on the
;;; column.

(define tabexpand/stop/col
  (letrec ((tab-char (integer->char 9))
            (vector ""
                    (make-string 1 #\space)
                    (make-string 2 #\space)
                    (make-string 3 #\space)
                    (make-string 4 #\space)
                    (make-string 5 #\space)
                    (make-string 6 #\space)
                    (make-string 7 #\space)
                    (make-string 8 #\space)))
            (lambda (n)
              (if (<= 0 n 8)
                  (vector-ref tab-space-8-vector n)
                  (make-string n #\space)))))
    (lambda (str stop col)
      (let ((len (string-length str)))
        (let find-first-tab ((col col)
                             (i   0))
          (if (= i len)
              (string-copy str)
              (if (eqv? (string-ref str i) tab-char)
                  (let ((os (open-output-string)))
                    ;; Note: We could see whether iterating over the substring
                    ;; and calling write-char is faster than allocating a
                    ;; substring for a particular Scheme implementation.
                    (display (substring str 0 i) os)
                    (let expand-tab-and-find-next ((col col)
                                                   (i   i))
                      (let* ((spaces0 (- stop (modulo col stop)))
                             (spaces  (if (= spaces0 0) stop spaces0)))
                        (display (tab-space-string spaces) os)
                        (let find-next-tab ((col (+ col spaces))
                                            (i   (+ 1 i)))
                          (if (= i len)
                              (let ((result (get-output-string os)))
                                (close-output-port os)
                              (let ((c (string-ref str i)))
                                (if (eqv? c tab-char)
                                    (expand-tab-and-find-next col i)
                                    (begin (write-char c os)
                                           (find-next-tab (+ 1 col)
                                                          (+ 1 i))))))))))
                  (find-first-tab (+ 1 col) (+ 1 i)))))))))

(define (tabexpand/stop str stop)
  (tabexpand/stop/col str stop 0))

(define (tabexpand str)
  (tabexpand/stop/col str 8 0))

;;; @section Tests

;;; The @code{tabexpand.scm} test suite can be enabled by editing the source
;;; code file and loading [Testeez]; the test suite is disabled by default.

(define (%tabexpand:test)

   (test/equal "" (tabexpand "\t")           "        ")
   (test/equal "" (tabexpand "a\tb")         "a       b")
   (test/equal "" (tabexpand "a\tbc")        "a       bc")

   (test/equal "" (tabexpand "a\t")          "a       ")
   (test/equal "" (tabexpand "ab\t")         "ab      ")
   (test/equal "" (tabexpand "abc\t")        "abc     ")
   (test/equal "" (tabexpand "abcd\t")       "abcd    ")
   (test/equal "" (tabexpand "abcde\t")      "abcde   ")
   (test/equal "" (tabexpand "abcdef\t")     "abcdef  ")
   (test/equal "" (tabexpand "abcdefg\t")    "abcdefg ")
   (test/equal "" (tabexpand "abcdefgh\t")   "abcdefgh        ")
   (test/equal "" (tabexpand "abcdefghi\t")  "abcdefghi       ")

   (test/equal "" (tabexpand "\t\tabcdefghi") "                abcdefghi")
   (test/equal "" (tabexpand "\ta\tbcdefghi") "        a       bcdefghi")
   (test/equal "" (tabexpand "\tab\tcdefghi") "        ab      cdefghi")
   (test/equal "" (tabexpand "\tabc\tdefghi") "        abc     defghi")
   (test/equal "" (tabexpand "\tabcd\tefghi") "        abcd    efghi")
   (test/equal "" (tabexpand "\tabcde\tfghi") "        abcde   fghi")
   (test/equal "" (tabexpand "\tabcdef\tghi") "        abcdef  ghi")
   (test/equal "" (tabexpand "\tabcdefg\thi") "        abcdefg hi")
   (test/equal "" (tabexpand "\tabcdefgh\ti") "        abcdefgh        i")

   (test/equal "" (tabexpand "abcdefghijklmnop") "abcdefghijklmnop")

   (test/equal "" (tabexpand "ab\tcd\tef\tg")   "ab      cd      ef      g")
   (test/equal "" (tabexpand "ab\tcd\t\tef")    "ab      cd              ef")


;;; @unnumberedsec History

;;; @table @asis
;;; @item Version 0.2 --- 2005-02-24
;;; Added Testeez test cases.  Packaged for PLaneT.
;;; @item Version 0.1 --- 2004-05-09
;;; Wrote as a joke that also made a point about code patterns vs. libraries.
;;; @end table

;;; @unnumberedsec References

;;; @table @asis
;;; @item [JWZ]
;;; Jamie Zawinski, ``Tabs versus Spaces: An Eternal Holy War,'' 2000.@*
;;; @item [LGPL]
;;; Free Software Foundation, ``GNU Lesser General Public License,'' Version
;;; 2.1, 1999-02, 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.@*
;;; @uref{}
;;; @item [Quack]
;;; @uref{}
;;; @item [SRFI-6]
;;; William D. Clinger, ``Basic String Ports,'' SRFI 6, 1999-07-01.@*
;;; @uref{}
;;; @item [Testeez]
;;; Neil W. Van Dyke, ``Testeez: Simple Test Mechanism for Scheme,'' Version
;;; 0.1.@*
;;; @uref{}
;;; @end table