tabexpand.ss
;;; @Package     tabexpand
;;; @Subtitle    Tab Character Expansion in Scheme
;;; @HomePage    http://www.neilvandyke.org/tabexpand-scheme/
;;; @Author      Neil Van Dyke
;;; @Version     0.3
;;; @Date        2009-03-03
;;; @PLaneT      neil/tabexpand:1:1

;; $Id: tabexpand.ss,v 1.12 2009/03/04 04:41:01 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2004--2009 Neil Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

#lang scheme/base

;;; @section Introduction

;;; There is no denying that ASCII tab characters are an archaic abomination.
;;; Savvy Emacs users might have noticed that the
;;; @uref{http://www.neilvandyke.org/quack/, Quack} option variable
;;; @code{quack-tabs-are-evil-p} defaults to true.  Note also that
;;; @code{quack-tidy} gladly slays any tab in sight, laughing maniacally as
;;; only the truly righteous can.  Sadly, not all strings in the universe are
;;; Scheme code subject to the wrath of Quack, therefore...
;;;
;;; This very simple Scheme library provides procedures for expanding tab
;;; characters.  It was written early one Sunday morning to complement the
;;; plethora of PLT-specific solutions being offered to the problem on the
;;; nascent Schematics cookbook Wiki.  Its source code is a bit verbose, but it
;;; tries not to generate much garbage, it supports non-zero starting columns,
;;; and it should work with any R5RS Scheme implementation that supports
;;; SRFI-6.  (A future edition of this continuing epic might remove the
;;; dependency on SRFI-6, should we bother to benchmark and find that some
;;; implementations are not as efficient as we'd like.)
;;;
;;; At time of this writing, the author notes with no small amount of interest
;;; that the Internet domain name @code{tabexpand.com} has not yet been taken.

;;; @section Procedures

;;; Three procedures are provided.  Most applications will use the simple
;;; @code{tabexpand}.

;;; @defproc  tabexpand/stop/col str stop col
;;; @defprocx tabexpand/stop     str stop
;;; @defprocx tabexpand          str
;;;
;;; Yields a new string that is equivalent to string @var{str} except that any
;;; ASCII tab characters have been expanded to space characters.  @var{stop}, a
;;; positive integer defaulting to @code{8}, is used as the tabstop.
;;; @var{col}, a nonnegative integer defaulting to @code{0}, is the context
;;; starting column for the beginning of the string, with respect to which tabs
;;; positions should be calculated.  All characters other than tab are treated
;;; as if they were normal printable characters with no special effect on the
;;; column.

(define tabexpand/stop/col
  (letrec ((tab-char (integer->char 9))
           (tab-space-8-vector
            (vector ""
                    (make-string 1 #\space)
                    (make-string 2 #\space)
                    (make-string 3 #\space)
                    (make-string 4 #\space)
                    (make-string 5 #\space)
                    (make-string 6 #\space)
                    (make-string 7 #\space)
                    (make-string 8 #\space)))
           (tab-space-string
            (lambda (n)
              (if (<= 0 n 8)
                  (vector-ref tab-space-8-vector n)
                  (make-string n #\space)))))
    (lambda (str stop col)
      (let ((len (string-length str)))
        (let find-first-tab ((col col)
                             (i   0))
          (if (= i len)
              (string-copy str)
              (if (eqv? (string-ref str i) tab-char)
                  (let ((os (open-output-string)))
                    ;; Note: We could see whether iterating over the substring
                    ;; and calling write-char is faster than allocating a
                    ;; substring for a particular Scheme implementation.
                    (display (substring str 0 i) os)
                    (let expand-tab-and-find-next ((col col)
                                                   (i   i))
                      (let* ((spaces0 (- stop (modulo col stop)))
                             (spaces  (if (= spaces0 0) stop spaces0)))
                        (display (tab-space-string spaces) os)
                        (let find-next-tab ((col (+ col spaces))
                                            (i   (+ 1 i)))
                          (if (= i len)
                              (let ((result (get-output-string os)))
                                (close-output-port os)
                                result)
                              (let ((c (string-ref str i)))
                                (if (eqv? c tab-char)
                                    (expand-tab-and-find-next col i)
                                    (begin (write-char c os)
                                           (find-next-tab (+ 1 col)
                                                          (+ 1 i))))))))))
                  (find-first-tab (+ 1 col) (+ 1 i)))))))))

(define (tabexpand/stop str stop)
  (tabexpand/stop/col str stop 0))

(define (tabexpand str)
  (tabexpand/stop/col str 8 0))

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.3 --- 2009-03-03 --- PLaneT @code{(1 1)}
;;; License is now LGPL 3.  Converted to author's new Scheme administration
;;; system.
;;;
;;; @item Version 0.2 --- 2005-02-24 --- PLaneT @code{(1 0)}
;;; Added Testeez test cases.  Packaged for PLaneT.
;;;
;;; @item Version 0.1 --- 2004-05-09
;;; Wrote as a joke that also made a point about code patterns vs. libraries.
;;;
;;; @end table

(provide
 tabexpand
 tabexpand/stop
 tabexpand/stop/col)