numspell.ss
;;; @Package     numspell
;;; @Subtitle    Spelling Numbers as English in Scheme
;;; @HomePage    http://www.neilvandyke.org/numspell-scheme/
;;; @Author      Neil Van Dyke
;;; @Version     0.2
;;; @Date        2009-02-24
;;; @PLaneT      neil/numspell:1:1

;; $Id: numspell.ss,v 1.35 2009/02/24 16:47:06 neilpair Exp $

;;; @legal
;;; Copyright @copyright{} 2006--2009 Neil Van Dyke.  This program is Free
;;; Software; you can redistribute it and/or modify it under the terms of the
;;; GNU Lesser General Public License as published by the Free Software
;;; Foundation; either version 3 of the License (LGPL 3), or (at your option)
;;; any later version.  This program is distributed in the hope that it will be
;;; useful, but without any warranty; without even the implied warranty of
;;; merchantability or fitness for a particular purpose.  See
;;; @indicateurl{http://www.gnu.org/licenses/} for details.  For other licenses
;;; and consulting, please contact the author.
;;; @end legal

#lang scheme/base

;;; @section Introduction

;;; The @b{numspell} library provides the ability to ``spell'' Scheme
;;; numbers in English.  This is useful for writing numbers on banking checks
;;; and other legal documents, as well as for speech generation.
;;;
;;; Most rational numbers in Scheme are presently supported.  For example:
;;;
;;; @lisp
;;; (number->english 123456)
;;; @result{} "one hundred twenty-three thousand four hundred fifty-six"
;;; (number->english (/ 4 -6))
;;; @result{} "negative two over three"
;;; (number->english (exact->inexact (/ 4 -6)))
;;; @result{} "negative zero point six six six six six six" ; (approx.)
;;; @end lisp
;;;
;;; The number names supported by @b{numspell} are taken from a version
;;; of the @url{http://en.wikipedia.org/wiki/Names_of_large_numbers, Wikipedia
;;; ``Names of large numbers''} article.  Both
;;; @url{http://en.wikipedia.org/wiki/Long_and_short_scales, short and long
;;; scales} are supported, through different procedures, with short scale being
;;; the default.  For example:
;;;
;;; @lisp
;;; (number->english             (expt 10 15))
;;; @result{} "one quadrillion"
;;; (number->short-scale-english (expt 10 15))
;;; @result{} "one quadrillion"
;;; (number->long-scale-english  (expt 10 15))
;;; @result{} "one thousand billion"
;;; @end lisp
;;;
;;; Note: Some numbers, such as very large and very small non-integers printed
;;; by some Scheme implementations in exponential notation, are not supported
;;; by the current version of @b{numspell}.
;;;
;;; @b{numspell} requires R5RS, SRFI-6 (string ports), and SRFI-11
;;; (@code{let-values}).

;;; @section Interface

;;; The public interface consists of a few procedures.

(define-syntax %call-with-output-string
  (syntax-rules ()
    ((_ PROC)
     (let ((port (open-output-string)))
       (PROC port)
       (let ((str (get-output-string port)))
         (close-output-port port)
         str)))))

(define %short-scale-english
  '(#f
    "thousand"
    "million"
    "billion"
    "trillion"
    "quadrillion"
    "quintillion"
    "sextillion"
    "septillion"
    "octillion"
    "nonillion"
    "decillion"
    "undecillion"
    "deuodecillion"
    "tredecillion"
    "quattuordecillion"
    "quindecillion"
    "sexdecillion"
    "septendecillion"
    "octodecillion"
    "novemdecillion"
    "vigintillion"
    ))

(define %long-scale-english
  '(#f
    "thousand"
    "million"           "thousand million"
    "billion"           "thousand billion"
    "trillion"          "thousand trillion"
    "quadrillion"       "thousand quadrillion"
    "quintillion"       "thousand quintillion"
    "sextillion"        "thousand sextillion"
    "septillion"        "thousand septillion"
    "octillion"         "thousand octillion"
    "nonillion"         "thousand nonillion"
    "decillion"         "thousand decillion"
    "undecillion"       "thousand undecillion"
    "deuodecillion"     "thousand deuodecillion"
    "tredecillion"      "thousand tredecillion"
    "quattuordecillion" "thousand quattuordecillion"
    "quindecillion"     "thousand quindecillion"
    "sexdecillion"      "thousand sexdecillion"
    "septendecillion"   "thousand septendecillion"
    "octodecillion"     "thousand octodecillion"
    "novemdecillion"    "thousand novemdecillion"
    "vigintillion"      "thousand vigintillion"
    ))

;;; @defproc  write-number-as-english             num port
;;; @defprocx write-number-as-short-scale-english num port
;;; @defprocx write-number-as-long-scale-english  num port
;;;
;;; Spell number @var{num} to output port @var{port}.  If @var{num} cannot be
;;; spelt, an error is signaled.

(define (write-number-as-english num port)
  (write-number-as-short-scale-english num port))

(define (write-number-as-short-scale-english num port)
  (%spell-number num port %short-scale-english))

(define (write-number-as-long-scale-english num port)
  (%spell-number num port %long-scale-english))

;; TODO: Handle printing infinity.
;;
;; (let ((x (sqrt -1)))                     (list x (- x) (/ x 0.0)))
;; (let ((x (/ 1 0.0)))                     (list x (- x) (/ x 0.0)))
;; (let ((x (exact->inexact (expt 9 999)))) (list x (- x) (/ x 0.0)))
;;
;; +infinity      -infinity      Complex    Implementations
;; -------------  -------------  ---------  ---------------
;; "+inf"         "-inf"         ?          Chicken 2.3 (uses C)
;; "+inf.0"       "-inf.0"       0+inf.0i   PLT MzScheme 301
;; "#{Inf}"       "#{-Inf}"      ?          Scheme 48 1.3
;; "#[+inf]"      ?              +#[+inf]i  MIT Scheme
;; "+infinity.0"  "-infinity.0"  ?          SISC
;; "+Infinity.0"  "-Infinity.0"  ?          Chicken (uses C)
;; "#i1/0"        "#i-1/0"       ?          Gauche 0.8.6
;; ?              ?              ?          Gambit-C
;;
;; TODO: Look at: http://swissnet.ai.mit.edu/~jaffer/III/RAWI

(define (%spell-number num port scale)
  (cond
   ((not (number? num)) (error "not a number:" num))
   ((integer?     num ) (%spell-integer    num port scale))
   ((rational?    num ) (%spell-noninteger num port scale))
   (else (error "cannot spell number:" num))))

(define (%spell-integer num port scale)
  (or (integer? num) (error "not an integer:" num))
  (let spell ((num num))
    (if (< num 0)
        (begin (display "negative " port)
               (spell (- num)))
        (%spell-nonnegative-integer num port scale))))

(define (%spell-integer-substring str start end port scale)
  ;; Note: We could implement this more efficiently, at the cost of maintaining
  ;; two seperate algorithms (one that takes an integer, and one that takes a
  ;; string).
  (%spell-integer (string->number (substring str start end))
                  port
                  scale))

(define (%spell-noninteger num port scale)
  (or (and (number? num) (rational? num)) (error "not a rational number:" num))
  (let spell ((num num))
    (if (< num 0)
        (begin (display "negative " port)
               (spell (- num)))
        (%spell-nonnegative-noninteger num port scale))))

(define %spell-nonnegative-integer
  (letrec ((split-integer
            (lambda (num divisor)
              (let ((first (truncate (/ num divisor))))
                (values first (- num (* first divisor))))))
           (zero-through-nineteen
            '#("zero" "one" "two" "three" "four" "five" "six" "seven" "eight"
               "nine" "ten" "eleven" "twelve" "thirteen" "fourteen" "fifteen"
               "sixteen" "seventeen" "eighteen" "nineteen"))
           (twenty-through-ninety
            '#("twenty" "thirty" "forty" "fifty" "sixty" "seventy" "eighty"
               "ninety")))
    (lambda (orig-num port scale)
      (let loop ((num   orig-num)
                 (names scale))
        (let-values (((thousands nonthousands) (split-integer num 1000)))

          (display `(*DEBUG* ((thousands ,thousands) (names ,names))))
          (newline)

          (or (zero? thousands)
              (if (null? names)
                  (error "scale names exhausted for:" orig-num)
                  (loop thousands (cdr names))))

          (if (zero? nonthousands)
              (and (zero? thousands)
                   (display "zero" port))
              (let-values (((hundreds nonhundreds)
                            (split-integer nonthousands 100)))
                (or (zero? hundreds)
                    (begin
                      (or (zero? thousands)
                          (write-char #\space port))
                      (display (vector-ref zero-through-nineteen hundreds)
                               port)
                      (display " hundred" port)))
                (or (zero? nonhundreds)
                    (begin
                      (or (and (zero? thousands) (zero? hundreds))
                          (write-char #\space port))
                      (if (< nonhundreds 20)
                          (display (vector-ref zero-through-nineteen
                                               nonhundreds)
                                   port)
                          (let-values (((tens ones)
                                        (split-integer nonhundreds 10)))
                            (display (vector-ref twenty-through-ninety
                                                 (- tens 2))
                                     port)
                            (or (zero? ones)
                                (begin
                                  (write-char #\- port)
                                  (display (vector-ref zero-through-nineteen
                                                       ones)
                                           port)))))))
                (cond ((car names) => (lambda (scale)
                                        (write-char #\space port)
                                        (display scale port)))))))))))

(define (%spell-nonnegative-noninteger num port scale)
  (or (and (number? num) (rational? num))
      (error "wrong kind of number:" num))
  (let* ((str (number->string num))
         (len (string-length str)))
    (let loop-for-point ((i 0))
      (if (= i len)
          (error "number string empty:" num str)
          (case (string-ref str i)
            ((#\/)
             (if (zero? i)
                 (display "zero" port)
                 (%spell-integer-substring str 0 i port scale))
             (let ((start (+ 1 i)))
               (let loop-for-decimal-digits ((i start))
                 (if (= i len)
                     (if (= start i)
                         (error "number string empty after slash:" num str)
                         (begin
                           (display " over " port)
                           (%spell-integer-substring
                            str start i port scale)))
                     (case (string-ref str i)
                       ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
                        (loop-for-decimal-digits (+ 1 i)))
                       (else
                        (error
                         "number string has unknown character after slash:"
                         num str i)))))))
            ((#\. #\,)
             ;; Note: We permit comma as a point character, although we've not
             ;; yet heard of a Scheme implementation using a comma that way.
             (if (zero? i)
                 (display "zero" port)
                 (%spell-integer-substring str 0 i port scale))
             (display " point" port)
             (if (= (+ 1 i) len)
                 (display " zero" port)
                 (let loop-for-decimal-digits ((i (+ 1 i)))
                   (and (< i len)
                        (begin
                          (display
                           (case (string-ref str i)
                             ((#\0) " zero")
                             ((#\1) " one")
                             ((#\2) " two")
                             ((#\3) " three")
                             ((#\4) " four")
                             ((#\5) " five")
                             ((#\6) " six")
                             ((#\7) " seven")
                             ((#\8) " eight")
                             ((#\9) " nine")
                             (else
                              (error "cannot spell number with string:"
                                     num str)))
                           port)
                          (loop-for-decimal-digits (+ 1 i)))))))
            ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
             (loop-for-point (+ 1 i)))
            (else (error "cannot spell number with string:" num str)))))))

;;; @defproc  number->english             num
;;; @defprocx number->short-scale-english num
;;; @defprocx number->long-scale-english  num
;;;
;;; Yield a string that spells number @var{num}.  If @var{num} cannot be spelt,
;;; an error is signaled.

(define (number->english num)
  (number->short-scale-english num))

(define (number->short-scale-english num)
  (%call-with-output-string
   (lambda (port)
     (write-number-as-short-scale-english num port))))

(define (number->long-scale-english num)
  (%call-with-output-string
   (lambda (port)
     (write-number-as-long-scale-english num port))))

;;; @unnumberedsec History

;;; @table @asis
;;;
;;; @item Version 0.2 --- 2009-02-24 --- PLaneT @code{(1 1)}
;;; License is now LGPL 3.  Converted to author's new Scheme administration
;;; system.  Test suite moved out of main file.  A descriptive error message is
;;; now generated when scale names are exhausted by a very large number.
;;; Changed some @code{if} to @code{and} for PLT 4.x.
;;;
;;; @item Version 0.1 --- 2006-05-07 --- PLaneT @code{(1 0)}
;;; Initial release.
;;;
;;; @end table

(provide
 number->english
 number->long-scale-english
 number->short-scale-english
 write-number-as-english
 write-number-as-long-scale-english
 write-number-as-short-scale-english)