lib/srfi/n14.ss
(library (srfi n14)
  (export char-set? char-set=
	  char-set<= char-set-hash char-set-cursor char-set-ref
	  char-set-cursor-next end-of-char-set? char-set-fold char-set-unfold
	  char-set-unfold!  char-set-for-each char-set-map char-set-copy
	  char-set

	  list->char-set  string->char-set 
	  list->char-set! string->char-set! 

	  char-set-filter  ucs-range->char-set

	  ; the SRFI defines ->CHAR-SET, but that isn't a legal identifier
	  x->char-set
	  
	  char-set-filter! ucs-range->char-set!

	  char-set->list char-set->string

	  char-set-size char-set-count char-set-contains?
	  char-set-every char-set-any

	  char-set-adjoin  char-set-delete 
	  char-set-adjoin! char-set-delete!
 

	  char-set-complement  char-set-union  char-set-intersection  
	  char-set-complement! char-set-union! char-set-intersection! 

	  char-set-difference  char-set-xor  char-set-diff+intersection
	  char-set-difference! char-set-xor! char-set-diff+intersection!

	  char-set:lower-case	char-set:upper-case	char-set:title-case
	  char-set:letter	char-set:digit		char-set:letter+digit
	  char-set:graphic	char-set:printing	char-set:whitespace
	  char-set:iso-control	char-set:punctuation	char-set:symbol
	  char-set:hex-digit	char-set:blank		char-set:ascii
	  char-set:empty	char-set:full)
  (import (rnrs base)
          (rename (rnrs base) 
		  (integer->char scalar-value->char)
		  (char->integer char->scalar-value))
	  (rename (rnrs lists)
		  (partition partition-list))
          (rename (rnrs bytevectors) 
		  (bytevector-u8-ref byte-vector-ref)
		  (bytevector-length byte-vector-length)
		  (bytevector-copy! copy-bytes!)
		  (bytevector-u8-set! byte-vector-set!)
		  (make-bytevector make-byte-vector))
             
          (rnrs mutable-strings) ;; only one string-set!, could be eliminated.
          (rnrs control)
          (rnrs unicode)
          (rnrs arithmetic bitwise)
          (rnrs r5rs)
          (srfi n9)
          (ubik opt-lambda)
          (ubik inversion-lists))

(define (unspecific) (if #f #f))

;; Scheme 48 implementation of SRFI 14.
;; http://s48.org/

;; Accessed via development repository, 6 March 2008.
;; http://www.deinprogramm.de/cgi-bin/hgs48.cgi/file/1cbbf64c92f6/scheme/srfi/srfi-14.scm

;; *R6RS* changes: removed definition of make-char-set-immutable!.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
; Copyright (c) 2005-2006 by Basis Technology Corporation.  See file COPYING.

; This is basically a complete re-implementation, suitable for Unicode.

; Some bits and pieces from Olin's reference implementation remain,
; but none from the MIT Scheme code.  For whatever remains, the
; following copyright holds:

; Copyright (c) 1994-2003 by Olin Shivers
;
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
; 1. Redistributions of source code must retain the above copyright
;    notice, this list of conditions and the following disclaimer.
; 2. Redistributions in binary form must reproduce the above copyright
;    notice, this list of conditions and the following disclaimer in the
;    documentation and/or other materials provided with the distribution.
; 3. The name of the authors may not be used to endorse or promote products
;    derived from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

(define-record-type :char-set
  (make-char-set simple i-list)
  char-set?
  ;; byte vector for the Latin-1 part
  (simple char-set-simple
	  set-char-set-simple!)
  ;; inversion list for the rest
  (i-list char-set-i-list
	  set-char-set-i-list!))

; *R6RS*: removed, not part of SRFI, not referenced here.
;(define (make-char-set-immutable! char-set)
;  (make-immutable! char-set)
;  (make-immutable! (char-set-simple char-set)))
;  ; inversion lists are always immutable

;;; "Simple Csets"---we use mutable byte vectors for the Latin-1 part

(define *simple-cset-boundary* 256)

(define (simple-char? c)
  (< (char->scalar-value c) *simple-cset-boundary*))

(define (make-empty-simple-cset)
  (make-byte-vector *simple-cset-boundary* 0))

(define (make-full-simple-cset)
  (make-byte-vector *simple-cset-boundary* 1))

(define (copy-simple-cset s)
  (byte-vector-copy s))

; don't mistake these for abstractions
(define (simple-cset-code-not-member? s i) (zero? (byte-vector-ref s i)))
(define (simple-cset-code-member? s i) (not (simple-cset-code-not-member? s i)))
(define (simple-cset-ref s i) (byte-vector-ref s i))
(define (simple-cset-set! s i v) (byte-vector-set! s i v))
(define (simple-cset-remove-code! s i) (byte-vector-set! s i 0))
(define (simple-cset-adjoin-code! s i) (byte-vector-set! s i 1))

(define (simple-cset-contains? s char)
  (simple-cset-code-member? s (char->scalar-value char)))

(define (simple-cset=? s1 s2)
  (byte-vector=? s1 s2))

(define (simple-cset<=? s1 s2)
  (or (eq? s1 s2)
      (let loop ((i 0))
	(if (>= i *simple-cset-boundary*)
	    #t
	    (and (<= (simple-cset-ref s1 i) (simple-cset-ref s2 i))
		 (loop (+ 1 i)))))))

(define (simple-cset-size s)
  (let loop ((i 0) (size 0))
    (if (>= i *simple-cset-boundary*)
	size
	(loop (+ 1 i) (+ size (simple-cset-ref s i))))))

(define (simple-cset-count pred s)
  (let loop ((i 0) (count 0))
    (if (>= i *simple-cset-boundary*)
	count
	(loop (+ 1 i)
	      (if (and (simple-cset-code-member? s i) (pred (scalar-value->char i)))
		  (+ count 1)
		  count)))))

(define (simple-cset-modify! set s chars)
  (for-each (lambda (c) (set s (char->scalar-value c)))
	    chars)
  s)

(define (simple-cset-modify set s chars)
  (simple-cset-modify! set (copy-simple-cset s) chars))

(define (simple-cset-adjoin s . chars)
  (simple-cset-modify simple-cset-adjoin-code! s chars))
(define (simple-cset-adjoin! s . chars)
  (simple-cset-modify! simple-cset-adjoin-code! s chars))
(define (simple-cset-delete s . chars)
  (simple-cset-modify simple-cset-remove-code! s chars))
(define (simple-cset-delete! s . chars)
  (simple-cset-modify! simple-cset-remove-code! s chars))

;;; If we represented char sets as a bit set, we could do the following
;;; trick to pick the lowest bit out of the set:
;;;   (count-bits (xor (- cset 1) cset))
;;; (But first mask out the bits already scanned by the cursor first.)

(define (simple-cset-cursor-next s cursor)
  (let loop ((cur cursor))
    (let ((cur (- cur 1)))
      (if (or (< cur 0) (simple-cset-code-member? s cur))
	  cur
	  (loop cur)))))

(define (end-of-simple-cset? cursor)
  (negative? cursor))

(define (simple-cset-cursor-ref cursor)
  (scalar-value->char cursor))

(define (simple-cset-for-each proc s)
  (let loop ((i 0))
    (if (< i *simple-cset-boundary*)
	(begin
	  (if (simple-cset-code-member? s i)
	      (proc (scalar-value->char i)))
	  (loop (+ 1 i))))))

(define (simple-cset-fold kons knil s)
  (let loop ((i 0) (ans knil))
    (if (>= i *simple-cset-boundary*)
	ans
	(loop (+ 1 i)
	      (if (simple-cset-code-not-member? s i)
		  ans
		  (kons (scalar-value->char i) ans))))))

(define (simple-cset-every? pred s)
  (let loop ((i 0))
    (cond
     ((>= i *simple-cset-boundary*)
      #t)
     ((or (simple-cset-code-not-member? s i)
	  (pred (scalar-value->char i)))
      (loop (+ 1 i)))
     (else
      #f))))

(define (simple-cset-any pred s)
  (let loop ((i 0))
    (cond
     ((>= i *simple-cset-boundary*) #f)
     ((and (simple-cset-code-member? s i)
	   (pred (scalar-value->char i))))
     (else
      (loop (+ 1 i))))))

(define (ucs-range->simple-cset lower upper)
  (let ((s (make-empty-simple-cset)))
    (let loop ((i lower))
      (if (< i upper)
	  (begin
	    (simple-cset-adjoin-code! s i)
	    (loop (+ 1 i)))))
    s))

; Algebra

; These do various "s[i] := s[i] op val" operations

(define (simple-cset-invert-code! s i v)
  (simple-cset-set! s i (- 1 v)))

(define (simple-cset-and-code! s i v)
  (if (zero? v)
      (simple-cset-remove-code! s i)))
(define (simple-cset-or-code! s i v)
  (if (not (zero? v))
      (simple-cset-adjoin-code! s i)))
(define (simple-cset-minus-code! s i v)
  (if (not (zero? v))
      (simple-cset-remove-code! s i)))
(define (simple-cset-xor-code! s i v)
  (if (not (zero? v))
      (simple-cset-set! s i (- 1 (simple-cset-ref s i)))))

(define (simple-cset-complement s)
  (simple-cset-complement! (copy-simple-cset s)))

(define (simple-cset-complement! s)
  (byte-vector-iter (lambda (i v) (simple-cset-invert-code! s i v)) s)
  s)

(define (simple-cset-op! s simple-csets code-op!)
  (for-each (lambda (s2)
	      (let loop ((i 0))
		(if (< i *simple-cset-boundary*)
		    (begin
		      (code-op! s i (simple-cset-ref s2 i))
		      (loop (+ 1 i))))))
	    simple-csets)
  s)

(define (simple-cset-union! s1 . ss)
  (simple-cset-op! s1 ss simple-cset-or-code!))

(define (simple-cset-union . ss)
  (if (pair? ss)
      (apply simple-cset-union!
	     (byte-vector-copy (car ss))
	     (cdr ss))
      (make-empty-simple-cset)))

(define (simple-cset-intersection! s1 . ss)
  (simple-cset-op! s1 ss simple-cset-and-code!))

(define (simple-cset-intersection . ss)
  (if (pair? ss)
      (apply simple-cset-intersection!
	     (byte-vector-copy (car ss))
	     (cdr ss))
      (make-full-simple-cset)))

(define (simple-cset-difference! s1 . ss)
  (simple-cset-op! s1 ss simple-cset-minus-code!))

(define (simple-cset-difference s1 . ss)
  (if (pair? ss)
      (apply simple-cset-difference! (copy-simple-cset s1) ss)
      (copy-simple-cset s1)))

(define (simple-cset-xor! s1 . ss)
  (simple-cset-op! s1 ss simple-cset-xor-code!))

(define (simple-cset-xor . ss)
  (if (pair? ss)
      (apply simple-cset-xor!
	     (byte-vector-copy (car ss))
	     (cdr ss))
      (make-empty-simple-cset)))

(define (simple-cset-diff+intersection! s1 s2 . ss)
  (byte-vector-iter (lambda (i v)
		       (cond
			((zero? v)
			 (simple-cset-remove-code! s2 i))
			((simple-cset-code-member? s2 i)
			 (simple-cset-remove-code! s1 i))))
		     s1)

  (for-each (lambda (s)
	      (byte-vector-iter (lambda (i v)
				  (if (and (not (zero? v))
					   (simple-cset-code-member? s1 i))
				      (begin
					(simple-cset-remove-code! s1 i)
					(simple-cset-adjoin-code! s2 i))))
				 s))
	    ss)

  (values s1 s2))



; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown
; in to keep the intermediate values small. (We do the calculation
; with just enough bits to represent BOUND, masking off high bits at
; each step in calculation. If this screws up any important properties
; of the hash function I'd like to hear about it. -Olin)

(define (simple-cset-hash s bound)
  ;; The mask that will cover BOUND-1:
  (let ((mask (let loop ((i #x10000)) ; Let's skip first 16 iterations, eh?
		(if (>= i bound) (- i 1) (loop (+ i i))))))
    (let loop ((i (- *simple-cset-boundary* 1)) (ans 0))
      (if (< i 0)
	  (modulo ans bound)
	  (loop (- i 1)
	      (if (simple-cset-code-not-member? s i)
		  ans
		  (bitwise-and mask (+ (* 37 ans) i))))))))

;;; Now for the real character sets

(define (make-empty-char-set)
  (make-char-set (make-empty-simple-cset)
		 (make-empty-inversion-list *simple-cset-boundary* (+ 1 #x10ffff))))
(define (make-full-char-set)
  (make-char-set (make-full-simple-cset)
		 (range->inversion-list *simple-cset-boundary* (+ 1 #x10ffff)
					*simple-cset-boundary* (+ 1 #x10ffff))))

(define (char-set-copy cs)
  (make-char-set (copy-simple-cset (char-set-simple cs))
		 (inversion-list-copy (char-set-i-list cs))))

; n-ary version
(define (char-set= . rest)
  (or (null? rest)
      (let ((cs1  (car rest))
	    (rest (cdr rest)))
	(let loop ((rest rest))
	  (or (not (pair? rest))
	      (and (char-set=/2 cs1 (car rest))
		   (loop (cdr rest))))))))

; binary version
(define (char-set=/2 cs-1 cs-2)
  (and (simple-cset=? (char-set-simple cs-1) (char-set-simple cs-2))
       (inversion-list=? (char-set-i-list cs-1)
			 (char-set-i-list cs-2))))

; n-ary
(define (char-set<= . rest)
  (or (null? rest)
      (let ((cs1  (car rest))
	    (rest (cdr rest)))
	(let loop ((cs1 cs1)  (rest rest))
	  (or (not (pair? rest))
	      (and (char-set<=/2 cs1 (car rest))
		   (loop (car rest) (cdr rest))))))))

; binary
(define (char-set<=/2 cs-1 cs-2)
  (and (simple-cset<=? (char-set-simple cs-1) (char-set-simple cs-2))
       (inversion-list<=? (char-set-i-list cs-1)
			  (char-set-i-list cs-2))))

(define (inversion-list<=? i-list-1 i-list-2)
  (inversion-list=? i-list-1
		    (inversion-list-intersection i-list-1 i-list-2)))

;;; Hash

; We follow Olin's reference implementation:
;
; If you keep BOUND small enough, the intermediate calculations will
; always be fixnums. How small is dependent on the underlying Scheme system;
; we use a default BOUND of 2^22 = 4194304, which should hack it in
; Schemes that give you at least 29 signed bits for fixnums. The core
; calculation that you don't want to overflow is, worst case,
;     (+ 65535 (* 37 (- bound 1)))
; where 65535 is the max character code. Choose the default BOUND to be the
; biggest power of two that won't cause this expression to fixnum overflow,
; and everything will be copacetic.

(define char-set-hash
  (opt-lambda (cs (bound 4194304))
    (if (not (and (integer? bound)
		  (exact? bound)
		  (<= 0 bound)))
	(assertion-violation 'char-set-hash "invalid bound" bound))
    (let ((bound (if (zero? bound) 4194304 bound)))
      (modulo (+ (simple-cset-hash (char-set-simple cs) bound)
		 (* 37 (inversion-list-hash (char-set-i-list cs) bound)))
	      bound))))

(define (char-set-contains? cs char)
  (if (simple-char? char)
      (simple-cset-contains? (char-set-simple cs) char)
      (inversion-list-member? (char->scalar-value char)
			      (char-set-i-list cs))))

(define (char-set-size cs)
  (+ (simple-cset-size (char-set-simple cs))
     (inversion-list-size (char-set-i-list cs))))

(define (char-set-count pred cset)
  (+ (simple-cset-count pred (char-set-simple cset))
     (inversion-list-count pred (char-set-i-list cset))))

(define (inversion-list-count pred i-list)
  (inversion-list-fold/done? (lambda (v count)
			       (if (pred (scalar-value->char v))
				   (+ 1 count)
				   count))
			     0
			     (lambda (v) #f)
			     i-list))

(define (make-char-set-char-op simple-cset-op inversion-list-op)
  (lambda (cs . chars)
    (call-with-values
	(lambda () (partition-list simple-char? chars))
      (lambda (simple-chars non-simple-chars)
	(make-char-set (apply simple-cset-op (char-set-simple cs) simple-chars)
		       (apply inversion-list-op (char-set-i-list cs)
			      (map char->scalar-value non-simple-chars)))))))

(define (make-char-set-char-op! simple-cset-op! simple-cset-op
				inversion-list-op)
  (lambda (cs . chars)
    (call-with-values
	(lambda () (partition-list simple-char? chars))
      (lambda (simple-chars non-simple-chars)
	(if (null? non-simple-chars)
	    (apply simple-cset-op! (char-set-simple cs) simple-chars)
	    (begin
	      (set-char-set-simple! cs
				    (apply simple-cset-op (char-set-simple cs)
					   simple-chars))
	      (set-char-set-i-list! cs
				    (apply inversion-list-op (char-set-i-list cs)
					   (map char->scalar-value non-simple-chars)))))))
    cs))

(define char-set-adjoin
  (make-char-set-char-op simple-cset-adjoin inversion-list-adjoin))
(define char-set-adjoin!
  (make-char-set-char-op! simple-cset-adjoin! simple-cset-adjoin
			  inversion-list-adjoin))
(define char-set-delete
  (make-char-set-char-op simple-cset-delete inversion-list-remove))
(define char-set-delete!
  (make-char-set-char-op! simple-cset-delete! simple-cset-delete
			  inversion-list-remove))

;;; Cursors

; A cursor is either an integer index into the mark vector (-1 for the
; end-of-char-set cursor) as in the reference implementation, and an
; inversion-list cursor otherwise.

(define (char-set-cursor cset)
  (let ((simple-cursor
	 (simple-cset-cursor-next (char-set-simple cset) 
				  *simple-cset-boundary*)))
    (if (end-of-simple-cset? simple-cursor)
	(inversion-list-cursor (char-set-i-list cset))
	simple-cursor)))
  
(define (end-of-char-set? cursor)
  (and (inversion-list-cursor? cursor)
       (inversion-list-cursor-at-end? cursor)))

(define (char-set-ref cset cursor)
  (if (number? cursor)
      (simple-cset-cursor-ref cursor)
      (scalar-value->char (inversion-list-cursor-ref cursor))))

(define (char-set-cursor-next cset cursor)
  (cond
   ((number? cursor)
    (let ((next (simple-cset-cursor-next (char-set-simple cset) cursor)))
      (if (end-of-simple-cset? next)
	  (inversion-list-cursor (char-set-i-list cset))
	  next)))
   (else
    (inversion-list-cursor-next (char-set-i-list cset) cursor))))

(define (char-set-for-each proc cs)
  (simple-cset-for-each proc (char-set-simple cs))
  (inversion-list-fold/done? (lambda (n _)
			       (proc (scalar-value->char n))
			       (unspecific))
			     #f
			     (lambda (_) #f)
			     (char-set-i-list cs)))

; this is pretty inefficent
(define (char-set-map proc cs)
  (let ((simple-cset (make-empty-simple-cset))
	(other-scalar-values '()))
    
    (define (adjoin! c)
      (let ((c (proc c)))
	(if (simple-char? c)
	    (simple-cset-adjoin! simple-cset c)
	    (set! other-scalar-values
		  (cons (char->scalar-value c) other-scalar-values)))))

    (char-set-for-each adjoin! cs)

    (make-char-set simple-cset
		   (apply numbers->inversion-list
			  *simple-cset-boundary* (+ 1 #x10ffff)
			  other-scalar-values))))

(define (char-set-fold kons knil cs)
  (inversion-list-fold/done? (lambda (n v)
			       (kons (scalar-value->char n) v))
			     (simple-cset-fold kons knil (char-set-simple cs))
			     (lambda (_) #f)
			     (char-set-i-list cs)))

(define (char-set-every pred cs)
  (and (simple-cset-every? pred (char-set-simple cs))
       (inversion-list-fold/done? (lambda (n v)
				    (and v
					 (pred (scalar-value->char n))))
				  #t
				  not
				  (char-set-i-list cs))))

(define (char-set-any pred cs)
  (or (simple-cset-any pred (char-set-simple cs))
      (inversion-list-fold/done? (lambda (n v)
				   (or v
				       (pred (scalar-value->char n))))
				 #f
				 values
				 (char-set-i-list cs))))

(define (base-char-set maybe-base-cs)
  (if maybe-base-cs
      (char-set-copy maybe-base-cs)
      (make-empty-char-set)))

(define char-set-unfold
  (opt-lambda (p f g seed (maybe-base-cs #f))
    (char-set-unfold! p f g seed
		      (base-char-set maybe-base-cs))))

(define (char-set-unfold! p f g seed base-cs)
  (let loop ((seed seed) (cs base-cs))
    (if (p seed) cs			; P says we are done.
	(loop (g seed)			; Loop on (G SEED).
	      (char-set-adjoin! cs (f seed)))))) ; Add (F SEED) to set.

; converting from and to lists

(define (char-set . chars)
  (list->char-set chars))

(define list->char-set
  (opt-lambda (chars (maybe-base-cs #f))
    (list->char-set! chars
		     (base-char-set maybe-base-cs))))

(define (list->char-set! chars cs)
  (for-each (lambda (c)
	      (char-set-adjoin! cs c))
	    chars)
  cs)

(define (char-set->list cs)
  (char-set-fold cons '() cs))

; converting to and from strings

(define string->char-set
  (opt-lambda (str (maybe-base-cs #f))
    (string->char-set! str
		       (base-char-set maybe-base-cs))))

(define (string->char-set! str cs)
  (do ((i (- (string-length str) 1) (- i 1)))
      ((< i 0))
    (char-set-adjoin! cs (string-ref str i)))
  cs)

(define (char-set->string cs)
  (let ((ans (make-string (char-set-size cs))))
    (char-set-fold (lambda (ch i)
		     (string-set! ans i ch)
		     (+ i 1))
		   0
		   cs)
    ans))

(define ucs-range->char-set
  (opt-lambda (lower upper (error? #f) (maybe-base-cs #f))
    (ucs-range->char-set! lower upper error?
			  (base-char-set maybe-base-cs))))

(define (ucs-range->char-set! lower upper error? base-cs)
  (if (negative? lower)
      (assertion-violation 'ucs-range->char-set! "negative lower bound" lower))
  (if (> lower #x10ffff)
      (assertion-violation 'ucs-range->char-set! "invalid lower bound" lower))
  (if (negative? upper)
      (assertion-violation 'ucs-range->char-set! "negative upper bound" upper))
  (if (> upper #x110000)
      (assertion-violation 'ucs-range->char-set! "invalid lower bound" upper))
  (if (not (<= lower upper))
      (assertion-violation 'ucs-range->char-set! "decreasing bounds" lower upper))

  (let ((create-inversion-list
	 (lambda (lower upper)
	   (cond
	    ((and (>= lower #xD800)
		  (>= #xe000 upper))
	     (make-empty-inversion-list *simple-cset-boundary* (+ 1 #x10ffff)))
	    ((<= upper #xe000)
	     (range->inversion-list *simple-cset-boundary* (+ 1 #x10ffff)
				    lower (min #xd800 upper)))
	    ((>= lower #xd800)
	     (range->inversion-list *simple-cset-boundary* (+ 1 #x10ffff)
				    (max #xe000 lower) upper))
	    (else
	     ;; hole
	     (ranges->inversion-list *simple-cset-boundary* (+ 1 #x10ffff)
				     (cons lower #xd800)
				     (cons #xe000 upper)))))))
    (char-set-union!
     base-cs
     (cond
      ((>= lower *simple-cset-boundary*)
       (make-char-set (make-empty-simple-cset)
		      (create-inversion-list lower upper)))
      ((< upper *simple-cset-boundary*)
       (make-char-set (ucs-range->simple-cset lower upper)
		      (make-empty-inversion-list *simple-cset-boundary* (+ 1 #x10ffff))))
      (else
       (make-char-set (ucs-range->simple-cset lower *simple-cset-boundary*)
		      (create-inversion-list *simple-cset-boundary* upper)))))))

(define char-set-filter
  (opt-lambda (predicate domain (maybe-base-cs #f))
    (char-set-filter! predicate
		      domain
		      (base-char-set maybe-base-cs))))

(define (char-set-filter! predicate domain base-cs)
  (char-set-fold (lambda (ch _)
		   (if (predicate ch)
		       (char-set-adjoin! base-cs ch)))
		 (unspecific)
		 domain)
  base-cs)

; {string, char, char-set, char predicate} -> char-set

; This is called ->CHAR-SET in the SRFI, but that's not a valid R5RS
; identifier.

(define (x->char-set x)
  (cond ((char-set? x) x)
	((string? x) (string->char-set x))
	((char? x) (char-set x))
	(else (assertion-violation 'x->char-set "Not a charset, string or char."))))


; Set algebra

(define *surrogate-complement-i-list*
  (inversion-list-complement
   (range->inversion-list *simple-cset-boundary* (+ 1 #x10ffff)
			  #xd800 #xe000)))

(define (char-set-complement cs)
  (make-char-set (simple-cset-complement (char-set-simple cs))
		 (inversion-list-intersection
		  (inversion-list-complement (char-set-i-list cs))
		  *surrogate-complement-i-list*)))

(define (char-set-complement! cs)
  (set-char-set-simple! cs
			(simple-cset-complement! (char-set-simple cs)))
  (set-char-set-i-list! cs
			(inversion-list-intersection
			 (inversion-list-complement (char-set-i-list cs))
			 *surrogate-complement-i-list*))
  cs)

(define (make-char-set-op! simple-cset-op! inversion-list-op)
  (lambda (cset1 . csets)
    (set-char-set-simple! cset1
			  (apply simple-cset-op!
				 (char-set-simple cset1)
				 (map char-set-simple csets)))
    (set-char-set-i-list! cset1
			  (apply inversion-list-op
				 (char-set-i-list cset1)
				 (map char-set-i-list csets)))
    cset1))

(define (make-char-set-op char-set-op! make-neutral)
  (lambda csets
    (if (pair? csets)
	(apply char-set-op! (char-set-copy (car csets)) (cdr csets))
	(make-neutral))))

(define char-set-union!
  (make-char-set-op! simple-cset-union! inversion-list-union))
(define char-set-union
  (make-char-set-op char-set-union! make-empty-char-set))

(define char-set-intersection!
  (make-char-set-op! simple-cset-intersection! inversion-list-intersection))
(define char-set-intersection
  (make-char-set-op char-set-intersection! make-full-char-set))

(define char-set-difference!
  (make-char-set-op! simple-cset-difference! inversion-list-difference))

(define (char-set-difference cset1 . csets)
  (apply char-set-difference! (char-set-copy cset1) csets))

; copied from inversion-list.scm
(define (binary->n-ary proc/2)
  (lambda (arg-1 . args)
    (if (and (pair? args)
	     (null? (cdr args)))
	(proc/2 arg-1 (car args))
	(let loop ((args args)
		   (result arg-1))
	  (if (null? args)
	      result
	      (loop (cdr args) (proc/2 result (car args))))))))

(define inversion-list-xor
  (binary->n-ary
   (lambda (i-list-1 i-list-2)
     (inversion-list-union (inversion-list-intersection
			    (inversion-list-complement i-list-1)
			    i-list-2)
			   (inversion-list-intersection
			    i-list-1
			    (inversion-list-complement i-list-2))))))

; Really inefficient for things outside Latin-1
; WHO NEEDS THIS NONSENSE, ANYWAY?
(define char-set-xor!
  (make-char-set-op! simple-cset-xor! inversion-list-xor))

(define char-set-xor
  (make-char-set-op char-set-xor! make-empty-char-set))

(define (char-set-diff+intersection! cs1 cs2 . csets)
  (call-with-values
      (lambda () (apply simple-cset-diff+intersection!
			(char-set-simple cs1) (char-set-simple cs2)
			(map char-set-simple csets)))
    (lambda (simple-diff simple-intersection)
      (set-char-set-simple! cs1 simple-diff)
      (set-char-set-simple! cs2 simple-intersection)
      (set-char-set-i-list! cs1
			    (apply inversion-list-difference
				   (char-set-i-list cs1)
				   (char-set-i-list cs2)
				   (map char-set-i-list csets)))
      (set-char-set-i-list! cs2 (inversion-list-intersection
				 (char-set-i-list cs1)
				 (apply inversion-list-union
					(char-set-i-list cs2)
					(map char-set-i-list csets))))
      (values cs1 cs2))))

(define (char-set-diff+intersection cs1 . csets)
  (apply char-set-diff+intersection!
	 (char-set-copy cs1)
	 (make-empty-char-set)
	 csets))

;; Byte vector utilities

(define (byte-vector-copy b)
  (let* ((size (byte-vector-length b))
	 (result (make-byte-vector size 0)))
    (copy-bytes! b 0 result 0 size)
    result))

;;; Apply P to each index and its char code in S: (P I VAL).
;;; Used by the set-algebra ops.

(define (byte-vector-iter p s)
  (let loop ((i (- (byte-vector-length s) 1)))
    (if (>= i 0)
	(begin
	  (p i (byte-vector-ref s i))
	  (loop (- i 1))))))

(define (byte-vector=? b1 b2)
  (let ((size-1 (byte-vector-length b1))
	(size-2 (byte-vector-length b2)))
    (and (= size-1 size-2)
	 (let loop ((i 0))
	   (cond
	    ((>= i size-1) #t)
	    ((= (byte-vector-ref b1 i) (byte-vector-ref b2 i))
	     (loop (+ 1 i)))
	    (else
	      #f))))))

;; Utility for srfi-14-base-char-sets.scm, which follows

; The range vector is an even-sized vector with [lower, upper)
; pairs.

(define (range-vector->char-set range-vector)
  (let ((size (vector-length range-vector))
	(simple-cset (make-empty-simple-cset)))

    (let loop ((index 0) (ranges '()))
      (if (>= index size)
	  (make-char-set simple-cset
			 (apply ranges->inversion-list
				*simple-cset-boundary* (+ 1 #x10ffff)
				ranges))
	  (let ((lower (vector-ref range-vector index))
		(upper (vector-ref range-vector (+ 1 index))))
	    
	    (define (fill-simple-cset! lower upper)
	      (let loop ((scalar-value lower))
		(if (< scalar-value upper)
		    (begin
		      (simple-cset-adjoin-code! simple-cset scalar-value)
		      (loop (+ 1 scalar-value))))))

	    
	    (cond
	     ((>= lower *simple-cset-boundary*)
	      (loop (+ 2 index) (cons (cons lower upper) ranges)))
	     ((< upper *simple-cset-boundary*)
	      (fill-simple-cset! lower upper)
	      (loop (+ 2 index) ranges))
	     (else
	      (fill-simple-cset! lower *simple-cset-boundary*)
	      (loop (+ 2 index)
		    (cons (cons *simple-cset-boundary* upper) ranges)))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Automatically generated by WRITE-SRFI-14-BASE-CHAR-SETS; do not edit.

(define char-set:lower-case (range-vector->char-set '#(97 123 181 182 223 247 248 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 378 379 380 381 382 385 387 388 389 390 392 393 396 398 402 403 405 406 409 412 414 415 417 418 419 420 421 422 424 425 427 428 429 430 432 433 436 437 438 439 441 443 445 446 447 448 454 455 457 458 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 497 499 500 501 502 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 570 572 573 575 577 578 579 583 584 585 586 587 588 589 590 591 610 611 618 619 628 629 630 631 641 642 655 656 660 666 667 669 671 672 673 675 684 686 688 837 838 867 880 891 894 912 913 940 975 976 978 981 983 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1011 1013 1014 1016 1017 1019 1020 1072 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1377 1416 6448 6457 7426 7427 7432 7434 7441 7445 7446 7448 7453 7456 7522 7544 7545 7547 7548 7550 7551 7579 7626 7627 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7836 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7936 7944 7952 7958 7968 7976 7984 7992 8000 8006 8016 8024 8032 8040 8048 8062 8064 8072 8080 8088 8096 8104 8112 8117 8118 8120 8126 8127 8130 8133 8134 8136 8144 8148 8150 8152 8160 8168 8178 8181 8182 8184 64256 64263 64275 64280 65345 65371 66600 66640 917601 917627)))
(define char-set:upper-case (range-vector->char-set '#(65 91 192 215 216 223 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 378 379 380 381 382 385 387 388 389 390 392 393 396 398 402 403 405 406 409 412 414 415 417 418 419 420 421 422 424 425 426 428 429 430 432 433 436 437 438 439 441 444 445 452 453 455 456 458 459 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 497 498 500 501 502 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 570 572 573 575 577 578 579 583 584 585 586 587 588 589 590 591 902 903 904 907 908 909 910 912 913 930 931 940 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1012 1013 1015 1016 1017 1019 1021 1072 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1329 1367 4256 4294 7547 7548 7550 7551 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7944 7952 7960 7966 7976 7984 7992 8000 8008 8014 8025 8026 8027 8028 8029 8030 8031 8032 8040 8048 8072 8080 8088 8096 8104 8112 8120 8125 8136 8141 8152 8156 8168 8173 8184 8189 65313 65339 66560 66600 917569 917595)))
(define char-set:title-case (range-vector->char-set '#(453 454 456 457 459 460 498 499 8072 8080 8088 8096 8104 8112 8124 8125 8140 8141 8188 8189)))
(define char-set:letter (range-vector->char-set '#(65 91 97 123 170 171 181 182 186 187 192 215 216 247 248 706 710 722 736 741 750 751 890 894 902 903 904 907 908 909 910 930 931 975 976 1014 1015 1154 1162 1300 1329 1367 1369 1370 1377 1416 1488 1515 1520 1523 1569 1595 1600 1611 1646 1648 1649 1748 1749 1750 1765 1767 1774 1776 1786 1789 1791 1792 1808 1809 1810 1840 1869 1902 1920 1958 1969 1970 1994 2027 2036 2038 2042 2043 2308 2362 2365 2366 2384 2385 2392 2402 2427 2432 2437 2445 2447 2449 2451 2473 2474 2481 2482 2483 2486 2490 2493 2494 2510 2511 2524 2526 2527 2530 2544 2546 2565 2571 2575 2577 2579 2601 2602 2609 2610 2612 2613 2615 2616 2618 2649 2653 2654 2655 2674 2677 2693 2702 2703 2706 2707 2729 2730 2737 2738 2740 2741 2746 2749 2750 2768 2769 2784 2786 2821 2829 2831 2833 2835 2857 2858 2865 2866 2868 2869 2874 2877 2878 2908 2910 2911 2914 2929 2930 2947 2948 2949 2955 2958 2961 2962 2966 2969 2971 2972 2973 2974 2976 2979 2981 2984 2987 2990 3002 3077 3085 3086 3089 3090 3113 3114 3124 3125 3130 3168 3170 3205 3213 3214 3217 3218 3241 3242 3252 3253 3258 3261 3262 3294 3295 3296 3298 3333 3341 3342 3345 3346 3369 3370 3386 3424 3426 3461 3479 3482 3506 3507 3516 3517 3518 3520 3527 3585 3633 3634 3636 3648 3655 3713 3715 3716 3717 3719 3721 3722 3723 3725 3726 3732 3736 3737 3744 3745 3748 3749 3750 3751 3752 3754 3756 3757 3761 3762 3764 3773 3774 3776 3781 3782 3783 3804 3806 3840 3841 3904 3912 3913 3947 3976 3980 4096 4130 4131 4136 4137 4139 4176 4182 4256 4294 4304 4347 4348 4349 4352 4442 4447 4515 4520 4602 4608 4681 4682 4686 4688 4695 4696 4697 4698 4702 4704 4745 4746 4750 4752 4785 4786 4790 4792 4799 4800 4801 4802 4806 4808 4823 4824 4881 4882 4886 4888 4955 4992 5008 5024 5109 5121 5741 5743 5751 5761 5787 5792 5867 5888 5901 5902 5906 5920 5938 5952 5970 5984 5997 5998 6001 6016 6068 6103 6104 6108 6109 6176 6264 6272 6313 6400 6429 6480 6510 6512 6517 6528 6570 6593 6600 6656 6679 6917 6964 6981 6988 7424 7616 7680 7836 7840 7930 7936 7958 7960 7966 7968 8006 8008 8014 8016 8024 8025 8026 8027 8028 8029 8030 8031 8062 8064 8117 8118 8125 8126 8127 8130 8133 8134 8141 8144 8148 8150 8156 8160 8173 8178 8181 8182 8189 8305 8306 8319 8320 8336 8341 8450 8451 8455 8456 8458 8468 8469 8470 8473 8478 8484 8485 8486 8487 8488 8489 8490 8494 8495 8506 8508 8512 8517 8522 8526 8527 8579 8581 11264 11311 11312 11359 11360 11373 11380 11384 11392 11493 11520 11558 11568 11622 11631 11632 11648 11671 11680 11687 11688 11695 11696 11703 11704 11711 11712 11719 11720 11727 11728 11735 11736 11743 12293 12295 12337 12342 12347 12349 12353 12439 12445 12448 12449 12539 12540 12544 12549 12589 12593 12687 12704 12728 12784 12800 13312 19894 19968 40892 40960 42125 42775 42779 43008 43010 43011 43014 43015 43019 43020 43043 43072 43124 44032 55204 63744 64046 64048 64107 64112 64218 64256 64263 64275 64280 64285 64286 64287 64297 64298 64311 64312 64317 64318 64319 64320 64322 64323 64325 64326 64434 64467 64830 64848 64912 64914 64968 65008 65020 65136 65141 65142 65277 65313 65339 65345 65371 65382 65471 65474 65480 65482 65488 65490 65496 65498 65501 65536 65548 65549 65575 65576 65595 65596 65598 65599 65614 65616 65630 65664 65787 66304 66335 66352 66369 66370 66378 66432 66462 66464 66500 66504 66512 66560 66718 67584 67590 67592 67593 67594 67638 67639 67641 67644 67645 67647 67648 67840 67862 68096 68097 68112 68116 68117 68120 68121 68148 73728 74607 119808 119893 119894 119965 119966 119968 119970 119971 119973 119975 119977 119981 119982 119994 119995 119996 119997 120004 120005 120070 120071 120075 120077 120085 120086 120093 120094 120122 120123 120127 120128 120133 120134 120135 120138 120145 120146 120486 120488 120513 120514 120539 120540 120571 120572 120597 120598 120629 120630 120655 120656 120687 120688 120713 120714 120745 120746 120771 120772 120780 131072 173783 194560 195102)))
(define char-set:digit (range-vector->char-set '#(48 58 1632 1642 1776 1786 1984 1994 2406 2416 2534 2544 2662 2672 2790 2800 2918 2928 3046 3056 3174 3184 3302 3312 3430 3440 3664 3674 3792 3802 3872 3882 4160 4170 6112 6122 6160 6170 6470 6480 6608 6618 6992 7002 65296 65306 66720 66730 120782 120832)))
(define char-set:mark (range-vector->char-set '#(768 880 1155 1159 1160 1162 1425 1470 1471 1472 1473 1475 1476 1478 1479 1480 1552 1558 1611 1631 1648 1649 1750 1757 1758 1765 1767 1769 1770 1774 1809 1810 1840 1867 1958 1969 2027 2036 2305 2308 2364 2365 2366 2382 2385 2389 2402 2404 2433 2436 2492 2493 2494 2501 2503 2505 2507 2510 2519 2520 2530 2532 2561 2564 2620 2621 2622 2627 2631 2633 2635 2638 2672 2674 2689 2692 2748 2749 2750 2758 2759 2762 2763 2766 2786 2788 2817 2820 2876 2877 2878 2884 2887 2889 2891 2894 2902 2904 2946 2947 3006 3011 3014 3017 3018 3022 3031 3032 3073 3076 3134 3141 3142 3145 3146 3150 3157 3159 3202 3204 3260 3261 3262 3269 3270 3273 3274 3278 3285 3287 3298 3300 3330 3332 3390 3396 3398 3401 3402 3406 3415 3416 3458 3460 3530 3531 3535 3541 3542 3543 3544 3552 3570 3572 3633 3634 3636 3643 3655 3663 3761 3762 3764 3770 3771 3773 3784 3790 3864 3866 3893 3894 3895 3896 3897 3898 3902 3904 3953 3973 3974 3976 3984 3992 3993 4029 4038 4039 4140 4147 4150 4154 4182 4186 4959 4960 5906 5909 5938 5941 5970 5972 6002 6004 6070 6100 6109 6110 6155 6158 6313 6314 6432 6444 6448 6460 6576 6593 6600 6602 6679 6684 6912 6917 6964 6981 7019 7028 7616 7627 7678 7680 8400 8432 12330 12336 12441 12443 43010 43011 43014 43015 43019 43020 43043 43048 64286 64287 65024 65040 65056 65060 68097 68100 68101 68103 68108 68112 68152 68155 68159 68160 119141 119146 119149 119155 119163 119171 119173 119180 119210 119214 119362 119365 917760 918000)))
(define char-set:separator (range-vector->char-set '#(32 33 160 161 5760 5761 6158 6159 8192 8203 8232 8234 8239 8240 8287 8288 12288 12289)))
(define char-set:punctuation (range-vector->char-set '#(33 36 37 43 44 48 58 60 63 65 91 94 95 96 123 124 125 126 161 162 171 172 183 184 187 188 191 192 894 895 903 904 1370 1376 1417 1419 1470 1471 1472 1473 1475 1476 1478 1479 1523 1525 1548 1550 1563 1564 1566 1568 1642 1646 1748 1749 1792 1806 2039 2042 2404 2406 2416 2417 3572 3573 3663 3664 3674 3676 3844 3859 3898 3902 3973 3974 4048 4050 4170 4176 4347 4348 4961 4969 5741 5743 5787 5789 5867 5870 5941 5943 6100 6103 6104 6107 6144 6155 6468 6470 6622 6624 6686 6688 7002 7009 8208 8232 8240 8260 8261 8274 8275 8287 8317 8319 8333 8335 9001 9003 10088 10102 10181 10183 10214 10220 10627 10649 10712 10716 10748 10750 11513 11517 11518 11520 11776 11800 11804 11806 12289 12292 12296 12306 12308 12320 12336 12337 12349 12350 12448 12449 12539 12540 43124 43128 64830 64832 65040 65050 65072 65107 65108 65122 65123 65124 65128 65129 65130 65132 65281 65284 65285 65291 65292 65296 65306 65308 65311 65313 65339 65342 65343 65344 65371 65372 65373 65374 65375 65382 65792 65794 66463 66464 66512 66513 67871 67872 68176 68185 74864 74868)))
(define char-set:symbol (range-vector->char-set '#(36 37 43 44 60 63 94 95 96 97 124 125 126 127 162 170 172 173 174 178 180 181 182 183 184 185 215 216 247 248 706 710 722 736 741 750 751 768 884 886 900 902 1014 1015 1154 1155 1547 1548 1550 1552 1769 1770 1789 1791 2038 2039 2546 2548 2554 2555 2801 2802 2928 2929 3059 3067 3313 3315 3647 3648 3841 3844 3859 3864 3866 3872 3892 3893 3894 3895 3896 3897 4030 4038 4039 4045 4047 4048 4960 4961 5008 5018 6107 6108 6464 6465 6624 6656 7009 7019 7028 7037 8125 8126 8127 8130 8141 8144 8157 8160 8173 8176 8189 8191 8260 8261 8274 8275 8314 8317 8330 8333 8352 8374 8448 8450 8451 8455 8456 8458 8468 8469 8470 8473 8478 8484 8485 8486 8487 8488 8489 8490 8494 8495 8506 8508 8512 8517 8522 8526 8592 9001 9003 9192 9216 9255 9280 9291 9372 9450 9472 9885 9888 9907 9985 9989 9990 9994 9996 10024 10025 10060 10061 10062 10063 10067 10070 10071 10072 10079 10081 10088 10132 10133 10136 10160 10161 10175 10176 10181 10183 10187 10192 10214 10224 10627 10649 10712 10716 10748 10750 11035 11040 11044 11493 11499 11904 11930 11931 12020 12032 12246 12272 12284 12292 12293 12306 12308 12320 12321 12342 12344 12350 12352 12443 12445 12688 12690 12694 12704 12736 12752 12800 12831 12842 12868 12880 12881 12896 12928 12938 12977 12992 13055 13056 13312 19904 19968 42128 42183 42752 42775 42784 42786 43048 43052 64297 64298 65020 65022 65122 65123 65124 65127 65129 65130 65284 65285 65291 65292 65308 65311 65342 65343 65344 65345 65372 65373 65374 65375 65504 65511 65512 65519 65532 65534 65794 65795 65847 65856 65913 65930 118784 119030 119040 119079 119082 119141 119146 119149 119171 119173 119180 119210 119214 119262 119296 119362 119365 119366 119552 119639 120513 120514 120539 120540 120571 120572 120597 120598 120629 120630 120655 120656 120687 120688 120713 120714 120745 120746 120771 120772)))
(define char-set:space-separator (range-vector->char-set '#(32 33 160 161 5760 5761 6158 6159 8192 8203 8239 8240 8287 8288 12288 12289)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.

; This constructs the SRFI 14 char sets from thin air and what's defined in
; srfi-14-base-char-sets.scm.

; Defined there:
; lower-case, upper-case, title-case, letter, digit, punctuation, symbol

(define char-set:empty (char-set))
(define char-set:full (char-set-complement char-set:empty))

(define char-set:letter+digit
  (char-set-union char-set:letter char-set:digit))

(define char-set:graphic
  (char-set-union char-set:mark
		  char-set:letter
		  char-set:digit
		  char-set:symbol
		  char-set:punctuation))

(define char-set:whitespace
  (char-set-union char-set:separator
		  (list->char-set (map scalar-value->char
				       '(9 ; tab
					 10 ; newline
					 11 ; vtab
					 12 ; page
					 13 ; return
					 )))))


(define char-set:printing
  (char-set-union char-set:whitespace char-set:graphic))

(define char-set:iso-control
  (char-set-union (ucs-range->char-set 0 #x20)
		  (ucs-range->char-set #x7f #xa0)))

(define char-set:blank
  (char-set-union char-set:space-separator
		  (char-set (scalar-value->char 9)))) ; tab

(define char-set:ascii (ucs-range->char-set 0 128))
(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF"))
#|  
(make-char-set-immutable! char-set:empty)
(make-char-set-immutable! char-set:full)
(make-char-set-immutable! char-set:lower-case)
(make-char-set-immutable! char-set:upper-case)
(make-char-set-immutable! char-set:letter)
(make-char-set-immutable! char-set:digit)
(make-char-set-immutable! char-set:hex-digit)
(make-char-set-immutable! char-set:letter+digit)
(make-char-set-immutable! char-set:punctuation)
(make-char-set-immutable! char-set:symbol)
(make-char-set-immutable! char-set:graphic)
(make-char-set-immutable! char-set:whitespace)
(make-char-set-immutable! char-set:printing)
(make-char-set-immutable! char-set:blank)
(make-char-set-immutable! char-set:iso-control)
(make-char-set-immutable! char-set:ascii)
|#
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
) ; end library