statistics.ss
```#lang scheme/base
;;; PLT Scheme Science Collection
;;; statistics.ss
;;; Copyright (c) 2004-2008 M. Douglas Williams
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; version 2.1 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA.
;;;
;;; -------------------------------------------------------------------
;;;
;;; This modules implements various statistical functions.  It is based
;;; on the Statistics in the GNU Scientific Library, which is licensed
;;; under the GPL.
;;;
;;; Version  Date      Description
;;; 1.0.0    09/24/04  Marked as ready for Release 1.0.  (Doug
;;;                    Williams)
;;; 2.0.0    11/19/07  Added unchecked versions of functions.  (Doug
;;;                    Williams)
;;; 2.1.0    06/07/08  More V4.0 changes.  (Doug Williams)

(require (lib "contract.ss"))

;;; Predicates and/or flat contracts for contracts

;;; nonempty-vector-of-reals? : flat contract
;;; #t for nonempty vectors of real numbers, #f otherwise.  Safe for
;;; all values.
(define nonempty-vector-of-reals?
(flat-named-contract
"nonempty-vector-of-reals?"
(lambda (x)
(and (vector? x)
(> (vector-length x) 0)
(let ((n (vector-length x)))
(let/ec exit
(do ((i 0 (+ i 1)))
((= i n) #t)
(when (not (real? (vector-ref x i)))
(exit #f)))))))))

;;; sorted?: flat contract
;;; #t for sorted vectors, #f otherwise.  Will fail for vectors
;;; containing any element that is not a real number (because > will
;;; fail).
(define sorted?
(flat-named-contract
"sorted vector"
(lambda (x)
(and (vector? x)
(let ((n (vector-length x)))
(let/ec exit
(do ((i 0 (+ i 1)))
((>= i (- n 1)) #t)
(when (> (vector-ref x i)
(vector-ref x (+ i 1)))
(exit #f)))))))))

(provide
(rename-out (mean unchecked-mean)
(variance unchecked-variance)
(standard-deviation unchecked-standard-deviation)
(variance-with-fixed-mean unchecked-variance-with-fixed-mean)
(standard-deviation-with-fixed-mean unchecked-standard-deviation-with-fixed-mean)
(absolute-deviation unchecked-absolute-deviation)
(skew unchecked-skew)
(kurtosis unchecked-kurtosis)
(lag-1-autocorrelation unchecked-lag-1-autocorrelation)
(covariance unchecked-covariance)
(covariance-with-fixed-means unchecked-covariance-with-fixed-means)
(weighted-mean unchecked-weighted-mean)
(weighted-variance unchecked-weighted-variance)
(weighted-standard-deviation unchecked-weighted-standard-deviation)
(weighted-variance-with-fixed-mean unchecked-weighted-variance-with-fixed-mean)
(weighted-standard-deviation-with-fixed-mean unchecked-weighted-standard-deviation-with-fixed-mean)
(weighted-absolute-deviation unchecked-weighted-absolute-deviation)
(weighted-skew unchecked-weighted-skew)
(weighted-kurtosis unchecked-weighted-kurtosis)
(maximum unchecked-maximum)
(minimum unchecked-minimum)
(minimum-maximum unchecked-minimum-maximum)
(minimum-index unchecked-minimum-index)
(maximum-index unchecked-maximum-index)
(minimum-maximum-index unchecked-minimum-maximum-index)
(median-from-sorted-data unchecked-median-from-sorted-data)
(quantile-from-sorted-data unchecked-quantile-from-sorted-data)))

(provide/contract
(mean
(-> (vectorof real?) real?))
(variance
(case-> (-> (vectorof real?) real? (>=/c 0.0))
(-> (vectorof real?) (>=/c 0.0))))
(standard-deviation
(case-> (-> (vectorof real?) real? (>=/c 0.0))
(-> (vectorof real?) (>=/c 0.0))))
(variance-with-fixed-mean
(-> (vectorof real?) real? (>=/c 0.0)))
(standard-deviation-with-fixed-mean
(-> (vectorof real?) real? (>=/c 0.0)))
(absolute-deviation
(case-> (-> (vectorof real?) real? (>=/c 0.0))
(-> (vectorof real?) (>=/c 0.0))))
(skew
(case-> (-> (vectorof real?) real? (>=/c 0.0) real?)
(-> (vectorof real?) real?)))
(kurtosis
(case-> (-> (vectorof real?) real? (>=/c 0.0) real?)
(-> (vectorof real?) real?)))
(lag-1-autocorrelation
(case-> (-> nonempty-vector-of-reals? real? real?)
(-> nonempty-vector-of-reals? real?)))
(covariance
(case-> (->r ((data1 (vectorof real?))
(data2 (and/c (vectorof real?)
(lambda (x)
(= (vector-length data1)
(vector-length data2)))))
(mu1 real?)
(mu2 real?))
real?)
(->r ((data1 (vectorof real?))
(data2 (and/c (vectorof real?)
(lambda (x)
(= (vector-length data1)
(vector-length data2))))))
real?)))
(covariance-with-fixed-means
(->r ((data1 (vectorof real?))
(data2 (and/c (vectorof real?)
(lambda (x)
(= (vector-length data1)
(vector-length data2)))))
(mu1 real?)
(mu2 real?))
real?))
(weighted-mean
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
real?))
(weighted-variance
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0))
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
(>=/c 0.0))))
(weighted-standard-deviation
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0))
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
(>=/c 0.0))))
(weighted-variance-with-fixed-mean
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0)))
(weighted-standard-deviation-with-fixed-mean
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0)))
(weighted-absolute-deviation
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0))
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
(>=/c 0.0))))
(weighted-skew
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?)
(sigma (>=/c 0.0)))
real?)
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
real?)))
(weighted-kurtosis
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?)
(sigma (>=/c 0.0)))
real?)
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
real?)))
(maximum
(-> nonempty-vector-of-reals? real?))
(minimum
(-> nonempty-vector-of-reals? real?))
(minimum-maximum
(-> nonempty-vector-of-reals? (values real? real?)))
(maximum-index
(-> nonempty-vector-of-reals? natural-number/c))
(minimum-index
(-> nonempty-vector-of-reals? natural-number/c))
(minimum-maximum-index
(-> nonempty-vector-of-reals? (values natural-number/c natural-number/c)))
(median-from-sorted-data
(-> (and/c nonempty-vector-of-reals? sorted?) real?))
(quantile-from-sorted-data
(-> (and/c nonempty-vector-of-reals? sorted?) (real-in 0.0 1.0) real?)))

;;; Mean, Standard Deviation, and Variance

;;; mean: vector -> real
(define (mean data)
(let ((n (vector-length data))
(mu 0.0))
(do ((i 0 (+ i 1)))
((= i n) mu)
(set! mu (+ mu (/ (- (vector-ref data i) mu) (+ i 1)))))))

;;; variance: vector x real -> real
;;; variance: vector -> real
(define variance
(case-lambda
((data mu)
(let ((n (vector-length data)))
(* (variance-with-fixed-mean data mu)
(exact->inexact (/ n (- n 1))))))
((data)
(variance data (mean data)))))

;;; standard-deviation: vector x real -> real
;;; standard-deviation: vector -> real
(define standard-deviation
(case-lambda
((data mu)
(sqrt (variance data mu)))
((data)
(sqrt (variance data)))))

;;; variance-with-fixed-mean: vector x real -> real
(define (variance-with-fixed-mean data mu)
(let ((n (vector-length data))
(var 0.0))
(do ((i 0 (+ i 1)))
((= i n) var)
(let ((delta (- (vector-ref data i) mu)))
(set! var (+ var (/ (- (* delta delta) var) (+ i 1))))))))

;;; standard-deviation-with-fixed-mean: vector x real -> real
(define (standard-deviation-with-fixed-mean data mu)
(sqrt (variance-with-fixed-mean data mu)))

;;; Absolute Deviation

;;; absolute-deviation: vector x real -> real
;;; absolute-deviation: vector -> real
(define absolute-deviation
(case-lambda
((data mu)
(let ((n (vector-length data))
(sum 0.0))
(do ((i 0 (+ i 1)))
((= i n) (/ sum n))
(let ((delta (abs (- (vector-ref data i) mu))))
(set! sum (+ sum delta))))))
((data)
(absolute-deviation data (mean data)))))

;;; Higher Moments (Skewness and Kurtosis)

;;; skew: vector x real x real -> real
;;; skew: vector -> real
(define skew
(case-lambda
((data mu sigma)
(let ((n (vector-length data))
(skew 0.0))
(do ((i 0 (+ i 1)))
((= i n) skew)
(let ((x (/ (- (vector-ref data i) mu) sigma)))
(set! skew (+ skew (/ (- (* x x x) skew) (+ i 1))))))))
((data)
(let* ((mu (mean data))
(sigma (standard-deviation data mu)))
(skew data mu sigma)))))

;;; kurtosis: vector x real x real -> real
;;; kurtosis: vector -> real
(define kurtosis
(case-lambda
((data mu sigma)
(let ((n (vector-length data))
(avg 0.0))
(do ((i 0 (+ i 1)))
((= i n) (- avg 3.0))
(let ((x (/ (- (vector-ref data i) mu) sigma)))
(set! avg (+ avg (/ (- (* x x x x) avg) (+ i 1))))))))
((data)
(let* ((mu (mean data))
(sigma (standard-deviation data mu)))
(kurtosis data mu sigma)))))

;;; Autocorrelation

;;; lag-1-autocorrelation: vector x real -> real
;;; lag-1-autocorrelation: vector -> real
(define lag-1-autocorrelation
(case-lambda
((data mu)
(let ((n (vector-length data))
(q 0.0)
(v (* (- (vector-ref data 0) mu) (- (vector-ref data 0) mu))))
(do ((i 1 (+ i 1)))
((= i n) (/ q v))
(let ((delta0 (- (vector-ref data (- i 1)) mu))
(delta1 (- (vector-ref data i) mu)))
(set! q (+ q (/ (- (* delta0 delta1) q) (+ i 1))))
(set! v (+ v (/ (- (* delta1 delta1) v) (+ i 1))))))))
((data)
(lag-1-autocorrelation data (mean data)))))

;;; Covariance

;;; covariance: vector x vector x real x real -> real
;;; covariance: vector x vector -> real
(define covariance
(case-lambda
((data1 data2 mu1 mu2)
(let ((n (vector-length data1)))
(* (covariance-with-fixed-means data1 data2 mu1 mu2)
(exact->inexact (/ n (- n 1))))))
((data1 data2)
(covariance data1 data2 (mean data1) (mean data2)))))

;;; covariance-with-fixed-means: vector x vector x real x real -> real
(define (covariance-with-fixed-means data1 data2 mu1 mu2)
(let ((n (vector-length data1))
(covar 0.0))
(do ((i 0 (+ i 1)))
((= i n) covar)
(let ((delta1 (- (vector-ref data1 i) mu1))
(delta2 (- (vector-ref data2 i) mu2)))
(set! covar (+ covar (/ (- (* delta1 delta2) covar) (+ i 1))))))))

;;; Weighted Samples

;;; weighted-mean: vector x vector -> real
(define (weighted-mean w data)
(let ((n (vector-length data))
(wmu 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wmu)
(let ((wi (vector-ref w i)))
(when (> wi 0.0)
(set! wsum (+ wsum wi))
(set! wmu (+ wmu (* (- (vector-ref data i) wmu)
(/ wi wsum)))))))))

;;; scale-factor: vector
;;; Internal function to compute the scale factor to be applied to
;;; weighted sample statistics.
(define (scale-factor w)
(let ((n (vector-length w))
(a 0.0)
(b 0.0))
(do ((i 0 (+ i 1)))
((= i n) (/ (* a a) (- (* a a) b)))
(let ((wi (vector-ref w i)))
(when (> wi 0.0)
(set! a (+ a wi))
(set! b (+ b (* wi wi))))))))

;;; weighted-variance: vector x vector x real -> real
;;; weighted-variance: vector x vector -> real
(define weighted-variance
(case-lambda
((w data wmu)
(* (scale-factor w)
(weighted-variance-with-fixed-mean w data wmu)))
((w data)
(weighted-variance w data (weighted-mean w data)))))

;;; weighted-standard-deviation: vector x vector x real -> real
;;; weighted-standard-deviation: vector x vector -> real
(define weighted-standard-deviation
(case-lambda
((w data wmu)
(sqrt (weighted-variance w data wmu)))
((w data)
(sqrt (weighted-variance w data)))))

;;; weighted-variance-with-fixed-mean: vector x vector x real -> real
(define (weighted-variance-with-fixed-mean w data wmu)
(let ((n (vector-length data))
(wvar 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wvar)
(let ((wi (vector-ref w i)))
(when (> wi 0.0)
(let ((delta (- (vector-ref data i) wmu)))
(set! wsum (+ wsum wi))
(set! wvar (+ wvar (* (- (* delta delta) wvar)
(/ wi wsum))))))))))

;;; weighted-standard-deviation-with-fixed-mean:
;;;   vector x vector x real -> real
(define (weighted-standard-deviation-with-fixed-mean w data wmu)
(sqrt (weighted-variance-with-fixed-mean w data wmu)))

;;; weighted-absolute-deviation: vector x vector x real -> real
;;; weighted-absoulte-deviation: vector x vector -> real
(define weighted-absolute-deviation
(case-lambda
((w data wmu)
(let ((n (vector-length data))
(wabsdev 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wabsdev)
(let ((wi (vector-ref w i)))
(when (> wi 0.0)
(let ((delta (abs (- (vector-ref data i) wmu))))
(set! wsum (+ wsum wi))
(set! wabsdev (+ wabsdev (* (- delta wabsdev)
(/ wi wsum))))))))))
((w data)
(weighted-absolute-deviation w data (weighted-mean w data)))))

;;; weighted-skew: vector x vector x real x real -> real
;;; weighted-skew: vector x vector -> real
(define weighted-skew
(case-lambda
((w data wmu wsigma)
(let ((n (vector-length data))
(wskew 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wskew)
(let ((wi (vector-ref w i)))
(when (> wi 0.0)
(let ((x (/ (- (vector-ref data i) wmu) wsigma)))
(set! wsum (+ wsum wi))
(set! wskew (+ wskew (* (- (* x x x) wskew)
(/ wi wsum))))))))))
((w data)
(let* ((wmu (weighted-mean w data))
(wsigma (weighted-standard-deviation w data wmu)))
(weighted-skew w data wmu wsigma)))))

;;; weighted-kurtosis: vector x vector x real x real -> real
;;; weighted-kurtosis: vector x vector -> real
(define weighted-kurtosis
(case-lambda
((w data wmu wsigma)
(let ((n (vector-length data))
(wavg 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) (- wavg 3.0))
(let ((wi (vector-ref w i)))
(when (> wi 0.0)
(let ((x (/ (- (vector-ref data i) wmu) wsigma)))
(set! wsum (+ wsum wi))
(set! wavg (+ wavg (* (- (* x x x x) wavg)
(/ wi wsum))))))))))
((w data)
(let* ((wmu (weighted-mean w data))
(wsigma (weighted-standard-deviation w data wmu)))
(weighted-kurtosis w data wmu wsigma)))))

;;; Maximum and Minimum Values

;;; minimum-maximum-and-indices:
;;;   vector -> number x number x integer x integer
(define (minimum-maximum-and-indices data)
(let ((n (vector-length data))
(dmin (vector-ref data 0))
(dmax (vector-ref data 0))
(dmin-ndx 0)
(dmax-ndx 0))
(do ((i 0 (+ i 1)))
((= i n) (values dmin dmax dmin-ndx dmax-ndx))
(let ((di (vector-ref data i)))
(when (< di dmin)
(set! dmin di)
(set! dmin-ndx i))
(when (> di dmax)
(set! dmax di)
(set! dmax-ndx i))))))

;;; maximum: vector -> number
(define (maximum data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmax))

;;; minimum: vector -> number
(define (minimum data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmin))

;;; minimum-maximum: vector -> number x number
(define (minimum-maximum data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
(values dmin dmax)))

;;; maximum-index: vector -> integer
(define (maximum-index data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmax-ndx))

;;; minimum-index: vector -> integer
(define (minimum-index data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmin-ndx))

;;; minimum-maximum-index: vector -> integer x integer
(define (minimum-maximum-index data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
(values dmin-ndx dmax-ndx)))

;;; Median and Percentiles

;;; median-from-sorted-data: sorted-vector -> real
(define (median-from-sorted-data sorted-data)
(let* ((n (vector-length sorted-data))
(lhs (quotient (- n 1) 2))
(rhs (quotient n 2)))
(if (= lhs rhs)
(vector-ref sorted-data lhs)
(/ (+ (vector-ref sorted-data lhs)
(vector-ref sorted-data rhs))
2.0))))

;;; quantile: sorted-vector x real -> real
(define (quantile-from-sorted-data sorted-data f)
(let* ((n (vector-length sorted-data))
(index (* f (- n 1)))
(lhs (inexact->exact (truncate index)))
(delta (- index lhs)))
(if (= lhs (- n 1))
(vector-ref sorted-data lhs)
(+ (* (- 1.0 delta) (vector-ref sorted-data lhs))
(* delta (vector-ref sorted-data (+ lhs 1)))))))```