statistics.ss
```;;; PLT Scheme Science Collection
;;; statistics.ss
;;; Copyright (c) 2004 M. Douglas Williams
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 2.1 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA.
;;;
;;; -------------------------------------------------------------------
;;;
;;; This modules implements various statistical functions.  It is based
;;; on the Statistics in the GNU Scientific Library, which is licensed
;;; under the GPL.
;;;
;;; Version  Date      Description
;;; 1.0.0    09/24/04  Marked as ready for Release 1.0.  (Doug
;;;                    Williams)

(module statistics mzscheme

(require (lib "contract.ss"))

;; Predicates and/or flat contracts for contracts

;; nonempty-vector-of-reals? : flat contract
;; #t for nonempty vectors of real numbers, #f otherwise.  Safe for
;; all values.
(define nonempty-vector-of-reals?
(flat-named-contract "nonempty-vector-of-reals?"
(lambda (x)
(and (vector? x)
(> (vector-length x) 0)
(let ((n (vector-length x)))
(let/ec exit
(do ((i 0 (+ i 1)))
((= i n) #t)
(if (not (real? (vector-ref x i)))
(exit #f)))))))))

;; sorted?: flat contract
;; #t for sorted vectors, #f otherwise.  Will fail for vectors
;; containing any element that is not a real number (because > will
;; fail).
(define sorted?
(flat-named-contract "sorted vector"
(lambda (x)
(and (vector? x)
(let ((n (vector-length x)))
(let/ec exit
(do ((i 0 (+ i 1)))
((>= i (- n 1)) #t)
(if (> (vector-ref x i)
(vector-ref x (+ i 1)))
(exit #f)))))))))

(provide/contract
(mean
(-> (vectorof real?) real?))
(variance
(case-> (-> (vectorof real?) real? (>=/c 0.0))
(-> (vectorof real?) (>=/c 0.0))))
(standard-deviation
(case-> (-> (vectorof real?) real? (>=/c 0.0))
(-> (vectorof real?) (>=/c 0.0))))
(variance-with-fixed-mean
(-> (vectorof real?) real? (>=/c 0.0)))
(standard-deviation-with-fixed-mean
(-> (vectorof real?) real? (>=/c 0.0)))
(absolute-deviation
(case-> (-> (vectorof real?) real? (>=/c 0.0))
(-> (vectorof real?) (>=/c 0.0))))
(skew
(case-> (-> (vectorof real?) real? (>=/c 0.0) real?)
(-> (vectorof real?) real?)))
(kurtosis
(case-> (-> (vectorof real?) real? (>=/c 0.0) real?)
(-> (vectorof real?) real?)))
(lag-1-autocorrelation
(case-> (-> nonempty-vector-of-reals? real? real?)
(-> nonempty-vector-of-reals? real?)))
(covariance
(case-> (->r ((data1 (vectorof real?))
(data2 (and/c (vectorof real?)
(lambda (x)
(= (vector-length data1)
(vector-length data2)))))
(mu1 real?)
(mu2 real?))
real?)
(->r ((data1 (vectorof real?))
(data2 (and/c (vectorof real?)
(lambda (x)
(= (vector-length data1)
(vector-length data2))))))
real?)))
(covariance-with-fixed-means
(->r ((data1 (vectorof real?))
(data2 (and/c (vectorof real?)
(lambda (x)
(= (vector-length data1)
(vector-length data2)))))
(mu1 real?)
(mu2 real?))
real?))
(weighted-mean
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
real?))
(weighted-variance
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0))
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
(>=/c 0.0))))
(weighted-standard-deviation
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0))
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
(>=/c 0.0))))
(weighted-variance-with-fixed-mean
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0)))
(weighted-standard-deviation-with-fixed-mean
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0)))
(weighted-absolute-deviation
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?))
(>=/c 0.0))
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
(>=/c 0.0))))
(weighted-skew
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?)
(sigma (>=/c 0.0)))
real?)
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
real?)))
(weighted-kurtosis
(case-> (->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data)))))
(mu real?)
(sigma (>=/c 0.0)))
real?)
(->r ((w (vectorof real?))
(data (and/c (vectorof real?)
(lambda (x)
(= (vector-length w)
(vector-length data))))))
real?)))
(maximum
(-> nonempty-vector-of-reals? real?))
(minimum
(-> nonempty-vector-of-reals? real?))
(minimum-maximum
(-> nonempty-vector-of-reals? (values real? real?)))
(maximum-index
(-> nonempty-vector-of-reals? natural-number/c))
(minimum-index
(-> nonempty-vector-of-reals? natural-number/c))
(minimum-maximum-index
(-> nonempty-vector-of-reals? (values natural-number/c natural-number/c)))
(median-from-sorted-data
(-> (and/c nonempty-vector-of-reals? sorted?) real?))
(quantile-from-sorted-data
(-> (and/c nonempty-vector-of-reals? sorted?) (real-in 0.0 1.0) real?)))

;; Mean, Standard Deviation, and Variance

;; mean: vector -> real
(define (mean data)
(let ((n (vector-length data))
(mu 0.0))
(do ((i 0 (+ i 1)))
((= i n) mu)
(set! mu (+ mu (/ (- (vector-ref data i) mu) (+ i 1)))))))

;; variance: vector x real -> real
;; variance: vector -> real
(define variance
(case-lambda
((data mu)
(let ((n (vector-length data)))
(* (variance-with-fixed-mean data mu)
(exact->inexact (/ n (- n 1))))))
((data)
(variance data (mean data)))))

;; standard-deviation: vector x real -> real
;; standard-deviation: vector -> real
(define standard-deviation
(case-lambda
((data mu)
(sqrt (variance data mu)))
((data)
(sqrt (variance data)))))

;; variance-with-fixed-mean: vector x real -> real
(define (variance-with-fixed-mean data mu)
(let ((n (vector-length data))
(var 0.0))
(do ((i 0 (+ i 1)))
((= i n) var)
(let ((delta (- (vector-ref data i) mu)))
(set! var (+ var (/ (- (* delta delta) var) (+ i 1))))))))

;; standard-deviation-with-fixed-mean: vector x real -> real
(define (standard-deviation-with-fixed-mean data mu)
(sqrt (variance-with-fixed-mean data mu)))

;; Absolute Deviation

;; absolute-deviation: vector x real -> real
;; absolute-deviation: vector -> real
(define absolute-deviation
(case-lambda
((data mu)
(let ((n (vector-length data))
(sum 0.0))
(do ((i 0 (+ i 1)))
((= i n) (/ sum n))
(let ((delta (abs (- (vector-ref data i) mu))))
(set! sum (+ sum delta))))))
((data)
(absolute-deviation data (mean data)))))

;; Higher Moments (Skewness and Kurtosis)

;; skew: vector x real x real -> real
;; skew: vector -> real
(define skew
(case-lambda
((data mu sigma)
(let ((n (vector-length data))
(skew 0.0))
(do ((i 0 (+ i 1)))
((= i n) skew)
(let ((x (/ (- (vector-ref data i) mu) sigma)))
(set! skew (+ skew (/ (- (* x x x) skew) (+ i 1))))))))
((data)
(let* ((mu (mean data))
(sigma (standard-deviation data mu)))
(skew data mu sigma)))))

;; kurtosis: vector x real x real -> real
;; kurtosis: vector -> real
(define kurtosis
(case-lambda
((data mu sigma)
(let ((n (vector-length data))
(avg 0.0))
(do ((i 0 (+ i 1)))
((= i n) (- avg 3.0))
(let ((x (/ (- (vector-ref data i) mu) sigma)))
(set! avg (+ avg (/ (- (* x x x x) avg) (+ i 1))))))))
((data)
(let* ((mu (mean data))
(sigma (standard-deviation data mu)))
(kurtosis data mu sigma)))))

;; Autocorrelation

;; lag-1-autocorrelation: vector x real -> real
;; lag-1-autocorrelation: vector -> real
(define lag-1-autocorrelation
(case-lambda
((data mu)
(let ((n (vector-length data))
(q 0.0)
(v (* (- (vector-ref data 0) mu) (- (vector-ref data 0) mu))))
(do ((i 1 (+ i 1)))
((= i n) (/ q v))
(let ((delta0 (- (vector-ref data (- i 1)) mu))
(delta1 (- (vector-ref data i) mu)))
(set! q (+ q (/ (- (* delta0 delta1) q) (+ i 1))))
(set! v (+ v (/ (- (* delta1 delta1) v) (+ i 1))))))))
((data)
(lag-1-autocorrelation data (mean data)))))

;; Covariance

;; covariance: vector x vector x real x real -> real
;; covariance: vector x vector -> real
(define covariance
(case-lambda
((data1 data2 mu1 mu2)
(let ((n (vector-length data1)))
(* (covariance-with-fixed-means data1 data2 mu1 mu2)
(exact->inexact (/ n (- n 1))))))
((data1 data2)
(covariance data1 data2 (mean data1) (mean data2)))))

;; covariance-with-fixed-means: vector x vector x real x real -> real
(define (covariance-with-fixed-means data1 data2 mu1 mu2)
(let ((n (vector-length data1))
(covar 0.0))
(do ((i 0 (+ i 1)))
((= i n) covar)
(let ((delta1 (- (vector-ref data1 i) mu1))
(delta2 (- (vector-ref data2 i) mu2)))
(set! covar (+ covar (/ (- (* delta1 delta2) covar) (+ i 1))))))))

;; Weighted Samples

;; weighted-mean: vector x vector -> real
(define (weighted-mean w data)
(let ((n (vector-length data))
(wmu 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wmu)
(let ((wi (vector-ref w i)))
(if (> wi 0.0)
(begin
(set! wsum (+ wsum wi))
(set! wmu (+ wmu (* (- (vector-ref data i) wmu)
(/ wi wsum))))))))))

;; scale-factor: vector
;; Internal function to compute the scale factor to be applied to
;; weighted sample statistics.
(define (scale-factor w)
(let ((n (vector-length w))
(a 0.0)
(b 0.0))
(do ((i 0 (+ i 1)))
((= i n) (/ (* a a) (- (* a a) b)))
(let ((wi (vector-ref w i)))
(if (> wi 0.0)
(begin
(set! a (+ a wi))
(set! b (+ b (* wi wi)))))))))

;; weighted-variance: vector x vector x real -> real
;; weighted-variance: vector x vector -> real
(define weighted-variance
(case-lambda
((w data wmu)
(* (scale-factor w)
(weighted-variance-with-fixed-mean w data wmu)))
((w data)
(weighted-variance w data (weighted-mean w data)))))

;; weighted-standard-deviation: vector x vector x real -> real
;; weighted-standard-deviation: vector x vector -> real
(define weighted-standard-deviation
(case-lambda
((w data wmu)
(sqrt (weighted-variance w data wmu)))
((w data)
(sqrt (weighted-variance w data)))))

;; weighted-variance-with-fixed-mean: vector x vector x real -> real
(define (weighted-variance-with-fixed-mean w data wmu)
(let ((n (vector-length data))
(wvar 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wvar)
(let ((wi (vector-ref w i)))
(if (> wi 0.0)
(let ((delta (- (vector-ref data i) wmu)))
(set! wsum (+ wsum wi))
(set! wvar (+ wvar (* (- (* delta delta) wvar)
(/ wi wsum))))))))))

;; weighted-standard-deviation-with-fixed-mean:
;;   vector x vector x real -> real
(define (weighted-standard-deviation-with-fixed-mean w data wmu)
(sqrt (weighted-variance-with-fixed-mean w data wmu)))

;; weighted-absolute-deviation: vector x vector x real -> real
;; weighted-absoulte-deviation: vector x vector -> real
(define weighted-absolute-deviation
(case-lambda
((w data wmu)
(let ((n (vector-length data))
(wabsdev 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wabsdev)
(let ((wi (vector-ref w i)))
(if (> wi 0.0)
(let ((delta (abs (- (vector-ref data i) wmu))))
(set! wsum (+ wsum wi))
(set! wabsdev (+ wabsdev (* (- delta wabsdev)
(/ wi wsum))))))))))
((w data)
(weighted-absolute-deviation w data (weighted-mean w data)))))

;; weighted-skew: vector x vector x real x real -> real
;; weighted-skew: vector x vector -> real
(define weighted-skew
(case-lambda
((w data wmu wsigma)
(let ((n (vector-length data))
(wskew 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) wskew)
(let ((wi (vector-ref w i)))
(if (> wi 0.0)
(let ((x (/ (- (vector-ref data i) wmu) wsigma)))
(set! wsum (+ wsum wi))
(set! wskew (+ wskew (* (- (* x x x) wskew)
(/ wi wsum))))))))))
((w data)
(let* ((wmu (weighted-mean w data))
(wsigma (weighted-standard-deviation w data wmu)))
(weighted-skew w data wmu wsigma)))))

;; weighted-kurtosis: vector x vector x real x real -> real
;; weighted-kurtosis: vector x vector -> real
(define weighted-kurtosis
(case-lambda
((w data wmu wsigma)
(let ((n (vector-length data))
(wavg 0.0)
(wsum 0.0))
(do ((i 0 (+ i 1)))
((= i n) (- wavg 3.0))
(let ((wi (vector-ref w i)))
(if (> wi 0.0)
(let ((x (/ (- (vector-ref data i) wmu) wsigma)))
(set! wsum (+ wsum wi))
(set! wavg (+ wavg (* (- (* x x x x) wavg)
(/ wi wsum))))))))))
((w data)
(let* ((wmu (weighted-mean w data))
(wsigma (weighted-standard-deviation w data wmu)))
(weighted-kurtosis w data wmu wsigma)))))

;; Maximum and Minimum Values

;; minimum-maximum-and-indices:
;;   vector -> number x number x integer x integer
(define (minimum-maximum-and-indices data)
(let ((n (vector-length data))
(dmin (vector-ref data 0))
(dmax (vector-ref data 0))
(dmin-ndx 0)
(dmax-ndx 0))
(do ((i 0 (+ i 1)))
((= i n) (values dmin dmax dmin-ndx dmax-ndx))
(let ((di (vector-ref data i)))
(if (< di dmin)
(begin
(set! dmin di)
(set! dmin-ndx i)))
(if (> di dmax)
(begin
(set! dmax di)
(set! dmax-ndx i)))))))

;; maximum: vector -> number
(define (maximum data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmax))

;; minimum: vector -> number
(define (minimum data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmin))

;; minimum-maximum: vector -> number x number
(define (minimum-maximum data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
(values dmin dmax)))

;; maximum-index: vector -> integer
(define (maximum-index data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmax-ndx))

;; minimum-index: vector -> integer
(define (minimum-index data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
dmin-ndx))

;; minimum-maximum-index: vector -> integer x integer
(define (minimum-maximum-index data)
(let-values (((dmin dmax dmin-ndx dmax-ndx)
(minimum-maximum-and-indices data)))
(values dmin-ndx dmax-ndx)))

;; Median and Percentiles

;; median-from-sorted-data: sorted-vector -> real
(define (median-from-sorted-data sorted-data)
(let* ((n (vector-length sorted-data))
(lhs (quotient (- n 1) 2))
(rhs (quotient n 2)))
(if (= lhs rhs)
(vector-ref sorted-data lhs)
(/ (+ (vector-ref sorted-data lhs)
(vector-ref sorted-data rhs))
2.0))))

;; quantile: sorted-vector x real -> real
(define (quantile-from-sorted-data sorted-data f)
(let* ((n (vector-length sorted-data))
(index (* f (- n 1)))
(lhs (inexact->exact (truncate index)))
(delta (- index lhs)))
(if (= lhs (- n 1))
(vector-ref sorted-data lhs)
(+ (* (- 1.0 delta) (vector-ref sorted-data lhs))
(* delta (vector-ref sorted-data (+ lhs 1)))))))

)```