#lang racket
(require racket/generator)
(provide make-csv-reader)
(provide row-gen->list)
(define (row-gen->list row-gen)
(for/list ([row (in-producer row-gen)]
#:break (eof-object? row))
row))
(define (function-map function-list . argn) (map (λ(x) (apply x argn)) function-list))
(define (tokenizer match-functions port)
(generator () (let loop ([segment (list)]
[prev-char #f]
[this-char (read-char port)]
[next-char (read-char port)]
[skip-chars 0])
(cond [(eof-object? this-char) (cons this-char segment)]
[(> skip-chars 0) (loop (cons this-char segment) this-char next-char (read-char port) (sub1 skip-chars))]
[else (let*
([match-results (function-map match-functions this-char prev-char next-char)] [skip-chars (apply max (for/list ([j match-results]) (car j)))] [match-booleans (for/list ([j match-results]) (car (cdr j)))] [match? (for/or ([j match-results]) (car (cdr j)))] [next-next-char (read-char port)])
(cond [match?
(begin (yield (cons this-char segment))
(loop (list) this-char next-char next-next-char skip-chars))] [else (loop (cons this-char segment) this-char next-char next-next-char skip-chars)]))]))))
(define (make-csv-reader
#:delimiter [ delimiter #\, ]
#:lineterminator [ lineterminator #\newline ]
#:escapechar [ escapechar #\\ ]
#:doublequote [ doublequote #t ]
#:quotechar [ quotechar #\" ]
#:quoting [ quoting #t ]
#:skipinitialspace [ skipinitialspace #t ])
(define (assembler token-gen)
(generator () (let loop ([undigested-string (list)]
[il (list)]
[inside-quotes? #f])
(let* ([token (token-gen)]
[token-without-match-char (cdr token)]
[match-char (car token)]
[eof? (eof-object? match-char)]
[delimiter? (equal? delimiter match-char)]
[lineterminator? (equal? lineterminator match-char)]
[quote? (equal? quotechar match-char)]
[digest (compose list->string reverse)])
(cond [eof? (if (empty? il) match-char
(begin
(yield (cons (digest (append token-without-match-char undigested-string)) il))
match-char))]
[inside-quotes? (loop (append token undigested-string) il (not quote?))]
[delimiter? (loop (list) (cons (digest (append token-without-match-char undigested-string)) il) #f)]
[lineterminator? (begin (yield (reverse (cons (digest (append token-without-match-char undigested-string)) il)))
(loop (list) (list) #f))]
[(and quote? (not inside-quotes?)) (loop token il #t)])))))
(define (match-quote this-char prev-char next-char)
(cond [(equal? this-char quotechar)
(let* ([escaped? (and escapechar (equal? escapechar prev-char))]
[doublequoted? (and doublequote (equal? next-char quotechar))])
(cond [escaped? '(0 #f)]
[doublequoted? '(1 #f)]
[else '(0 #t)]))] [else '(0 #f)]))
(define (match-delimiter this-char prev-char next-char)
(cond [(equal? this-char delimiter)
(let* ([escaped? (and escapechar (equal? escapechar prev-char))])
(cond [escaped? '(0 #f)]
[else '(0 #t)]))]
[else '(0 #f)]))
(define (match-lineterminator this-char prev-char next-char)
(list 0 (equal? this-char lineterminator)))
(define token-gen (curry tokenizer (list match-delimiter match-lineterminator match-quote)))
(define csv-parser (λ(x) (assembler (token-gen x))))
csv-parser)