csv-reader.rkt
#lang racket

(require racket/generator)
(provide make-csv-reader)
(provide row-gen->list)

(define (row-gen->list row-gen)
  (for/list ([row (in-producer row-gen)]
             #:break (eof-object? row))
    row))
 
(define (function-map function-list . argn) ; similar to map, but instead takes a list of functions, and applies it to input arguments
  (map (λ(x) (apply x argn)) function-list)) ; returns a list of return values

;lmf : list of match functions
;eos : End of string

; return value (list  token)
; EOF stays true once the input port has been expended
(define (tokenizer match-functions port)
  (generator () (let loop ([segment (list)]
                           [prev-char #f]
                           [this-char (read-char port)]
                           [next-char (read-char port)]
                           [skip-chars 0])
                  (cond [(eof-object? this-char) (cons this-char segment)]
                        [(> skip-chars 0) (loop (cons this-char segment) this-char next-char (read-char port) (sub1 skip-chars))]
                        [else (let*
                                   ([match-results (function-map match-functions this-char prev-char next-char)] ; list of (skip-chars #f/#t) for match functions
                                    [skip-chars (apply max (for/list ([j match-results]) (car j)))] ; find the max of all skip characters
                                    [match-booleans (for/list ([j match-results]) (car (cdr j)))] ; create a list of only match? from match-functions
                                    [match? (for/or ([j match-results]) (car (cdr j)))] ; or of all boolean values from match-functions
                                    [next-next-char (read-char port)])
                                 (cond [match?
                                        (begin (yield (cons this-char segment))
                                               (loop (list) this-char next-char next-next-char skip-chars))] ; if there is a match return (i match? match-booleans next-index)
                                       [else (loop (cons this-char segment) this-char next-char next-next-char skip-chars)]))]))))



(define (make-csv-reader
         #:delimiter [ delimiter #\, ]
         #:lineterminator [ lineterminator #\newline ]
         #:escapechar [ escapechar #\\ ] 
         #:doublequote [ doublequote #t ]
         #:quotechar [ quotechar #\" ]
         #:quoting [ quoting #t ]
         ; not implemented yet
         #:skipinitialspace [ skipinitialspace #t ])

  (define (assembler token-gen)
  (generator () (let loop ([undigested-string (list)]
                           [il (list)]
                           [inside-quotes? #f])
                  (let* ([token (token-gen)]
                         [token-without-match-char (cdr token)]
                         [match-char (car token)]
                         [eof? (eof-object? match-char)]
                         [delimiter? (equal? delimiter match-char)]
                         [lineterminator? (equal? lineterminator match-char)]
                         [quote? (equal? quotechar match-char)]
                         [digest (compose list->string reverse)])
                    (cond [eof?  (if (empty? il) match-char
                                     (begin
                                       (yield (cons (digest (append token-without-match-char undigested-string)) il))
                                       match-char))]
                          [inside-quotes?   (loop (append token undigested-string) il (not quote?))] 
                          [delimiter?       (loop (list) (cons (digest (append token-without-match-char undigested-string)) il) #f)]
                          [lineterminator?  (begin (yield (reverse (cons (digest (append token-without-match-char undigested-string)) il)))
                                              (loop (list) (list) #f))]
                          [(and quote? (not inside-quotes?))           (loop token il #t)])))))
  
  (define (match-quote this-char prev-char next-char)
    (cond [(equal? this-char quotechar)
           (let* ([escaped? (and escapechar (equal? escapechar prev-char))]
                  [doublequoted? (and doublequote (equal? next-char quotechar))])
             (cond [escaped? '(0 #f)]
                   [doublequoted? '(1 #f)]
                   [else '(0 #t)]))] ;Skip the next character in matching
          [else '(0 #f)]))

  (define (match-delimiter this-char prev-char next-char)
    (cond [(equal? this-char delimiter)
           (let* ([escaped? (and escapechar (equal? escapechar prev-char))])
             (cond [escaped? '(0 #f)]
                   [else '(0 #t)]))]
          [else '(0 #f)]))

  (define (match-lineterminator this-char prev-char next-char)
    (list 0 (equal? this-char lineterminator)))

  (define token-gen (curry tokenizer (list match-delimiter match-lineterminator match-quote)))

  (define csv-parser (λ(x) (assembler (token-gen x))))
 csv-parser)