#lang scheme
(require net/mime )
(define-struct email (headers messages) #:inspector #f)
(define-struct (exn:malformed-email exn) () #:inspector #f)
(provide/contract
(struct email ((headers (listof (cons/c symbol? string?)))
(messages (listof (listof string?)))))
(parse-archive (path? . -> . (listof email?)))
(parse-emails ((or/c none/c input-port?) . -> . (listof email?)))
(parse-email ((or/c none/c input-port?) . -> . email?))
(write-email (email? output-port? . -> . any))
(an-email-file? (path? . -> . any))
(get-email-header-val (any/c email? . -> . any))
(tokenise-email-addresses (string? . -> . (listof string?)))
)
(provide (struct-out exn:malformed-email))
(define parse-emails
(lambda ((ip (current-input-port)))
(let loop ((line (peek-line ip)))
(cond ((eof-object? line) '())
((new-email? line) (let* ((parsed (parse-email
(open-input-string (get-first-email ip))))
(parsed-rest (loop (peek-line ip))))
(cons parsed parsed-rest)))
((an-email? line) (list (parse-email ip))) (else (raise
(make-exn:malformed-email
(string->immutable-string
(format "~a: ~a"
"Expected a \"From ...\", got"
line))
(current-continuation-marks))))))))
(define parse-email
(lambda ((ip (current-input-port)))
(let ((analysis (mime-analyze ip)))
(if (multi-message? analysis)
(parse-email-multi analysis)
(parse-email-single analysis)))))
(define/contract parse-email-multi
(message? . -> . email?)
(lambda (analysis)
(make-email
(message-fields->assoc (message-fields analysis))
(map message->body
(entity-parts (message-entity analysis))))))
(define/contract parse-email-single
(message? . -> . email?)
(lambda (analysis)
(make-email
(message-fields->assoc (message-fields analysis))
(list (message->body analysis)))))
(define (multi-message? analysis)
(symbol=? (entity-type (message-entity analysis))
'multipart))
(define (message->body message)
(entity-body->body (entity-body (message-entity message))))
(define (entity-body->body body)
(let ((o (open-output-string)))
(body o)
(cdr (string->los (get-output-string o)))))
(define (string->los s)
(regexp-split #px"\n" s))
(define/contract peek-line
(->* () (input-port?) (or/c eof-object? string?))
(lambda ((ip (current-input-port)))
(let loop ((acc "")
(c (peek-char ip))
(col 1))
(cond
((eof-object? c) (if (string=? acc "") c acc))
((char=? c #\newline) acc)
(else (loop (string-append acc (string c))
(peek-char ip col)
(+ col 1)))))))
(define (new-email? line)
(regexp-match #px"^From .*\\d{4}$" line))
(define (an-email? line)
(regexp-match #px"^(?mi:From|To|Envelope.*|Received|Return-Path|Date|Subject|Content\\-.*|MIME-Version|Forwarded|Message.*)" line))
(define/contract message-fields->assoc
((listof string?) . -> . (listof (cons/c symbol? string?)))
(lambda (fields)
(filter pair?
(map
(lambda (field)
(let ((n-v (regexp-match #px"^(\\S+): *(.*)" field)))
(if n-v
(cons (string->symbol (cadr n-v))
(apply string-append (cddr n-v)))
'())))
fields))))
(define/contract get-first-email
(input-port? . -> . string?)
(lambda (ip)
(let loop ((acc "")
(line (peek-line ip))
(seen-first #f))
(cond
((eof-object? line) acc)
((new-email? line) (if seen-first
acc
(let* ((a (read-line ip))
(l (peek-line ip)))
(loop a l #t))))
(else (let ((a (read-line ip))
(l (peek-line ip)))
(loop (format "~a~n~a" acc a) l seen-first)))))))
(define (parse-archive email-archive-path)
(append-map
(lambda (email-file-path)
(call-with-input-file email-file-path parse-emails))
(find-files an-email-file? email-archive-path)))
(define (an-email-file? path)
(let ((file (file-name-from-path path)))
(if (not (directory-exists? path))
(regexp-match
#px"(?mi:\\d+|\\.(mbx|mbox|email|eml))$"
(path->string (file-name-from-path path)))
#f)))
(define (write-email message (out-port (current-output-port)))
(printf "THIS IS NOT WORKING ~V:~V~N" message out-port))
(define (tokenise-email-addresses string)
(if (string? string)
(map
(lambda (a-string)
(string-foldcase
(car (regexp-match
#px"[^<]+@[^>]+"
(car (regexp-match #px"\\S+@\\S+" a-string)))))) (regexp-split #px",\\s*" string))
null))
(define (get-email-header-val key message)
(let* ((headers-alist (email-headers message))
(result (assoc key headers-alist)))
(if (pair? result) (cdr result) #f)))