#lang scheme
(require mzlib/contract mzlib/etc mzlib/pregexp net/mime )
(define-struct email (headers messages) #:inspector #f)
(define-struct (exn:malformed-email exn) () #:inspector #f)
(provide/contract
(struct email ((headers (listof (cons/c symbol? string?)))
(messages (listof (listof string?)))))
(parse-emails ((or/c none/c input-port?) . -> . (listof email?)))
(parse-email ((or/c none/c input-port?) . -> . email?)))
(provide (struct-out exn:malformed-email))
(define parse-emails
(lambda ((ip (current-input-port)))
(let loop ((line (peek-line ip)))
(cond ((eof-object? line) '())
((new-email? line) (let* ((parsed (parse-email
(open-input-string (get-first-email ip))))
(parsed-rest (loop (peek-line ip))))
(printf "an mbox collection")
(cons parsed parsed-rest)))
((an-email? line) (list (parse-email ip))) (else (raise
(make-exn:malformed-email
(string->immutable-string
(format "~a: ~a"
"Expected a \"From ...\", got"
line))
(current-continuation-marks))))))))
(define parse-email
(lambda ((ip (current-input-port)))
(let ((analysis (mime-analyze ip)))
(if (multi-message? analysis)
(parse-email-multi analysis)
(parse-email-single analysis)))))
(define/contract parse-email-multi
(message? . -> . email?)
(lambda (analysis)
(make-email
(message-fields->assoc (message-fields analysis))
(map message->body
(entity-parts (message-entity analysis))))))
(define/contract parse-email-single
(message? . -> . email?)
(lambda (analysis)
(make-email
(message-fields->assoc (message-fields analysis))
(list (message->body analysis)))))
(define (multi-message? analysis)
(symbol=? (entity-type (message-entity analysis))
'multipart))
(define (message->body message)
(entity-body->body (entity-body (message-entity message))))
(define (entity-body->body body)
(let ((o (open-output-string)))
(body o)
(cdr (string->los (get-output-string o)))))
(define (string->los s)
(pregexp-split "\n" s))
(define/contract peek-line
(() (input-port?) . opt-> . (union eof-object? string?))
(lambda ((ip (current-input-port)))
(let loop ((acc "")
(c (peek-char ip))
(col 1))
(cond
((eof-object? c) (if (string=? acc "") c acc))
((char=? c #\newline) acc)
(else (loop (string-append acc (string c))
(peek-char ip col)
(+ col 1)))))))
(define (new-email? line)
(pregexp-match #px"^From .*\\d{4}$" line)
)
(define (an-email? line)
(pregexp-match #px"^(?mi:From|To|Envelope.*|Received|Return-Path|Date|Subject|Content\\-.*|MIME-Version|Forwarded|Message.*)" line)
)
(define/contract message-fields->assoc
((listof string?) . -> . (listof (cons/c symbol? string?)))
(lambda (fields)
(filter pair?
(map
(lambda (field)
(let ((n-v (pregexp-match "^(\\S+): *(.*)" field)))
(if n-v
(cons (string->symbol (cadr n-v))
(apply string-append (cddr n-v)))
'())))
fields))))
(define/contract get-first-email
(input-port? . -> . string?)
(lambda (ip)
(let loop ((acc "")
(line (peek-line ip))
(seen-first #f))
(cond
((eof-object? line) acc)
((new-email? line) (if seen-first
acc
(let* ((a (read-line ip))
(l (peek-line ip)))
(loop a l #t))))
(else (let ((a (read-line ip))
(l (peek-line ip)))
(loop (format "~a~n~a" acc a) l seen-first)))))))