(module ahocorasick mzscheme
(provide make
make-from-strings
add
string-add
prepare
size
(rename AhoCorasickTree-root root)
start-search
start-string-search
start-port-search
result-output
result-end-position
continue-search
string-search
port-search
)
(require (lib "plt-match.ss")
(prefix s: "state.ss")
(prefix q: "queue.ss")
)
(define-struct AhoCorasickTree (root initialized?))
(define (make-from-strings strings)
(let ((tree (make)))
(let loop ((strings strings))
(when (not (null? strings))
(add tree (string->list (car strings)) (car strings))
(loop (cdr strings))))
(prepare tree)
tree))
(define (make)
(make-AhoCorasickTree (s:make 0) #f))
(define add
(case-lambda
[(tree labels)
(add tree labels labels)]
[(tree labels output)
(match tree
[(struct AhoCorasickTree (root initialized?))
(when initialized?
(error 'add "Can't add to a prepared tree"))
(let ((state (s:extend* root labels)))
(s:add-output! state output))]
[else (raise-type-error 'add-keyword "AhoCorasickTree" tree)])]))
(define string-add
(case-lambda
[(tree string)
(add tree (string->list string) string)]
[(tree string output)
(add tree (string->list string) output)]))
(define (size tree)
(define (size-state state)
(+ 1
(apply +
(map (lambda (l) (size-state (s:goto state l)))
(s:out-labels state)))))
(match tree
[(struct AhoCorasickTree (root initialized?))
(size-state root)]
[else (raise-type-error 'size "AhoCorasickTree" tree)]))
(define (prepare tree)
(initialize-fail-transitions tree)
(set-AhoCorasickTree-initialized?! tree #t))
(define (initialize-fail-transitions tree)
(define (init-unit-depth root)
(for-each (lambda (x) (s:set-fail! x root))
(s:out-states root)))
(define (init-state state)
(for-each
(lambda (e) (init-helper state (s:goto state e) e))
(s:out-labels state)))
(define (init-helper r s a) (let loop ((state (s:fail r)))
(if (void? (s:goto state a))
(loop (s:fail state))
(begin
(s:set-fail! s (s:goto state a))
(s:set-output! s
(append (s:output (s:goto state a))
(s:output s)))))))
(match tree
[(struct AhoCorasickTree (root initialized?))
(init-unit-depth root)
(s:breadth-first init-state
(q:insert* q:empty (s:out-states root)))]
[else (raise-type-error 'initialize-fail-transitions
"AhoCorasickTree" tree)]))
(define-struct SearchResult (output end-position cont-f))
(define (start-search tree source-f)
(define (search-state state)
(let/cc return
(let loop ((state state)
(next-label (source-f))
(i 0))
(when (not (null? (s:output state)))
(set!
return
(let/cc restart-f
(return (make-SearchResult (s:output state) i restart-f)))))
(cond ((eof-object? next-label)
(return #f))
((not (void? (s:goto state next-label)))
(loop (s:goto state next-label) (source-f) (+ i 1)))
(else
(loop (s:fail state) next-label i))))))
(check-initialized! tree)
(match tree
[(struct AhoCorasickTree (root initialized?))
(search-state root)]
[else (raise-type-error 'search "AhoCorasickTree" tree)]))
(define (check-initialized! tree)
(when (not (AhoCorasickTree-initialized? tree))
(error 'start-search
"Can't start a search against unprepared tree.")))
(define (start-string-search tree str)
(let ((p (open-input-string str)))
(start-port-search tree p)))
(define (start-port-search tree port)
(start-search tree (lambda () (read-char port))))
(define (continue-search last-search-result)
(let/cc return
((SearchResult-cont-f last-search-result) return)))
(define (port-search tree port)
(let loop ((s (start-port-search tree port))
(rev-results (list)))
(if s
(let ((output (result-output s))
(end-pos (result-end-position s)))
(loop (continue-search s)
(append (map (lambda (o)
(list o
(- end-pos (string-length o))
end-pos))
(reverse output))
rev-results)))
(reverse rev-results))))
(define (string-search tree string)
(let ((port (open-input-string string)))
(port-search tree port)))
(define (result-output last-search-result)
(SearchResult-output last-search-result))
(define (result-end-position last-search-result)
(SearchResult-end-position last-search-result))
)