#lang racket/base
;; Some utilities.

(require racket/contract

(define false-thunk (lambda () #f))

;; longest-common-substring: string string -> string
;; Returns the longest common substring between the two strings.
(provide/contract (longest-common-substring (-> string? string? string?)))
(define (longest-common-substring s1 s2)
  (label->string (longest-common-sublabel (string->label/with-sentinel s1)
                                          (string->label/with-sentinel s2))))

;; longest-common-sublabel: label label -> label
;; Naive use of suffix trees to find longest common sublabel between
;; two labels.  Note that there's a better way to do this with
;; matching statistics: I'll try using matching statistics as soon
;; as I get this version running.
;; This approach simply adds both labels to a common suffix tree,
;; does a postorder traversal to mark up the inner nodes, and then
;; finds the inner node with the deepest string depth.
(provide/contract (longest-common-sublabel (-> label? label? label?)))
(define (longest-common-sublabel label-1 label-2)
  (let ((label-1-marks (make-hasheq))
        (label-2-marks (make-hasheq))
        (deepest-node #f)
        (deepest-depth 0))
          (lambda ()
            (let ((tree (make-tree)))
              (tree-add! tree label-1)
              (tree-add! tree label-2)
              (mark-up-inner-nodes! (tree-root tree) 0)
              (path-label deepest-node))))

          (lambda (node depth)
            (if (null? (node-children node))
                (begin (when (label-source-eq? (node-up-label node) label-1)
                         (mark-with-label-1! node))
                       (when (label-source-eq? (node-up-label node) label-2)
                         (mark-with-label-2! node)))
                (begin (for-each
                        (lambda (child)
                           (+ depth (label-length (node-up-label child)))))
                        (node-children node))
                       (absorb-children-marks! node depth)))))

          (lambda (node)
            (hash-set! label-1-marks node #t)))

          (lambda (node)
            (hash-set! label-2-marks node #t)))

          (lambda (node)
            (hash-ref label-1-marks node false-thunk)))

          (lambda (node)
            (hash-ref label-2-marks node false-thunk)))
          (lambda (node)
            (and (marked-by-label-1? node)
                 (marked-by-label-2? node))))
          (lambda (node depth)
            (let/ec escape
              (for-each (lambda (child)
                          (when (marked-by-label-1? child)
                            (mark-with-label-1! node))
                          (when (marked-by-label-2? child)
                            (mark-with-label-2! node))
                          (when (marked-by-both? node)
                        (node-children node)))
            (when (and (marked-by-both? node)
                       (> depth deepest-depth))
              (set! deepest-depth depth)
              (set! deepest-node node))))
      (if (or (= 0 (label-length label-1))
              (= 0 (label-length label-2)))
          (string->label "")

;; path-label: node -> label
;; Returns a new label that represents the path from the tree root
;; to this node.
;; Fixme: optimize the representation of label to be able to do this
;; without much reallocation.  Maybe another label class that uses a
;; rope data structure might be better...  I need to read Hans
;; Boehm's paper on "Ropes, an alternative to strings" to see how
;; much work this would be.
(provide/contract (path-label (-> node? label?)))
(define path-label
        (lambda (current-node collected-labels total-length)
          (if current-node
              (collect-loop (node-parent current-node)
                            (cons (node-up-label current-node)
                            (+ total-length
                               (label-length (node-up-label current-node))))
              (build-new-label collected-labels total-length))))

        (lambda (src-label dest-vector dest-offset)
          (let loop ((i 0))
            (when (< i (label-length src-label))
              (vector-set! dest-vector
                           (+ i dest-offset)
                           (label-ref src-label i))
              (loop (add1 i))))))
        (lambda (labels total-length)
          (let ((vector (make-vector total-length)))
            (let loop ((labels labels)
                       (i 0))
              (if (null? labels)
                    (vector->label vector))
                    (vector-blit! (car labels) vector i)
                    (loop (cdr labels)
                          (+ i (label-length (car labels))))))))))]
    (lambda (node)
      (collect-loop node '() 0))))