filetransfer.rkt
#lang racket
(provide start-listen finish-transfer kill-transfer wait-transfer send-file filetransfer?)

(require racket/tcp)
(require racket/async-channel)
(require file/sha1)
(require srfi/13)
(require srfi/14)
(require (planet williams/packed-binary/packed-binary))
(require (planet erast/fileutils:2:=1))

; INTERFACE

; listen on local-port for incoming filetransfer connection
; from-ip is the ip of the client connecting to the (if from-ip is
; #f any connection is allowed), progress-proc is called with a symbol in '(receiving preparing), the path,
; and the percentage completed, final proc with a symbol in '(ok error) and the path, and file-table
; is a hash table that holds checksum+name keys with paths as values to check whether a requested file
; already exists (if the name and checksum are in the table and the corresponding file exists, then download
; will be resumed by sending an offset to the sending remote host)
; if during transfer a block is not received within timeout seconds, then exn:fail:tcp-timeout exception is raised
; returns the filetransfer, which can be shut down by stop-transfer or kill-transfer
(define (start-listen local-port save-path from-ip progress-proc final-proc
                      [timeout 60.0] [file-table (make-hash)] [listen-timeout 604800.0] )
  (define custodian (make-custodian))
  (define finalize-channel (make-async-channel))
  (define listener (tcp-listen local-port 1 #t #f))
  (define (accept-connection/maybe listener)
    (with-handlers ([exn:fail:network? (lambda (exn) #f)])
      (let ((ports (sync/timeout/enable-break listen-timeout (tcp-accept-evt listener))))
        (if (or (not from-ip) (string=? (let-values (((in out) (tcp-addresses (first ports) #f))) in)
                         from-ip))
            ports
            #f))))
  (make-filetransfer
   listener
   (thread
    (lambda ()
      (progress-proc 'listening save-path 0)
      (let loop ((in-out-ports (accept-connection/maybe listener)))
        (when in-out-ports
          (receive-file (first in-out-ports) (second in-out-ports) save-path progress-proc final-proc timeout file-table #:channel finalize-channel)
          (unless (thread-try-receive)
            (loop (accept-connection/maybe listener)))))))
   custodian
   finalize-channel))

; receive a file from in-port, sending back data to out-port (bidirectional channel required)
; see send-file below for protocol
; the file-table is a hashtable containing paths based on file checksums+names as keys
; returns the path where the file is stored (which may differ from local path if the download
; has been resumed)
(define (receive-file in-port out-port local-path progress-proc final-proc [timeout 60.0] [file-table (make-hash)] #:channel finalize-channel)
  (progress-proc 'preparing local-path 0)        ; preparing for transmission, save-path is directory to save to
  (define checksum (sync/network-timeout timeout (read-bytes-evt 20 in-port))) ; read 20 bytes checksum
  (define name-len (car (unpack "<H" (sync/network-timeout timeout (read-bytes-evt 2 in-port))))) ; read name length 2 bytes
  (define name (sync/network-timeout timeout (read-bytes-evt name-len in-port))) ; read the name as bytes
  (define key (bytes-append checksum name))  ; the key is checksum+name (so files with different names will be transmitted twice!)
  (define path (hash-ref file-table key #f))
  (cond
    ((and path (file-exists? path)) (write-packed "<q" out-port (file-size path))
                                    (flush-output out-port)
                                    (read-raw-file in-port path progress-proc final-proc timeout finalize-channel)
                                    path)
    (else (let ((local-name (build-path local-path (sanitize (bytes->string/utf-8 name) local-path))))
            (hash-set! file-table key local-name)
            (write-packed "<q" out-port 0)
            (flush-output out-port)
            (read-raw-file in-port local-name progress-proc final-proc timeout finalize-channel)
            local-name))))


(define (percent n m)
  (ceiling (* (/ n m) 100)))

; this should only be called when the listening process has to be terminated unexpectedly
(define (kill-transfer t)
  (when (filetransfer-listener t)
    (tcp-close (filetransfer-listener t)))
  (custodian-shutdown-all (filetransfer-custodian t)))

; waits until the current transfer has finished, then stops the transfer
(define (finish-transfer t [timeout 259200.0])
  (thread-send (filetransfer-thread t) #t)
  (wait-transfer t timeout)
      (kill-transfer t))

; waits until the transfer has finished, returns #t if the transfer finished within timeout,
; #f otherwise
(define (wait-transfer t [timeout 259200.0])
  (sync/timeout timeout (filetransfer-finalized? t)))

; send the file at path to a remote host at port, where suggested-filename indicates
; how the receiver should name the file (but, of course, he may modify this name)
; the progress proc takes 3 arguments, a symbol in '(preparing resuming starting sending), a path and
; the percentage completed, and the final-proc is called with the path as argument when the transfer is completed
(define (send-file remote-hostname remote-port path suggested-filename progress-proc final-proc [timeout 60.0])
  (define custodian (make-custodian))
  (define finalize-channel (make-async-channel))
  (progress-proc 'connecting path 0)
  (let-values (((in-port out-port) (tcp-connect/enable-break remote-hostname remote-port)))
    (make-filetransfer #f
                   (thread 
                    (lambda () (send-file-to-port suggested-filename path out-port in-port progress-proc final-proc timeout #:channel finalize-channel)))
                   custodian
                   finalize-channel)))
  
; sends a file over the port
(define (send-file-to-port suggested-name file-path out-port in-port progress-proc final-proc [timeout 60.0] #:channel finalize-channel)
  (progress-proc 'preparing file-path 0)
  (define file-name-bytes (string->bytes/utf-8 suggested-name))
  ; ==> file-checksum 20 bytes sha1 partial checksum
  ; ==> suggested-name-length  SHORT 16 bit
  ; ==> suggested-name suggested-name-length bytes
  ; <== offset LONG 64 bit
  ; ==> file-length LONG 64 bit (minus offset, i.e. actual data to be sent)
  ; ==> file-length bytes of data
  (write-bytes (checksum-file file-path) out-port)
  (write-packed "<H" out-port (bytes-length file-name-bytes))
  ; write name (as bytes, UTF-8 encoded)
  (write-bytes file-name-bytes out-port)
  (flush-output out-port)
  ; get the offset and send file-length - offset
  (define offset (car (unpack "<q" (sync/network-timeout timeout (read-bytes-evt 8 in-port)))))
  (define size (file-size file-path))
  (if (> offset 0)
      (progress-proc 'resuming file-path 0)
      (progress-proc 'starting file-path 0))
  (write-packed "<q" out-port (- size offset))
  ; write raw file data with offset
  (define time_0 (current-milliseconds))
  (when (< offset size)
    (write-file file-path out-port offset size progress-proc final-proc timeout))
  (flush-output out-port)
  (final-proc 'finished file-path (- (current-milliseconds) time_0) size)
  (async-channel-put finalize-channel #t))

; IMPLEMENTATION

(define-struct filetransfer (listener thread custodian finalized?))

; exn:failk:tcp-timeout is raised if a network operation has not succeeded within the
; specified timeout seconds
(define-struct (exn:fail:network:timeout
                exn:fail:network)
  (a-srcloc)
  #:property prop:exn:srclocs
  (lambda (a-struct)
    (match a-struct
      [(struct exn:fail:network:timeout
         (msg marks a-srcloc))
       (list a-srcloc)])))

(define-syntax sync/network-timeout
  (syntax-rules ()
    ((_ timeout expr ...) (let ((result (sync/timeout timeout expr ...)))
                           (if result
                               result
                               (raise exn:fail:network:timeout #t))))))

(define (time)
  (current-milliseconds))

; read the raw file consisting of 64 bit file length plus all data
(define (read-raw-file in-port path progress-proc final-proc timeout finalize-channel)
  (define time_0 (time))
  (define buffsize 16384)
  (define buffer (make-bytes buffsize))
  (define size (car (unpack "<q" (sync/network-timeout timeout (read-bytes-evt 8 in-port)))))
  (define out-port (open-output-file path #:mode 'binary #:exists 'append))
  (when (> size 0)
    (let loop ((total-read 0))
      (cond
        ((> (+ total-read buffsize) size) (read-bytes! buffer in-port 0 (- size total-read))
                                          (write-bytes buffer out-port 0 (- size total-read))
                                          (progress-proc 'receiving path 
                                                         (percent size size)))
        (else (read-bytes! buffer in-port)
              (when (> (percent total-read size) (percent (- total-read buffsize) size))
                 (progress-proc 'receiving path 
                                (percent total-read size)))
              (write-bytes buffer out-port)
              (loop (+ total-read buffsize))))))
  (close-output-port out-port)
  (final-proc 'finished path (- (time) time_0) size)
  (async-channel-put finalize-channel #t))

; this is the suggested file name, meaning it's untrusted (from remote host)
; so it needs to be checked - we only allow a quite strict set of alphanumeric set of characters
; plus few special characters CHECK this could be a problem with foreign languages/unicode
(define char-set:allowed-in-file-name (char-set-union char-set:letter+digit (char-set #\_ #\- #\.)))

(define (sanitize file-name local-path)
  (let ((name (string-filter char-set:allowed-in-file-name file-name)))
    (if (string=? (filename-main name) "")
        (if (string=? (filename-suffix name) "")
            (make-unique-name (number->string (current-milliseconds)) local-path)
            (make-unique-name (string-append (number->string (current-milliseconds)) "." (filename-suffix name)) local-path))
        (make-unique-name name local-path))))

; compute a fast (incomplete) 20 bytes checksum of a file
(define (checksum-file path)
  (let ((in-port (open-input-file path #:mode 'binary))
        (buff (make-bytes 16384))
        (size (file-size path)))
    (file-position in-port (max 0 (- (quotient (file-size path) 2) 8192)))
    (read-bytes-avail!* buff in-port)
    (close-input-port in-port)
    (sha1-bytes (open-input-bytes buff))))

; open file and write it to out-port as raw data
(define (write-file path out-port offset size progress-proc final-proc timeout)
  (define in-port (open-input-file path #:mode 'binary))
  (define buffsize 16384)
  (define buffer (make-bytes buffsize))
  (let loop ((total-read 0))
    (when (> (percent total-read size) (percent (- total-read buffsize) size))
      (progress-proc 'sending path (percent total-read size)))
    (cond
      ((> (+ total-read buffsize) size) (read-bytes! buffer in-port 0 (- size total-read))
                                        (write-bytes buffer out-port 0 (- size total-read))
                                        (progress-proc 'sending path (percent size size)))
      (else (read-bytes! buffer in-port)
            (write-bytes buffer out-port) 
            (loop (+ total-read buffsize)))))
  (close-input-port in-port))



; todo wait-transfer for listener will immediately succeed if it is used when the second or higher file is received, because async-channel-put queues (instead of just 1 value)
; need to remove previous values from queue when looping in the listener (start-listen)