match.rkt
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; match.rkt
;; Richard Cobbe
;; January 2011
;;
;; Matches a sequence of arguments against a command-line spec and
;; invokes the user's action function.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

#lang racket

(require (prefix-in srfi-1: srfi/1)
         "spec-ast.rkt"
         "usage.rkt")

;; We throw detailed exceptions for various error conditions, even though the
;; functional-command function handles them all in the same way (except for
;; help-request), so we can write test cases that distinguish between the
;; cases.

;; general exception type; shouldn't be instantiated directly
(struct exn:fc:match exn:fail:contract (usage-info) #:transparent)
;;   message: error message, if any
;;   usage-info :: String: usage info to be printed to stdout before terminating

;; command name doesn't appear in spec
(struct exn:fc:bad-command exn:fc:match () #:transparent)

;; user supplied a switch that doesn't exist on this command
(struct exn:fc:bad-switch exn:fc:match () #:transparent)

;; user requested usage info
(struct exn:fc:help-request exn:fc:match () #:transparent)

;; positional argument(s) (either to command or switch) missing
(struct exn:fc:missing-argument exn:fc:match () #:transparent)

;; command has extra positional arguments
;;  (cannot be switch; extra positional arguments interpreted as belonging to
;;  command)
(struct exn:fc:extra-argument exn:fc:match () #:transparent)

;; once-each switch occurs twice
(struct exn:fc:duplicate-switch exn:fc:match () #:transparent)

;; once-any collision
(struct exn:fc:multiple-group-members exn:fc:match () #:transparent)

(provide/contract
 [match-spec (spec? (listof string?) . -> . any)])

;; too lazy to retype the same three field contracts over and over and over
;; again for all the exceptions.  Not worth it.
(provide
 (struct-out exn:fc:match)
 (struct-out exn:fc:bad-command)
 (struct-out exn:fc:bad-switch)
 (struct-out exn:fc:help-request)
 (struct-out exn:fc:missing-argument)
 (struct-out exn:fc:extra-argument)
 (struct-out exn:fc:duplicate-switch)
 (struct-out exn:fc:multiple-group-members))

;; matches the given args against the spec and invokes the appropriate handler
;; function.  argv[0] should not be in `args'.  (It's not part of
;; (current-command-line-arguments).)
(define match-spec
  (lambda (spec args)
    (match args
      [(list) (print-usage spec)]
      [(or (list "help") (list "--help"))
       (raise (exn:fc:help-request
               ""
               (current-continuation-marks)
               (format-usage spec)))]
      [(cons (? switch?) _)
       (raise (exn:fc:bad-command
               (format "~a: expected command~n"
                       (spec-program spec))
               (current-continuation-marks)
               (format-usage spec)))]
      [(list-rest "help" command _)
       (let ([cmd-spec (find-command spec command)])
         (if cmd-spec
             (raise (exn:fc:help-request
                     ""
                     (current-continuation-marks)
                     (format-command-usage cmd-spec)))
             (raise (exn:fc:bad-command
                     (format "Unknown command ~a" command)
                     (current-continuation-marks)
                     (format-usage spec)))))]
      [(list-rest cmd-name args)
       (let ([cmd-spec (find-command spec cmd-name)])
         (if cmd-spec
             (match-command cmd-spec args)
             (raise (exn:fc:bad-command
                     (format "Unknown command ~a" cmd-name)
                     (current-continuation-marks)
                     (format-usage spec)))))])))

;; Switch-Map ::= (FHash Keyword (U (Listof String) (Listof (Listof String))))
;; map containing keywords that we've seen as we parse the actual command line
;; arguments.  Lists are list of args (for optional/group) or list of list of
;; args (for multi).

(struct match-state (args cmd-spec formal-positionals
                          actual-positionals rest-positionals
                          keyword-table groups)
        #:transparent)
;; Match-State ::= (match-state (Listof String)
;;                              Command
;;                              (Listof Symbol)
;;                              (Listof String)
;;                              (Listof String)
;;                              Switch-Map
;;                              (F-Hash-Eq Group String))
;;   args: unparsed parameters from command line
;;   cmd-spec: Command specifying structure of command line
;;   formal-positionals: required formal positional args for which we haven't
;;     yet seen actuals
;;   actual-positionals: actual positional args we've seen so far, in order
;;   rest-positionals: actual arguments for positional rest arg, in order
;;   keyword-table: map of all switches seen so far, indexed by keyword
;;   groups: map from groups we've seen so far to the switches we've seen.

;; match-command :: Command (Listof String) -> whatever
;; Processes args according to cmd-spec.  Args should not include the
;; subcommand name.
;;
;; We do not currently support "--".
(define match-command
  (lambda (cmd-spec args)
    (match-main-loop
     (match-state args cmd-spec (command-positional-args cmd-spec)
                  null null (hasheq) (hasheq)))))

;; We structure the parsing of the actual command line as a state machine, with
;; states represented as the following functions:
;;   - match-command-main-loop: start state.  We're looking for either a
;;     positional argument, a switch, or the end of the argument list.
;;   - match-command-switch: we've just seen a switch, and now we need to look
;;     for its arguments.
;;   - match-optional-switch: look for an optional switch's arguments
;;   - match-multi-switch: look for a multi-switch's arguments
;;   - match-group-switch: look for a grouped switch's arguments

;; match-main-loop :: Match-State -> whatever
(define match-main-loop
  (lambda (state)
    (match state
      [(match-state (list) cmd (list) actual-positionals (list) keyword-table _)
       (invoke-cmd-fn (command-function cmd)
                      actual-positionals
                      null
                      keyword-table)]
      [(match-state (list) cmd (list) actual-positionals rest-positionals
                    keyword-table _)
       (cond
        [(not (command-rest-arg cmd))
         (raise (exn:fc:extra-argument
                 (format "Extra positional argument~a: ~a"
                         (plural-check rest-positionals)
                         (format-list rest-positionals))
                 (current-continuation-marks)
                 (format-command-usage cmd)))]
        [else (invoke-cmd-fn (command-function cmd)
                             actual-positionals
                             rest-positionals
                             keyword-table)])]
      [(match-state (list) cmd formal-positionals _ _ _ _)
       (raise-missing-argument formal-positionals cmd)]
      [(match-state (list-rest (? switch? s) args-rest) _ _ _ _ _ _)
       (match-command-switch
        s
        (struct-copy match-state state [args args-rest]))]
      [(match-state (list-rest args-first args-rest) _ (list) _ rest-pos _ _)
       (match-main-loop
        (struct-copy match-state state
                     [args args-rest]
                     [rest-positionals (snoc args-first rest-pos)]))]
      [(match-state (list-rest args-first args-rest) _ formals actuals _ _ _)
       (match-main-loop
        (struct-copy match-state state
                     [args args-rest]
                     [formal-positionals (cdr formals)]
                     [actual-positionals (snoc args-first actuals)]))])))

;; Design decision: if user specifies command that can take both --foo and
;; --bar long switches, and --foo requires an argument, then what do we do with
;;    command subcommand --foo --bar ?
;; Treat "--bar" as argument to --foo, or treat it as separate switch and
;; signal an error for --foo's missing argument?
;; Both Gnu's getopt and Racket's command-line treat it as argument to --foo.
;; Rationale for changing behavior: better error detection & reporting.
;; For now, stick with getopt/racket behavior.

;; match-command-switch :: String Match-State -> Whatever
;; state's args should not contain the switch itself.
(define match-command-switch
  (lambda (switch state)
    (match (find-switch switch (command-flags (match-state-cmd-spec state)))
      [(optional-flag _ kwd _ formal-args)
       (match-optional-switch state switch kwd formal-args)]
      [(multi-flag _ kwd _ formal-args)
       (match-multi-switch state switch kwd formal-args)]
      [(cons (optional-flag _ kwd _ formal-args) g)
       (match-group-switch state switch kwd formal-args g)]
      [#f
       (raise (exn:fc:bad-switch
               (format "Unknown switch: ~a~n" switch)
               (current-continuation-marks)
               (format-command-usage (match-state-cmd-spec state))))])))

;; match-command-optional-switch :: Match-State String Keyword (Listof Symbol)
;;                               -> Whatever
;; match an optional switch's arguments
;;   state: matcher state; its args should not contain the switch itself
;;   switch: string containing switch being matched (for exns)
;;   kwd: keyword corresponding to switch being processed
;;   formal-args: switch's formal arguments, if any
(define match-optional-switch
  (lambda (state switch kwd formal-args)
    (let ([args (match-state-args state)]
          [num-formals (length formal-args)]
          [kwd-table (match-state-keyword-table state)])
      (cond
       [(hash-has-key? kwd-table kwd)
        (raise (exn:fc:duplicate-switch
                (format "Switch ~a may only appear once~n" switch)
                (current-continuation-marks)
                (format-command-usage (match-state-cmd-spec state))))]
       [(< (length args) num-formals)
        (let ([missing-args (list-tail formal-args (length args))])
          (raise-missing-argument missing-args (match-state-cmd-spec state)))]
       [else
        (let-values ([(actuals rest) (srfi-1:split-at args num-formals)])
          (match-main-loop
           (struct-copy match-state state
                        [args rest]
                        [keyword-table (hash-set kwd-table kwd actuals)])))]))))

;; match-multi-switch :: State String Keyword (NEListof Symbol) -> Whatever
;; args as with match-optional-switch
(define match-multi-switch
  (lambda (state switch kwd formal-args)
    (let ([args (match-state-args state)]
          [num-formals (length formal-args)])
      (cond
       [(< (length args) num-formals)
        (let ([missing-args (list-tail formal-args (length args))])
          (raise-missing-argument missing-args (match-state-cmd-spec state)))]
       [else
        (let-values ([(actuals rest)
                      (srfi-1:split-at args num-formals)])
          (match-main-loop
           (struct-copy match-state state
                        [args rest]
                        [keyword-table
                         (hash-update
                          (match-state-keyword-table state)
                          kwd
                          (lambda (args)
                            (map snoc actuals args))
                          (map (lambda (x) null) actuals))])))]))))

;; match-group-switch :: State String Keyword (Listof Symbol) Group -> Whatever
;; args as with match-optional-switch; `g' is the switch group containing
;; the switch being processed.
(define match-group-switch
  (lambda (state switch kwd formal-args g)
    (let ([args (match-state-args state)]
          [num-formals (length formal-args)])
      (cond
       [(hash-ref (match-state-groups state) g #f) =>
        (lambda (prev-switch)
          (raise (exn:fc:multiple-group-members
                  (format "Switches ~a and ~a may not both appear.~n"
                          prev-switch
                          switch)
                  (current-continuation-marks)
                  (format-command-usage (match-state-cmd-spec state)))))]
       [(< (length args) num-formals)
        (let ([missing-args (list-tail formal-args (length args))])
          (raise-missing-argument missing-args (match-state-cmd-spec state)))]
       [else
        (let-values ([(actuals rest)
                      (srfi-1:split-at args num-formals)])
          (match-main-loop
           (struct-copy match-state state
                        [args rest]
                        [keyword-table
                         (hash-set (match-state-keyword-table state)
                                   kwd
                                   actuals)]
                        [groups
                         (hash-set (match-state-groups state) g switch)])))]))))

;; invoke-cmd-fn :: Procedure (Listof String) (Listof String) Switch-Map
;;               -> Whatever
;; tail-calls fn, following protocol for user-supplied action procedures.
(define invoke-cmd-fn
  (lambda (fn actuals rest-args kwd-table)
    (let* ([unsorted-kwd-args (hash-map kwd-table cons)]
           [sorted-kwd-args (sort unsorted-kwd-args keyword<? #:key car)])
      (keyword-apply fn
                     (map car sorted-kwd-args)
                     (map cdr sorted-kwd-args)
                     (append actuals rest-args)))))

;; find-switch :: String (Listof Flag)
;;             -> (Union Optional-Flag Multi-Flag
;;                       (Cons Optional-Flag Group)
;;                       #f)
;; Looks up Flag corresponding to given switch; returns #f if not found.
;; When flag is member of group, returns pair of flag & containing group.
(define find-switch
  (lambda (sw flags)
    (match flags
      [(list) #f]
      [(cons (and flag (or (optional-flag switches _ _ _)
                           (multi-flag switches _ _ _))) rest)
       (if (set-member? switches sw)
           flag
           (find-switch sw rest))]
      [(cons (and grp (group flags)) rest)
       (let ([spec (find-switch sw flags)])
         (if spec
             (cons spec grp)
             (find-switch sw rest)))])))

;; snoc :: a (List a) -> (List a)
;; functionally appends x to xs.
(define snoc
  (lambda (x xs)
    (cond
     [(null? xs) (cons x null)]
     [else (cons (car xs) (snoc x (cdr xs)))])))

;; plural-check :: (Listof x) [#:singular String] [#:plural String]
;;              -> String
;; Returns #:singular if one element in list, else #:plural
(define plural-check
  (lambda (lst #:singular (singular "") #:plural (plural "s"))
    (match lst
      [(list _) singular]
      [else plural])))

;; format-list :: (Listof x) -> String
;; formats list for user message: space separated, no parens
(define format-list
  (lambda (l)
    (string-join (map (lambda (x) (format "~a" x)) l) " ")))

;; raise-missing-argument :: (NEListof Symbol) Command -> a
;; creates & raises a missing-argument exception
(define raise-missing-argument
  (lambda (missing-formals cmd-spec)
    (raise (exn:fc:missing-argument
            (format "Missing required argument~a: ~a"
                    (plural-check missing-formals)
                    (format-list missing-formals))
            (current-continuation-marks)
            (format-command-usage cmd-spec)))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; assume all switches appear before all positional args
;; don't allow -xyz yet; require -x -y -z.
;; can't just run tokenize on all arguments up front, since we don't want to
;; parse arguments to switch as a switch.

;; to test print-usage, override current-exit-handler to escape.

;; use exit code from print usage?  Loss of modularity in the context of a
;; larger program that uses exit codes to convey information.  Alternatively,
;; could parameterize over desired exit code, possibly w/ parameter, but that
;; seems awfully baroque.