language/sequences.rkt
#lang racket

#|

File: language/sequences.rkt
Author: Bill Turtle (wrturtle)

This file contains the functionality of the sequence types.
This code is in the parenthetical language (pypar) layer,
because, for example, in the parenthetical expression
(+ "cat" "dog"), + has the semantics of string-append, not
the semantics of Racket's + operator.

|#


(require "../utilities.rkt")
(require "../printer.rkt")
(require racket/match)
(require (prefix-in pysem:
                    (only-in "../semantics/beginner-funs.rkt"
                             member
                             append
                             string_append)))
(require (prefix-in pysem: "../semantics/strings.rkt"))
(require (prefix-in pysem: "../semantics/hash-percents.rkt"))

(require (for-syntax "../utilities.rkt"))
(require (for-syntax "indent.rkt"))

#;(provide sequence+ sequence* sequence? sequence-in? pyret-sequence-ref sequence-slice sequence-len)
(provide sequence+ sequence*)

;; define which value types are sequences
;; at the moment, we only support lists and unicode
;; strings.
(define (sequence? e)
  (or
    (list? e)
    (string? e)))

;; given a sequence, return the empty version of this sequence
(define/contract (empty-sequence s)
  (-> sequence? sequence?)
  (match s
    [(regexp "")
     ""]
    [(list lvp ...)
     '()]))

;; (sequence-in? x s)
;;
;; True if an item of s is equal to x, else False
;; When s is a unicode string, this acts like
;; a substring test.
(define (sequence-in? x s xloc sloc)
  (match s
   [(regexp "")
    ;; should match any string
    (if (string? x)
        (pysem:string_contains s x)
        (raise-pyret-error (string-append "in: expected a string, but given "
                                          (something-else x))
                           xloc))]
   [(list lvp ...)
    (pysem:member x s)]
   [else
    (raise-pyret-error (string-append "in: expected a sequence, but given "
                                      (something-else s))
                       sloc)]))

;; (sequence+ s t)
;;
;; the concatenation of s and t
(define (sequence+ s t sloc tloc)
  (match s
   [(regexp "")
    (if (string? t)
        (pysem:#%app pysem:string_append s t)
        (raise-pyret-error (string-append "+: this should be a string")
                           tloc))]
   [(list lvp ...)
    (if (list? t)
        (pysem:#%app pysem:append s t)
        (raise-pyret-error (string-append "+: this should be a list") sloc))]
   [else
    (raise-pyret-error (string-append "+: expected a sequence, but given "
                                      (something-else s)))]))

;; (sequence* n s sloc)
;;
;; n_ shallow copies of s
(define/contract (sequence* n_ s)
  (-> (lambda (e) (>= e 0)) sequence? sequence?)
  (match s
   [(regexp "")
    (let loop ([n n_])
      (if (zero? n)
          ""
          (string-append s (loop (sub1 n)))))]
   [(list lvp ...)
    (let loop ([n n_])
      (if (zero? n)
          empty
          (append s (loop (sub1 n)))))]
   [else
    (error 'sequence* "internal error: reached end of match statement")]))
                                     
;; (pyret-sequence-ref seq n)
;;
;; Gets the nth element of seq, origin 0
(define (pyret-sequence-ref seq n seq-stx n-stx seq-loc n-loc expr-loc)
  (unless (and (number? n) (>= n 0))
    (raise-pyret-error
     (format
      (string-append
       "ref"
       ": expected a number greater than or equal to 0, but given ~e")
      n)
     n-loc))
  (match seq
    [(regexp "")
     (let ([len (string-length seq)])
       (when (>= n len)
         (raise-pyret-error
          (format
           (string-append
            "ref"
            ": string ~e only has ~e characters")
           seq
           len)
          expr-loc))
       (string (string-ref seq n)))]
    [(list lvp ...)
     (let ([len (length seq)])
       (when (>= n len)
         (raise-pyret-error
          (format
           (string-append
            "ref"
            ": the list ~a only has ~e elements")
           (pyret-print-to-string seq)
           len)
          expr-loc))
       (list-ref seq n))]
    [else
     (raise-pyret-error
      (string-append
       "ref"
       (format "expected a sequence, but given ~a"
               (something-else seq)))
      seq-loc)]))

;; s[i:j:k] => elements of s from i to (but not including j)
;;             in k increments
(define (sequence-slice seq i j k seq-loc k-loc)
  (unless (sequence? seq)
    (raise-pyret-error (format 
                         "slice: expected a sequence, but given ~a" 
                         (something-else seq))
                       seq-loc))
  (unless (> k 0)
    (raise-pyret-error (format "slice: third argument must be nonnegative, not ~a" k)
                       k-loc))
  (let ([len (sequence-len seq seq-loc)])
    ;; figure out what the real start and end locations are
    (let ([first-start (if (< i 0) (+ len i) i)]
          [first-end (if (< j 0) (+ len j) j)])
      (let ([real-start (if (< first-start 0) 0 first-start)]
            [real-end (if (< first-end 0) 0 first-end)])
        (cond
          [(< real-end real-start) (empty-sequence seq)]
          [(>= real-start len) (empty-sequence seq)]
          [else
           (let-values ([(plus ref) 
                         (match seq
                           [(regexp "") 
                            (values string-append (lambda (str i) (string (string-ref str i))))]
                           [(list lvp ...) (values append list-ref)]
                           [else
                            (error 'sequence-slice "Pyret internal error: match fell through")])])
             (let loop ([s real-start]
                        [ret (empty-sequence seq)])
               (if (< s (min real-end len))
                   (loop (+ s k) (plus ret (ref seq s)))
                   ret)))])))))

;; len(s) => length of sequences
(define (sequence-len seq seq-loc)
  (match seq
    [(regexp "")
     (string-length seq)]
    [(list lvp ...)
     (length seq)]
    [else
     (raise-pyret-error (format "len: expected a sequence, but given ~a" (something-else seq)) seq-loc)]))

;; -----------------------------------------------------------------------------
;; macros

;; in
(define-syntax (pypar:in stx)
  (syntax-case stx ()
    [(kw_ x_ s_)
     (begin
       (check-indent 'SLGC #'x_ #'kw_)
       (check-indent 'SLGC #'kw_ #'s_)
       (with-syntax ([xloc (syntax->vector #'x_)]
                     [sloc (syntax->vector #'s_)])
         (syntax/loc stx
           (sequence-in? x_ s_ xloc sloc))))]))

;; not in
(define-syntax (pypar:not-in stx)
  (syntax-case stx ()
    [(kw x s)
     (syntax/loc stx
       (pysem:not (pypar:in kw x s)))]))

;; s[i]
(define-syntax (pypar:ref stx)
  (syntax-case stx ()
    [(_ sequence_ index_)
     (begin
       (check-indent 'SLGC #'sequence_ #'index_)
       (let ([s (syntax sequence_)]
             [i (syntax index_)])
         (with-syntax ([seq-stx (syntax-e s)]
                       [i-stx (syntax-e i)]
                       [expr-loc (syntax->vector stx)]
                       [seq-loc (syntax->vector s)]
                       [i-loc (syntax->vector i)])
           (syntax/loc stx
             (pyret-sequence-ref sequence_ index_ (quote seq-stx) (quote i-stx) seq-loc i-loc expr-loc)))))]))

;; slice
(define-syntax (pypar:slice stx)
  (syntax-case stx ()
    [(kw_ seq_ i_ j_)
     (syntax/loc stx
       (kw_ seq_ i_ j_ 1))]
    [(kw_ seq_ i_ j_ k_)
     (begin
       (check-indent 'SLGC #'kw_ #'seq_)
       (check-indent 'SLGC #'seq_ #'i_)
       (check-indent 'SLGC #'i_ #'j_)
       (check-indent 'SLGC #'j_ #'k_)
       (with-syntax ([seq-loc (syntax->vector #'seq_)]
                     [k-loc (syntax->vector #'k_)])
         (syntax/loc stx
           (sequence-slice seq_ i_ j_ k_ seq-loc k-loc))))]))

;; len
(define-syntax (pypar:len stx)
  (syntax-case stx ()
    [(kw_ seq_)
     (begin
       (check-indent 'SLGC #'kw_ #'seq_)
       (with-syntax ([seq-loc (syntax->vector #'seq_)])
         (syntax/loc stx
           (sequence-len seq_ seq-loc))))]))

(provide (rename-out [pypar:in pyret-in]
                     [pypar:not-in pyret-not-in]
                     [pypar:ref pyret-ref]
                     [pypar:slice pyret-slice]
                     [pypar:len len]))