#lang racket
(require parser-tools/lex
(prefix-in : parser-tools/lex-sre)
plai/test-harness)
(provide value-tokens keyword-tokens op-tokens letter digit identifier
line-break comment expression-lexer)
(define-tokens value-tokens (NUMBER IDENTIFIER STRING CHAR))
(define-empty-tokens keyword-tokens (FUN DEF LET IN WHERE IF ELIF ELSE RETURN AND OR NOT TRUE FALSE PRINT IS STRUCT))
(define-empty-tokens op-tokens (newline
OP CP OB CB
COMMA SEMI COLON PERIOD
LESS-EQUAL GREATER-EQUAL NOT-EQUAL-C NOT-EQUAL-ML = < > + - * /
== %
** EOF))
(define-lex-abbrevs
[letter (:or (:/ "a" "z") (:/ "A" "Z"))]
[digit (:/ #\0 #\9)]
[identifier (:: letter (:* (:or letter digit #\_ #\?)))]
[line-break #\newline]
[comment (:: "#" (complement (:: any-string line-break any-string)) line-break)])
(define expression-lexer
(lexer-src-pos
[(eof) 'EOF]
[whitespace
(return-without-pos (expression-lexer input-port))]
[comment
(return-without-pos (expression-lexer input-port))]
[(:or "=" "+" "-" "*" "/" "<" ">" "**" "%" "==") (string->symbol lexeme)]
["(" 'OP]
[")" 'CP]
["[" 'OB]
["]" 'CB]
["," 'COMMA]
[";" 'SEMI]
[":" 'COLON]
["." 'PERIOD]
["!" 'NOT]
["<=" 'LESS-EQUAL]
[">=" 'GREATER-EQUAL]
["!=" 'NOT-EQUAL-C]
["<>" 'NOT-EQUAL-ML]
["fun" 'FUN]
["let" 'LET]
["in" 'IN]
["where" 'WHERE]
["def" 'DEF]
["if" 'IF]
["elif" 'ELIF]
["else" 'ELSE]
["and" 'AND]
["or" 'OR]
["not" 'NOT]
["return" 'RETURN]
["True" 'TRUE]
["False" 'FALSE]
["print" 'PRINT]
["is" 'IS]
["struct" 'STRUCT]
[identifier
(token-IDENTIFIER (string->symbol (regexp-replace* #rx"_" lexeme "-"))
(string->symbol lexeme))] [(:+ digit) (token-NUMBER (string->number lexeme))]
[(:: (:+ digit) #\. (:* digit)) (token-NUMBER (string->number lexeme))] [(:: #\' any-char #\')
(token-CHAR (string-ref lexeme 1))]
[(:: #\" any-string #\")
(let* ([len (string-length lexeme)]
[sub (substring lexeme 1 (- len 1))]) (token-STRING sub))]))
(define (test-lexer string)
(define p (open-input-string string))
(position-token-token (expression-lexer p)))