#lang at-exp racket/base
(require "python-tokenizer.rkt"
rackunit
racket/match
racket/sequence
racket/generator)
(define r (lambda (x) x))
(define (dump-tokens s)
(sequence->list
(in-generator
(let/ec break
(for ([token (generate-tokens (open-input-string s))])
(match token
[(list 'ENDMARKER rest ...)
(break)]
[(list type text start end rest)
(yield (list type text start end))]))))))
(check-equal? (dump-tokens "1 + 1")
'((NUMBER "1" (1 0) (1 1))
(OP "+" (1 2) (1 3))
(NUMBER "1" (1 4) (1 5))))
(check-equal? (dump-tokens (string-append "if False:\n"
" # NL\n"
" True = False # NEWLINE\n"))
'((NAME "if" (1 0) (1 2))
(NAME "False" (1 3) (1 8))
(OP ":" (1 8) (1 9))
(NEWLINE "\n" (1 9) (1 10))
(COMMENT "# NL" (2 4) (2 8))
(NL "\n" (2 8) (2 9))
(INDENT " " (3 0) (3 4))
(NAME "True" (3 4) (3 8))
(OP "=" (3 9) (3 10))
(NAME "False" (3 11) (3 16))
(COMMENT "# NEWLINE" (3 17) (3 26))
(NEWLINE "\n" (3 26) (3 27))
(DEDENT "" (4 0) (4 0))))
(check-exn exn:fail:indentation?
(lambda ()
(dump-tokens #<<EOF
def k(x):
x += 2
x += 5
EOF
)))
(check-equal? (dump-tokens "0xff <= 255")
'((NUMBER "0xff" (1 0) (1 4))
(OP "<=" (1 5) (1 7))
(NUMBER "255" (1 8) (1 11))))
(check-equal? (dump-tokens "0b10 <= 255")
'((NUMBER "0b10" (1 0) (1 4))
(OP "<=" (1 5) (1 7))
(NUMBER "255" (1 8) (1 11))))
(check-equal? (dump-tokens "0o123 <= 0123")
'((NUMBER "0o123" (1 0) (1 5))
(OP "<=" (1 6) (1 8))
(NUMBER "0123" (1 9) (1 13))))
(check-equal? (dump-tokens "01234567 > ~0x15")
'(( NUMBER "01234567" (1 0) (1 8))
(OP ">" (1 9) (1 10))
(OP "~" (1 11) (1 12))
(NUMBER "0x15" (1 12) (1 16))))
(check-equal? (dump-tokens "2134568 != 01231515")
'((NUMBER "2134568" (1 0) (1 7))
(OP "!=" (1 8) (1 10))
(NUMBER "01231515" (1 11) (1 19))))
(check-equal? (dump-tokens "(-124561-1) & 0200000000")
'((OP "(" (1 0) (1 1))
(OP "-" (1 1) (1 2))
(NUMBER "124561" (1 2) (1 8))
(OP "-" (1 8) (1 9))
(NUMBER "1" (1 9) (1 10))
(OP ")" (1 10) (1 11))
(OP "&" (1 12) (1 13))
(NUMBER "0200000000" (1 14) (1 24))))
(check-equal? (dump-tokens "0xdeadbeef != -1")
'((NUMBER "0xdeadbeef" (1 0) (1 10))
(OP "!=" (1 11) (1 13))
(OP "-" (1 14) (1 15))
(NUMBER "1" (1 15) (1 16))))
(check-equal? (dump-tokens "0xdeadc0de & 012345")
'((NUMBER "0xdeadc0de" (1 0) (1 10))
(OP "&" (1 11) (1 12))
(NUMBER "012345" (1 13) (1 19))))
(check-equal? (dump-tokens "0xFF & 0x15 | 1234")
'((NUMBER "0xFF" (1 0) (1 4))
(OP "&" (1 5) (1 6))
(NUMBER "0x15" (1 7) (1 11))
(OP "|" (1 12) (1 13))
(NUMBER "1234" (1 14) (1 18))))
(check-equal? (dump-tokens "x = 0L")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "0L" (1 4) (1 6))))
(check-equal? (dump-tokens "x = 0xfffffffffff")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "0xfffffffffff" (1 4) (1 17))))
(check-equal? (dump-tokens "x = 123141242151251616110l")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "123141242151251616110l" (1 4) (1 26))))
(check-equal? (dump-tokens "x = -15921590215012591L")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(OP "-" (1 4) (1 5))
(NUMBER "15921590215012591L" (1 5) (1 23))))
(check-equal? (dump-tokens "x = 3.14159")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "3.14159" (1 4) (1 11))))
(check-equal? (dump-tokens "x = 314159.")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "314159." (1 4) (1 11))))
(check-equal? (dump-tokens "x = .314159")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER ".314159" (1 4) (1 11))))
(check-equal? (dump-tokens "x = 3e14159")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "3e14159" (1 4) (1 11))))
(check-equal? (dump-tokens "x = 3E123")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "3E123" (1 4) (1 9))))
(check-equal? (dump-tokens "x+y = 3e-1230")
'((NAME "x" (1 0) (1 1))
(OP "+" (1 1) (1 2))
(NAME "y" (1 2) (1 3))
(OP "=" (1 4) (1 5))
(NUMBER "3e-1230" (1 6) (1 13))))
(check-equal? (dump-tokens "x = 3.14e159")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "3.14e159" (1 4) (1 12))))
(check-equal? (dump-tokens "x = ''; y = \"\"")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "''" (1 4) (1 6))
(OP ";" (1 6) (1 7))
(NAME "y" (1 8) (1 9))
(OP "=" (1 10) (1 11))
(STRING "\"\"" (1 12) (1 14))))
(check-equal? (dump-tokens "x = '\"'; y = \"'\"")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "'\"'" (1 4) (1 7))
(OP ";" (1 7) (1 8))
(NAME "y" (1 9) (1 10))
(OP "=" (1 11) (1 12))
(STRING "\"'\"" (1 13) (1 16))))
(check-equal? (dump-tokens "x = \"doesn't \"shrink\", does it\"")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "\"doesn't \"" (1 4) (1 14))
(NAME "shrink" (1 14) (1 20))
(STRING "\", does it\"" (1 20) (1 31))))
(check-equal? (dump-tokens "x = u'abc' + U'ABC'")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "u'abc'" (1 4) (1 10))
(OP "+" (1 11) (1 12))
(STRING "U'ABC'" (1 13) (1 19))))
(check-equal? (dump-tokens "y = u\"ABC\" + U\"ABC\"")
'((NAME "y" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "u\"ABC\"" (1 4) (1 10))
(OP "+" (1 11) (1 12))
(STRING "U\"ABC\"" (1 13) (1 19))))
(check-equal? (dump-tokens "x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "ur'abc'" (1 4) (1 11))
(OP "+" (1 12) (1 13))
(STRING "Ur'ABC'" (1 14) (1 21))
(OP "+" (1 22) (1 23))
(STRING "uR'ABC'" (1 24) (1 31))
(OP "+" (1 32) (1 33))
(STRING "UR'ABC'" (1 34) (1 41))))
(check-equal? (dump-tokens @r{y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"})
'((NAME "y" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(STRING "ur\"abc\"" (1 4) (1 11))
(OP "+" (1 12) (1 13))
(STRING "Ur\"ABC\"" (1 14) (1 21))
(OP "+" (1 22) (1 23))
(STRING "uR\"ABC\"" (1 24) (1 31))
(OP "+" (1 32) (1 33))
(STRING "UR\"ABC\"" (1 34) (1 41))))
(check-equal? (dump-tokens "def d22(a, b, c=2, d=2, *k): pass")
'((NAME "def" (1 0) (1 3))
(NAME "d22" (1 4) (1 7))
(OP "(" (1 7) (1 8))
(NAME "a" (1 8) (1 9))
(OP "," (1 9) (1 10))
(NAME "b" (1 11) (1 12))
(OP "," (1 12) (1 13))
(NAME "c" (1 14) (1 15))
(OP "=" (1 15) (1 16))
(NUMBER "2" (1 16) (1 17))
(OP "," (1 17) (1 18))
(NAME "d" (1 19) (1 20))
(OP "=" (1 20) (1 21))
(NUMBER "2" (1 21) (1 22))
(OP "," (1 22) (1 23))
(OP "*" (1 24) (1 25))
(NAME "k" (1 25) (1 26))
(OP ")" (1 26) (1 27))
(OP ":" (1 27) (1 28))
(NAME "pass" (1 29) (1 33))))
(check-equal? (dump-tokens "def d01v_(a=1, *k, **w): pass")
'((NAME "def" (1 0) (1 3))
(NAME "d01v_" (1 4) (1 9))
(OP "(" (1 9) (1 10))
(NAME "a" (1 10) (1 11))
(OP "=" (1 11) (1 12))
(NUMBER "1" (1 12) (1 13))
(OP "," (1 13) (1 14))
(OP "*" (1 15) (1 16))
(NAME "k" (1 16) (1 17))
(OP "," (1 17) (1 18))
(OP "**" (1 19) (1 21))
(NAME "w" (1 21) (1 22))
(OP ")" (1 22) (1 23))
(OP ":" (1 23) (1 24))
(NAME "pass" (1 25) (1 29))))
(check-equal? (dump-tokens (string-append
"if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass"))
'((NAME "if" (1 0) (1 2))
(NUMBER "1" (1 3) (1 4))
(OP "<" (1 5) (1 6))
(NUMBER "1" (1 7) (1 8))
(OP ">" (1 9) (1 10))
(NUMBER "1" (1 11) (1 12))
(OP "==" (1 13) (1 15))
(NUMBER "1" (1 16) (1 17))
(OP ">=" (1 18) (1 20))
(NUMBER "5" (1 21) (1 22))
(OP "<=" (1 23) (1 25))
(NUMBER "0x15" (1 26) (1 30))
(OP "<=" (1 31) (1 33))
(NUMBER "0x12" (1 34) (1 38))
(OP "!=" (1 39) (1 41))
(NUMBER "1" (1 42) (1 43))
(NAME "and" (1 44) (1 47))
(NUMBER "5" (1 48) (1 49))
(NAME "in" (1 50) (1 52))
(NUMBER "1" (1 53) (1 54))
(NAME "not" (1 55) (1 58))
(NAME "in" (1 59) (1 61))
(NUMBER "1" (1 62) (1 63))
(NAME "is" (1 64) (1 66))
(NUMBER "1" (1 67) (1 68))
(NAME "or" (1 69) (1 71))
(NUMBER "5" (1 72) (1 73))
(NAME "is" (1 74) (1 76))
(NAME "not" (1 77) (1 80))
(NUMBER "1" (1 81) (1 82))
(OP ":" (1 82) (1 83))
(NAME "pass" (1 84) (1 88))))
(check-equal? (dump-tokens "x = 1 << 1 >> 5")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "1" (1 4) (1 5))
(OP "<<" (1 6) (1 8))
(NUMBER "1" (1 9) (1 10))
(OP ">>" (1 11) (1 13))
(NUMBER "5" (1 14) (1 15))))
(check-equal? (dump-tokens "x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "1" (1 4) (1 5))
(OP "-" (1 6) (1 7))
(NAME "y" (1 8) (1 9))
(OP "+" (1 10) (1 11))
(NUMBER "15" (1 12) (1 14))
(OP "-" (1 15) (1 16))
(NUMBER "01" (1 17) (1 19))
(OP "+" (1 20) (1 21))
(NUMBER "0x124" (1 22) (1 27))
(OP "+" (1 28) (1 29))
(NAME "z" (1 30) (1 31))
(OP "+" (1 32) (1 33))
(NAME "a" (1 34) (1 35))
(OP "[" (1 35) (1 36))
(NUMBER "5" (1 36) (1 37))
(OP "]" (1 37) (1 38))))
(check-equal? (dump-tokens "x = 1//1*1/5*12%0x12")
'((NAME "x" (1 0) (1 1))
(OP "=" (1 2) (1 3))
(NUMBER "1" (1 4) (1 5))
(OP "//" (1 5) (1 7))
(NUMBER "1" (1 7) (1 8))
(OP "*" (1 8) (1 9))
(NUMBER "1" (1 9) (1 10))
(OP "/" (1 10) (1 11))
(NUMBER "5" (1 11) (1 12))
(OP "*" (1 12) (1 13))
(NUMBER "12" (1 13) (1 15))
(OP "%" (1 15) (1 16))
(NUMBER "0x12" (1 16) (1 20))))
(check-equal? (dump-tokens "~1 ^ 1 & 1 |1 ^ -1")
'((OP "~" (1 0) (1 1))
(NUMBER "1" (1 1) (1 2))
(OP "^" (1 3) (1 4))
(NUMBER "1" (1 5) (1 6))
(OP "&" (1 7) (1 8))
(NUMBER "1" (1 9) (1 10))
(OP "|" (1 11) (1 12))
(NUMBER "1" (1 12) (1 13))
(OP "^" (1 14) (1 15))
(OP "-" (1 16) (1 17))
(NUMBER "1" (1 17) (1 18))))
(check-equal? (dump-tokens "-1*1/1+1*1//1 - ---1**1")
'((OP "-" (1 0) (1 1))
(NUMBER "1" (1 1) (1 2))
(OP "*" (1 2) (1 3))
(NUMBER "1" (1 3) (1 4))
(OP "/" (1 4) (1 5))
(NUMBER "1" (1 5) (1 6))
(OP "+" (1 6) (1 7))
(NUMBER "1" (1 7) (1 8))
(OP "*" (1 8) (1 9))
(NUMBER "1" (1 9) (1 10))
(OP "//" (1 10) (1 12))
(NUMBER "1" (1 12) (1 13))
(OP "-" (1 14) (1 15))
(OP "-" (1 16) (1 17))
(OP "-" (1 17) (1 18))
(OP "-" (1 18) (1 19))
(NUMBER "1" (1 19) (1 20))
(OP "**" (1 20) (1 22))
(NUMBER "1" (1 22) (1 23))))
(check-equal? (dump-tokens "import sys, time\nx = sys.modules['time'].time()")
'((NAME "import" (1 0) (1 6))
(NAME "sys" (1 7) (1 10))
(OP "," (1 10) (1 11))
(NAME "time" (1 12) (1 16))
(NEWLINE "\n" (1 16) (1 17))
(NAME "x" (2 0) (2 1))
(OP "=" (2 2) (2 3))
(NAME "sys" (2 4) (2 7))
(OP "." (2 7) (2 8))
(NAME "modules" (2 8) (2 15))
(OP "[" (2 15) (2 16))
(STRING "'time'" (2 16) (2 22))
(OP "]" (2 22) (2 23))
(OP "." (2 23) (2 24))
(NAME "time" (2 24) (2 28))
(OP "(" (2 28) (2 29))
(OP ")" (2 29) (2 30))))
(check-equal? (dump-tokens "@staticmethod\ndef foo(x,y): pass")
'((OP "@" (1 0) (1 1))
(NAME "staticmethod" (1 1) (1 13))
(NEWLINE "\n" (1 13) (1 14))
(NAME "def" (2 0) (2 3))
(NAME "foo" (2 4) (2 7))
(OP "(" (2 7) (2 8))
(NAME "x" (2 8) (2 9))
(OP "," (2 9) (2 10))
(NAME "y" (2 10) (2 11))
(OP ")" (2 11) (2 12))
(OP ":" (2 12) (2 13))
(NAME "pass" (2 14) (2 18))))
(check-equal? (dump-tokens "def f():\n\tif x\n \tpass")
'((NAME "def" (1 0) (1 3))
(NAME "f" (1 4) (1 5))
(OP "(" (1 5) (1 6))
(OP ")" (1 6) (1 7))
(OP ":" (1 7) (1 8))
(NEWLINE "\n" (1 8) (1 9))
(INDENT "\t" (2 0) (2 1))
(NAME "if" (2 1) (2 3))
(NAME "x" (2 4) (2 5))
(NEWLINE "\n" (2 5) (2 6))
(INDENT " \t" (3 0) (3 9))
(NAME "pass" (3 9) (3 13))
(DEDENT "" (4 0) (4 0))
(DEDENT "" (4 0) (4 0))))