(module parser mzscheme
(provide parse-css-port parse-css-file parse-css-string unparse-css unparse-css-to-string)
(require (lib "lex.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools"))
(lib "cfg-parser.ss" "algol60")
(lib "yacc.ss" "parser-tools")
(lib "readerr.ss" "syntax")
(lib "match.ss")
(only (lib "13.ss" "srfi") string-drop-right)
(planet "io.ss" ("dherman" "io.plt")))
(define-lex-abbrevs
(atoz (:or (:/ #\a #\z) (:/ #\A #\Z)))
(atof (:or (:/ #\a #\f) (:/ #\A #\F)))
(digit (:/ #\0 #\9))
(hex (:or digit atof))
(non-ascii (:/ #\u0080 #\u00ff))
(unicode (:seq #\\ (:** 1 6 hex)
(:? (:or (:seq #\return #\newline)
#\space #\tab #\return #\newline #\page))))
(escape (:or unicode (:seq #\\ (:~ #\return #\newline #\page hex))))
(nmstart (:or #\_ atoz non-ascii escape))
(nmchar (:or #\_ atoz digit #\- non-ascii escape))
(string1 (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
(:seq #\\ nl)
escape))
#\"))
(string2 (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
(:seq #\\ nl)
escape))
#\'))
(invalid1 (:seq #\" (:* (:or (:~ #\newline #\return #\page #\")
(:seq #\\ nl)
escape))))
(invalid2 (:seq #\' (:* (:or (:~ #\newline #\return #\page #\')
(:seq #\\ nl)
escape))))
(ident (:seq (:? #\-) nmstart (:* nmchar)))
(name (:+ nmchar))
(num (:or (:+ digit)
(:seq (:* digit) #\. (:+ digit))))
(String (:or string1 string2))
(invalid (:or invalid1 invalid2))
(url (:* (:or #\! #\# #\$ #\% #\&
(char-range #\* #\~) non-ascii
escape)))
(s (:or #\space #\tab #\return #\newline #\page))
(w (:* s))
(nl (:or #\newline (:seq #\return #\newline) #\return #\page))
(A (:or #\a #\A (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\1) (:seq #\6 #\1)) (:? (:or (:seq #\return #\newline) s)))))
(B (:or #\b #\B (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\2) (:seq #\6 #\2)) (:? (:or (:seq #\return #\newline) s)))))
(C (:or #\c #\C (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\3) (:seq #\6 #\3)) (:? (:or (:seq #\return #\newline) s)))))
(D (:or #\d #\D (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\4) (:seq #\6 #\4)) (:? (:or (:seq #\return #\newline) s)))))
(E (:or #\e #\E (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\5) (:seq #\6 #\5)) (:? (:or (:seq #\return #\newline) s)))))
(F (:or #\f #\F (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\6) (:seq #\6 #\6)) (:? (:or (:seq #\return #\newline) s)))))
(G (:or #\g #\G (:seq #\\ #\\ #\g) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\7) (:seq #\6 #\7)) (:? (:or (:seq #\return #\newline) s)))))
(H (:or #\h #\H (:seq #\\ #\\ #\h) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\8) (:seq #\6 #\8)) (:? (:or (:seq #\return #\newline) s)))))
(I (:or #\i #\I (:seq #\\ #\\ #\i) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\9) (:seq #\6 #\9)) (:? (:or (:seq #\return #\newline) s)))))
(J (:or #\j #\J (:seq #\\ #\\ #\j) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\a) (:seq #\6 #\a)) (:? (:or (:seq #\return #\newline) s)))))
(K (:or #\k #\K (:seq #\\ #\\ #\k) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\b) (:seq #\6 #\b)) (:? (:or (:seq #\return #\newline) s)))))
(L (:or #\l #\L (:seq #\\ #\\ #\l) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\c) (:seq #\6 #\c)) (:? (:or (:seq #\return #\newline) s)))))
(M (:or #\m #\M (:seq #\\ #\\ #\m) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\d) (:seq #\6 #\d)) (:? (:or (:seq #\return #\newline) s)))))
(N (:or #\n #\N (:seq #\\ #\\ #\n) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\e) (:seq #\6 #\e)) (:? (:or (:seq #\return #\newline) s)))))
(O (:or #\o #\O (:seq #\\ #\\ #\o) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\4 #\f) (:seq #\6 #\f)) (:? (:or (:seq #\return #\newline) s)))))
(P (:or #\p #\P (:seq #\\ #\\ #\p) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\0) (:seq #\7 #\0)) (:? (:or (:seq #\return #\newline) s)))))
(Q (:or #\q #\Q (:seq #\\ #\\ #\q) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\1) (:seq #\7 #\1)) (:? (:or (:seq #\return #\newline) s)))))
(R (:or #\r #\R (:seq #\\ #\\ #\r) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\2) (:seq #\7 #\2)) (:? (:or (:seq #\return #\newline) s)))))
(S (:or #\s #\S (:seq #\\ #\\ #\s) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\3) (:seq #\7 #\3)) (:? (:or (:seq #\return #\newline) s)))))
(T (:or #\t #\T (:seq #\\ #\\ #\t) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\4) (:seq #\7 #\4)) (:? (:or (:seq #\return #\newline) s)))))
(U (:or #\u #\U (:seq #\\ #\\ #\u) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\5) (:seq #\7 #\5)) (:? (:or (:seq #\return #\newline) s)))))
(V (:or #\v #\V (:seq #\\ #\\ #\v) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\6) (:seq #\7 #\6)) (:? (:or (:seq #\return #\newline) s)))))
(W (:or #\w #\W (:seq #\\ #\\ #\w) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\7) (:seq #\7 #\7)) (:? (:or (:seq #\return #\newline) s)))))
(X (:or #\x #\X (:seq #\\ #\\ #\x) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\8) (:seq #\7 #\8)) (:? (:or (:seq #\return #\newline) s)))))
(Y (:or #\y #\Y (:seq #\\ #\\ #\y) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\9) (:seq #\7 #\9)) (:? (:or (:seq #\return #\newline) s)))))
(Z (:or #\z #\Z (:seq #\\ #\\ #\z) (:seq #\\ (:** 0 4 #\0) (:or (:seq #\5 #\a) (:seq #\7 #\a)) (:? (:or (:seq #\return #\newline) s)))))
)
(define-tokens non-terminals (<angle> <dimension> <ems> <exs> <freq>
<function> <hash> <ident> <invalid>
<length> <number> <percentage>
<string> <time> <uri>
CDO CDC CHARSET-SYM
COLON
COMMA
DASHMATCH
DOT
EOF
EQUAL
IMPORT-SYM
IMPORTANT-SYM
INCLUDES
GREATER
LBRACE
LBRACKET
MEDIA-SYM
MINUS
PAGE-SYM
PLUS
S SLASH
RBRACE
RBRACKET
RPAREN
SEMICOLON
STAR
UNPARSEABLE))
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
(define-syntax (token stx)
(syntax-case stx ()
[(_ name val)
(identifier? (syntax name))
(let ([name (syntax name)])
(with-syntax ([token-name (datum->syntax-object
name
(string->symbol
(format "token-~a" (syntax-e name))))]
[source-name (datum->syntax-object name 'source-name)]
[start-pos (datum->syntax-object name 'start-pos)]
[end-pos (datum->syntax-object name 'end-pos)])
(syntax
(token-name
(datum->syntax-object #f val
(list
source-name
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos)
(- (position-offset end-pos)
(position-offset start-pos)))
stx-for-original-property)))))]))
(define-syntax (ttoken stx)
(syntax-case stx ()
[(_ name)
(identifier? (syntax name))
(syntax (token name 'name))]))
(define (lex source-name)
(letrec ([loop
(lexer
((:seq "url(" w String w ")") (token <uri> lexeme))
((:seq "url(" w url w ")") (token <uri> lexeme))
((:: "/*" (complement (:: any-string "*/" any-string)) "*/")
(loop input-port))
((:: (:+ s) "/*" (complement (:: any-string "*/" any-string)) "*/")
(token S lexeme))
((:seq "/*"
(:* (:~ #\*)) (:+ #\*)
(:* (:~ #\/ #\*) (:* (:~ #\*)) (:+ #\*))
"/")
(loop input-port))
((:seq (:+ s) "/*"
(:~ #\*) (:+ #\*)
(:* (:~ #\/ #\*) (:* (:~ #\*)) (:+ #\*))
"/") (token S lexeme))
((:+ s) (token S lexeme))
("<!--" (token CDO '<!--))
("--!>" (token CDC '--!>))
("~=" (token INCLUDES '~=))
("|=" (token DASHMATCH (string->symbol "|=")))
((:seq w #\{) (token LBRACE '|{|))
((:seq w #\}) (token RBRACE '|}|))
((:seq w #\+) (token PLUS '+))
((:seq w #\>) (token GREATER '>))
((:seq w #\,) (token COMMA '|,|))
(String (token <string> lexeme))
(invalid (token <invalid> lexeme))
(ident (token <ident> lexeme))
((:seq #\# name) (token <hash> lexeme))
("@import" (token IMPORT-SYM '@import))
("@page" (token PAGE-SYM '@page))
("@media" (token MEDIA-SYM '@media))
("@charset" (token CHARSET-SYM '@charset))
((:seq #\! w
"important") (token IMPORTANT-SYM '!important))
((:seq num E M) (token <ems> lexeme))
((:seq num E X) (token <exs> lexeme))
((:seq num P X) (token <length> lexeme))
((:seq num C M) (token <length> lexeme))
((:seq num M M) (token <length> lexeme))
((:seq num I N) (token <length> lexeme))
((:seq num P T) (token <length> lexeme))
((:seq num P C) (token <length> lexeme))
((:seq num D E G) (token <angle> lexeme))
((:seq num R A D) (token <angle> lexeme))
((:seq num G R A D) (token <angle> lexeme))
((:seq num M S) (token <time> lexeme))
((:seq num S) (token <time> lexeme))
((:seq num H Z) (token <freq> lexeme))
((:seq num K H Z) (token <freq> lexeme))
((:seq num ident) (token <dimension> lexeme))
((:seq num #\%) (token <percentage> lexeme))
((:seq num) (token <number> lexeme))
((:seq ident "(") (token <function> lexeme))
((eof) (ttoken EOF))
(#\; (token SEMICOLON '| (#\: (token COLON '|:|))
(#\/ (token SLASH '/))
(#\- (token MINUS '-))
(#\+ (token PLUS '+))
(#\> (token GREATER '>))
(#\. (token DOT '|.|))
(#\* (token STAR '*))
(#\[ (token LBRACKET '|[|))
(#\] (token RBRACKET '|]|))
(#\= (token EQUAL '=))
(#\) (token RPAREN '|)|))
(any-char (token UNPARSEABLE (string->symbol lexeme)))
)])
loop))
(define parse
(cfg-parser
(tokens non-terminals)
(start stylesheet)
(end EOF)
(error (lambda (a b stx)
(display stx)
(raise-read-error (format "parse error near ~a" (syntax-e stx))
(syntax-source stx)
(syntax-line stx)
(syntax-column stx)
(syntax-position stx)
(syntax-span stx))))
(suppress)
(grammar
(S* ((S* S) 'skip)
(() 'skip))
(S+ ((S+ S) 'skip)
((S) 'skip))
(stylesheet ((stylesheet1 stylesheet2* stylesheet3*) (make-css:stylesheet $1 (reverse $2) (reverse $3)))
(( stylesheet2* stylesheet3*) (make-css:stylesheet #f (reverse $1) (reverse $2))))
(stylesheet1 ((CHARSET-SYM S* <string> S* SEMICOLON) (syntax-e $3)))
(stylesheet2* ((s-cdo-cds* stylesheet22*) (reverse $2))) (s-cdo-cds ((S) 'S)
((CDO) 'CDO)
((CDC) 'CDC))
(s-cdo-cds* ((s-cdo-cds* s-cdo-cds) (cons $2 $1))
(() '()))
(stylesheet22 ((import s-cdo-cds*) $1)
((import) $1))
(stylesheet22* ((stylesheet22* stylesheet22) (cons $2 $1))
(() '()))
(stylesheet3 ((ruleset s-cdo-cds*) $1)
((media s-cdo-cds*) $1)
((page s-cdo-cds*) $1))
(stylesheet3* ((stylesheet3* stylesheet3) (cons $2 $1))
(() '()))
(import ((IMPORT-SYM S* string-or-uri S* import1 SEMICOLON S*) (make-css:import $3 (reverse $5)))
((IMPORT-SYM S* string-or-uri S* SEMICOLON S*) (make-css:import $3 '())))
(string-or-uri ((<string>) (syntax-e $1))
((<uri>) (syntax-e $1)))
(import1 ((medium comma-medium-list) (cons $1 $2)))
(media ((MEDIA-SYM S* medium comma-medium-list LBRACE S* ruleset-start RBRACE S*)
(make-css:media (cons $3 (reverse $4)) (reverse $7))))
(comma-medium-list ((comma-medium-list COMMA S* medium) (cons $4 $1))
(() '()))
(medium ((<ident> S*) (make-css:medium (syntax-e $1))))
(page ((PAGE-SYM S* pseudo-page S* LBRACE S* declaration semi-declaration-star RBRACE S*)
(make-css:page $3 (cons $7 (reverse $8))))
((PAGE-SYM S* S* LBRACE S* declaration semi-declaration-star RBRACE S*)
(make-css:page #f (cons $6 (reverse $7)))))
(pseudo-page ((COLON <ident>) (make-css:pseudo-page (syntax-e $2))))
(combinator ((PLUS S*) (make-css:combinator '+ 'hole)) ((GREATER S*) (make-css:combinator '> 'hole)) ((S+) (make-css:combinator '| | 'hole))) (unary-operator ((MINUS) '-)
((PLUS) '+))
(property ((<ident> S*) (syntax-e $1)))
(ruleset ((selector comma-selector-star LBRACE S* declaration semi-declaration-star RBRACE S*)
(make-css:ruleset (cons $1 (reverse $2)) (cons $5 (reverse $6)))))
(comma-selector-star ((comma-selector-star COMMA S* selector) (cons $4 $1))
(() '()))
(semi-declaration-star ((semi-declaration-star SEMICOLON S* declaration) (cons $4 $1))
(() '()))
(ruleset-start ((ruleset-start ruleset) (cons $1 $2))
(() '()))
(selector ((simple-selector combinator-simple-selector-star)
(make-css:selector $1 $2)))
(combinator-simple-selector-star ((combinator-simple-selector-star combinator simple-selector)
(begin (set-css:combinator-simple-selector! $2 $3)
(cons $2 $1)))
(() '()))
(simple-selector ((element-name component-star)
(make-css:simple-selector $1 (reverse $2)))
((component-plus)
(make-css:simple-selector '* (reverse $1))))
(component ((<hash>) (make-css:id (syntax-e $1))) ((class) $1)
((attrib) $1)
((pseudo) $1))
(component-star ((component-star component) (cons $2 $1))
(() '()))
(component-plus ((component-star component) (cons $2 $1))
((component) (list $1)))
(class ((DOT <ident>) (make-css:class (syntax-e $2))))
(element-name ((<ident>) (syntax-e $1))
((STAR) '*))
(attrib ((LBRACKET <ident> S* RBRACKET) (make-css:attrib 'simple (syntax-e $2) #f))
((LBRACKET <ident> S* attrib1 RBRACKET) (make-css:attrib (car $4) (syntax-e $2) (cdr $4))))
(attrib1 ((equal/includes/dashmatch S* ident/string S*) (cons $1 $3)))
(equal/includes/dashmatch ((EQUAL) 'exact)
((INCLUDES) 'partial)
((DASHMATCH) 'lang))
(ident/string ((<ident>) (syntax-e $1))
((<string>) (syntax-e $1)))
(pseudo ((COLON <ident>) (make-css:pseudo (syntax-e $2) #f))
((COLON <function> S* RPAREN) (make-css:pseudo (syntax-e $2) '()))
((COLON <function> S* <ident> S* RPAREN) (make-css:pseudo (syntax-e $2) (syntax-e $4))))
(declaration ((property COLON S* expr) (make-css:declaration $1 $4 #f))
((property COLON S* expr prio) (make-css:declaration $1 $4 #t))
(() (make-css:empty-declaration)))
(prio ((IMPORTANT-SYM S*) 'IMPORTANT))
(expr ((term operator-term-star) (make-css:expr $1 (reverse $2))))
(operator-term-star ((operator-term-star operator term) (begin (set-css:operator-term! $2 $3)
(cons $2 $1)))
(() '()))
(operator ((SLASH S*) (make-css:operator '/ ' 'hole))
((COMMA S*) (make-css:operator '|,| 'hole))
(() (make-css:operator '| | 'hole)))
(term ((term2) (make-css:term $1))
((unary-operator term1) (make-css:term
(make-css:unary-operator
$1 (syntax-e $2)))) ((term1) (make-css:term (syntax-e $1))))
(term1 ((<number> S*) $1)
((<percentage> S*) $1)
((<length> S*) $1)
((<ems> S*) $1)
((<exs> S*) $1)
((<angle> S*) $1)
((<time> S*) $1)
((<freq> S*) $1)
((<string> S*) $1)
((<ident> S*) $1)
((<uri> S*) $1))
(term2 ((hexcolor) (syntax-e $1))
((function) $1))
(function ((<function> S* expr RPAREN S*) (make-css:function
(string-drop-right (syntax-e $1) 1) $3)))
(hexcolor ((<hash> S*) $1)))))
(define-syntax (define-css-structs stx)
(syntax-case stx ()
[(_ (struct-name (field ...)) ...)
(with-syntax ([(css:struct ...) (map (lambda (id)
(datum->syntax-object
id
(string->symbol
(format "css:~a" (syntax-e id)))))
(syntax->list (syntax (struct-name ...))))])
(syntax (begin (define-struct css:struct (field ...) (make-inspector)) ...
(provide (struct css:struct (field ...)) ...))))]))
(define-css-structs
(stylesheet (charset imports ruleset/media/page))
(import (address mediums))
(media (mediums rulesets))
(medium (name))
(page (pseudo-page declarations))
(pseudo-page (name))
(ruleset (selectors declarations))
(selector (simple-selector combinators))
(combinator (combinator simple-selector)) (simple-selector (name components)) (class (name))
(attrib (type name value)) (pseudo (name argument)) (id (name))
(declaration (property expr important)) (empty-declaration ())
(expr (term operators))
(term (term))
(operator (operator term))
(unary-operator (operator term))
(function (name arg)))
(define (unparse/seperator l s)
(cond
[(null? l) 'done]
[(null? (cdr l)) (unparse-css (car l))]
[else (begin
(unparse-css (car l))
(display s)
(unparse/seperator (cdr l) s))]))
(define (unparse/terminator l t)
(cond
[(null? l) (display t)]
[(null? (cdr l)) (begin
(unparse-css (car l))
(display t))]
[else (begin
(unparse-css (car l))
(display t)
(unparse/terminator (cdr l) t))]))
(define (unparse-css css)
(define unparse unparse-css)
(match css
[($ css:stylesheet charset imports ruleset/media/page)
(begin
(when charset
(printf "@charset ~s;\n" charset))
(for-each unparse imports)
(for-each unparse ruleset/media/page))]
[($ css:import address mediums)
(printf "@import ~s " address)
(unparse/seperator mediums ", ")
(display ";\n")]
[($ css:media mediums rulesets)
(printf "@media ")
(unparse/seperator mediums ", ")
(display " { ")
(unparse/terminator rulesets "\n")
(display "}\n")]
[($ css:medium name)
(display name)]
[($ css:page pseudo-page declarations)
(printf "@page ")
(when pseudo-page
(unparse pseudo-page))
(display " { ")
(unparse/seperator declarations ";\n")
(display "}\n")]
[($ css:pseudo-page name)
(printf ":~a" name)]
[($ css:ruleset selectors declarations)
(unparse/seperator selectors ", ")
(display " {\n")
(unparse/seperator declarations ";\n")
(display "}\n")]
[($ css:selector simple-selector combinators)
(unparse simple-selector)
(for-each unparse combinators)]
[($ css:combinator comb simple-selector)
(display comb)
(unparse simple-selector)]
[($ css:simple-selector name components)
(display name)
(for-each unparse components)]
[($ css:class name)
(printf ".~a" name)]
[($ css:attrib type name value)
(printf "[~a" name)
(case type
[(simple) 'skip]
[(exact) (printf "=~a" value)]
[(partial) (printf "~~=\"~a\"" value)]
[(lang) (printf "lang|=\"~a\"" value)])
(display #\])]
[($ css:pseudo name argument)
(display #\:)
(display name)
(match argument
[#f 'skip]
[() (display "()")]
[(name) (printf "(~a)" name)])]
[($ css:id name)
(display name)] [($ css:declaration property expr important)
(display property)
(display ": ")
(unparse expr)
(when important
(display " !important"))]
[($ css:empty-declaration)
'skip]
[($ css:expr term operators)
(unparse term)
(for-each unparse operators)]
[($ css:term t)
(if (struct? t)
(unparse t)
(display t))]
[($ css:operator op term)
(display op)
(unparse term)]
[($ css:unary-operator op term)
(display op)
(unparse term)]
[($ css:function name arg)
(display name)
(display "(")
(unparse arg)
(display ")")
]))
(define (parse-css-port port file)
(let ([lexer (lex file)])
(port-count-lines! port)
(parse
(lambda ()
(let loop ()
(let ([v (lexer port)])
(if (void? v)
(loop)
v)))))))
(define (parse-css-file file)
(with-input-from-file file
(lambda ()
(parse-css-port (current-input-port)
(path->complete-path file)))))
(define (parse-css-string string)
(let ([port (open-input-string string)])
(parse-css-port port (object-name port))))
(define (unparse-css-to-string css)
(with-output-to-string
(unparse-css css)))
(print-struct #t)
)