caml.lua (3685B)
1 local l = require("lexer") 2 local lex = l.new(...) 3 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V 4 local T = function(name, patt) return lex:tag(name, patt) end 5 local any = P(1) 6 local bin = R"01" 7 local oct = R"07" 8 local dec = R"09" 9 local hex = R("09", "AF", "af") 10 local alpha = R("AZ", "az") 11 local letter = alpha + dec + "_" 12 local word = (alpha + P"_") * letter^0 13 local hws = S"\t " 14 local vws = S"\n\r" 15 local ws = hws + vws 16 local function I(s) -- Case-insensitive string match 17 local p = P(true) 18 for i = 1, #s do 19 local c = s:sub(i, i) 20 p = p * (P(c:lower()) + P(c:upper())) 21 end 22 return p 23 end 24 25 local whitespace = T("whitespace", ws^1) 26 27 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme") * #(any - letter)) 28 local comment_text = T("comment_text", (any - comment_keyword - P"(*" - P"*)")^1) 29 local comment_open = T("comment_text", P"(*") 30 local comment_close = T("comment_text", P"*)") 31 local comment = P{comment_open * (comment_text + comment_keyword + V(1))^0 * comment_close^-1} 32 33 local intlit_bin = P"0" * S"Bb" * bin * (bin + "_")^0 34 local intlit_oct = P"0" * S"Oo" * oct * (oct + "_")^0 35 local intlit_dec = dec * (dec + "_")^0 36 local intlit_hex = P"0" * S"Xx" * hex * (hex + "_")^0 37 local intlit_suffix = P"l" + P"L" + P"n" 38 local intlit = (intlit_bin + intlit_oct + intlit_dec + intlit_hex) * intlit_suffix^-1 39 40 -- TODO: fltlit 41 local numlit = T("numlit", intlit) 42 43 local escape = 44 P"\\" * S"\\\"'nrtb " 45 + P"\\" * dec * dec * dec 46 + P"\\x" * hex * hex 47 + P"\\o" * oct * oct * oct 48 local bad_escape = T("bad_escape", P"\\" * any) 49 50 local chrlit_escape = T("chrlit_escape", escape) 51 local chrlit_delim = T("chrlit_delim", P"'") 52 local chrlit_text = T("chrlit_text", any - vws - S"'\\") 53 local chrlit = chrlit_delim * (chrlit_text + chrlit_escape + bad_escape) 54 * chrlit_delim 55 56 local strlit_escape = T("strlit_escape", 57 escape + P"\\u{" * hex^1 * P"}" + P"\\\n" * hws^0) 58 local strlit_delim = T("strlit_delim", P"\"") 59 local strlit_text = T("strlit_text", (any - vws - S"\"\\")^1) 60 local strlit = strlit_delim * (strlit_text + strlit_escape + bad_escape)^0 61 * strlit_delim 62 63 local delimiter = T("delimiter", S",;(){}") 64 65 --[[ 66 local core_operator_char = S"$&*+-/=>@^|" 67 local operator_char = S"~!?%<:." + core_operator_char 68 local infix = (core_operator_char + S"%<") * operator_char^0 69 + P"#" * operator_char^1 70 local prefix = P"!" * operator_char^0 + S"?~" * operator_char^1 71 local operator = T("operator", infix + prefix) 72 --]] 73 local operator = T("operator", S"$&*+-/=>@^|~!?%<:.#[]") 74 75 local keyword = T("keyword", l.word_match{ 76 "and", "as", "assert", "asr", "begin", "class", 77 "constraint", "do", "done", "downto", "else", "end", 78 "exception", "external", "false", "for", "fun", "function", 79 "functor", "if", "in", "include", "inherit", "initializer", 80 "land", "lazy", "let", "lor", "lsl", "lsr", 81 "lxor", "match", "method", "mod", "module", "mutable", 82 "new", "nonrec", "object", "of", "open", "or", 83 "private", "rec", "sig", "struct", "then", "to", 84 "true", "try", "type", "val", "virtual", "when", 85 "while", "with" 86 }) 87 88 local type_ = T("type", P"'" * word) 89 90 local identifier = T("identifier", word * P"'"^0) 91 92 lex:add_rule("whitespace", whitespace) 93 lex:add_rule("comment", comment) 94 lex:add_rule("numlit", numlit) 95 lex:add_rule("chrlit", chrlit) 96 lex:add_rule("strlit", strlit) 97 lex:add_rule("delimiter", delimiter) 98 lex:add_rule("operator", operator) 99 lex:add_rule("keyword", keyword) 100 lex:add_rule("type", type_) 101 lex:add_rule("identifier", identifier) 102 103 lex:add_rule("error", T("error", any)) -- TODO: TEMP 104 105 return lex