caml.lua (4086B)
1 local M = {_NAME = "caml"} 2 3 local l = require("lexer") 4 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V 5 local T = l.token 6 local any = P(1) 7 local bin = R"01" 8 local oct = R"07" 9 local dec = R"09" 10 local hex = R("09", "AF", "af") 11 local alpha = R("AZ", "az") 12 local letter = alpha + dec + "_" 13 local word = (alpha + P"_") * letter^0 14 local hws = S"\t " 15 local vws = S"\n\r" 16 local ws = hws + vws 17 local function I(s) -- Case-insensitive string match 18 local p = P(true) 19 for i = 1, #s do 20 local c = s:sub(i, i) 21 p = p * (P(c:lower()) + P(c:upper())) 22 end 23 return p 24 end 25 26 local whitespace = T("whitespace", ws^1) 27 28 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme") * #(any - letter)) 29 local comment_text = T("comment_text", (any - comment_keyword - P"(*" - P"*)")^1) 30 local comment_open = T("comment_text", P"(*") 31 local comment_close = T("comment_text", P"*)") 32 local comment = P{comment_open * (comment_text + comment_keyword + V(1))^0 * comment_close^-1} 33 34 local intlit_bin = P"0" * S"Bb" * bin * (bin + "_")^0 35 local intlit_oct = P"0" * S"Oo" * oct * (oct + "_")^0 36 local intlit_dec = dec * (dec + "_")^0 37 local intlit_hex = P"0" * S"Xx" * hex * (hex + "_")^0 38 local intlit_suffix = P"l" + P"L" + P"n" 39 local intlit = (intlit_bin + intlit_oct + intlit_dec + intlit_hex) * intlit_suffix^-1 40 41 -- TODO: fltlit 42 local numlit = T("numlit", intlit) 43 44 local escape = 45 P"\\" * S"\\\"'nrtb " 46 + P"\\" * dec * dec * dec 47 + P"\\x" * hex * hex 48 + P"\\o" * oct * oct * oct 49 local bad_escape = T("bad_escape", P"\\" * any) 50 51 local chrlit_escape = T("chrlit_escape", escape) 52 local chrlit_delim = T("chrlit_delim", P"'") 53 local chrlit_text = T("chrlit_text", any - vws - S"'\\") 54 local chrlit = chrlit_delim * (chrlit_text + chrlit_escape + bad_escape) 55 * chrlit_delim 56 57 local strlit_escape = T("strlit_escape", 58 escape + P"\\u{" * hex^1 * P"}" + P"\\\n" * hws^0) 59 local strlit_delim = T("strlit_delim", P"\"") 60 local strlit_text = T("strlit_text", (any - vws - S"\"\\")^1) 61 local strlit = strlit_delim * (strlit_text + strlit_escape + bad_escape)^0 62 * strlit_delim 63 64 local delimiter = T("delimiter", S",;(){}") 65 66 --[[ 67 local core_operator_char = S"$&*+-/=>@^|" 68 local operator_char = S"~!?%<:." + core_operator_char 69 local infix = (core_operator_char + S"%<") * operator_char^0 70 + P"#" * operator_char^1 71 local prefix = P"!" * operator_char^0 + S"?~" * operator_char^1 72 local operator = T("operator", infix + prefix) 73 --]] 74 local operator = T("operator", S"$&*+-/=>@^|~!?%<:.#[]") 75 76 local keyword = T("keyword", l.word_match{ 77 "and", "as", "assert", "asr", "begin", "class", 78 "constraint", "do", "done", "downto", "else", "end", 79 "exception", "external", "false", "for", "fun", "function", 80 "functor", "if", "in", "include", "inherit", "initializer", 81 "land", "lazy", "let", "lor", "lsl", "lsr", 82 "lxor", "match", "method", "mod", "module", "mutable", 83 "new", "nonrec", "object", "of", "open", "or", 84 "private", "rec", "sig", "struct", "then", "to", 85 "true", "try", "type", "val", "virtual", "when", 86 "while", "with" 87 }) 88 89 local type_ = T("type", P"'" * word) 90 91 local identifier = T("identifier", word * P"'"^0) 92 93 M._rules = { 94 {"whitespace", whitespace}, 95 {"comment", comment}, 96 {"numlit", numlit}, 97 {"chrlit", chrlit}, 98 {"strlit", strlit}, 99 {"delimiter", delimiter}, 100 {"operator", operator}, 101 {"keyword", keyword}, 102 {"type", type_}, 103 {"identifier", identifier}, 104 105 {"error", T("error", any)}, -- TODO: TEMP 106 } 107 108 M._tokenstyles = { 109 whitespace = l.STYLE_WHITESPACE, 110 111 comment_keyword = l.STYLE_COMMENT_KEYWORD, 112 comment_text = l.STYLE_COMMENT, 113 114 numlit = l.STYLE_NUMBER, 115 116 bad_escape = l.STYLE_ERROR, 117 118 chrlit_escape = l.STYLE_ESCAPE, 119 chrlit_delim = l.STYLE_NUMBER, 120 chrlit_text = l.STYLE_NUMBER, 121 122 strlit_escape = l.STYLE_ESCAPE, 123 strlit_delim = l.STYLE_STRING, 124 strlit_text = l.STYLE_STRING, 125 126 delimiter = l.STYLE_DELIMITER, 127 operator = l.STYLE_OPERATOR, 128 129 keyword = l.STYLE_KEYWORD, 130 ["type"] = l.STYLE_TYPE, 131 identifier = l.STYLE_IDENTIFIER, 132 } 133 134 return M