dotfiles

dot files
git clone git://git.rr3.xyz/dotfiles
Log | Files | Refs

caml.lua (4086B)


      1 local M = {_NAME = "caml"}
      2 
      3 local l = require("lexer")
      4 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
      5 local T = l.token
      6 local any = P(1)
      7 local bin = R"01"
      8 local oct = R"07"
      9 local dec = R"09"
     10 local hex = R("09", "AF", "af")
     11 local alpha = R("AZ", "az")
     12 local letter = alpha + dec + "_"
     13 local word = (alpha + P"_") * letter^0
     14 local hws = S"\t "
     15 local vws = S"\n\r"
     16 local ws = hws + vws
     17 local function I(s)  -- Case-insensitive string match
     18 	local p = P(true)
     19 	for i = 1, #s do
     20 		local c = s:sub(i, i)
     21 		p = p * (P(c:lower()) + P(c:upper()))
     22 	end
     23 	return p
     24 end
     25 
     26 local whitespace = T("whitespace", ws^1)
     27 
     28 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme") * #(any - letter))
     29 local comment_text = T("comment_text", (any - comment_keyword - P"(*" - P"*)")^1)
     30 local comment_open = T("comment_text", P"(*")
     31 local comment_close = T("comment_text", P"*)")
     32 local comment = P{comment_open * (comment_text + comment_keyword + V(1))^0 * comment_close^-1}
     33 
     34 local intlit_bin = P"0" * S"Bb" * bin * (bin + "_")^0
     35 local intlit_oct = P"0" * S"Oo" * oct * (oct + "_")^0
     36 local intlit_dec =                dec * (dec + "_")^0
     37 local intlit_hex = P"0" * S"Xx" * hex * (hex + "_")^0
     38 local intlit_suffix = P"l" + P"L" + P"n"
     39 local intlit = (intlit_bin + intlit_oct + intlit_dec + intlit_hex) * intlit_suffix^-1
     40 
     41 -- TODO: fltlit
     42 local numlit = T("numlit", intlit)
     43 
     44 local escape = 
     45 	  P"\\" * S"\\\"'nrtb "
     46 	+ P"\\" * dec * dec * dec
     47 	+ P"\\x" * hex * hex
     48 	+ P"\\o" * oct * oct * oct
     49 local bad_escape = T("bad_escape", P"\\" * any)
     50 
     51 local chrlit_escape = T("chrlit_escape", escape)
     52 local chrlit_delim = T("chrlit_delim", P"'")
     53 local chrlit_text = T("chrlit_text", any - vws - S"'\\")
     54 local chrlit = chrlit_delim * (chrlit_text + chrlit_escape + bad_escape)
     55 	* chrlit_delim
     56 
     57 local strlit_escape = T("strlit_escape",
     58 	escape + P"\\u{" * hex^1 * P"}" + P"\\\n" * hws^0)
     59 local strlit_delim = T("strlit_delim", P"\"")
     60 local strlit_text = T("strlit_text", (any - vws - S"\"\\")^1)
     61 local strlit = strlit_delim * (strlit_text + strlit_escape + bad_escape)^0
     62 	* strlit_delim
     63 
     64 local delimiter = T("delimiter", S",;(){}")
     65 
     66 --[[
     67 local core_operator_char = S"$&*+-/=>@^|"
     68 local operator_char = S"~!?%<:." + core_operator_char
     69 local infix = (core_operator_char + S"%<") * operator_char^0
     70 	+ P"#" * operator_char^1
     71 local prefix = P"!" * operator_char^0 + S"?~" * operator_char^1
     72 local operator = T("operator", infix + prefix)
     73 --]]
     74 local operator = T("operator", S"$&*+-/=>@^|~!?%<:.#[]")
     75 
     76 local keyword = T("keyword", l.word_match{
     77 	"and",        "as",       "assert", "asr",     "begin",   "class",
     78 	"constraint", "do",       "done",   "downto",  "else",    "end",
     79 	"exception",  "external", "false",  "for",     "fun",     "function",
     80 	"functor",    "if",       "in",     "include", "inherit", "initializer",
     81 	"land",       "lazy",     "let",    "lor",     "lsl",     "lsr",
     82 	"lxor",       "match",    "method", "mod",     "module",  "mutable",
     83 	"new",        "nonrec",   "object", "of",      "open",    "or",
     84 	"private",    "rec",      "sig",    "struct",  "then",    "to",
     85 	"true",       "try",      "type",   "val",     "virtual", "when",
     86 	"while",      "with"
     87 })
     88 
     89 local type_ = T("type", P"'" * word)
     90 
     91 local identifier = T("identifier", word * P"'"^0)
     92 
     93 M._rules = {
     94 	{"whitespace", whitespace},
     95 	{"comment", comment},
     96 	{"numlit", numlit},
     97 	{"chrlit", chrlit},
     98 	{"strlit", strlit},
     99 	{"delimiter", delimiter},
    100 	{"operator", operator},
    101 	{"keyword", keyword},
    102 	{"type", type_},
    103 	{"identifier", identifier},
    104 
    105 	{"error", T("error", any)},  -- TODO: TEMP
    106 }
    107 
    108 M._tokenstyles = {
    109 	whitespace = l.STYLE_WHITESPACE,
    110 
    111 	comment_keyword = l.STYLE_COMMENT_KEYWORD,
    112 	comment_text = l.STYLE_COMMENT,
    113 
    114 	numlit = l.STYLE_NUMBER,
    115 
    116 	bad_escape = l.STYLE_ERROR,
    117 
    118 	chrlit_escape = l.STYLE_ESCAPE,
    119 	chrlit_delim = l.STYLE_NUMBER,
    120 	chrlit_text = l.STYLE_NUMBER,
    121 
    122 	strlit_escape = l.STYLE_ESCAPE,
    123 	strlit_delim = l.STYLE_STRING,
    124 	strlit_text = l.STYLE_STRING,
    125 
    126 	delimiter = l.STYLE_DELIMITER,
    127 	operator = l.STYLE_OPERATOR,
    128 
    129 	keyword = l.STYLE_KEYWORD,
    130 	["type"] = l.STYLE_TYPE,
    131 	identifier = l.STYLE_IDENTIFIER,
    132 }
    133 
    134 return M