ansi_c.lua (7488B)
1 local M = {_NAME = "ansi_c"} 2 3 local l = require("lexer") 4 local P, R, S = lpeg.P, lpeg.R, lpeg.S 5 local T = l.token 6 local any = P(1) 7 local oct = R"07" 8 local dec = R"09" 9 local hex = R("09", "AF", "af") 10 local alpha = R("AZ", "az") 11 local letter = alpha + dec + "_" 12 local word = (alpha + P"_") * letter^0 13 local pm = S"+-" 14 local hws = S"\t " 15 local vws = S"\n\r" 16 local ws = hws + vws 17 local function I(s) -- Case-insensitive string match 18 local p = P(true) 19 for i = 1, #s do 20 local c = s:sub(i, i) 21 p = p * (P(c:lower()) + P(c:upper())) 22 end 23 return p 24 end 25 local function N(p, min, max) 26 max = max or min 27 return p^min - p^(max+1) 28 end 29 30 local whitespace = T("whitespace", ws^1) 31 32 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme" + I"sync" + I"fallthrough" + I"unreachable") * #(any - letter)) 33 local line_comment_text = T("comment_text", (any - comment_keyword - S"\\\n" + P"\\\n" + P"\\")^1) 34 local line_comment = T("comment_text", P"//") * (line_comment_text + comment_keyword)^0 * (T("whitespace", P"\n") + P"") 35 local block_comment_text = T("comment_text", (any - comment_keyword - P"*/")^1) 36 local block_comment = T("comment_text", P"/*") * (block_comment_text + comment_keyword)^0 * T("comment_text", P"*/")^-1 37 local comment = line_comment + block_comment 38 39 local fltlit_dec_exp = S"eE" * pm^-1 * dec^1 40 local fltlit_dec = dec^1 * (P"." * dec^0 * fltlit_dec_exp^-1 + fltlit_dec_exp) + P"." * dec^1 * fltlit_dec_exp^-1 41 local fltlit_hex_exp = S"pP" * pm^-1 * dec^1 42 local fltlit_hex = P"0" * S"xX" * (hex^1 * (P"." * hex^0)^-1 + P"." * hex^1) * fltlit_hex_exp 43 local fltlit_suffix = S"fFlL" 44 -- local fltlit = pm^-1 * (fltlit_hex + fltlit_dec) * fltlit_suffix^-1 45 local fltlit = (fltlit_hex + fltlit_dec) * fltlit_suffix^-1 46 47 local intlit_oct = P"0" * oct^1 48 local intlit_dec = dec^1 49 local intlit_hex = P"0" * S"xX" * hex^1 50 local intlit_width = P"ll" + P"l" + P"LL" + P"L" 51 local intlit_suffix = (S"uU" * intlit_width^-1) + (intlit_width * S"uU"^-1) 52 -- local intlit = pm^-1 * (intlit_hex + intlit_oct + intlit_dec) * intlit_suffix^-1 53 local intlit = (intlit_hex + intlit_oct + intlit_dec) * intlit_suffix^-1 54 55 local numlit = T("numlit", fltlit + intlit) 56 57 local escape = T("escape", 58 P"\\" * S"'\"?\\abfnrtv" 59 + P"\\" * oct * oct^-2 60 + P"\\x" * hex^1 61 + P"\\u" * N(hex, 4) 62 + P"\\U" * N(hex, 8)) 63 local bad_escape = T("bad_escape", P"\\" * any) 64 65 local charlit_text = T("charlit_text", (any - vws - S"'\\")^1) 66 local charlit_prefix = T("charlit_delim", P"u8" + P"u" + P"U" + P"L") 67 local charlit_delim = T("charlit_delim", P"'") 68 local charlit = charlit_prefix^-1 * charlit_delim 69 * (charlit_text + escape + bad_escape)^0 * charlit_delim 70 71 local strlit_format_param = dec^1 * P"$" 72 local strlit_format = T("strlit_format", P"%" 73 * strlit_format_param^-1 74 * S"-+ 0'#"^0 -- flags 75 * (R"19" * dec^0 + (P"*" * strlit_format_param^-1))^-1 -- width 76 * (P"." * (P"*" * strlit_format_param^-1 + dec^0))^-1 -- precision 77 * (S"Lhjltz" + P"hh" + P"ll")^-1 -- length 78 * S"%diuoxXeEfFgGaAcspn") -- type 79 local strlit_text = T("strlit_text", (any - vws - S"\"\\" - strlit_format)^1) 80 local strlit_prefix = T("strlit_delim", P"u8" + P"u" + P"U" + P"L") 81 local strlit_delim = T("strlit_delim", P"\"") 82 local strlit = strlit_prefix^-1 * strlit_delim 83 * (strlit_text + strlit_format + escape + bad_escape)^0 * strlit_delim 84 85 local delimiter = T("delimiter", S"(){};,\\") 86 87 local operator = T("operator", S"+-*/%!&|^~<=>?:.[]" + P"sizeof" + P"_Alignof" + P"alignof") 88 89 local preproc_cond = T("preproc_cond", P"#") * T("whitespace", hws^0) 90 * T("preproc_cond", P"ifdef" + P"ifndef" + P"if" + P"elif" + P"else" + P"endif") 91 local preproc_def = T("preproc", P"#") * T("whitespace", hws^0) 92 * T("preproc", P"define") * T("whitespace", hws^0) 93 * (T("function", word) * #P"(" + T("identifier", word)) 94 local preproc_inc = T("preproc", P"#") * T("whitespace", hws^0) 95 * T("preproc", P"include") * ( 96 T("whitespace", hws^0) 97 * T("strlit_delim", P"<") 98 * T("strlit_text", (any - vws - P">")^0) 99 * T("strlit_delim", P">") 100 )^-1 101 local preproc_other = T("preproc", P"#") * T("whitespace", hws^0) 102 * T("preproc", P"undef" + P"pragma" + P"include" + P"error" + P"warning" + P"line") 103 local preproc = preproc_cond + preproc_def + preproc_inc + preproc_other + T("preproc", "#") 104 105 local keyword = T("keyword", l.word_match{ 106 -- Storage classes 107 "extern", "static", "auto", "register", "_Thread_local", 108 "thread_local", -- <threads.h> 109 110 -- Type qualifiers 111 "const", "restrict", "volatile", "_Atomic", 112 113 -- Function specifier 114 "inline", "_Noreturn", 115 "noreturn", -- <stdnoreturn.h> 116 117 -- Control flow 118 "return", "break", "continue", "goto", 119 "if", "else", 120 "switch", "case","default", 121 "do", "while", "for", 122 123 -- Misc 124 "typedef", "_Alignas", "_Generic", "_Static_assert", 125 "alignas", -- <stdalign.h> 126 "static_assert", -- <assert.h> 127 128 -- GNU 129 "__typeof__", "typeof", 130 "__attribute__", 131 "__asm__", "asm", 132 133 -- rcx macros 134 "likely", "unlikely", 135 "unreachable", 136 }) 137 138 local constant = T("constant", l.word_match{ 139 -- Very special constants only. 140 "__DATE__", "__FILE__", "__LINE__", "__TIME__", "__func__", 141 "__VA_ARGS__", 142 "NULL", 143 "true", "false", -- <stdbool.h> 144 }) 145 146 local type_builtin = l.word_match{ 147 "void", 148 "_Bool", "char", "int", "float", "double", 149 "bool", -- <stdbool.h> 150 "short", "long", 151 "signed", "unsigned", 152 "_Complex", "_Imaginary", 153 "complex", "imaginary", -- <complex.h> 154 "struct", "union", "enum", 155 } 156 local type_std = l.word_match{ 157 "ptrdiff_t", "size_t", "ssize_t", "max_align_t", "wchar_t", 158 } 159 local bit_width = P"8" + P"16" + P"32" + P"64" + P"128" 160 local type_stdint = P"u"^-1 * P"int" * ((P"_least" + P"_fast")^-1 * bit_width + P"max" + P"ptr") * P"_t" * #(any - letter) 161 local type_nice = P"__"^-1 * (P"v" * dec^1)^-1 * S"usifc" * bit_width * (P"a" * dec^1)^-1 * P"_t"^-1 * #(any - letter) + l.word_match{ 162 "schar", "uchar", "ushort", "uint", "ulong", "llong", "ullong", 163 } 164 local type_rcx = (S"iu" * (P"max" + P"ptr" + P"size") + P"rune" + P"maxalign") * #(any - letter) 165 local type_ = T("type", type_builtin + type_std + type_stdint + type_nice + type_rcx) 166 167 local label = T("whitespace", hws^0) * T("label", word) * T("delimiter", P":") 168 169 --local function_ = T("function", word) * T("whitespace", ws^0) * #P"(" 170 local function_ = T("function", word) * #P"(" 171 172 local identifier = T("identifier", word) 173 174 M._rules = { 175 {"whitespace", whitespace}, 176 {"comment", comment}, 177 {"numlit", numlit}, 178 {"charlit", charlit}, 179 {"strlit", strlit}, 180 {"delimiter", delimiter}, 181 {"operator", operator}, 182 {"preproc", preproc}, 183 {"keyword", keyword}, 184 {"constant", constant}, 185 {"type", type_}, 186 {"label", label}, 187 {"function", function_}, 188 {"identifier", identifier}, 189 190 {"error", T("error", any)}, -- TODO: TEMP 191 } 192 193 M._tokenstyles = { 194 whitespace = l.STYLE_WHITESPACE, 195 196 comment_text = l.STYLE_COMMENT, 197 comment_keyword = l.STYLE_COMMENT_KEYWORD, 198 199 numlit = l.STYLE_NUMBER, 200 201 escape = l.STYLE_ESCAPE, 202 bad_escape = l.STYLE_ERROR, 203 204 charlit_delim = l.STYLE_NUMBER, 205 charlit_text = l.STYLE_NUMBER, 206 207 strlit_delim = l.STYLE_STRING, 208 strlit_text = l.STYLE_STRING, 209 strlit_format = l.STYLE_STRING_FORMAT, 210 211 delimiter = l.STYLE_DELIMITER, 212 operator = l.STYLE_OPERATOR, 213 214 preproc = l.STYLE_PREPROCESSOR, 215 preproc_cond = l.STYLE_PREPROCESSOR_CONDITIONAL, 216 217 keyword = l.STYLE_KEYWORD, 218 constant = l.STYLE_CONSTANT, 219 ["type"] = l.STYLE_TYPE, 220 label = l.STYLE_LABEL, 221 ["function"] = l.STYLE_FUNCTION, 222 identifier = l.STYLE_IDENTIFIER, 223 } 224 225 return M