ansi_c.lua (6951B)
1 local l = require("lexer") 2 local lex = l.new(...) 3 local P, R, S = lpeg.P, lpeg.R, lpeg.S 4 local T = function(name, patt) return lex:tag(name, patt) end 5 local any = P(1) 6 local oct = R"07" 7 local dec = R"09" 8 local hex = R("09", "AF", "af") 9 local alpha = R("AZ", "az") 10 local letter = alpha + dec + "_" 11 local word = (alpha + P"_") * letter^0 12 local pm = S"+-" 13 local hws = S"\t " 14 local vws = S"\n\r" 15 local ws = hws + vws 16 local function I(s) -- Case-insensitive string match 17 local p = P(true) 18 for i = 1, #s do 19 local c = s:sub(i, i) 20 p = p * (P(c:lower()) + P(c:upper())) 21 end 22 return p 23 end 24 local function N(p, min, max) 25 max = max or min 26 return p^min - p^(max+1) 27 end 28 29 local whitespace = T("whitespace", ws^1) 30 31 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme" + I"sync" + I"fallthrough" + I"unreachable") * #(any - letter)) 32 local line_comment_text = T("comment_text", (any - comment_keyword - S"\\\n" + P"\\\n" + P"\\")^1) 33 local line_comment = T("comment_text", P"//") * (line_comment_text + comment_keyword)^0 * (T("whitespace", P"\n") + P"") 34 local block_comment_text = T("comment_text", (any - comment_keyword - P"*/")^1) 35 local block_comment = T("comment_text", P"/*") * (block_comment_text + comment_keyword)^0 * T("comment_text", P"*/")^-1 36 local comment = line_comment + block_comment 37 38 local fltlit_dec_exp = S"eE" * pm^-1 * dec^1 39 local fltlit_dec = dec^1 * (P"." * dec^0 * fltlit_dec_exp^-1 + fltlit_dec_exp) + P"." * dec^1 * fltlit_dec_exp^-1 40 local fltlit_hex_exp = S"pP" * pm^-1 * dec^1 41 local fltlit_hex = P"0" * S"xX" * (hex^1 * (P"." * hex^0)^-1 + P"." * hex^1) * fltlit_hex_exp 42 local fltlit_suffix = S"fFlL" 43 -- local fltlit = pm^-1 * (fltlit_hex + fltlit_dec) * fltlit_suffix^-1 44 local fltlit = (fltlit_hex + fltlit_dec) * fltlit_suffix^-1 45 46 local intlit_oct = P"0" * oct^1 47 local intlit_dec = dec^1 48 local intlit_hex = P"0" * S"xX" * hex^1 49 local intlit_width = P"ll" + P"l" + P"LL" + P"L" 50 local intlit_suffix = (S"uU" * intlit_width^-1) + (intlit_width * S"uU"^-1) 51 -- local intlit = pm^-1 * (intlit_hex + intlit_oct + intlit_dec) * intlit_suffix^-1 52 local intlit = (intlit_hex + intlit_oct + intlit_dec) * intlit_suffix^-1 53 54 local numlit = T("numlit", fltlit + intlit) 55 56 local escape = T("escape", 57 P"\\" * S"'\"?\\abfnrtv" 58 + P"\\" * oct * oct^-2 59 + P"\\x" * hex^1 60 + P"\\u" * N(hex, 4) 61 + P"\\U" * N(hex, 8)) 62 local bad_escape = T("bad_escape", P"\\" * any) 63 64 local charlit_text = T("charlit_text", (any - vws - S"'\\")^1) 65 local charlit_prefix = T("charlit_delim", P"u8" + P"u" + P"U" + P"L") 66 local charlit_delim = T("charlit_delim", P"'") 67 local charlit = charlit_prefix^-1 * charlit_delim 68 * (charlit_text + escape + bad_escape)^0 * charlit_delim 69 70 local strlit_format_param = dec^1 * P"$" 71 local strlit_format = T("strlit_format", P"%" 72 * strlit_format_param^-1 73 * S"-+ 0'#"^0 -- flags 74 * (R"19" * dec^0 + (P"*" * strlit_format_param^-1))^-1 -- width 75 * (P"." * (P"*" * strlit_format_param^-1 + dec^0))^-1 -- precision 76 * (S"Lhjltz" + P"hh" + P"ll")^-1 -- length 77 * S"%diuoxXeEfFgGaAcspn") -- type 78 local strlit_text = T("strlit_text", (any - vws - S"\"\\" - strlit_format)^1) 79 local strlit_prefix = T("strlit_delim", P"u8" + P"u" + P"U" + P"L") 80 local strlit_delim = T("strlit_delim", P"\"") 81 local strlit = strlit_prefix^-1 * strlit_delim 82 * (strlit_text + strlit_format + escape + bad_escape)^0 * strlit_delim 83 84 local delimiter = T("delimiter", S"(){};,\\") 85 86 local operator = T("operator", S"+-*/%!&|^~<=>?:.[]" + P"sizeof" + P"_Alignof" + P"alignof") 87 88 local preproc_cond = T("preproc_cond", P"#") * T("whitespace", hws^0) 89 * T("preproc_cond", P"ifdef" + P"ifndef" + P"if" + P"elif" + P"else" + P"endif") 90 local preproc_def = T("preproc", P"#") * T("whitespace", hws^0) 91 * T("preproc", P"define") * T("whitespace", hws^0) 92 * (T("function", word) * #P"(" + T("identifier", word)) 93 local preproc_inc = T("preproc", P"#") * T("whitespace", hws^0) 94 * T("preproc", P"include") * ( 95 T("whitespace", hws^0) 96 * T("strlit_delim", P"<") 97 * T("strlit_text", (any - vws - P">")^0) 98 * T("strlit_delim", P">") 99 )^-1 100 local preproc_other = T("preproc", P"#") * T("whitespace", hws^0) 101 * T("preproc", P"undef" + P"pragma" + P"include" + P"error" + P"warning" + P"line") 102 local preproc = preproc_cond + preproc_def + preproc_inc + preproc_other + T("preproc", "#") 103 104 local keyword = T("keyword", l.word_match{ 105 -- Storage classes 106 "extern", "static", "auto", "register", "_Thread_local", 107 "thread_local", -- <threads.h> 108 109 -- Type qualifiers 110 "const", "restrict", "volatile", "_Atomic", 111 112 -- Function specifier 113 "inline", "_Noreturn", 114 "noreturn", -- <stdnoreturn.h> 115 116 -- Control flow 117 "return", "break", "continue", "goto", 118 "if", "else", 119 "switch", "case","default", 120 "do", "while", "for", 121 122 -- Misc 123 "typedef", "_Alignas", "_Generic", "_Static_assert", 124 "alignas", -- <stdalign.h> 125 "static_assert", -- <assert.h> 126 127 -- GNU 128 "__typeof__", "typeof", 129 "__attribute__", 130 "__asm__", "asm", 131 132 -- rcx macros 133 "likely", "unlikely", 134 "unreachable", 135 }) 136 137 local constant = T("constant", l.word_match{ 138 -- Very special constants only. 139 "__DATE__", "__FILE__", "__LINE__", "__TIME__", "__func__", 140 "__VA_ARGS__", 141 "NULL", 142 "true", "false", -- <stdbool.h> 143 }) 144 145 local type_builtin = l.word_match{ 146 "void", 147 "_Bool", "char", "int", "float", "double", 148 "bool", -- <stdbool.h> 149 "short", "long", 150 "signed", "unsigned", 151 "_Complex", "_Imaginary", 152 "complex", "imaginary", -- <complex.h> 153 "struct", "union", "enum", 154 } 155 local type_std = l.word_match{ 156 "ptrdiff_t", "size_t", "ssize_t", "max_align_t", "wchar_t", 157 } 158 local bit_width = P"8" + P"16" + P"32" + P"64" + P"128" 159 local type_stdint = P"u"^-1 * P"int" * ((P"_least" + P"_fast")^-1 * bit_width + P"max" + P"ptr") * P"_t" * #(any - letter) 160 local type_nice = P"__"^-1 * (P"v" * dec^1)^-1 * S"usifc" * bit_width * (P"a" * dec^1)^-1 * P"_t"^-1 * #(any - letter) + l.word_match{ 161 "schar", "uchar", "ushort", "uint", "ulong", "llong", "ullong", 162 } 163 local type_rcx = (S"iu" * (P"max" + P"ptr" + P"size") + P"rune" + P"maxalign") * #(any - letter) 164 local type_ = T("type", type_builtin + type_std + type_stdint + type_nice + type_rcx) 165 166 local label = T("whitespace", hws^0) * T("label", word) * T("delimiter", P":") 167 168 --local function_ = T("function", word) * T("whitespace", ws^0) * #P"(" 169 local function_ = T("function", word) * #P"(" 170 171 local identifier = T("identifier", word) 172 173 lex:add_rule("whitespace", whitespace) 174 lex:add_rule("comment", comment) 175 lex:add_rule("numlit", numlit) 176 lex:add_rule("charlit", charlit) 177 lex:add_rule("strlit", strlit) 178 lex:add_rule("delimiter", delimiter) 179 lex:add_rule("operator", operator) 180 lex:add_rule("preproc", preproc) 181 lex:add_rule("keyword", keyword) 182 lex:add_rule("constant", constant) 183 lex:add_rule("type", type_) 184 lex:add_rule("label", label) 185 lex:add_rule("function", function_) 186 lex:add_rule("identifier", identifier) 187 188 lex:add_rule("error", T("error", any)) -- TODO: TEMP 189 190 return lex