suq.lua (3699B)
1 local M = {_NAME = "suq"} 2 3 local l = require("lexer") 4 local P, R, S = lpeg.P, lpeg.R, lpeg.S 5 local T = l.token 6 7 local any = P(1) 8 9 local dec = R"09" 10 local bin = R"01" 11 local qat = R"03" 12 local oct = R"07" 13 local hex = R("09", "AF", "af") 14 15 local dec_ = dec + "_" 16 local bin_ = bin + "_" 17 local qat_ = qat + "_" 18 local oct_ = oct + "_" 19 local hex_ = hex + "_" 20 21 local alpha = R("AZ", "az") 22 local letter = alpha + dec + "_" 23 local word = (alpha + P"_") * letter^0 24 25 local hws = S"\t " 26 local vws = S"\n\r" 27 local ws = hws + vws 28 29 local function I(s) -- Case-insensitive string match 30 local p = P(true) 31 for i = 1, #s do 32 local c = s:sub(i, i) 33 p = p * (P(c:lower()) + P(c:upper())) 34 end 35 return p 36 end 37 38 local function N(p, min, max) 39 max = max or min 40 return p^min - p^(max+1) 41 end 42 43 local whitespace = T("whitespace", ws^1) 44 45 local comment_keyword = T("comment_keyword", I"todo" + I"xxx" + I"fixme") 46 local comment_text = T("comment_text", (any - comment_keyword - P"\n")^1) 47 local comment = T("comment_text", P"\\\\") * (comment_text + comment_keyword)^0 * T("whitespace", P"\n") 48 49 local numterm = any - letter 50 local function numerals(nums) 51 return P"_"^0 * nums^1 * (nums + "_")^0 52 end 53 local numlit_dec = dec^1 * dec_^0 * (P"." * numerals(dec))^-1 * #numterm 54 local numlit_bin = P"0" * S"bB" * numerals(bin) * (P"." * numerals(bin))^-1 * #numterm 55 local numlit_qat = P"0" * S"qQ" * numerals(qat) * (P"." * numerals(qat))^-1 * #numterm 56 local numlit_oct = P"0" * S"oO" * numerals(oct) * (P"." * numerals(oct))^-1 * #numterm 57 local numlit_hex = P"0" * S"xX" * numerals(hex) * (P"." * numerals(hex))^-1 * #numterm 58 local numlit = T("numlit", numlit_dec + numlit_bin + numlit_qat + numlit_oct + numlit_hex) 59 60 local escape = T("escape", 61 P"\\" * S"'\"\\nrt" 62 + P"\\" * hex * hex 63 + P"\\u" * N(hex, 4) 64 + P"\\U" * N(hex, 6)) 65 local bad_escape = T("bad_escape", P"\\" * any) 66 67 local charlit_text = T("charlit_text", (any - S"'\\")^1) 68 local charlit_delim = T("charlit_delim", P"'") 69 local charlit = charlit_delim * (charlit_text + escape + bad_escape)^0 * charlit_delim 70 71 local strlit_text = T("strlit_text", (any - S"\"\\")^1) 72 local strlit_delim = T("strlit_delim", P"\"") 73 local strlit = strlit_delim * (strlit_text + escape + bad_escape)^0 * strlit_delim 74 75 local delimiter = T("delimiter", S"(){}[];,") 76 77 local operator = T("operator", S"!%^&*-+=|:.<>?~") 78 79 local keyword = T("keyword", l.word_match{ 80 "type", "fn", "match", "with", "if", "then", "else", "iff", "do", "for", "in", 81 }) 82 83 local type_defs = l.word_match{ 84 "Unit", "Void", "Char", "Str", "Ptr", "Bool", 85 } 86 local type_cons = P"#" * #(S"({[") 87 local type_num = S"INF" * (dec^1 + P"size") * #(any - letter) 88 local type_ = T("type", type_defs + type_cons + type_num) 89 90 local cons = T("cons", P"@" * (#(S"({[") + word)) 91 92 local tick = P"`" * (any - S"`")^0 * P"`" 93 local identifier = T("identifier", word + tick) 94 95 M._rules = { 96 {"whitespace", whitespace}, 97 {"comment", comment}, 98 {"numlit", numlit}, 99 {"charlit", charlit}, 100 {"strlit", strlit}, 101 {"delimiter", delimiter}, 102 {"operator", operator}, 103 {"keyword", keyword}, 104 {"type", type_}, 105 {"cons", cons}, -- XXX 106 {"identifier", identifier}, 107 108 {"error", T("error", any)}, -- TODO: TEMP 109 } 110 111 M._tokenstyles = { 112 whitespace = l.STYLE_WHITESPACE, 113 114 comment_text = l.STYLE_COMMENT, 115 comment_keyword = l.STYLE_COMMENT_KEYWORD, 116 117 numlit = l.STYLE_NUMBER, 118 119 escape = l.STYLE_ESCAPE, 120 bad_escape = l.STYLE_ERROR, 121 122 charlit_delim = l.STYLE_NUMBER, 123 charlit_text = l.STYLE_NUMBER, 124 125 strlit_delim = l.STYLE_STRING, 126 strlit_text = l.STYLE_STRING, 127 128 delimiter = l.STYLE_DELIMITER, 129 operator = l.STYLE_OPERATOR, 130 131 keyword = l.STYLE_KEYWORD, 132 ["type"] = l.STYLE_TYPE, 133 ["cons"] = l.STYLE_FUNCTION, -- XXX 134 identifier = l.STYLE_IDENTIFIER, 135 } 136 137 return M