haskell.lua (5890B)
1 local M = {_NAME = "haskell"} 2 3 local l = require("lexer") 4 local P, R, S = lpeg.P, lpeg.R, lpeg.S 5 local T = l.token 6 7 local function I(s) -- Case-insensitive string match 8 local p = P(true) 9 for i = 1, #s do 10 local c = s:sub(i, i) 11 p = p * (P(c:lower()) + P(c:upper())) 12 end 13 return p 14 end 15 16 -- Variables beginning with "hs_" are (subsets of) the official nonterminals in 17 -- the Haskell 2010 grammar of the same name. 18 local hs_special = S"(),;[]`{}" 19 local hs_whitechar = S" \t\n\r" 20 local hs_small = R"az" + P"_" 21 + P"Γ" + P"Δ" + P"Θ" + P"Λ" + P"Ξ" + P"Π" + P"Σ" + P"Φ" + P"Ψ" + P"Ω" 22 + P"α" + P"β" + P"γ" + P"δ" + P"ε" + P"ζ" + P"η" + P"θ" + P"ι" + P"κ" 23 + P"λ" + P"μ" + P"ν" + P"ξ" + P"ο" + P"π" + P"ρ" + P"σ" + P"τ" + P"υ" 24 + P"φ" + P"χ" + P"ψ" + P"ω" + P"ϵ" + P"ϑ" + P"ϰ" + P"ϖ" + P"ϱ" + P"ς" 25 + P"ϕ" 26 local hs_large = R"AZ" 27 local hs_symbol = S"!#$%&*+./<=>?@\\^|-~:" 28 + P"⋅" 29 + P"→" + P"↑" + P"←" + P"↓" 30 + P"⇒" + P"⇑" + P"⇐" + P"⇓" 31 + P"⊢" + P"⊥" + P"⊣" + P"⊤" 32 + P"─" + P"│" + P"┌" + P"┐" + P"└" + P"┘" + P"├" + P"┤" + P"┬" + P"┴" + P"┼" + P"╭" + P"╮" + P"╯" + P"╰" + P"╴" + P"╵" + P"╶" + P"╷" 33 local hs_digit = R"09" 34 local hs_octit = R"07" 35 local hs_hexit = R("09", "AF", "af") 36 local hs_graphic = hs_small + hs_large + hs_symbol + hs_digit + hs_special + S"\"'" 37 local hs_any = hs_graphic + S" \t" 38 local hs_ANY = hs_graphic + hs_whitechar 39 local letter = hs_small + hs_large + hs_digit + P"'" 40 local hs_reservedid = ( 41 P"case" + P"class" + P"data" + P"default" + P"deriving" + P"do" + P"else" 42 + P"foreign" + P"if" + P"import"+ P"infixl" + P"infixr" + P"infix" 43 + P"instance" + P"in" + P"let" + P"module" + P"newtype" + P"of" 44 + P"then" + P"type" + P"where" + P"_" 45 ) * #-letter 46 local hs_varid = hs_small * letter^0 - hs_reservedid 47 local hs_conid = hs_large * letter^0 48 local hs_reservedop = 49 (P".." + P":" + P"::" + P"=" + P"\\" + P"<-" + P"->" + P"@" + P"~" + P"=>") * #-hs_symbol 50 local hs_varsym = (hs_symbol - P":") * hs_symbol^0 - hs_reservedop 51 local hs_consym = P":" * hs_symbol^0 - hs_reservedop 52 local ghc_keywords = (P"forall" + P"pattern" + P"family") * #-letter 53 local import_keywords = (P"qualified" + P"as" + P"hiding") * #-letter 54 local pragma = ( 55 P"INLINE" + P"NOINLINE" + P"SPECIALIZE" + P"SPECIALISE" + P"LANGUAGE" 56 + P"OPTIONS_GHC" + P"INCLUDE" + P"WARNING" + P"DEPRECATED" 57 + P"MINIMAL" + P"INLINABLE" + P"OPAQUE" + P"LINE" + P"COLUMN" 58 + P"RULES" + P"UNPACK" + P"NOUNPACK" + P"SOURCE" + P"COMPLETE" 59 + P"OVERLAPPING" + P"OVERLAPPABLE" + P"OVERLAPS" + P"INCOHERENT" 60 ) * #-letter 61 62 local whitespace = T("whitespace", hs_whitechar^1) 63 64 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme") * #(hs_ANY - R("09", "AZ", "az") - S"_'")) 65 local line_comment_text = T("comment_text", (hs_any - comment_keyword)^1) 66 local line_comment = T("comment_text", P"-"^2 * #(hs_ANY - hs_symbol)) * (line_comment_text + comment_keyword)^0 * (T("whitespace", P"\n") + P"") 67 local block_comment_text = T("comment_text", (hs_ANY - comment_keyword - P"{-" - P"-}")^1) 68 local block_comment = P{T("comment_text", P"{-" - P"{-#" * hs_whitechar^0 * pragma) * (block_comment_text + comment_keyword)^0 * T("comment_text", P"-}")^-1} 69 local comment = line_comment + block_comment 70 71 local intlit_dec = hs_digit^1 72 local intlit_oct = P"0" * S"oO" * hs_octit^1 73 local intlit_hex = P"0" * S"xX" * hs_hexit^1 74 local intlit = intlit_dec + intlit_oct + intlit_hex 75 76 local fltlit_exp = S"eE" * S"+-"^-1 * hs_digit^1 77 local fltlit = hs_digit^1 * (P"." * hs_digit^1 * fltlit_exp^-1 + fltlit_exp) 78 79 local numlit = T("numlit", intlit + fltlit) 80 81 local escape_char = S"abfnrtv\\\"'&" 82 local escape_ascii = P"^" * (R"AZ" + S"@[\\]^_") 83 -- It's important here that "SOH" precedes "SO"; it's the only ambiguous case. 84 + P"NUL" + P"SOH" + P"STX" + P"ETX" + P"EOT" + P"ENQ" + P"ACK" 85 + P"BEL" + P"BS" + P"HT" + P"LF" + P"VT" + P"FF" + P"CR" + P"SO" + P"SI" + P"DLE" 86 + P"DC1" + P"DC2" + P"DC3" + P"DC4" + P"NAK" + P"SYN" + P"ETB" + P"CAN" 87 + P"EM" + P"SUB" + P"ESC" + P"FS" + P"GS" + P"RS" + P"US" + P"SP" + P"DEL" 88 local escape_number = hs_digit^1 + P"o" * hs_octit^1 + P"x" * hs_hexit^1 89 local escape = T("escape", P"\\" * (escape_char + escape_ascii + escape_number)) 90 local gap = T("escape", P"\\" * hs_whitechar^1 * P"\\") 91 92 local chrlit_delim = T("chrlit_delim", P"'") 93 local chrlit_text = T("chrlit_text", hs_graphic - S"'\\" + P" ") 94 local chrlit = chrlit_delim * (chrlit_text + escape) * chrlit_delim 95 96 local strlit_delim = T("strlit_delim", P"\"") 97 local strlit_text = T("strlit_text", (hs_graphic - S"\"\\" + P" ")^1) 98 local strlit = strlit_delim * (strlit_text + escape + gap)^0 * strlit_delim 99 100 local delimiter = T("delimiter", P"{-#" + P"#-}" + hs_special) 101 local keyword = T("keyword", hs_reservedid + hs_reservedop + ghc_keywords + import_keywords + pragma) 102 103 local qualifier = T("qualifier", (hs_conid * P".")^0) 104 local operator = qualifier * T("operator", hs_varsym + hs_consym) 105 local type_ = qualifier * T("type", hs_conid) 106 local identifier = qualifier * T("identifier", hs_varid) 107 108 M._rules = { 109 {"whitespace", whitespace}, 110 {"comment", comment}, 111 {"numlit", numlit}, 112 {"chrlit", chrlit}, 113 {"strlit", strlit}, 114 {"delimiter", delimiter}, 115 {"keyword", keyword}, 116 {"operator", operator}, 117 {"type", type_}, 118 {"identifier", identifier}, 119 120 {"error", T("error", P(1))}, -- TODO: TEMP 121 } 122 123 M._tokenstyles = { 124 whitespace = l.STYLE_WHITESPACE, 125 126 comment_text = l.STYLE_COMMENT, 127 comment_keyword = l.STYLE_COMMENT_KEYWORD, 128 129 numlit = l.STYLE_NUMBER, 130 131 escape = l.STYLE_ESCAPE, 132 133 chrlit_delim = l.STYLE_NUMBER, 134 chrlit_text = l.STYLE_NUMBER, 135 136 strlit_delim = l.STYLE_STRING, 137 strlit_text = l.STYLE_STRING, 138 139 delimiter = l.STYLE_DELIMITER, 140 keyword = l.STYLE_KEYWORD, 141 142 qualifier = l.STYLE_IDENTIFIER, 143 operator = l.STYLE_OPERATOR, 144 ["type"] = l.STYLE_TYPE, 145 identifier = l.STYLE_IDENTIFIER, 146 } 147 148 return M