lexer.mll (2992B)
1 { 2 open Node 3 module E = Error 4 open Parser 5 6 let abort lexbuf e = 7 let beg_pos = Lexing.lexeme_start_p lexbuf in 8 let end_pos = Lexing.lexeme_end_p lexbuf in 9 let loc = Loc.of_positions beg_pos end_pos in 10 E.abort loc e 11 } 12 13 let ascii_horizontal = ['\t' ' '-'~'] 14 15 let dnumeral = ['0'-'9'] 16 let bnumeral = ['0'-'1'] 17 let onumeral = ['0'-'7'] 18 let xnumeral = ['0'-'9' 'a'-'f' 'A'-'F'] 19 20 let ulatin = ['A'-'Z'] 21 let llatin = ['a'-'z'] 22 let latin = ulatin | llatin 23 24 (* We allow only those Greek letters that can't be confused with Latin letters 25 * in most fonts. *) 26 let ugreek = "Γ" | "Δ" | "Θ" | "Λ" | "Ξ" | "Π" | "Σ" | "Φ" | "Ψ" | "Ω" 27 let lgreek = "α" | "β" | "γ" | "δ" | "ε" | "ζ" | "η" | "θ" | "ι" | "κ" | "λ" 28 | "μ" | "ν" | "ξ" | "π" | "ρ" | "σ" | "τ" | "υ" | "φ" | "χ" | "ψ" | "ω" 29 let lgreekalt = "ϵ" | "ϑ" | "ϰ" | "ϖ" | "ϱ" | "ς" | "ϕ" 30 let greek = ugreek | lgreek | lgreekalt 31 32 let comment = '\\' (ascii_horizontal | greek)* 33 34 (* Keep in sync with non_start and non_cont in name.ml. *) 35 let ident_start = latin | greek | '_' 36 let ident_cont = latin | greek | '_' | dnumeral | '\'' | '"' 37 let ident_quote = (ascii_horizontal # '`') | greek 38 let name = (ident_start ident_cont* as x) | '`' (ident_quote* as x) '`' 39 let tag = (ident_cont+ as x) | '`' (ident_quote* as x) '`' 40 41 (* This format is accepted by Z.of_string. *) 42 let dnumber = dnumeral (dnumeral | '_')* 43 let bnumber = '0' ['b' 'B'] bnumeral (bnumeral | '_')* 44 let onumber = '0' ['o' 'O'] onumeral (onumeral | '_')* 45 let xnumber = '0' ['x' 'X'] xnumeral (xnumeral | '_')* 46 let number = dnumber | bnumber | onumber | xnumber 47 48 (* This regex is used to prevent input like "0b02" from successfully lexing as 49 * the two tokens "0b0" and "2". *) 50 let invalid_number_terminator = ident_cont 51 52 rule token = parse 53 | [' ' '\t' '\r'] { token lexbuf } 54 | comment? '\n' { Lexing.new_line lexbuf; token lexbuf } 55 | comment? eof { EOF } 56 57 | ";" { SEMICOLON } 58 | ":" { COLON } 59 | "," { COMMA } 60 | ":=" { COLON_EQ } 61 | "?" { QUESTION } 62 | "->" { SINGLE_ARROW } 63 | "=>" { DOUBLE_ARROW } 64 65 | "#(" { HASH_LPAREN } 66 | "(" { LPAREN } 67 | ")" { RPAREN } 68 69 | "[" { LBRACK } 70 | "]" { RBRACK } 71 72 | "#{" { HASH_LBRACE } 73 | "{" { LBRACE } 74 | "}" { RBRACE } 75 76 | "||" { PIPE2 } 77 | "&&" { AMPERSAND2 } 78 79 | "<" { LT } 80 | "<=" { LE } 81 | "=" { EQ } 82 | ">=" { GE } 83 | ">" { GT } 84 | "<>" { NE } 85 86 | "+" { PLUS } 87 | "-" { MINUS } 88 | "*" { ASTERISK } 89 | "/" { SLASH } 90 | "%" { PERCENT } 91 92 | "|" { PIPE } 93 | "&" { AMPERSAND } 94 95 | "rec" { REC } 96 | "match" { MATCH } 97 | "if" { IF } 98 | "then" { THEN } 99 | "else" { ELSE } 100 | "iff" { IFF } 101 | "do" { DO } 102 | "fn" { FN } 103 104 | "_" { UNDERSCORE } 105 106 | "@" tag { TAG x } 107 108 | name "#" (number as i) { 109 let z = Z.of_string i in 110 (* TODO: Standardize this limit. *) 111 if Z.fits_int z then 112 INDEXED_NAME (x, Z.to_int z) 113 else 114 abort lexbuf E.LexerNameIndexTooBig 115 } 116 | name { NAME x } 117 118 | number as n { NAT (Z.of_string n) } 119 | number invalid_number_terminator 120 { abort lexbuf E.LexerInvalidNumberTerminator } 121 122 | _ { abort lexbuf E.LexerNoRule } 123 124 { 125 }