dtlc

dependently-typed lambda calculus toy
git clone git://git.rr3.xyz/dtlc
Log | Files | Refs | README | LICENSE

lexer.mll (2992B)


      1 {
      2 open Node
      3 module E = Error
      4 open Parser
      5 
      6 let abort lexbuf e =
      7 	let beg_pos = Lexing.lexeme_start_p lexbuf in
      8 	let end_pos = Lexing.lexeme_end_p lexbuf in
      9 	let loc = Loc.of_positions beg_pos end_pos in
     10 	E.abort loc e
     11 }
     12 
     13 let ascii_horizontal = ['\t' ' '-'~']
     14 
     15 let dnumeral = ['0'-'9']
     16 let bnumeral = ['0'-'1']
     17 let onumeral = ['0'-'7']
     18 let xnumeral = ['0'-'9' 'a'-'f' 'A'-'F']
     19 
     20 let ulatin = ['A'-'Z']
     21 let llatin = ['a'-'z']
     22 let latin = ulatin | llatin
     23 
     24 (* We allow only those Greek letters that can't be confused with Latin letters
     25  * in most fonts. *)
     26 let ugreek = "Γ" | "Δ" | "Θ" | "Λ" | "Ξ" | "Π" | "Σ" | "Φ" | "Ψ" | "Ω"
     27 let lgreek = "α" | "β" | "γ" | "δ" | "ε" | "ζ" | "η" | "θ" | "ι" | "κ" | "λ"
     28 	| "μ" | "ν" | "ξ" | "π" | "ρ" | "σ" | "τ" | "υ" | "φ" | "χ" | "ψ" | "ω"
     29 let lgreekalt = "ϵ" | "ϑ" | "ϰ" | "ϖ" | "ϱ" | "ς" | "ϕ"
     30 let greek = ugreek | lgreek | lgreekalt
     31 
     32 let comment = '\\' (ascii_horizontal | greek)*
     33 
     34 (* Keep in sync with non_start and non_cont in name.ml. *)
     35 let ident_start = latin | greek | '_'
     36 let ident_cont  = latin | greek | '_' | dnumeral | '\'' | '"'
     37 let ident_quote = (ascii_horizontal # '`') | greek
     38 let name = (ident_start ident_cont* as x) | '`' (ident_quote* as x) '`'
     39 let tag = (ident_cont+ as x) | '`' (ident_quote* as x) '`'
     40 
     41 (* This format is accepted by Z.of_string. *)
     42 let dnumber = dnumeral (dnumeral | '_')*
     43 let bnumber = '0' ['b' 'B'] bnumeral (bnumeral | '_')*
     44 let onumber = '0' ['o' 'O'] onumeral (onumeral | '_')*
     45 let xnumber = '0' ['x' 'X'] xnumeral (xnumeral | '_')*
     46 let number = dnumber | bnumber | onumber | xnumber
     47 
     48 (* This regex is used to prevent input like "0b02" from successfully lexing as
     49  * the two tokens "0b0" and "2". *)
     50 let invalid_number_terminator = ident_cont
     51 
     52 rule token = parse
     53 | [' ' '\t' '\r'] { token lexbuf }
     54 | comment? '\n' { Lexing.new_line lexbuf; token lexbuf }
     55 | comment? eof { EOF }
     56 
     57 | ";"  { SEMICOLON }
     58 | ":"  { COLON }
     59 | ","  { COMMA }
     60 | ":=" { COLON_EQ }
     61 | "?"  { QUESTION }
     62 | "->" { SINGLE_ARROW }
     63 | "=>" { DOUBLE_ARROW }
     64 
     65 | "#(" { HASH_LPAREN }
     66 | "("  { LPAREN }
     67 | ")"  { RPAREN }
     68 
     69 | "["  { LBRACK }
     70 | "]"  { RBRACK }
     71 
     72 | "#{" { HASH_LBRACE }
     73 | "{"  { LBRACE }
     74 | "}"  { RBRACE }
     75 
     76 | "||" { PIPE2 }
     77 | "&&" { AMPERSAND2 }
     78 
     79 | "<"  { LT }
     80 | "<=" { LE }
     81 | "="  { EQ }
     82 | ">=" { GE }
     83 | ">"  { GT }
     84 | "<>" { NE }
     85 
     86 | "+" { PLUS }
     87 | "-" { MINUS }
     88 | "*" { ASTERISK }
     89 | "/" { SLASH }
     90 | "%" { PERCENT }
     91 
     92 | "|" { PIPE }
     93 | "&" { AMPERSAND }
     94 
     95 | "rec" { REC }
     96 | "match" { MATCH }
     97 | "if"   { IF }
     98 | "then" { THEN }
     99 | "else" { ELSE }
    100 | "iff" { IFF }
    101 | "do"  { DO }
    102 | "fn" { FN }
    103 
    104 | "_" { UNDERSCORE }
    105 
    106 | "@" tag { TAG x }
    107 
    108 | name "#" (number as i) {
    109 	let z = Z.of_string i in
    110 	(* TODO: Standardize this limit. *)
    111 	if Z.fits_int z then
    112 		INDEXED_NAME (x, Z.to_int z)
    113 	else
    114 		abort lexbuf E.LexerNameIndexTooBig
    115 }
    116 | name { NAME x }
    117 
    118 | number as n { NAT (Z.of_string n) }
    119 | number invalid_number_terminator
    120 	{ abort lexbuf E.LexerInvalidNumberTerminator }
    121 
    122 | _ { abort lexbuf E.LexerNoRule }
    123 
    124 {
    125 }