]> matita.cs.unibo.it Git - helm.git/commitdiff
- now unicode characters are counted :)
authorFerruccio Guidi <ferruccio.guidi@unibo.it>
Fri, 4 Jul 2014 18:13:50 +0000 (18:13 +0000)
committerFerruccio Guidi <ferruccio.guidi@unibo.it>
Fri, 4 Jul 2014 18:13:50 +0000 (18:13 +0000)
- now identifiers are counted as single characters

matita/components/binaries/mac/lexer.mll

index d5d5c90a2290eca97e242888eff7b402beee1bec..c6e56176e0a4220c34530329a0c9942595b1e45a 100644 (file)
@@ -19,19 +19,22 @@ let OL  = "(*"
 let CL  = "*)" 
 let UNI = ['\x80'-'\xBF']+
 let SPC = ['\r' '\n' '\t' ' ']+
+let WRD = ['0'-'9' 'A'-'Z' 'a'-'z' '_']+
 let QT  = '"'
 
 rule token = parse
    | OL     { out "COM"; block lexbuf; token lexbuf                     }
    | QT     { out "STR"; O.count := !O.count + str lexbuf; token lexbuf }
    | SPC    { out "SPC"; incr O.count; token lexbuf                     }
-   | UNI    { out "UNI"; token lexbuf                                   }
+   | UNI    { out "UNI"; incr O.count; token lexbuf                     }
+   | WRD    { out "WRD"; incr O.count; token lexbuf                     }
    | _      { out "CHR"; incr O.count; token lexbuf                     }
    | eof    { out "EOF"                                                 }
 and str = parse
    | QT     { 2 }
    | "\\\"" { succ (str lexbuf)                                         }
-   | UNI    { str lexbuf                                                }
+   | UNI    { succ (str lexbuf)                                         }
+   | WRD    { succ (str lexbuf)                                         }
    | _      { succ (str lexbuf)                                         }
 and block = parse
    | CL     { ()                                                        }