From 0ae40e5497e2ed1e231594d68d862acc4a79fe03 Mon Sep 17 00:00:00 2001 From: Ferruccio Guidi Date: Fri, 4 Jul 2014 18:13:50 +0000 Subject: [PATCH] - now unicode characters are counted :) - now identifiers are counted as single characters --- matita/components/binaries/mac/lexer.mll | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/matita/components/binaries/mac/lexer.mll b/matita/components/binaries/mac/lexer.mll index d5d5c90a2..c6e56176e 100644 --- a/matita/components/binaries/mac/lexer.mll +++ b/matita/components/binaries/mac/lexer.mll @@ -19,19 +19,22 @@ let OL = "(*" let CL = "*)" let UNI = ['\x80'-'\xBF']+ let SPC = ['\r' '\n' '\t' ' ']+ +let WRD = ['0'-'9' 'A'-'Z' 'a'-'z' '_']+ let QT = '"' rule token = parse | OL { out "COM"; block lexbuf; token lexbuf } | QT { out "STR"; O.count := !O.count + str lexbuf; token lexbuf } | SPC { out "SPC"; incr O.count; token lexbuf } - | UNI { out "UNI"; token lexbuf } + | UNI { out "UNI"; incr O.count; token lexbuf } + | WRD { out "WRD"; incr O.count; token lexbuf } | _ { out "CHR"; incr O.count; token lexbuf } | eof { out "EOF" } and str = parse | QT { 2 } | "\\\"" { succ (str lexbuf) } - | UNI { str lexbuf } + | UNI { succ (str lexbuf) } + | WRD { succ (str lexbuf) } | _ { succ (str lexbuf) } and block = parse | CL { () } -- 2.39.2