]> matita.cs.unibo.it Git - helm.git/blobdiff - matita/components/binaries/mac/lexer.mll
- now unicode characters are counted :)
[helm.git] / matita / components / binaries / mac / lexer.mll
index 4068879ecd1b66e8055a4278f0a515c0072c10f5..c6e56176e0a4220c34530329a0c9942595b1e45a 100644 (file)
@@ -1,3 +1,14 @@
+(*
+    ||M||  This file is part of HELM, an Hypertextual, Electronic        
+    ||A||  Library of Mathematics, developed at the Computer Science     
+    ||T||  Department, University of Bologna, Italy.                     
+    ||I||                                                                
+    ||T||  HELM is free software; you can redistribute it and/or         
+    ||A||  modify it under the terms of the GNU General Public License   
+    \   /  version 2 or (at your option) any later version.      
+     \ /   This software is distributed as is, NO WARRANTY.     
+      V_______________________________________________________________ *)
+
 {
    module O = Options
    
@@ -8,19 +19,22 @@ let OL  = "(*"
 let CL  = "*)" 
 let UNI = ['\x80'-'\xBF']+
 let SPC = ['\r' '\n' '\t' ' ']+
+let WRD = ['0'-'9' 'A'-'Z' 'a'-'z' '_']+
 let QT  = '"'
 
 rule token = parse
    | OL     { out "COM"; block lexbuf; token lexbuf                     }
    | QT     { out "STR"; O.count := !O.count + str lexbuf; token lexbuf }
    | SPC    { out "SPC"; incr O.count; token lexbuf                     }
-   | UNI    { out "UNI"; token lexbuf                                   }
+   | UNI    { out "UNI"; incr O.count; token lexbuf                     }
+   | WRD    { out "WRD"; incr O.count; token lexbuf                     }
    | _      { out "CHR"; incr O.count; token lexbuf                     }
    | eof    { out "EOF"                                                 }
 and str = parse
    | QT     { 2 }
    | "\\\"" { succ (str lexbuf)                                         }
-   | UNI    { str lexbuf                                                }
+   | UNI    { succ (str lexbuf)                                         }
+   | WRD    { succ (str lexbuf)                                         }
    | _      { succ (str lexbuf)                                         }
 and block = parse
    | CL     { ()                                                        }