X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Fsoftware%2Fcomponents%2Fcontent_pres%2FcicNotationLexer.ml;h=9d7f2f99d379e3672246ebd56d8537044527cc2b;hb=08e9d02504942642a917c0d3e4b4795e65172d89;hp=749731bdafcf9038923c213757f652b896c0d56c;hpb=b531c938515b0ea6cb92df2e8732c587e0bc026b;p=helm.git diff --git a/helm/software/components/content_pres/cicNotationLexer.ml b/helm/software/components/content_pres/cicNotationLexer.ml index 749731bda..9d7f2f99d 100644 --- a/helm/software/components/content_pres/cicNotationLexer.ml +++ b/helm/software/components/content_pres/cicNotationLexer.ml @@ -30,6 +30,7 @@ open Printf exception Error of int * int * string let regexp number = xml_digit+ +let regexp utf8_blank = " " | "\r\n" | "\n" | "\t" | [160] (* this is a nbsp *) (* ZACK: breaks unicode's binder followed by an ascii letter without blank *) (* let regexp ident_letter = xml_letter *) @@ -75,7 +76,7 @@ let regexp meta_ident = "$" ident let regexp meta_anonymous = "$_" let regexp qstring = '"' [^ '"']* '"' -let regexp begincomment = "(**" xml_blank +let regexp begincomment = "(**" utf8_blank let regexp beginnote = "(*" let regexp endcomment = "*)" (* let regexp comment_char = [^'*'] | '*'[^')'] @@ -109,7 +110,7 @@ let level2_ast_keywords = Hashtbl.create 23 let _ = List.iter (fun k -> Hashtbl.add level2_ast_keywords k ()) [ "CProp"; "Prop"; "Type"; "Set"; "let"; "rec"; "corec"; "match"; - "with"; "in"; "and"; "to"; "as"; "on"; "return" ] + "with"; "in"; "by"; "and"; "to"; "as"; "on"; "return"; "done" ] let add_level2_ast_keyword k = Hashtbl.add level2_ast_keywords k () let remove_level2_ast_keyword k = Hashtbl.remove level2_ast_keywords k @@ -143,14 +144,8 @@ let error_at_end lexbuf msg = raise (Error (begin_cnum, end_cnum, msg)) let return_with_loc token begin_cnum end_cnum = - (* TODO handle line/column numbers *) - let flocation_begin = - { Lexing.pos_fname = ""; - Lexing.pos_lnum = -1; Lexing.pos_bol = -1; - Lexing.pos_cnum = begin_cnum } - in - let flocation_end = { flocation_begin with Lexing.pos_cnum = end_cnum } in - (token, (flocation_begin, flocation_end)) + let flocation = HExtlib.floc_of_loc (begin_cnum,end_cnum) in + token, flocation let return lexbuf token = let begin_cnum, end_cnum = Ulexing.loc lexbuf in @@ -168,17 +163,17 @@ let mk_lexer token = (* let lexbuf = Ulexing.from_utf8_stream stream in *) (** XXX Obj.magic rationale. * The problem. - * camlp4 constraints the tok_func field of Token.glexer to have type: + * camlp5 constraints the tok_func field of Token.glexer to have type: * Stream.t char -> (Stream.t 'te * flocation_function) * In order to use ulex we have (in theory) to instantiate a new lexbuf each * time a char Stream.t is passed, destroying the previous lexbuf which may * have consumed a character from the old stream which is lost forever :-( * The "solution". - * Instead of passing to camlp4 a char Stream.t we pass a lexbuf, casting it to + * Instead of passing to camlp5 a char Stream.t we pass a lexbuf, casting it to * char Stream.t with Obj.magic where needed. *) let lexbuf = Obj.magic stream in - Token.make_stream_and_flocation + Token.make_stream_and_location (fun () -> try token lexbuf @@ -229,7 +224,7 @@ let read_unparsed_group token_name lexbuf = let rec level2_meta_token = lexer - | xml_blank+ -> level2_meta_token lexbuf + | utf8_blank+ -> level2_meta_token lexbuf | ident -> let s = Ulexing.utf8_lexeme lexbuf in begin @@ -279,7 +274,7 @@ let rec ligatures_token k = and level2_ast_token = lexer - | xml_blank+ -> ligatures_token level2_ast_token lexbuf + | utf8_blank+ -> ligatures_token level2_ast_token lexbuf | meta -> let s = Ulexing.utf8_lexeme lexbuf in return lexbuf ("META", String.sub s 1 (String.length s - 1)) @@ -320,7 +315,7 @@ and level2_ast_token = and level1_pattern_token = lexer - | xml_blank+ -> ligatures_token level1_pattern_token lexbuf + | utf8_blank+ -> ligatures_token level1_pattern_token lexbuf | number -> return lexbuf ("NUMBER", Ulexing.utf8_lexeme lexbuf) | ident -> let s = Ulexing.utf8_lexeme lexbuf in