X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fpxp%2Flexers%2Fpxp_lex_aux_utf8.ml;fp=helm%2FDEVEL%2Fpxp%2Fpxp%2Flexers%2Fpxp_lex_aux_utf8.ml;h=0b2c577e9499b19d408fd258fa36bf0c3502249a;hb=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5;hp=0000000000000000000000000000000000000000;hpb=758057e85325f94cd88583feb1fdf6b038e35055;p=helm.git diff --git a/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml new file mode 100644 index 000000000..0b2c577e9 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/lexers/pxp_lex_aux_utf8.ml @@ -0,0 +1,95 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_iso88591 *) + + open Pxp_types + open Pxp_lexer_types + + let get_name_end s k = + (* Get the index of the end+1 of the name beginning at position k *) + let l = String.length s in + let rec find j = + if j < l then + match s.[j] with + | ('\009'|'\010'|'\013'|'\032') -> j + |_ -> find (j+1) + else + l + in + find k + + let get_ws_end s k = + let l = String.length s in + let rec find j = + if j < l then + match s.[j] with + (' '|'\t'|'\r'|'\n') -> find (j+1) + | _ -> j + else + l + in + find k + + let scan_pi pi xml_scanner = + let s = String.sub pi 2 (String.length pi - 4) in + (* the PI without the leading "" *) + let xml_lexbuf = Lexing.from_string (s ^ " ") in + (* Add space because the lexer expects whitespace after every + * clause; by adding a space there is always whitespace at the + * end of the string. + *) + + (* The first word of a PI must be a name: Extract it. *) + + let s_name, s_len = + match xml_scanner xml_lexbuf with + Pro_name n -> + let ltok = String.length (Lexing.lexeme xml_lexbuf) in + if String.length n = ltok then + (* No whitespace after the name *) + raise (WF_error ("Bad processing instruction")); + n, ltok + | _ -> raise (WF_error ("Bad processing instruction")) + in + + (* Note: s_len is the length of s_name + the whitespace following s_name *) + + match s_name with + "xml" -> begin + (* It is a PI: Get the other tokens *) + let rec collect () = + let t = xml_scanner xml_lexbuf in + (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *) + if t = Pro_eof then + [] + else + t :: collect() + in + PI_xml (collect()) + end + | _ -> + let len_param = String.length s - s_len in + (* It is possible that len_param = -1 *) + if len_param >= 1 then + PI(s_name, String.sub s s_len len_param) + else + PI(s_name, "") + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:32 lpadovan + * Initial revision + * + * Revision 1.2 2000/05/29 23:53:12 gerd + * Updated because Markup_* modules have been renamed to Pxp_*. + * + * Revision 1.1 2000/05/20 20:33:25 gerd + * Initial revision. + * + * + *)