X-Git-Url: http://matita.cs.unibo.it/gitweb/?p=helm.git;a=blobdiff_plain;f=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnethtml.ml;fp=helm%2FDEVEL%2Fpxp%2Fnetstring%2Fnethtml.ml;h=0000000000000000000000000000000000000000;hp=7f9d983cddd85529fb0af589cdef78ae2a2d4848;hb=869549224eef6278a48c16ae27dd786376082b38;hpb=89262281b6e83bd2321150f81f1a0583645eb0c8 diff --git a/helm/DEVEL/pxp/netstring/nethtml.ml b/helm/DEVEL/pxp/netstring/nethtml.ml deleted file mode 100644 index 7f9d983cd..000000000 --- a/helm/DEVEL/pxp/netstring/nethtml.ml +++ /dev/null @@ -1,276 +0,0 @@ -(* $Id$ - * ---------------------------------------------------------------------- - * - *) - -open Nethtml_scanner;; - -type document = - Element of (string * (string*string) list * document list) - | Data of string -;; - - -exception End_of_scan;; - - -let no_end_tag = (* empty HTML elements *) - ref - [ "isindex"; - "base"; - "meta"; - "link"; - "hr"; - "input"; - "img"; - "param"; - "basefont"; - "br"; - "area"; - ] -;; - - -let special_tag = (* other lexical rules *) - ref - [ "script"; - "style"; - ] -;; - - -let rec parse_comment buf = - let t = scan_comment buf in - match t with - Mcomment -> - parse_comment buf - | Eof -> - raise End_of_scan - | _ -> - () -;; - - -let rec parse_doctype buf = - let t = scan_doctype buf in - match t with - Mdoctype -> - parse_doctype buf - | Eof -> - raise End_of_scan - | _ -> - () -;; - - -let parse_document buf = - let current_name = ref "" in - let current_atts = ref [] in - let current_subs = ref [] in - let stack = Stack.create() in - - let parse_atts() = - let rec next_no_space() = - match scan_element buf with - Space _ -> next_no_space() - | t -> t - in - - let rec parse_atts_lookahead next = - match next with - Relement -> [] - | Name n -> - begin match next_no_space() with - Is -> - begin match next_no_space() with - Name v -> - (String.lowercase n, String.uppercase v) :: - parse_atts_lookahead (next_no_space()) - | Literal v -> - (String.lowercase n,v) :: - parse_atts_lookahead (next_no_space()) - | Eof -> - raise End_of_scan - | Relement -> - (* Illegal *) - [] - | _ -> - (* Illegal *) - parse_atts_lookahead (next_no_space()) - end - | Eof -> - raise End_of_scan - | Relement -> - (* <==> *) - [ String.lowercase n, String.lowercase n ] - | next' -> - (* assume <==> *) - ( String.lowercase n, String.lowercase n ) :: - parse_atts_lookahead next' - end - | Eof -> - raise End_of_scan - | _ -> - (* Illegal *) - parse_atts_lookahead (next_no_space()) - in - parse_atts_lookahead (next_no_space()) - in - - let rec parse_special name = - (* Parse until *) - match scan_special buf with - Lelementend n -> - if n = name then - "" - else - " - raise End_of_scan - | Cdata s -> - s ^ parse_special name - | _ -> - (* Illegal *) - parse_special name - in - - let rec skip_element() = - (* Skip until ">" *) - match scan_element buf with - Relement -> - () - | Eof -> - raise End_of_scan - | _ -> - skip_element() - in - - let rec parse_next() = - let t = scan_document buf in - match t with - Lcomment -> - parse_comment buf; - parse_next() - | Ldoctype -> - parse_doctype buf; - parse_next() - | Lelement name -> - let name = String.lowercase name in - if List.mem name !no_end_tag then begin - let atts = parse_atts() in - current_subs := (Element(name, atts, [])) :: !current_subs; - parse_next() - end - else if List.mem name !special_tag then begin - let atts = parse_atts() in - let data = parse_special name in - (* Read until ">" *) - skip_element(); - current_subs := (Element(name, atts, [Data data])) :: !current_subs; - parse_next() - end - else begin - let atts = parse_atts() in - Stack.push (!current_name, !current_atts, !current_subs) stack; - current_name := name; - current_atts := atts; - current_subs := []; - parse_next() - end - | Cdata data -> - current_subs := (Data data) :: !current_subs; - parse_next() - | Lelementend name -> - let name = String.lowercase name in - (* Read until ">" *) - skip_element(); - (* Search the element to close on the stack: *) - let found = ref (name = !current_name) in - Stack.iter - (fun (old_name, _, _) -> - if name = old_name then found := true) - stack; - (* If not found, the end tag is wrong. Simply ignore it. *) - if not !found then - parse_next() - else begin - (* Put the current element on to the stack: *) - Stack.push (!current_name, !current_atts, !current_subs) stack; - (* If found: Remove the elements from the stack, and append - * them to the previous element as sub elements - *) - let rec remove() = - let old_name, old_atts, old_subs = Stack.pop stack in - (* or raise Stack.Empty *) - if old_name = name then - old_name, old_atts, old_subs - else - let older_name, older_atts, older_subs = remove() in - older_name, - older_atts, - (Element (old_name, old_atts, List.rev old_subs) :: older_subs) - in - let old_name, old_atts, old_subs = remove() in - (* Remove one more element: the element containing the element - * currently being closed. - *) - let new_name, new_atts, new_subs = Stack.pop stack in - current_name := new_name; - current_atts := new_atts; - current_subs := (Element (old_name, old_atts, List.rev old_subs)) - :: new_subs; - (* Go on *) - parse_next() - end - | Eof -> - raise End_of_scan - | _ -> - parse_next() - in - try - parse_next(); - List.rev !current_subs - with - End_of_scan -> - (* Close all remaining elements: *) - Stack.push (!current_name, !current_atts, !current_subs) stack; - let rec remove() = - let old_name, old_atts, old_subs = Stack.pop stack in - (* or raise Stack.Empty *) - try - let older_name, older_atts, older_subs = remove() in - older_name, - older_atts, - (Element (old_name, old_atts, List.rev old_subs) :: older_subs) - with - Stack.Empty -> - old_name, old_atts, old_subs - in - let name, atts, subs = remove() in - List.rev subs -;; - - -let parse_string s = - let buf = Lexing.from_string s in - parse_document buf -;; - - -let parse_file fd = - let buf = Lexing.from_channel fd in - parse_document buf -;; - -(* ====================================================================== - * History: - * - * $Log$ - * Revision 1.1 2000/11/17 09:57:28 lpadovan - * Initial revision - * - * Revision 1.1 2000/03/03 01:07:25 gerd - * Initial revision. - * - * - *)