From 630f5b03a7e5da46d7795fc0124a288a1efb8562 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Wed, 8 Feb 2006 10:34:37 +0000 Subject: [PATCH] added splitting engine for .html generated by docbook -> xhtml conversion --- helm/software/matita/help/C/sec_terms.xml | 8 +- helm/software/matita/help/C/split.ml | 180 ++ .../matita/help/C/xhtml1-transitional.dtd | 1717 +++++++++++++++++ 3 files changed, 1901 insertions(+), 4 deletions(-) create mode 100755 helm/software/matita/help/C/split.ml create mode 100644 helm/software/matita/help/C/xhtml1-transitional.dtd diff --git a/helm/software/matita/help/C/sec_terms.xml b/helm/software/matita/help/C/sec_terms.xml index d92518d8d..4499f2344 100644 --- a/helm/software/matita/help/C/sec_terms.xml +++ b/helm/software/matita/help/C/sec_terms.xml @@ -6,22 +6,22 @@ Terms - TODO + &TODO; Definitions - TODO + &TODO; Declarations (of inductive types) - TODO + &TODO; Proofs - TODO + &TODO; diff --git a/helm/software/matita/help/C/split.ml b/helm/software/matita/help/C/split.ml new file mode 100755 index 000000000..d20e4aea8 --- /dev/null +++ b/helm/software/matita/help/C/split.ml @@ -0,0 +1,180 @@ +#!/usr/bin/ocamlrun /usr/bin/ocaml + +let fname = + try Sys.argv.(1) + with Invalid_argument _ -> + prerr_endline "Usage: split.ml "; + exit 1 ;; + +#use "topfind";; +#require "unix";; +#require "pxp-engine";; +#require "pxp-lex-utf8";; + +let xhtml_header = +" + + + +" + +let xhtml_trailer = +" + +" + +type 'a node = + ('a Pxp_document.node #Pxp_document.extension as 'a) Pxp_document.node + +let resolver = + Pxp_reader.lookup_public_id_as_file [ + "-//W3C//DTD XHTML 1.0 Transitional//EN", "xhtml1-transitional.dtd"; + "-//W3C//ENTITIES Latin 1 for XHTML//EN", "xhtml-lat1.ent"; + "-//W3C//ENTITIES Symbols for XHTML//EN", "xhtml-symbol.ent"; + "-//W3C//ENTITIES Special for XHTML//EN", "xhtml-special.ent"; ] + +let parse_xml fname = + Pxp_tree_parser.parse_wfdocument_entity + { Pxp_types.default_config with Pxp_types.encoding = `Enc_utf8 } + (Pxp_types.from_file ~alt:[ resolver ] fname) + Pxp_tree_parser.default_spec + + (** pattern matching like predicate on pxp nodes *) +let match_elt tag attr_name ?attr_value () node = + node#node_type = Pxp_document.T_element tag + && (match attr_value with + | Some attr_value -> + (try node#attribute attr_name = Pxp_types.Value attr_value + with Not_found -> false) + | None -> List.mem attr_name node#attribute_names) + +let slice doc = + let article = + try + Pxp_document.find ~deeply:true + (match_elt "div" "class" ~attr_value:"article" ()) doc#root + with Not_found -> failwith "Can't find article
" in + let titlepage = + Pxp_document.find ~deeply:false + (match_elt "div" "class" ~attr_value:"titlepage" ()) article in + let toc = + Pxp_document.find ~deeply:false + (match_elt "div" "class" ~attr_value:"toc" ()) article in + let secs = + Pxp_document.find_all ~deeply:false + (match_elt "div" "class" ~attr_value:"sect1" ()) article in + titlepage :: toc :: secs + +let localize_ids secs = + let id2sec = Hashtbl.create 1023 in + let sec_ids = ref [] in + List.iter + (fun sec -> + match Pxp_document.find_all ~deeply:true (match_elt "a" "id" ()) sec with + | sec_id :: ids -> + let sec_id = sec_id#required_string_attribute "id" in + sec_ids := sec_id :: !sec_ids; + List.iter + (fun id -> + let id = id#required_string_attribute "id" in + Hashtbl.add id2sec id sec_id) + ids + | _ -> failwith "can't find section id") + secs; + !sec_ids, id2sec + +let fname_of_sec sec_name = sec_name ^ ".html" + +let get_sec_name sec = + let id = + try + Pxp_document.find ~deeply:true (match_elt "a" "id" ()) sec + with Not_found -> failwith "can't find section id" in + id#required_string_attribute "id" + +let iter_xrefs f node = + let a_s = Pxp_document.find_all ~deeply:true (match_elt "a" "href" ()) node in + List.iter + (fun (node: 'a node) -> + let href = node#required_string_attribute "href" in + assert (String.length href > 0); + if href.[0] = '#' then + let xref = String.sub href 1 (String.length href - 1) in + f node xref) + a_s + +let patch_toc sec_ids id2sec toc = + iter_xrefs + (fun node xref -> + if List.mem xref sec_ids then + node#set_attribute "href" (Pxp_types.Value (fname_of_sec xref)) + else + try + let sec = Hashtbl.find id2sec xref in + node#set_attribute "href" + (Pxp_types.Value (fname_of_sec sec ^ "#" ^ xref)) + with Not_found -> ()) + toc + +(* let a_s = Pxp_document.find_all ~deeply:true (match_elt "a" "href" ()) toc in + List.iter + (fun (node: 'a node) -> + let href = node#required_string_attribute "href" in + assert (String.length href > 0); + if href.[0] = '#' then + let xref = String.sub href 1 (String.length href - 1) in + if List.mem xref sec_ids then + node#set_attribute "href" (Pxp_types.Value (fname_of_sec xref)) + else + try + let sec = Hashtbl.find id2sec xref in + node#set_attribute "href" + (Pxp_types.Value (fname_of_sec sec ^ "#" ^ xref)) + with Not_found -> ()) + a_s *) + +let patch_sec sec_ids id2sec sec = + let sec_name = get_sec_name sec in + iter_xrefs + (fun node xref -> + try + let xref_sec = Hashtbl.find id2sec xref in + if xref_sec <> sec_name then + node#set_attribute "href" + (Pxp_types.Value (fname_of_sec xref_sec ^ "#" ^ xref)) + with Not_found -> ()) + sec + +let open_formatter fname = + Unix.open_process_out (Printf.sprintf "xmllint --format -o %s -" fname) + +let close_formatter oc = ignore (Unix.close_process_out oc) + +let output_index (titlepage: 'a node) (toc: 'a node) fname = + let oc = open_formatter fname in + output_string oc xhtml_header; + titlepage#write (`Out_channel oc) `Enc_utf8; + toc#write (`Out_channel oc) `Enc_utf8; + output_string oc xhtml_trailer; + close_formatter oc + +let output_sec (sec: 'a node) fname = + let oc = open_formatter fname in + output_string oc xhtml_header; + sec#write (`Out_channel oc) `Enc_utf8; + output_string oc xhtml_trailer; + close_formatter oc + +let main () = + let doc = parse_xml fname in + match slice doc with + | titlepage :: toc :: secs -> + let sec_ids, id2sec = localize_ids secs in + patch_toc sec_ids id2sec toc; + List.iter (patch_sec sec_ids id2sec) secs; + output_index titlepage toc "index.html"; + List.iter (fun sec -> output_sec sec (get_sec_name sec ^ ".html")) secs + | _ -> failwith "Unrecognized document structure" + +let _ = main () + diff --git a/helm/software/matita/help/C/xhtml1-transitional.dtd b/helm/software/matita/help/C/xhtml1-transitional.dtd new file mode 100644 index 000000000..388a59434 --- /dev/null +++ b/helm/software/matita/help/C/xhtml1-transitional.dtd @@ -0,0 +1,1717 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- 2.39.2