1 (* Copyright (C) 2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
30 open Http_getter_common
32 open Http_getter_types
34 exception Not_implemented of string
35 exception UnexpectedGetterOutput
42 type logger_callback = HelmLogger.html_tag -> unit
44 let stdout_logger tag = print_string (HelmLogger.string_of_html_tag tag)
46 let not_implemented s = raise (Not_implemented ("Http_getter." ^ s))
48 let index_line_sep_RE = Pcre.regexp "[ \t]+"
49 let index_sep_RE = Pcre.regexp "\r\n|\r|\n"
50 let trailing_types_RE = Pcre.regexp "\\.types$"
51 let heading_cic_RE = Pcre.regexp "^cic:"
52 let heading_theory_RE = Pcre.regexp "^theory:"
53 let heading_nuprl_RE = Pcre.regexp "^nuprl:"
54 let types_RE = Pcre.regexp "\\.types$"
55 let types_ann_RE = Pcre.regexp "\\.types\\.ann$"
56 let body_RE = Pcre.regexp "\\.body$"
57 let body_ann_RE = Pcre.regexp "\\.body\\.ann$"
58 let proof_tree_RE = Pcre.regexp "\\.proof_tree$"
59 let proof_tree_ann_RE = Pcre.regexp "\\.proof_tree\\.ann$"
60 let theory_RE = Pcre.regexp "\\.theory$"
61 let basepart_RE = Pcre.regexp
62 "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$"
63 let slash_RE = Pcre.regexp "/"
64 let pipe_RE = Pcre.regexp "\\|"
65 let til_slash_RE = Pcre.regexp "^.*/"
66 let no_slashes_RE = Pcre.regexp "^[^/]*$"
67 let fix_regexp_RE = Pcre.regexp ("^" ^ (Pcre.quote "(cic|theory)"))
68 let showable_file_RE =
69 Pcre.regexp "(\\.con|\\.ind|\\.var|\\.body|\\.types|\\.proof_tree)$"
71 let xml_suffix = ".xml"
72 let theory_suffix = ".theory"
74 (* global maps, shared by all threads *)
76 let ends_with_slash s =
78 s.[String.length s - 1] = '/'
79 with Invalid_argument _ -> false
81 (* should we use a remote getter or not *)
84 Helm_registry.get "getter.mode" = "remote"
85 with Helm_registry.Key_not_found _ -> false
87 let getter_url () = Helm_registry.get "getter.url"
89 (* Remote interface: getter methods implemented using a remote getter *)
92 let getxml_remote uri = not_implemented "getxml_remote"
93 let getxslt_remote uri = not_implemented "getxslt_remote"
94 let getdtd_remote uri = not_implemented "getdtd_remote"
95 let clean_cache_remote () = not_implemented "clean_cache_remote"
96 let list_servers_remote () = not_implemented "list_servers_remote"
97 let add_server_remote ~logger ~position name =
98 not_implemented "add_server_remote"
99 let remove_server_remote ~logger position =
100 not_implemented "remove_server_remote"
101 let getalluris_remote () = not_implemented "getalluris_remote"
102 let ls_remote lsuri = not_implemented "ls_remote"
103 let exists_remote uri = not_implemented "exists_remote"
106 let resolve_remote ~writable uri =
107 (* deliver resolve request to http_getter *)
109 Http_getter_wget.get (sprintf "%sresolve?uri=%s&writable=%b" (getter_url ())
112 let res = ref Unknown in
113 let start_element tag attrs =
117 res := Resolved (List.assoc "value" attrs)
118 with Not_found -> ())
119 | "unresolvable" -> res := Exception (Unresolvable_URI uri)
120 | "not_found" -> res := Exception (Key_not_found uri)
124 XmlPushParser.default_callbacks with
125 XmlPushParser.start_element = Some start_element
127 let xml_parser = XmlPushParser.create_parser callbacks in
128 XmlPushParser.parse xml_parser (`String doc);
129 XmlPushParser.final xml_parser;
131 | Unknown -> raise UnexpectedGetterOutput
132 | Exception e -> raise e
133 | Resolved url -> url
135 let deref_index_theory ~local uri =
136 (* if Http_getter_storage.exists ~local (uri ^ xml_suffix) then uri *)
137 if is_theory_uri uri && Filename.basename uri = "index.theory" then
138 strip_trailing_slash (Filename.dirname uri) ^ theory_suffix
144 let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ())
146 let exists ~local uri =
147 (* prerr_endline ("Http_getter.exists " ^ uri); *)
151 let uri = deref_index_theory ~local uri in
152 Http_getter_storage.exists ~local (uri ^ xml_suffix)
156 s <> UriManager.buri_of_uri (UriManager.uri_of_string s)
157 with UriManager.IllFormedUri _ -> true
159 let resolve ~local ~writable uri =
161 resolve_remote ~writable uri
163 let uri = deref_index_theory ~local uri in
165 if is_an_obj uri then
166 Http_getter_storage.resolve ~writable ~local (uri ^ xml_suffix)
168 Http_getter_storage.resolve ~writable ~local uri
169 with Http_getter_storage.Resource_not_found _ -> raise (Key_not_found uri)
171 let filename ~local ~writable uri =
173 raise (Key_not_found uri)
175 let uri = deref_index_theory ~local uri in
177 Http_getter_storage.resolve
178 ~must_exists:false ~writable ~local uri
179 with Http_getter_storage.Resource_not_found _ -> raise (Key_not_found uri)
182 if remote () then getxml_remote uri
184 let uri' = deref_index_theory ~local:false uri in
186 Http_getter_storage.filename ~local:false (uri' ^ xml_suffix)
187 with Http_getter_storage.Resource_not_found _ -> raise (Key_not_found uri))
191 if remote () then getxslt_remote uri
192 else Http_getter_storage.filename ~local:false ~find:true ("xslt:/" ^ uri)
198 let fname = Http_getter_env.get_dtd_dir () ^ "/" ^ uri in
199 if not (Sys.file_exists fname) then raise (Dtd_not_found uri);
205 clean_cache_remote ()
207 Http_getter_storage.clean_cache ()
209 let (++) (oldann, oldtypes, oldbody, oldtree)
210 (newann, newtypes, newbody, newtree) =
211 ((if newann > oldann then newann else oldann),
212 (if newtypes > oldtypes then newtypes else oldtypes),
213 (if newbody > oldbody then newbody else oldbody),
214 (if newtree > oldtree then newtree else oldtree))
216 let store_obj tbl o =
217 (* prerr_endline ("Http_getter.store_obj " ^ o); *)
218 if Pcre.pmatch ~rex:showable_file_RE o then begin
219 let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in
220 let no_flags = false, No, No, No in
223 Hashtbl.find tbl basepart
224 with Not_found -> (* no ann, no types, no body, no proof tree *)
229 | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No)
230 | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No)
231 | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No)
232 | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No)
233 | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes)
234 | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann)
237 Hashtbl.replace tbl basepart (oldflags ++ newflags)
240 let store_dir set_ref d =
241 set_ref := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !set_ref
243 let collect_ls_items dirs_set objs_tbl =
244 let items = ref [] in
245 StringSet.iter (fun dir -> items := Ls_section dir :: !items) dirs_set;
246 Http_getter_misc.hashtbl_sorted_iter
247 (fun uri (annflag, typesflag, bodyflag, treeflag) ->
250 uri = uri; ann = annflag;
251 types = typesflag; body = bodyflag; proof_tree = treeflag
256 let contains_object = (<>) []
258 (** non regexp-aware version of ls *)
259 let rec dumb_ls ~local uri_prefix =
260 (* prerr_endline ("Http_getter.dumb_ls " ^ uri_prefix); *)
261 if is_cic_obj_uri uri_prefix then begin
262 let dirs = ref StringSet.empty in
263 let objs = Hashtbl.create 17 in
266 if ends_with_slash fname then
270 store_obj objs (strip_suffix ~suffix:xml_suffix fname)
271 with Invalid_argument _ -> ())
272 (Http_getter_storage.ls ~local uri_prefix);
273 collect_ls_items !dirs objs
274 end else if is_theory_uri uri_prefix then begin
275 let items = ref [] in
276 let add_theory fname =
279 uri = fname; ann = false; types = No; body = No; proof_tree = No }
283 Pcre.replace_first ~rex:heading_theory_RE ~templ:"cic:" uri_prefix
287 if ends_with_slash fname then
288 items := Ls_section (strip_trailing_slash fname) :: !items
291 let fname = strip_suffix ~suffix:xml_suffix fname in
292 let theory_name = strip_suffix ~suffix:theory_suffix fname in
293 let sub_theory = normalize_dir cic_uri_prefix ^ theory_name ^ "/" in
294 if is_empty_theory ~local sub_theory then add_theory fname
295 with Invalid_argument _ -> ())
296 (Http_getter_storage.ls ~local uri_prefix);
298 if contains_object (dumb_ls ~local cic_uri_prefix)
299 && exists ~local:false (strip_trailing_slash uri_prefix ^ theory_suffix)
301 add_theory "index.theory";
302 with Unresolvable_URI _ -> ());
305 raise (Invalid_URI uri_prefix)
307 and is_empty_theory ~local uri_prefix =
308 (* prerr_endline ("is_empty_theory " ^ uri_prefix); *)
309 not (contains_object (dumb_ls ~local uri_prefix))
311 (* handle simple regular expressions of the form "...(..|..|..)..." on cic
312 * uris, not meant to be a real implementation of regexp. The only we use is
313 * "(cic|theory):/..." *)
314 let explode_ls_regexp regexp =
316 let len = String.length regexp in
317 let lparen_idx = String.index regexp '(' in
318 let rparen_idx = String.index_from regexp lparen_idx ')' in
319 let choices_str = (* substring between parens, parens excluded *)
320 String.sub regexp (lparen_idx + 1) (rparen_idx - lparen_idx - 1)
322 let choices = Pcre.split ~rex:pipe_RE choices_str in
323 let prefix = String.sub regexp 0 lparen_idx in
324 let suffix = String.sub regexp (rparen_idx + 1) (len - (rparen_idx + 1)) in
325 List.map (fun choice -> prefix ^ choice ^ suffix) choices
326 with Not_found -> [regexp]
328 let merge_results results =
329 let rec aux objects_acc dirs_acc = function
330 | [] -> dirs_acc @ objects_acc
331 | Ls_object _ as obj :: tl -> aux (obj :: objects_acc) dirs_acc tl
332 | Ls_section _ as dir :: tl ->
333 if List.mem dir dirs_acc then (* filters out dir duplicates *)
334 aux objects_acc dirs_acc tl
336 aux objects_acc (dir :: dirs_acc) tl
338 aux [] [] (List.concat results)
340 let ls ~local regexp =
344 let prefixes = explode_ls_regexp regexp in
345 merge_results (List.map (dumb_ls ~local) prefixes)
348 let rec aux acc = function
353 (fun (acc, subdirs) result ->
355 | Ls_object obj -> (dir ^ obj.uri) :: acc, subdirs
356 | Ls_section sect -> acc, (dir ^ sect ^ "/") :: subdirs)
358 (dumb_ls ~local:false dir)
362 aux [] ["cic:/"] (* trailing slash required *)
364 (* Shorthands from now on *)
366 let getxml' uri = getxml (UriManager.string_of_uri uri)
367 let resolve' ~local ~writable uri =
368 resolve ~local ~writable (UriManager.string_of_uri uri)
370 let exists' ~local uri = exists ~local (UriManager.string_of_uri uri)
371 let filename' ~local ~writable uri =
372 filename ~local ~writable (UriManager.string_of_uri uri)
375 let tilde_expand_key k =
377 Helm_registry.set k (HExtlib.tilde_expand (Helm_registry.get k))
378 with Helm_registry.Key_not_found _ -> ()
381 List.iter tilde_expand_key ["getter.cache_dir"; "getter.dtd_dir"];
382 Http_getter_logger.set_log_level
383 (Helm_registry.get_opt_default Helm_registry.int ~default:1
385 Http_getter_logger.set_log_file
386 (Helm_registry.get_opt Helm_registry.string "getter.log_file")