1 (* Copyright (C) 2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
28 open Http_getter_common
30 open Http_getter_types
32 exception Not_implemented of string
33 exception UnexpectedGetterOutput
40 type logger_callback = HelmLogger.html_tag -> unit
42 let stdout_logger tag = print_string (HelmLogger.string_of_html_tag tag)
44 let not_implemented s = raise (Not_implemented ("Http_getter." ^ s))
46 let index_line_sep_RE = Pcre.regexp "[ \t]+"
47 let index_sep_RE = Pcre.regexp "\r\n|\r|\n"
48 let trailing_types_RE = Pcre.regexp "\\.types$"
49 let heading_cic_RE = Pcre.regexp "^cic:"
50 let heading_theory_RE = Pcre.regexp "^theory:"
51 let heading_nuprl_RE = Pcre.regexp "^nuprl:"
52 let types_RE = Pcre.regexp "\\.types$"
53 let types_ann_RE = Pcre.regexp "\\.types\\.ann$"
54 let body_RE = Pcre.regexp "\\.body$"
55 let body_ann_RE = Pcre.regexp "\\.body\\.ann$"
56 let proof_tree_RE = Pcre.regexp "\\.proof_tree$"
57 let proof_tree_ann_RE = Pcre.regexp "\\.proof_tree\\.ann$"
58 let theory_RE = Pcre.regexp "\\.theory$"
59 let basepart_RE = Pcre.regexp
60 "^([^.]*\\.[^.]*)((\\.body)|(\\.proof_tree)|(\\.types))?(\\.ann)?$"
61 let slash_RE = Pcre.regexp "/"
62 let pipe_RE = Pcre.regexp "\\|"
63 let til_slash_RE = Pcre.regexp "^.*/"
64 let no_slashes_RE = Pcre.regexp "^[^/]*$"
65 let fix_regexp_RE = Pcre.regexp ("^" ^ (Pcre.quote "(cic|theory)"))
66 let showable_file_RE = Pcre.regexp "(\\.con|\\.ind|\\.var)$"
68 let xml_suffix = ".xml"
69 let theory_suffix = ".theory" ^ xml_suffix
71 (* global maps, shared by all threads *)
73 let ends_with_slash s =
75 s.[String.length s - 1] = '/'
76 with Invalid_argument _ -> false
78 (* should we use a remote getter or not *)
81 Helm_registry.get "getter.mode" = "remote"
82 with Helm_registry.Key_not_found _ -> false
84 let getter_url () = Helm_registry.get "getter.url"
86 (* Remote interface: getter methods implemented using a remote getter *)
89 let getxml_remote uri = not_implemented "getxml_remote"
90 let getxslt_remote ~patch_dtd uri = not_implemented "getxslt_remote"
91 let getdtd_remote uri = not_implemented "getdtd_remote"
92 let clean_cache_remote () = not_implemented "clean_cache_remote"
93 let list_servers_remote () = not_implemented "list_servers_remote"
94 let add_server_remote ~logger ~position name =
95 not_implemented "add_server_remote"
96 let remove_server_remote ~logger position =
97 not_implemented "remove_server_remote"
98 let getalluris_remote () = not_implemented "getalluris_remote"
99 let ls_remote lsuri = not_implemented "ls_remote"
100 let exists_remote uri = not_implemented "exists_remote"
103 let resolve_remote uri =
104 (* deliver resolve request to http_getter *)
106 Http_getter_wget.get (sprintf "%sresolve?uri=%s" (getter_url ()) uri)
108 let res = ref Unknown in
109 let start_element tag attrs =
113 res := Resolved (List.assoc "value" attrs)
114 with Not_found -> ())
115 | "unresolvable" -> res := Exception (Unresolvable_URI uri)
116 | "not_found" -> res := Exception (Key_not_found uri)
120 XmlPushParser.default_callbacks with
121 XmlPushParser.start_element = Some start_element
123 let xml_parser = XmlPushParser.create_parser callbacks in
124 XmlPushParser.parse xml_parser (`String doc);
125 XmlPushParser.final xml_parser;
127 | Unknown -> raise UnexpectedGetterOutput
128 | Exception e -> raise e
129 | Resolved url -> url
133 let help () = Http_getter_const.usage_string (Http_getter_env.env_to_string ())
139 Http_getter_storage.exists (uri ^ xml_suffix)
146 Http_getter_storage.resolve (uri ^ xml_suffix)
147 with Http_getter_storage.Resource_not_found _ -> raise (Key_not_found uri)
154 Http_getter_storage.filename (uri ^ xml_suffix)
155 with Http_getter_storage.Resource_not_found _ -> raise (Key_not_found uri)
158 let getxslt ?(patch_dtd = true) uri = assert false
159 (* let getxslt ?(patch_dtd = true) uri =
161 getxslt_remote ~patch_dtd uri
163 let url = resolve uri in
164 let (fname, outchan) = temp_file_of_uri uri in
165 Http_getter_cache.respond_xsl ~via_http:false ~url ~patch:patch_dtd outchan;
174 let fname = Lazy.force Http_getter_env.dtd_dir ^ "/" ^ uri in
175 if not (Sys.file_exists fname) then raise (Dtd_not_found uri);
181 clean_cache_remote ()
183 Http_getter_storage.clean_cache ()
185 let (++) (oldann, oldtypes, oldbody, oldtree)
186 (newann, newtypes, newbody, newtree) =
187 ((if newann > oldann then newann else oldann),
188 (if newtypes > oldtypes then newtypes else oldtypes),
189 (if newbody > oldbody then newbody else oldbody),
190 (if newtree > oldtree then newtree else oldtree))
192 let store_obj tbl o =
193 if Pcre.pmatch ~rex:showable_file_RE o then begin
194 let basepart = Pcre.replace ~rex:basepart_RE ~templ:"$1" o in
195 let no_flags = false, No, No, No in
198 Hashtbl.find tbl basepart
199 with Not_found -> (* no ann, no types, no body, no proof tree *)
204 | s when Pcre.pmatch ~rex:types_RE s -> (false, Yes, No, No)
205 | s when Pcre.pmatch ~rex:types_ann_RE s -> (true, Ann, No, No)
206 | s when Pcre.pmatch ~rex:body_RE s -> (false, No, Yes, No)
207 | s when Pcre.pmatch ~rex:body_ann_RE s -> (true, No, Ann, No)
208 | s when Pcre.pmatch ~rex:proof_tree_RE s -> (false, No, No, Yes)
209 | s when Pcre.pmatch ~rex:proof_tree_ann_RE s -> (true, No, No, Ann)
212 Hashtbl.replace tbl basepart (oldflags ++ newflags)
215 let store_dir set_ref d =
216 set_ref := StringSet.add (List.hd (Pcre.split ~rex:slash_RE d)) !set_ref
218 let collect_ls_items dirs_set objs_tbl =
219 let items = ref [] in
220 StringSet.iter (fun dir -> items := Ls_section dir :: !items) dirs_set;
221 Http_getter_misc.hashtbl_sorted_iter
222 (fun uri (annflag, typesflag, bodyflag, treeflag) ->
225 uri = uri; ann = annflag;
226 types = typesflag; body = bodyflag; proof_tree = treeflag
231 (** non regexp-aware version of ls *)
232 let dumb_ls uri_prefix =
233 if is_cic_obj_uri uri_prefix then begin
234 let dirs = ref StringSet.empty in
235 let objs = Hashtbl.create 17 in
238 if ends_with_slash fname then
242 store_obj objs (strip_suffix ~suffix:xml_suffix fname)
243 with Invalid_argument _ -> ())
244 (Http_getter_storage.ls uri_prefix);
245 collect_ls_items !dirs objs
246 end else if is_theory_uri uri_prefix then begin
247 let items = ref [] in
250 if ends_with_slash fname then
251 items := Ls_section (strip_trailing_slash fname) :: !items
255 { uri = strip_suffix theory_suffix fname;
256 ann = false; types = No; body = No; proof_tree = No }
258 items := Ls_object obj :: !items
259 with Invalid_argument _ -> ())
260 (Http_getter_storage.ls uri_prefix);
263 raise (Invalid_URI uri_prefix)
265 (* handle simple regular expressions of the form "...(..|..|..)..." on cic
266 * uris, not meant to be a real implementation of regexp. The only we use is
267 * "(cic|theory):/..." *)
268 let explode_ls_regexp regexp =
270 let len = String.length regexp in
271 let lparen_idx = String.index regexp '(' in
272 let rparen_idx = String.index_from regexp lparen_idx ')' in
273 let choices_str = (* substring between parens, parens excluded *)
274 String.sub regexp (lparen_idx + 1) (rparen_idx - lparen_idx - 1)
276 let choices = Pcre.split ~rex:pipe_RE choices_str in
277 let prefix = String.sub regexp 0 lparen_idx in
278 let suffix = String.sub regexp (rparen_idx + 1) (len - (rparen_idx + 1)) in
279 List.map (fun choice -> prefix ^ choice ^ suffix) choices
280 with Not_found -> [regexp]
282 let merge_results results =
283 let rec aux objects_acc dirs_acc = function
284 | [] -> dirs_acc @ objects_acc
285 | Ls_object _ as obj :: tl -> aux (obj :: objects_acc) dirs_acc tl
286 | Ls_section _ as dir :: tl ->
287 if List.mem dir dirs_acc then (* filters out dir duplicates *)
288 aux objects_acc dirs_acc tl
290 aux objects_acc (dir :: dirs_acc) tl
292 aux [] [] (List.concat results)
298 let prefixes = explode_ls_regexp regexp in
299 merge_results (List.map dumb_ls prefixes)
302 let rec aux acc = function
307 (fun (acc, subdirs) result ->
309 | Ls_object obj -> (dir ^ obj.uri) :: acc, subdirs
310 | Ls_section sect -> acc, (dir ^ sect ^ "/") :: subdirs)
316 aux [] ["cic:/"] (* trailing slash required *)
318 (* Shorthands from now on *)
320 let getxml' uri = getxml (UriManager.string_of_uri uri)
321 let resolve' uri = resolve (UriManager.string_of_uri uri)
322 let exists' uri = exists (UriManager.string_of_uri uri)
325 Http_getter_logger.set_log_level
326 (Helm_registry.get_opt_default Helm_registry.int ~default:1
328 Http_getter_logger.set_log_file
329 (Helm_registry.get_opt Helm_registry.string "getter.log_file")