1 (* Copyright (C) 2004-2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
31 open Http_getter_types
34 exception Resource_not_found of string * string (** method, uri *)
36 let index_fname = "INDEX"
38 (******************************* HELPERS **************************************)
40 let trailing_slash_RE = Pcre.regexp "/$"
41 let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)"
42 let relative_RE = Pcre.regexp relative_RE_raw
43 let file_scheme_RE_raw = "(^file://)"
44 let extended_file_scheme_RE = Pcre.regexp "(^file:/+)"
45 let file_scheme_RE = Pcre.regexp (relative_RE_raw ^ "|" ^ file_scheme_RE_raw)
46 let http_scheme_RE = Pcre.regexp "^http://"
47 let newline_RE = Pcre.regexp "\\n"
48 let cic_scheme_sep_RE = Pcre.regexp ":/"
50 let gz_suffix_len = String.length gz_suffix
52 (* file:///bla -> bla, bla -> bla *)
53 let path_of_file_url url =
54 assert (Pcre.pmatch ~rex:file_scheme_RE url);
55 if Pcre.pmatch ~rex:relative_RE url then
57 else (* absolute path, add heading "/" if missing *)
58 "/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url)
60 let strip_gz_suffix fname =
61 if extension fname = gz_suffix then
62 String.sub fname 0 (String.length fname - gz_suffix_len)
66 let normalize_root uri = (* add trailing slash to roots *)
68 if uri.[String.length uri - 1] = ':' then uri ^ "/"
70 with Invalid_argument _ -> uri
72 let remove_duplicates l =
73 Http_getter_misc.list_uniq (List.stable_sort Pervasives.compare l)
75 let has_rdonly l = List.exists ((=) `Read_only) l
76 let has_legacy l = List.exists ((=) `Legacy) l
77 let is_readwrite attrs = (not (has_legacy attrs) && not (has_rdonly attrs))
79 let is_file_schema url = Pcre.pmatch ~rex:file_scheme_RE url
80 let is_http_schema url = Pcre.pmatch ~rex:http_scheme_RE url
82 let is_empty_listing files =
85 let len = String.length s in
86 len < 4 || String.sub s (len - 4) 4 <> ".xml") files
88 (************************* GLOBALS PREFIXES **********************************)
90 (** associative list regular expressions -> url prefixes
91 * sorted with longest prefixes first *)
92 let prefix_map_ref = ref (lazy (
94 (fun (uri_prefix, (url_prefix, attrs)) ->
95 let uri_prefix = normalize_dir uri_prefix in
96 let url_prefix = normalize_dir url_prefix in
97 let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in
98 regexp, strip_trailing_slash uri_prefix, url_prefix, attrs)
99 (List.rev (Lazy.force Http_getter_env.prefixes))))
101 let prefix_map () = !prefix_map_ref
104 let cmp (_,x) (_,y) = x = y in
105 let rec aux prev = function
107 | hd::tl -> if cmp prev hd then hd :: aux prev tl else []
110 | hd :: tl -> hd :: aux hd tl
114 (** given an uri returns the prefixes for it *)
118 (fun (rex, _, l, _ as entry) ->
120 let got = Pcre.extract ~full_match:true ~rex uri in
121 Some (entry, String.length got.(0))
122 with Not_found -> None)
123 (Lazy.force (prefix_map ()))
125 if matches = [] then raise (Unresolvable_URI uri);
126 List.map fst (keep_first (List.sort (fun (_,l1) (_,l2) -> l2 - l1) matches))
129 let get_attrs uri = List.map (fun (_, _, _, attrs) -> attrs) (lookup uri)
131 (*************************** ACTIONS ******************************************)
133 let exists_http ~local _ url =
134 if local then false else
135 Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url
137 let exists_file _ fname =
138 Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname
140 let resolve_http ~must_exists ~local _ url =
141 if local then raise Not_found' else
144 List.find Http_getter_wget.exists [ url ^ gz_suffix; url ]
147 with Not_found -> raise Not_found'
149 let resolve_file ~must_exists _ fname =
152 List.find Sys.file_exists [ fname ^ gz_suffix; fname ]
155 with Not_found -> raise Not_found'
157 let ls_file_single _ path_prefix =
158 let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in
159 let is_useless dir = try dir.[0] = '.' with _ -> false in
160 let entries = ref [] in
162 let dir_handle = Unix.opendir path_prefix in
165 let entry = Unix.readdir dir_handle in
166 if is_useless entry then
168 else if is_dir (path_prefix ^ "/" ^ entry) then
169 entries := normalize_dir entry :: !entries
171 entries := strip_gz_suffix entry :: !entries
173 with End_of_file -> Unix.closedir dir_handle);
174 remove_duplicates !entries
175 with Unix.Unix_error (_, "opendir", _) -> []
177 let ls_http_single ~local _ url_prefix =
178 if local then raise (Resource_not_found ("get","")) else
179 let url = normalize_dir url_prefix ^ index_fname in
181 let index = Http_getter_wget.get url in
182 Pcre.split ~rex:newline_RE index
183 with Http_client_error _ -> raise (Resource_not_found ("get",url))
186 let get_file _ path =
187 if Sys.file_exists (path ^ gz_suffix) then
189 else if Sys.file_exists path then
194 let get_http ~local uri url =
195 if local then raise Not_found' else
197 match Pcre.split ~rex:cic_scheme_sep_RE uri with
198 | [scheme; path] -> scheme, path
202 sprintf "%s%s/%s" (Lazy.force Http_getter_env.cache_dir) scheme path
204 if Sys.file_exists (cache_name ^ gz_suffix) then
205 cache_name ^ gz_suffix
206 else if Sys.file_exists cache_name then
208 else begin (* fill cache *)
209 Http_getter_misc.mkdir ~parents:true (Filename.dirname cache_name);
211 Http_getter_wget.get_and_save (url ^ gz_suffix) (cache_name ^ gz_suffix);
212 cache_name ^ gz_suffix
213 with Http_client_error _ ->
215 Http_getter_wget.get_and_save url cache_name;
217 with Http_client_error _ ->
221 let remove_file _ path =
222 if Sys.file_exists (path ^ gz_suffix) then Sys.remove (path ^ gz_suffix);
223 if Sys.file_exists path then Sys.remove path
225 let remove_http _ _ =
226 prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme";
229 (**************************** RESOLUTION OF PREFIXES ************************)
231 let resolve_prefixes n local write exists uri =
232 let exists_test new_uri =
233 if is_file_schema new_uri then
234 exists_file () (path_of_file_url new_uri)
235 else if is_http_schema new_uri then
236 exists_http ~local () new_uri
239 let rec aux n = function
240 | (rex, _, url_prefix, attrs) :: tl when n > 0->
241 (match write, is_readwrite attrs, exists with
242 | true ,false, _ -> aux n tl
245 let new_uri = (Pcre.replace_first ~rex ~templ:url_prefix uri) in
246 if exists_test new_uri then new_uri::aux (n-1) tl else aux n tl
249 (Pcre.replace_first ~rex ~templ:url_prefix uri) :: (aux (n-1) tl))
254 let resolve_prefix l w e u =
255 match resolve_prefixes 1 l w e u with
260 (Printf.sprintf "resolve_prefix write:%b exists:%b" w e,u))
262 (* uncomment to debug prefix resolution *)
264 let resolve_prefix w e u =
266 ("XXX w=" ^ string_of_bool w ^ " e=" ^ string_of_bool e ^" :" ^ u);
267 let rc = resolve_prefix w e u in
268 prerr_endline ("YYY :" ^ rc ^ "\n");
272 (************************* DISPATCHERS ***************************************)
274 type 'a storage_method = {
279 file: string -> string -> 'a; (* unresolved uri, resolved uri *)
280 http: string -> string -> 'a; (* unresolved uri, resolved uri *)
283 let invoke_method storage_method uri url =
285 if is_file_schema url then
286 storage_method.file uri (path_of_file_url url)
287 else if is_http_schema url then
288 storage_method.http uri url
290 raise (Unsupported_scheme url)
291 with Not_found' -> raise (Resource_not_found (storage_method.name, uri))
293 let dispatch_single storage_method uri =
294 assert (extension uri <> gz_suffix);
295 let uri = normalize_root uri in
298 storage_method.local storage_method.write storage_method.exists uri
300 invoke_method storage_method uri url
302 let dispatch_multi storage_method uri =
304 resolve_prefixes max_int
305 storage_method.local storage_method.write storage_method.exists uri
307 let rec aux = function
308 | [] -> raise (Resource_not_found (storage_method.name, uri))
311 invoke_method storage_method uri url
312 with Resource_not_found _ -> aux tl)
316 let dispatch_all storage_method uri =
318 resolve_prefixes max_int
319 storage_method.local storage_method.write storage_method.exists uri
321 List.map (fun url -> invoke_method storage_method uri url) urls
323 (******************************** EXPORTED FUNCTIONS *************************)
325 let exists ~local s =
332 file = exists_file; http = exists_http ~local; } s
333 with Resource_not_found _ -> false
335 let resolve ~local ?(must_exists=true) ~writable =
342 exists = must_exists;
344 file = resolve_file ~must_exists;
345 http = resolve_http ~local ~must_exists; }
353 file = remove_file; http = remove_http; }
355 let filename ~local ?(find = false) =
356 (if find then dispatch_multi else dispatch_single)
361 file = get_file; http = get_http ~local ; }
363 let ls ~local uri_prefix =
371 file = ls_file_single; http = ls_http_single ~local; } s
372 with Resource_not_found _ -> []
374 let direct_results = List.flatten (ls_all uri_prefix) in
376 (fun results (_, uri_prefix', _, _) ->
377 if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then
378 (Filename.basename uri_prefix' ^ "/") :: results
382 (Lazy.force (prefix_map ()))
386 (sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir)))
388 let list_writable_prefixes _ =
390 (fun (_,_,url,attrs) ->
391 if is_readwrite attrs then
395 (Lazy.force (prefix_map ()))
397 let is_legacy uri = List.for_all has_legacy (get_attrs uri)
399 (* implement this in a fast way! *)
400 let is_empty ~local buri =
401 let buri = strip_trailing_slash buri ^ "/" in
402 let files = ls ~local buri in
403 is_empty_listing files
405 let is_read_only uri =
406 let is_empty_dir path =
409 if is_file_schema path then
410 ls_file_single () (path_of_file_url path)
411 else if is_http_schema path then
412 ls_http_single ~local:false () path
415 with Resource_not_found _ -> []
417 is_empty_listing files
419 let rec aux found_writable = function
420 | (rex, _, url_prefix, attrs)::tl ->
421 let new_url = (Pcre.replace_first ~rex ~templ:url_prefix uri) in
422 let rdonly = has_legacy attrs || has_rdonly attrs in
423 (match rdonly, is_empty_dir new_url, found_writable with
424 | true, false, _ -> true
425 | true, true, _ -> aux found_writable tl
426 | false, _, _ -> aux true tl)
427 | [] -> not found_writable (* if found_writable then false else true *)
429 aux false (lookup uri)
431 let activate_system_mode () =
432 let map = Lazy.force (prefix_map ()) in
435 (fun ((rex, urip, urlp, attrs) as entry) ->
436 if has_legacy attrs then
438 else if has_rdonly attrs then
439 Some (rex, urip, urlp, List.filter ((<>) `Read_only) attrs)
444 let map = map in (* just to remember that ocamlc 'lazy' is a ... *)
445 prefix_map_ref := (lazy map)