1 (* Copyright (C) 2004-2005, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://helm.cs.unibo.it/
29 open Http_getter_types
32 exception Resource_not_found of string * string (** method, uri *)
34 let index_fname = "INDEX"
36 let trailing_slash_RE = Pcre.regexp "/$"
37 let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)"
38 let relative_RE = Pcre.regexp relative_RE_raw
39 let file_scheme_RE_raw = "(^file://)"
40 let extended_file_scheme_RE = Pcre.regexp "(^file:/+)"
41 let file_scheme_RE = Pcre.regexp (relative_RE_raw ^ "|" ^ file_scheme_RE_raw)
42 let http_scheme_RE = Pcre.regexp "^http://"
43 let newline_RE = Pcre.regexp "\\n"
44 let cic_scheme_sep_RE = Pcre.regexp ":/"
46 let gz_suffix_len = String.length gz_suffix
48 let path_of_file_url url =
49 assert (Pcre.pmatch ~rex:file_scheme_RE url);
50 if Pcre.pmatch ~rex:relative_RE url then
52 else (* absolute path, add heading "/" if missing *)
53 "/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url)
55 (** associative list regular expressions -> url prefixes
56 * sorted with longest prefixes first *)
57 let prefix_map = lazy (
60 (fun (uri_prefix, url_prefix) ->
61 let uri_prefix = normalize_dir uri_prefix in
62 let url_prefix = normalize_dir url_prefix in
63 let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in
64 (regexp, String.length uri_prefix, uri_prefix, url_prefix))
65 (Lazy.force Http_getter_env.prefixes)
67 let decreasing_length (_, len1, _, _) (_, len2, _, _) = compare len2 len1 in
69 (fun (regexp, len, uri_prefix, url_prefix) ->
70 (regexp, strip_trailing_slash uri_prefix, url_prefix))
71 (List.fast_sort decreasing_length map_w_length))
73 let resolve_prefix uri =
75 List.filter (fun (rex, _, _) -> Pcre.pmatch ~rex uri)
76 (Lazy.force prefix_map)
79 | (rex, _, url_prefix) :: _ -> Pcre.replace_first ~rex ~templ:url_prefix uri
80 | [] -> raise (Unresolvable_URI uri)
82 let resolve_prefixes uri =
84 List.filter (fun (rex, _, _) -> Pcre.pmatch ~rex uri)
85 (Lazy.force prefix_map)
87 if matches = [] then raise (Unresolvable_URI uri);
89 (fun (rex, _, url_prefix) -> Pcre.replace_first ~rex ~templ:url_prefix uri)
92 let exists_http _ url =
93 Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url
95 let exists_file _ fname =
96 Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname
98 let resolve_http _ url =
100 List.find Http_getter_wget.exists [ url ^ gz_suffix; url ]
101 with Not_found -> raise Not_found'
103 let resolve_file _ fname =
105 List.find Sys.file_exists [ fname ^ gz_suffix; fname ]
106 with Not_found -> raise Not_found'
108 let strip_gz_suffix fname =
109 if extension fname = gz_suffix then
110 String.sub fname 0 (String.length fname - gz_suffix_len)
114 let remove_duplicates l =
115 Http_getter_misc.list_uniq (List.fast_sort Pervasives.compare l)
117 let ls_file_single _ path_prefix =
118 let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in
119 let is_useless dir = try dir.[0] = '.' with _ -> false in
120 let entries = ref [] in
122 let dir_handle = Unix.opendir path_prefix in
125 let entry = Unix.readdir dir_handle in
126 if is_useless entry then
128 else if is_dir (path_prefix ^ "/" ^ entry) then
129 entries := normalize_dir entry :: !entries
131 entries := strip_gz_suffix entry :: !entries
133 with End_of_file -> Unix.closedir dir_handle);
134 remove_duplicates !entries
135 with Unix.Unix_error (_, "opendir", _) -> []
137 let ls_http_single _ url_prefix =
139 let index = Http_getter_wget.get (normalize_dir url_prefix ^ index_fname) in
140 Pcre.split ~rex:newline_RE index
141 with Http_client_error _ -> raise Not_found'
143 let get_file _ path =
144 if Sys.file_exists (path ^ gz_suffix) then
146 else if Sys.file_exists path then
151 let get_http uri url =
153 match Pcre.split ~rex:cic_scheme_sep_RE uri with
154 | [scheme; path] -> scheme, path
158 sprintf "%s%s/%s" (Lazy.force Http_getter_env.cache_dir) scheme path
160 if Sys.file_exists (cache_name ^ gz_suffix) then
161 cache_name ^ gz_suffix
162 else if Sys.file_exists cache_name then
164 else begin (* fill cache *)
165 Http_getter_misc.mkdir ~parents:true (Filename.dirname cache_name);
167 Http_getter_wget.get_and_save (url ^ gz_suffix) (cache_name ^ gz_suffix);
168 cache_name ^ gz_suffix
169 with Http_client_error _ ->
171 Http_getter_wget.get_and_save url cache_name;
173 with Http_client_error _ ->
177 let remove_file _ path =
178 if Sys.file_exists (path ^ gz_suffix) then Sys.remove (path ^ gz_suffix);
179 if Sys.file_exists path then Sys.remove path
181 let remove_http _ _ =
182 prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme";
185 type 'a storage_method = {
187 file: string -> string -> 'a; (* unresolved uri, resolved uri *)
188 http: string -> string -> 'a; (* unresolved uri, resolved uri *)
191 let normalize_root uri = (* add trailing slash to roots *)
193 if uri.[String.length uri - 1] = ':' then uri ^ "/"
195 with Invalid_argument _ -> uri
197 let invoke_method storage_method uri url =
199 if Pcre.pmatch ~rex:file_scheme_RE url then
200 storage_method.file uri (path_of_file_url url)
201 else if Pcre.pmatch ~rex:http_scheme_RE url then
202 storage_method.http uri url
204 raise (Unsupported_scheme url)
205 with Not_found' -> raise (Resource_not_found (storage_method.name, uri))
207 let dispatch_single storage_method uri =
208 assert (extension uri <> gz_suffix);
209 let uri = normalize_root uri in
210 let url = resolve_prefix uri in
211 invoke_method storage_method uri url
213 let dispatch_multi storage_method uri =
214 let urls = resolve_prefixes uri in
215 let rec aux = function
216 | [] -> raise (Resource_not_found (storage_method.name, uri))
219 invoke_method storage_method uri url
220 with Resource_not_found _ -> aux tl)
225 dispatch_single { name = "exists"; file = exists_file; http = exists_http }
228 dispatch_single { name = "resolve"; file = resolve_file; http = resolve_http }
231 dispatch_single { name = "ls"; file = ls_file_single; http = ls_http_single }
234 dispatch_single { name = "remove"; file = remove_file; http = remove_http }
236 let filename ?(find = false) =
238 dispatch_multi { name = "filename"; file = get_file; http = get_http }
240 dispatch_single { name = "filename"; file = get_file; http = get_http }
242 (* ls_single performs ls only below a single prefix, but prefixes which have
243 * common prefix (sorry) with a given one may need to be considered as well
244 * for example: when doing "ls cic:/" we would like to see the "cic:/matita"
247 (* prerr_endline ("Http_getter_storage.ls " ^ uri_prefix); *)
248 let direct_results = ls_single uri_prefix in
250 (fun results (_, uri_prefix', _) ->
251 if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then
252 (Filename.basename uri_prefix' ^ "/") :: results
256 (Lazy.force prefix_map)
260 (sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir)))