X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2Focaml%2Fgetter%2Fhttp_getter_storage.ml;fp=helm%2Focaml%2Fgetter%2Fhttp_getter_storage.ml;h=fc6f415ac31ce68c0897113c0db026e018e6bb40;hb=792b5d29ebae8f917043d9dd226692919b5d6ca1;hp=0000000000000000000000000000000000000000;hpb=a14a8c7637fd0b95e9d4deccb20c6abc98e8f953;p=helm.git diff --git a/helm/ocaml/getter/http_getter_storage.ml b/helm/ocaml/getter/http_getter_storage.ml new file mode 100644 index 000000000..fc6f415ac --- /dev/null +++ b/helm/ocaml/getter/http_getter_storage.ml @@ -0,0 +1,275 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +(* $Id$ *) + +open Printf + +open Http_getter_misc +open Http_getter_types + +exception Not_found' +exception Resource_not_found of string * string (** method, uri *) + +let index_fname = "INDEX" + +let trailing_slash_RE = Pcre.regexp "/$" +let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)" +let relative_RE = Pcre.regexp relative_RE_raw +let file_scheme_RE_raw = "(^file://)" +let extended_file_scheme_RE = Pcre.regexp "(^file:/+)" +let file_scheme_RE = Pcre.regexp (relative_RE_raw ^ "|" ^ file_scheme_RE_raw) +let http_scheme_RE = Pcre.regexp "^http://" +let newline_RE = Pcre.regexp "\\n" +let cic_scheme_sep_RE = Pcre.regexp ":/" +let gz_suffix = ".gz" +let gz_suffix_len = String.length gz_suffix + +let path_of_file_url url = + assert (Pcre.pmatch ~rex:file_scheme_RE url); + if Pcre.pmatch ~rex:relative_RE url then + url + else (* absolute path, add heading "/" if missing *) + "/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url) + + (** associative list regular expressions -> url prefixes + * sorted with longest prefixes first *) +let prefix_map = lazy ( + let map_w_length = + List.map + (fun (uri_prefix, (url_prefix, attrs)) -> + let uri_prefix = normalize_dir uri_prefix in + let url_prefix = normalize_dir url_prefix in + let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in + (regexp, String.length uri_prefix, uri_prefix, url_prefix, attrs)) + (Lazy.force Http_getter_env.prefixes) + in + let decreasing_length (_, len1, _, _, _) (_, len2, _, _, _) = + compare len2 len1 in + List.map + (fun (regexp, len, uri_prefix, url_prefix, attrs) -> + (regexp, strip_trailing_slash uri_prefix, url_prefix, attrs)) + (List.fast_sort decreasing_length map_w_length)) + +let lookup uri = + let matches = + List.filter (fun (rex, _, _, _) -> Pcre.pmatch ~rex uri) + (Lazy.force prefix_map) in + if matches = [] then raise (Unresolvable_URI uri); + matches + +let resolve_prefix uri = + match lookup uri with + | (rex, _, url_prefix, _) :: _ -> + Pcre.replace_first ~rex ~templ:url_prefix uri + | [] -> assert false + +let resolve_prefixes uri = + let matches = lookup uri in + List.map + (fun (rex, _, url_prefix, _) -> + Pcre.replace_first ~rex ~templ:url_prefix uri) + matches + +let get_attrs uri = + match lookup uri with + | (_, _, _, attrs) :: _ -> attrs + | [] -> assert false + +let is_legacy uri = List.exists ((=) `Legacy) (get_attrs uri) + +let is_read_only uri = + is_legacy uri || List.exists ((=) `Read_only) (get_attrs uri) + +let exists_http _ url = + Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url + +let exists_file _ fname = + Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname + +let resolve_http _ url = + try + List.find Http_getter_wget.exists [ url ^ gz_suffix; url ] + with Not_found -> raise Not_found' + +let resolve_file _ fname = + try + List.find Sys.file_exists [ fname ^ gz_suffix; fname ] + with Not_found -> raise Not_found' + +let strip_gz_suffix fname = + if extension fname = gz_suffix then + String.sub fname 0 (String.length fname - gz_suffix_len) + else + fname + +let remove_duplicates l = + Http_getter_misc.list_uniq (List.fast_sort Pervasives.compare l) + +let ls_file_single _ path_prefix = + let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in + let is_useless dir = try dir.[0] = '.' with _ -> false in + let entries = ref [] in + try + let dir_handle = Unix.opendir path_prefix in + (try + while true do + let entry = Unix.readdir dir_handle in + if is_useless entry then + () + else if is_dir (path_prefix ^ "/" ^ entry) then + entries := normalize_dir entry :: !entries + else + entries := strip_gz_suffix entry :: !entries + done + with End_of_file -> Unix.closedir dir_handle); + remove_duplicates !entries + with Unix.Unix_error (_, "opendir", _) -> [] + +let ls_http_single _ url_prefix = + try + let index = Http_getter_wget.get (normalize_dir url_prefix ^ index_fname) in + Pcre.split ~rex:newline_RE index + with Http_client_error _ -> raise Not_found' + +let get_file _ path = + if Sys.file_exists (path ^ gz_suffix) then + path ^ gz_suffix + else if Sys.file_exists path then + path + else + raise Not_found' + +let get_http uri url = + let scheme, path = + match Pcre.split ~rex:cic_scheme_sep_RE uri with + | [scheme; path] -> scheme, path + | _ -> assert false + in + let cache_name = + sprintf "%s%s/%s" (Lazy.force Http_getter_env.cache_dir) scheme path + in + if Sys.file_exists (cache_name ^ gz_suffix) then + cache_name ^ gz_suffix + else if Sys.file_exists cache_name then + cache_name + else begin (* fill cache *) + Http_getter_misc.mkdir ~parents:true (Filename.dirname cache_name); + (try + Http_getter_wget.get_and_save (url ^ gz_suffix) (cache_name ^ gz_suffix); + cache_name ^ gz_suffix + with Http_client_error _ -> + (try + Http_getter_wget.get_and_save url cache_name; + cache_name + with Http_client_error _ -> + raise Not_found')) + end + +let remove_file _ path = + if Sys.file_exists (path ^ gz_suffix) then Sys.remove (path ^ gz_suffix); + if Sys.file_exists path then Sys.remove path + +let remove_http _ _ = + prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme"; + assert false + +type 'a storage_method = { + name: string; + file: string -> string -> 'a; (* unresolved uri, resolved uri *) + http: string -> string -> 'a; (* unresolved uri, resolved uri *) +} + +let normalize_root uri = (* add trailing slash to roots *) + try + if uri.[String.length uri - 1] = ':' then uri ^ "/" + else uri + with Invalid_argument _ -> uri + +let invoke_method storage_method uri url = + try + if Pcre.pmatch ~rex:file_scheme_RE url then + storage_method.file uri (path_of_file_url url) + else if Pcre.pmatch ~rex:http_scheme_RE url then + storage_method.http uri url + else + raise (Unsupported_scheme url) + with Not_found' -> raise (Resource_not_found (storage_method.name, uri)) + +let dispatch_single storage_method uri = + assert (extension uri <> gz_suffix); + let uri = normalize_root uri in + let url = resolve_prefix uri in + invoke_method storage_method uri url + +let dispatch_multi storage_method uri = + let urls = resolve_prefixes uri in + let rec aux = function + | [] -> raise (Resource_not_found (storage_method.name, uri)) + | url :: tl -> + (try + invoke_method storage_method uri url + with Resource_not_found _ -> aux tl) + in + aux urls + +let exists = + dispatch_single { name = "exists"; file = exists_file; http = exists_http } + +let resolve = + dispatch_single { name = "resolve"; file = resolve_file; http = resolve_http } + +let ls_single = + dispatch_single { name = "ls"; file = ls_file_single; http = ls_http_single } + +let remove = + dispatch_single { name = "remove"; file = remove_file; http = remove_http } + +let filename ?(find = false) = + if find then + dispatch_multi { name = "filename"; file = get_file; http = get_http } + else + dispatch_single { name = "filename"; file = get_file; http = get_http } + + (* ls_single performs ls only below a single prefix, but prefixes which have + * common prefix (sorry) with a given one may need to be considered as well + * for example: when doing "ls cic:/" we would like to see the "cic:/matita" + * directory *) +let ls uri_prefix = +(* prerr_endline ("Http_getter_storage.ls " ^ uri_prefix); *) + let direct_results = ls_single uri_prefix in + List.fold_left + (fun results (_, uri_prefix', _, _) -> + if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then + (Filename.basename uri_prefix' ^ "/") :: results + else + results) + direct_results + (Lazy.force prefix_map) + +let clean_cache () = + ignore (Sys.command + (sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir))) +