X-Git-Url: http://matita.cs.unibo.it/gitweb/?p=helm.git;a=blobdiff_plain;f=components%2Fgetter%2Fhttp_getter_storage.ml;fp=components%2Fgetter%2Fhttp_getter_storage.ml;h=c17435f6a25a4f5f7848fc9dcc2435a3e36d2911;hp=0000000000000000000000000000000000000000;hb=f61af501fb4608cc4fb062a0864c774e677f0d76;hpb=58ae1809c352e71e7b5530dc41e2bfc834e1aef1 diff --git a/components/getter/http_getter_storage.ml b/components/getter/http_getter_storage.ml new file mode 100644 index 000000000..c17435f6a --- /dev/null +++ b/components/getter/http_getter_storage.ml @@ -0,0 +1,447 @@ +(* Copyright (C) 2004-2005, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://helm.cs.unibo.it/ + *) + +(* $Id$ *) + +open Printf + +open Http_getter_misc +open Http_getter_types + +exception Not_found' +exception Resource_not_found of string * string (** method, uri *) + +let index_fname = "INDEX" + +(******************************* HELPERS **************************************) + +let trailing_slash_RE = Pcre.regexp "/$" +let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)" +let relative_RE = Pcre.regexp relative_RE_raw +let file_scheme_RE_raw = "(^file://)" +let extended_file_scheme_RE = Pcre.regexp "(^file:/+)" +let file_scheme_RE = Pcre.regexp (relative_RE_raw ^ "|" ^ file_scheme_RE_raw) +let http_scheme_RE = Pcre.regexp "^http://" +let newline_RE = Pcre.regexp "\\n" +let cic_scheme_sep_RE = Pcre.regexp ":/" +let gz_suffix = ".gz" +let gz_suffix_len = String.length gz_suffix + + (* file:///bla -> bla, bla -> bla *) +let path_of_file_url url = + assert (Pcre.pmatch ~rex:file_scheme_RE url); + if Pcre.pmatch ~rex:relative_RE url then + url + else (* absolute path, add heading "/" if missing *) + "/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url) + +let strip_gz_suffix fname = + if extension fname = gz_suffix then + String.sub fname 0 (String.length fname - gz_suffix_len) + else + fname + +let normalize_root uri = (* add trailing slash to roots *) + try + if uri.[String.length uri - 1] = ':' then uri ^ "/" + else uri + with Invalid_argument _ -> uri + +let remove_duplicates l = + Http_getter_misc.list_uniq (List.stable_sort Pervasives.compare l) + +let has_rdonly l = List.exists ((=) `Read_only) l +let has_legacy l = List.exists ((=) `Legacy) l +let is_readwrite attrs = (not (has_legacy attrs) && not (has_rdonly attrs)) + +let is_file_schema url = Pcre.pmatch ~rex:file_scheme_RE url +let is_http_schema url = Pcre.pmatch ~rex:http_scheme_RE url + +let is_empty_listing files = + List.for_all + (fun s -> + let len = String.length s in + len < 4 || String.sub s (len - 4) 4 <> ".xml") files + +(************************* GLOBALS PREFIXES **********************************) + + (** associative list regular expressions -> url prefixes + * sorted with longest prefixes first *) +let prefix_map_ref = ref (lazy ( + List.map + (fun (uri_prefix, (url_prefix, attrs)) -> + let uri_prefix = normalize_dir uri_prefix in + let url_prefix = normalize_dir url_prefix in + let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in + regexp, strip_trailing_slash uri_prefix, url_prefix, attrs) + (List.rev (Lazy.force Http_getter_env.prefixes)))) + +let prefix_map () = !prefix_map_ref + +let keep_first l = + let cmp (_,x) (_,y) = x = y in + let rec aux prev = function + | [] -> [] + | hd::tl -> if cmp prev hd then hd :: aux prev tl else [] + in + match l with + | hd :: tl -> hd :: aux hd tl + | _ -> assert false +;; + + (** given an uri returns the prefixes for it *) +let lookup uri = + let matches = + HExtlib.filter_map + (fun (rex, _, l, _ as entry) -> + try + let got = Pcre.extract ~full_match:true ~rex uri in + Some (entry, String.length got.(0)) + with Not_found -> None) + (Lazy.force (prefix_map ())) + in + if matches = [] then raise (Unresolvable_URI uri); + List.map fst (keep_first (List.sort (fun (_,l1) (_,l2) -> l2 - l1) matches)) +;; + +let get_attrs uri = List.map (fun (_, _, _, attrs) -> attrs) (lookup uri) + +(*************************** ACTIONS ******************************************) + +let exists_http ~local _ url = + if local then false else + Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url + +let exists_file _ fname = + Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname + +let resolve_http ~must_exists ~local _ url = + if local then raise Not_found' else + try + if must_exists then + List.find Http_getter_wget.exists [ url ^ gz_suffix; url ] + else + url + with Not_found -> raise Not_found' + +let resolve_file ~must_exists _ fname = + try + if must_exists then + List.find Sys.file_exists [ fname ^ gz_suffix; fname ] + else + fname + with Not_found -> raise Not_found' + +let ls_file_single _ path_prefix = + let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in + let is_useless dir = try dir.[0] = '.' with _ -> false in + let entries = ref [] in + try + let dir_handle = Unix.opendir path_prefix in + (try + while true do + let entry = Unix.readdir dir_handle in + if is_useless entry then + () + else if is_dir (path_prefix ^ "/" ^ entry) then + entries := normalize_dir entry :: !entries + else + entries := strip_gz_suffix entry :: !entries + done + with End_of_file -> Unix.closedir dir_handle); + remove_duplicates !entries + with Unix.Unix_error (_, "opendir", _) -> [] + +let ls_http_single ~local _ url_prefix = + if local then raise (Resource_not_found ("get","")) else + let url = normalize_dir url_prefix ^ index_fname in + try + let index = Http_getter_wget.get url in + Pcre.split ~rex:newline_RE index + with Http_client_error _ -> raise (Resource_not_found ("get",url)) +;; + +let get_file _ path = + if Sys.file_exists (path ^ gz_suffix) then + path ^ gz_suffix + else if Sys.file_exists path then + path + else + raise Not_found' + +let get_http ~local uri url = + if local then raise Not_found' else + let scheme, path = + match Pcre.split ~rex:cic_scheme_sep_RE uri with + | [scheme; path] -> scheme, path + | _ -> assert false + in + let cache_name = + sprintf "%s%s/%s" (Lazy.force Http_getter_env.cache_dir) scheme path + in + if Sys.file_exists (cache_name ^ gz_suffix) then + cache_name ^ gz_suffix + else if Sys.file_exists cache_name then + cache_name + else begin (* fill cache *) + Http_getter_misc.mkdir ~parents:true (Filename.dirname cache_name); + (try + Http_getter_wget.get_and_save (url ^ gz_suffix) (cache_name ^ gz_suffix); + cache_name ^ gz_suffix + with Http_client_error _ -> + (try + Http_getter_wget.get_and_save url cache_name; + cache_name + with Http_client_error _ -> + raise Not_found')) + end + +let remove_file _ path = + if Sys.file_exists (path ^ gz_suffix) then Sys.remove (path ^ gz_suffix); + if Sys.file_exists path then Sys.remove path + +let remove_http _ _ = + prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme"; + assert false + +(**************************** RESOLUTION OF PREFIXES ************************) + +let resolve_prefixes n local write exists uri = + let exists_test new_uri = + if is_file_schema new_uri then + exists_file () (path_of_file_url new_uri) + else if is_http_schema new_uri then + exists_http ~local () new_uri + else false + in + let rec aux n = function + | (rex, _, url_prefix, attrs) :: tl when n > 0-> + (match write, is_readwrite attrs, exists with + | true ,false, _ -> aux n tl + | true ,true ,true + | false,_ ,true -> + let new_uri = (Pcre.replace_first ~rex ~templ:url_prefix uri) in + if exists_test new_uri then new_uri::aux (n-1) tl else aux n tl + | true ,true ,false + | false,_ ,false -> + (Pcre.replace_first ~rex ~templ:url_prefix uri) :: (aux (n-1) tl)) + | _ -> [] + in + aux n (lookup uri) + +let resolve_prefix l w e u = + match resolve_prefixes 1 l w e u with + | hd :: _ -> hd + | [] -> + raise + (Resource_not_found + (Printf.sprintf "resolve_prefix write:%b exists:%b" w e,u)) + +(* uncomment to debug prefix resolution *) +(* +let resolve_prefix w e u = + prerr_endline + ("XXX w=" ^ string_of_bool w ^ " e=" ^ string_of_bool e ^" :" ^ u); + let rc = resolve_prefix w e u in + prerr_endline ("YYY :" ^ rc ^ "\n"); + rc +*) + +(************************* DISPATCHERS ***************************************) + +type 'a storage_method = { + name: string; + write: bool; + exists: bool; + local: bool; + file: string -> string -> 'a; (* unresolved uri, resolved uri *) + http: string -> string -> 'a; (* unresolved uri, resolved uri *) +} + +let invoke_method storage_method uri url = + try + if is_file_schema url then + storage_method.file uri (path_of_file_url url) + else if is_http_schema url then + storage_method.http uri url + else + raise (Unsupported_scheme url) + with Not_found' -> raise (Resource_not_found (storage_method.name, uri)) + +let dispatch_single storage_method uri = + assert (extension uri <> gz_suffix); + let uri = normalize_root uri in + let url = + resolve_prefix + storage_method.local storage_method.write storage_method.exists uri + in + invoke_method storage_method uri url + +let dispatch_multi storage_method uri = + let urls = + resolve_prefixes max_int + storage_method.local storage_method.write storage_method.exists uri + in + let rec aux = function + | [] -> raise (Resource_not_found (storage_method.name, uri)) + | url :: tl -> + (try + invoke_method storage_method uri url + with Resource_not_found _ -> aux tl) + in + aux urls + +let dispatch_all storage_method uri = + let urls = + resolve_prefixes max_int + storage_method.local storage_method.write storage_method.exists uri + in + List.map (fun url -> invoke_method storage_method uri url) urls + +(******************************** EXPORTED FUNCTIONS *************************) + +let exists ~local s = + try + dispatch_single + { write = false; + name = "exists"; + exists = true; + local=local; + file = exists_file; http = exists_http ~local; } s + with Resource_not_found _ -> false + +let resolve ~local ?(must_exists=true) ~writable = + (if must_exists then + dispatch_multi + else + dispatch_single) + { write = writable; + name="resolve"; + exists = must_exists; + local=local; + file = resolve_file ~must_exists; + http = resolve_http ~local ~must_exists; } + +let remove = + dispatch_single + { write = false; + name = "remove"; + exists=true; + local=false; + file = remove_file; http = remove_http; } + +let filename ~local ?(find = false) = + (if find then dispatch_multi else dispatch_single) + { write = false; + name = "filename"; + exists=true; + local=local; + file = get_file; http = get_http ~local ; } + +let ls ~local uri_prefix = + let ls_all s = + try + dispatch_all + { write=false; + name = "ls"; + exists=true; + local=local; + file = ls_file_single; http = ls_http_single ~local; } s + with Resource_not_found _ -> [] + in + let direct_results = List.flatten (ls_all uri_prefix) in + List.fold_left + (fun results (_, uri_prefix', _, _) -> + if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then + (Filename.basename uri_prefix' ^ "/") :: results + else + results) + direct_results + (Lazy.force (prefix_map ())) + +let clean_cache () = + ignore (Sys.command + (sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir))) + +let list_writable_prefixes _ = + HExtlib.filter_map + (fun (_,_,url,attrs) -> + if is_readwrite attrs then + Some url + else + None) + (Lazy.force (prefix_map ())) + +let is_legacy uri = List.for_all has_legacy (get_attrs uri) + +(* implement this in a fast way! *) +let is_empty ~local buri = + let buri = strip_trailing_slash buri ^ "/" in + let files = ls ~local buri in + is_empty_listing files + +let is_read_only uri = + let is_empty_dir path = + let files = + try + if is_file_schema path then + ls_file_single () (path_of_file_url path) + else if is_http_schema path then + ls_http_single ~local:false () path + else + assert false + with Resource_not_found _ -> [] + in + is_empty_listing files + in + let rec aux found_writable = function + | (rex, _, url_prefix, attrs)::tl -> + let new_url = (Pcre.replace_first ~rex ~templ:url_prefix uri) in + let rdonly = has_legacy attrs || has_rdonly attrs in + (match rdonly, is_empty_dir new_url, found_writable with + | true, false, _ -> true + | true, true, _ -> aux found_writable tl + | false, _, _ -> aux true tl) + | [] -> not found_writable (* if found_writable then false else true *) + in + aux false (lookup uri) + +let activate_system_mode () = + let map = Lazy.force (prefix_map ()) in + let map = + HExtlib.filter_map + (fun ((rex, urip, urlp, attrs) as entry) -> + if has_legacy attrs then + Some entry + else if has_rdonly attrs then + Some (rex, urip, urlp, List.filter ((<>) `Read_only) attrs) + else + None) + map + in + let map = map in (* just to remember that ocamlc 'lazy' is a ... *) + prefix_map_ref := (lazy map) + +(* eof *)