X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=components%2Fgetter%2Fhttp_getter_storage.ml;h=c17435f6a25a4f5f7848fc9dcc2435a3e36d2911;hb=a957099550619f87a58be467b9b11f2ad6501378;hp=fc6f415ac31ce68c0897113c0db026e018e6bb40;hpb=7f2444c2670cadafddd8785b687ef312158376b0;p=helm.git diff --git a/components/getter/http_getter_storage.ml b/components/getter/http_getter_storage.ml index fc6f415ac..c17435f6a 100644 --- a/components/getter/http_getter_storage.ml +++ b/components/getter/http_getter_storage.ml @@ -35,6 +35,8 @@ exception Resource_not_found of string * string (** method, uri *) let index_fname = "INDEX" +(******************************* HELPERS **************************************) + let trailing_slash_RE = Pcre.regexp "/$" let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)" let relative_RE = Pcre.regexp relative_RE_raw @@ -47,6 +49,7 @@ let cic_scheme_sep_RE = Pcre.regexp ":/" let gz_suffix = ".gz" let gz_suffix_len = String.length gz_suffix + (* file:///bla -> bla, bla -> bla *) let path_of_file_url url = assert (Pcre.pmatch ~rex:file_scheme_RE url); if Pcre.pmatch ~rex:relative_RE url then @@ -54,80 +57,103 @@ let path_of_file_url url = else (* absolute path, add heading "/" if missing *) "/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url) +let strip_gz_suffix fname = + if extension fname = gz_suffix then + String.sub fname 0 (String.length fname - gz_suffix_len) + else + fname + +let normalize_root uri = (* add trailing slash to roots *) + try + if uri.[String.length uri - 1] = ':' then uri ^ "/" + else uri + with Invalid_argument _ -> uri + +let remove_duplicates l = + Http_getter_misc.list_uniq (List.stable_sort Pervasives.compare l) + +let has_rdonly l = List.exists ((=) `Read_only) l +let has_legacy l = List.exists ((=) `Legacy) l +let is_readwrite attrs = (not (has_legacy attrs) && not (has_rdonly attrs)) + +let is_file_schema url = Pcre.pmatch ~rex:file_scheme_RE url +let is_http_schema url = Pcre.pmatch ~rex:http_scheme_RE url + +let is_empty_listing files = + List.for_all + (fun s -> + let len = String.length s in + len < 4 || String.sub s (len - 4) 4 <> ".xml") files + +(************************* GLOBALS PREFIXES **********************************) + (** associative list regular expressions -> url prefixes * sorted with longest prefixes first *) -let prefix_map = lazy ( - let map_w_length = - List.map - (fun (uri_prefix, (url_prefix, attrs)) -> - let uri_prefix = normalize_dir uri_prefix in - let url_prefix = normalize_dir url_prefix in - let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in - (regexp, String.length uri_prefix, uri_prefix, url_prefix, attrs)) - (Lazy.force Http_getter_env.prefixes) - in - let decreasing_length (_, len1, _, _, _) (_, len2, _, _, _) = - compare len2 len1 in +let prefix_map_ref = ref (lazy ( List.map - (fun (regexp, len, uri_prefix, url_prefix, attrs) -> - (regexp, strip_trailing_slash uri_prefix, url_prefix, attrs)) - (List.fast_sort decreasing_length map_w_length)) + (fun (uri_prefix, (url_prefix, attrs)) -> + let uri_prefix = normalize_dir uri_prefix in + let url_prefix = normalize_dir url_prefix in + let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in + regexp, strip_trailing_slash uri_prefix, url_prefix, attrs) + (List.rev (Lazy.force Http_getter_env.prefixes)))) + +let prefix_map () = !prefix_map_ref + +let keep_first l = + let cmp (_,x) (_,y) = x = y in + let rec aux prev = function + | [] -> [] + | hd::tl -> if cmp prev hd then hd :: aux prev tl else [] + in + match l with + | hd :: tl -> hd :: aux hd tl + | _ -> assert false +;; + (** given an uri returns the prefixes for it *) let lookup uri = let matches = - List.filter (fun (rex, _, _, _) -> Pcre.pmatch ~rex uri) - (Lazy.force prefix_map) in + HExtlib.filter_map + (fun (rex, _, l, _ as entry) -> + try + let got = Pcre.extract ~full_match:true ~rex uri in + Some (entry, String.length got.(0)) + with Not_found -> None) + (Lazy.force (prefix_map ())) + in if matches = [] then raise (Unresolvable_URI uri); - matches - -let resolve_prefix uri = - match lookup uri with - | (rex, _, url_prefix, _) :: _ -> - Pcre.replace_first ~rex ~templ:url_prefix uri - | [] -> assert false + List.map fst (keep_first (List.sort (fun (_,l1) (_,l2) -> l2 - l1) matches)) +;; -let resolve_prefixes uri = - let matches = lookup uri in - List.map - (fun (rex, _, url_prefix, _) -> - Pcre.replace_first ~rex ~templ:url_prefix uri) - matches - -let get_attrs uri = - match lookup uri with - | (_, _, _, attrs) :: _ -> attrs - | [] -> assert false - -let is_legacy uri = List.exists ((=) `Legacy) (get_attrs uri) - -let is_read_only uri = - is_legacy uri || List.exists ((=) `Read_only) (get_attrs uri) +let get_attrs uri = List.map (fun (_, _, _, attrs) -> attrs) (lookup uri) -let exists_http _ url = +(*************************** ACTIONS ******************************************) + +let exists_http ~local _ url = + if local then false else Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url let exists_file _ fname = Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname -let resolve_http _ url = +let resolve_http ~must_exists ~local _ url = + if local then raise Not_found' else try - List.find Http_getter_wget.exists [ url ^ gz_suffix; url ] + if must_exists then + List.find Http_getter_wget.exists [ url ^ gz_suffix; url ] + else + url with Not_found -> raise Not_found' -let resolve_file _ fname = +let resolve_file ~must_exists _ fname = try - List.find Sys.file_exists [ fname ^ gz_suffix; fname ] + if must_exists then + List.find Sys.file_exists [ fname ^ gz_suffix; fname ] + else + fname with Not_found -> raise Not_found' -let strip_gz_suffix fname = - if extension fname = gz_suffix then - String.sub fname 0 (String.length fname - gz_suffix_len) - else - fname - -let remove_duplicates l = - Http_getter_misc.list_uniq (List.fast_sort Pervasives.compare l) - let ls_file_single _ path_prefix = let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in let is_useless dir = try dir.[0] = '.' with _ -> false in @@ -148,11 +174,14 @@ let ls_file_single _ path_prefix = remove_duplicates !entries with Unix.Unix_error (_, "opendir", _) -> [] -let ls_http_single _ url_prefix = +let ls_http_single ~local _ url_prefix = + if local then raise (Resource_not_found ("get","")) else + let url = normalize_dir url_prefix ^ index_fname in try - let index = Http_getter_wget.get (normalize_dir url_prefix ^ index_fname) in + let index = Http_getter_wget.get url in Pcre.split ~rex:newline_RE index - with Http_client_error _ -> raise Not_found' + with Http_client_error _ -> raise (Resource_not_found ("get",url)) +;; let get_file _ path = if Sys.file_exists (path ^ gz_suffix) then @@ -162,7 +191,8 @@ let get_file _ path = else raise Not_found' -let get_http uri url = +let get_http ~local uri url = + if local then raise Not_found' else let scheme, path = match Pcre.split ~rex:cic_scheme_sep_RE uri with | [scheme; path] -> scheme, path @@ -196,36 +226,84 @@ let remove_http _ _ = prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme"; assert false +(**************************** RESOLUTION OF PREFIXES ************************) + +let resolve_prefixes n local write exists uri = + let exists_test new_uri = + if is_file_schema new_uri then + exists_file () (path_of_file_url new_uri) + else if is_http_schema new_uri then + exists_http ~local () new_uri + else false + in + let rec aux n = function + | (rex, _, url_prefix, attrs) :: tl when n > 0-> + (match write, is_readwrite attrs, exists with + | true ,false, _ -> aux n tl + | true ,true ,true + | false,_ ,true -> + let new_uri = (Pcre.replace_first ~rex ~templ:url_prefix uri) in + if exists_test new_uri then new_uri::aux (n-1) tl else aux n tl + | true ,true ,false + | false,_ ,false -> + (Pcre.replace_first ~rex ~templ:url_prefix uri) :: (aux (n-1) tl)) + | _ -> [] + in + aux n (lookup uri) + +let resolve_prefix l w e u = + match resolve_prefixes 1 l w e u with + | hd :: _ -> hd + | [] -> + raise + (Resource_not_found + (Printf.sprintf "resolve_prefix write:%b exists:%b" w e,u)) + +(* uncomment to debug prefix resolution *) +(* +let resolve_prefix w e u = + prerr_endline + ("XXX w=" ^ string_of_bool w ^ " e=" ^ string_of_bool e ^" :" ^ u); + let rc = resolve_prefix w e u in + prerr_endline ("YYY :" ^ rc ^ "\n"); + rc +*) + +(************************* DISPATCHERS ***************************************) + type 'a storage_method = { name: string; + write: bool; + exists: bool; + local: bool; file: string -> string -> 'a; (* unresolved uri, resolved uri *) http: string -> string -> 'a; (* unresolved uri, resolved uri *) } -let normalize_root uri = (* add trailing slash to roots *) - try - if uri.[String.length uri - 1] = ':' then uri ^ "/" - else uri - with Invalid_argument _ -> uri - let invoke_method storage_method uri url = try - if Pcre.pmatch ~rex:file_scheme_RE url then + if is_file_schema url then storage_method.file uri (path_of_file_url url) - else if Pcre.pmatch ~rex:http_scheme_RE url then + else if is_http_schema url then storage_method.http uri url else raise (Unsupported_scheme url) with Not_found' -> raise (Resource_not_found (storage_method.name, uri)) - + let dispatch_single storage_method uri = assert (extension uri <> gz_suffix); let uri = normalize_root uri in - let url = resolve_prefix uri in + let url = + resolve_prefix + storage_method.local storage_method.write storage_method.exists uri + in invoke_method storage_method uri url let dispatch_multi storage_method uri = - let urls = resolve_prefixes uri in + let urls = + resolve_prefixes max_int + storage_method.local storage_method.write storage_method.exists uri + in let rec aux = function | [] -> raise (Resource_not_found (storage_method.name, uri)) | url :: tl -> @@ -235,31 +313,65 @@ let dispatch_multi storage_method uri = in aux urls -let exists = - dispatch_single { name = "exists"; file = exists_file; http = exists_http } - -let resolve = - dispatch_single { name = "resolve"; file = resolve_file; http = resolve_http } - -let ls_single = - dispatch_single { name = "ls"; file = ls_file_single; http = ls_http_single } +let dispatch_all storage_method uri = + let urls = + resolve_prefixes max_int + storage_method.local storage_method.write storage_method.exists uri + in + List.map (fun url -> invoke_method storage_method uri url) urls + +(******************************** EXPORTED FUNCTIONS *************************) + +let exists ~local s = + try + dispatch_single + { write = false; + name = "exists"; + exists = true; + local=local; + file = exists_file; http = exists_http ~local; } s + with Resource_not_found _ -> false + +let resolve ~local ?(must_exists=true) ~writable = + (if must_exists then + dispatch_multi + else + dispatch_single) + { write = writable; + name="resolve"; + exists = must_exists; + local=local; + file = resolve_file ~must_exists; + http = resolve_http ~local ~must_exists; } let remove = - dispatch_single { name = "remove"; file = remove_file; http = remove_http } - -let filename ?(find = false) = - if find then - dispatch_multi { name = "filename"; file = get_file; http = get_http } - else - dispatch_single { name = "filename"; file = get_file; http = get_http } - - (* ls_single performs ls only below a single prefix, but prefixes which have - * common prefix (sorry) with a given one may need to be considered as well - * for example: when doing "ls cic:/" we would like to see the "cic:/matita" - * directory *) -let ls uri_prefix = -(* prerr_endline ("Http_getter_storage.ls " ^ uri_prefix); *) - let direct_results = ls_single uri_prefix in + dispatch_single + { write = false; + name = "remove"; + exists=true; + local=false; + file = remove_file; http = remove_http; } + +let filename ~local ?(find = false) = + (if find then dispatch_multi else dispatch_single) + { write = false; + name = "filename"; + exists=true; + local=local; + file = get_file; http = get_http ~local ; } + +let ls ~local uri_prefix = + let ls_all s = + try + dispatch_all + { write=false; + name = "ls"; + exists=true; + local=local; + file = ls_file_single; http = ls_http_single ~local; } s + with Resource_not_found _ -> [] + in + let direct_results = List.flatten (ls_all uri_prefix) in List.fold_left (fun results (_, uri_prefix', _, _) -> if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then @@ -267,9 +379,69 @@ let ls uri_prefix = else results) direct_results - (Lazy.force prefix_map) + (Lazy.force (prefix_map ())) let clean_cache () = ignore (Sys.command (sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir))) +let list_writable_prefixes _ = + HExtlib.filter_map + (fun (_,_,url,attrs) -> + if is_readwrite attrs then + Some url + else + None) + (Lazy.force (prefix_map ())) + +let is_legacy uri = List.for_all has_legacy (get_attrs uri) + +(* implement this in a fast way! *) +let is_empty ~local buri = + let buri = strip_trailing_slash buri ^ "/" in + let files = ls ~local buri in + is_empty_listing files + +let is_read_only uri = + let is_empty_dir path = + let files = + try + if is_file_schema path then + ls_file_single () (path_of_file_url path) + else if is_http_schema path then + ls_http_single ~local:false () path + else + assert false + with Resource_not_found _ -> [] + in + is_empty_listing files + in + let rec aux found_writable = function + | (rex, _, url_prefix, attrs)::tl -> + let new_url = (Pcre.replace_first ~rex ~templ:url_prefix uri) in + let rdonly = has_legacy attrs || has_rdonly attrs in + (match rdonly, is_empty_dir new_url, found_writable with + | true, false, _ -> true + | true, true, _ -> aux found_writable tl + | false, _, _ -> aux true tl) + | [] -> not found_writable (* if found_writable then false else true *) + in + aux false (lookup uri) + +let activate_system_mode () = + let map = Lazy.force (prefix_map ()) in + let map = + HExtlib.filter_map + (fun ((rex, urip, urlp, attrs) as entry) -> + if has_legacy attrs then + Some entry + else if has_rdonly attrs then + Some (rex, urip, urlp, List.filter ((<>) `Read_only) attrs) + else + None) + map + in + let map = map in (* just to remember that ocamlc 'lazy' is a ... *) + prefix_map_ref := (lazy map) + +(* eof *)