let index_fname = "INDEX"
+(******************************* HELPERS **************************************)
+
let trailing_slash_RE = Pcre.regexp "/$"
let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)"
let relative_RE = Pcre.regexp relative_RE_raw
let gz_suffix = ".gz"
let gz_suffix_len = String.length gz_suffix
+ (* file:///bla -> bla, bla -> bla *)
let path_of_file_url url =
assert (Pcre.pmatch ~rex:file_scheme_RE url);
if Pcre.pmatch ~rex:relative_RE url then
else (* absolute path, add heading "/" if missing *)
"/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url)
+let strip_gz_suffix fname =
+ if extension fname = gz_suffix then
+ String.sub fname 0 (String.length fname - gz_suffix_len)
+ else
+ fname
+
+let normalize_root uri = (* add trailing slash to roots *)
+ try
+ if uri.[String.length uri - 1] = ':' then uri ^ "/"
+ else uri
+ with Invalid_argument _ -> uri
+
+let remove_duplicates l =
+ Http_getter_misc.list_uniq (List.stable_sort Pervasives.compare l)
+
+let has_rdonly l = List.exists ((=) `Read_only) l
+let has_legacy l = List.exists ((=) `Legacy) l
+let is_readwrite attrs = (not (has_legacy attrs) && not (has_rdonly attrs))
+
+let is_file_schema url = Pcre.pmatch ~rex:file_scheme_RE url
+let is_http_schema url = Pcre.pmatch ~rex:http_scheme_RE url
+
+let is_empty_listing files =
+ List.for_all
+ (fun s ->
+ let len = String.length s in
+ len < 4 || String.sub s (len - 4) 4 <> ".xml") files
+
+(************************* GLOBALS PREFIXES **********************************)
+
(** associative list regular expressions -> url prefixes
* sorted with longest prefixes first *)
-let prefix_map = lazy (
- let map_w_length =
- List.map
- (fun (uri_prefix, (url_prefix, attrs)) ->
- let uri_prefix = normalize_dir uri_prefix in
- let url_prefix = normalize_dir url_prefix in
- let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in
- (regexp, String.length uri_prefix, uri_prefix, url_prefix, attrs))
- (Lazy.force Http_getter_env.prefixes)
- in
- let decreasing_length (_, len1, _, _, _) (_, len2, _, _, _) =
- compare len2 len1 in
+let prefix_map_ref = ref (lazy (
List.map
- (fun (regexp, len, uri_prefix, url_prefix, attrs) ->
- (regexp, strip_trailing_slash uri_prefix, url_prefix, attrs))
- (List.fast_sort decreasing_length map_w_length))
+ (fun (uri_prefix, (url_prefix, attrs)) ->
+ let uri_prefix = normalize_dir uri_prefix in
+ let url_prefix = normalize_dir url_prefix in
+ let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in
+ regexp, strip_trailing_slash uri_prefix, url_prefix, attrs)
+ (List.rev (Lazy.force Http_getter_env.prefixes))))
+
+let prefix_map () = !prefix_map_ref
+
+let keep_first l =
+ let cmp (_,x) (_,y) = x = y in
+ let rec aux prev = function
+ | [] -> []
+ | hd::tl -> if cmp prev hd then hd :: aux prev tl else []
+ in
+ match l with
+ | hd :: tl -> hd :: aux hd tl
+ | _ -> assert false
+;;
+ (** given an uri returns the prefixes for it *)
let lookup uri =
let matches =
- List.filter (fun (rex, _, _, _) -> Pcre.pmatch ~rex uri)
- (Lazy.force prefix_map) in
+ HExtlib.filter_map
+ (fun (rex, _, l, _ as entry) ->
+ try
+ let got = Pcre.extract ~full_match:true ~rex uri in
+ Some (entry, String.length got.(0))
+ with Not_found -> None)
+ (Lazy.force (prefix_map ()))
+ in
if matches = [] then raise (Unresolvable_URI uri);
- matches
-
-let resolve_prefix uri =
- match lookup uri with
- | (rex, _, url_prefix, _) :: _ ->
- Pcre.replace_first ~rex ~templ:url_prefix uri
- | [] -> assert false
+ List.map fst (keep_first (List.sort (fun (_,l1) (_,l2) -> l2 - l1) matches))
+;;
-let resolve_prefixes uri =
- let matches = lookup uri in
- List.map
- (fun (rex, _, url_prefix, _) ->
- Pcre.replace_first ~rex ~templ:url_prefix uri)
- matches
-
-let get_attrs uri =
- match lookup uri with
- | (_, _, _, attrs) :: _ -> attrs
- | [] -> assert false
-
-let is_legacy uri = List.exists ((=) `Legacy) (get_attrs uri)
-
-let is_read_only uri =
- is_legacy uri || List.exists ((=) `Read_only) (get_attrs uri)
+let get_attrs uri = List.map (fun (_, _, _, attrs) -> attrs) (lookup uri)
-let exists_http _ url =
+(*************************** ACTIONS ******************************************)
+
+let exists_http ~local _ url =
+ if local then false else
Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url
let exists_file _ fname =
Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname
-let resolve_http _ url =
+let resolve_http ~must_exists ~local _ url =
+ if local then raise Not_found' else
try
- List.find Http_getter_wget.exists [ url ^ gz_suffix; url ]
+ if must_exists then
+ List.find Http_getter_wget.exists [ url ^ gz_suffix; url ]
+ else
+ url
with Not_found -> raise Not_found'
-let resolve_file _ fname =
+let resolve_file ~must_exists _ fname =
try
- List.find Sys.file_exists [ fname ^ gz_suffix; fname ]
+ if must_exists then
+ List.find Sys.file_exists [ fname ^ gz_suffix; fname ]
+ else
+ fname
with Not_found -> raise Not_found'
-let strip_gz_suffix fname =
- if extension fname = gz_suffix then
- String.sub fname 0 (String.length fname - gz_suffix_len)
- else
- fname
-
-let remove_duplicates l =
- Http_getter_misc.list_uniq (List.fast_sort Pervasives.compare l)
-
let ls_file_single _ path_prefix =
let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in
let is_useless dir = try dir.[0] = '.' with _ -> false in
remove_duplicates !entries
with Unix.Unix_error (_, "opendir", _) -> []
-let ls_http_single _ url_prefix =
+let ls_http_single ~local _ url_prefix =
+ if local then raise (Resource_not_found ("get","")) else
+ let url = normalize_dir url_prefix ^ index_fname in
try
- let index = Http_getter_wget.get (normalize_dir url_prefix ^ index_fname) in
+ let index = Http_getter_wget.get url in
Pcre.split ~rex:newline_RE index
- with Http_client_error _ -> raise Not_found'
+ with Http_client_error _ -> raise (Resource_not_found ("get",url))
+;;
let get_file _ path =
if Sys.file_exists (path ^ gz_suffix) then
else
raise Not_found'
-let get_http uri url =
+let get_http ~local uri url =
+ if local then raise Not_found' else
let scheme, path =
match Pcre.split ~rex:cic_scheme_sep_RE uri with
| [scheme; path] -> scheme, path
prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme";
assert false
+(**************************** RESOLUTION OF PREFIXES ************************)
+
+let resolve_prefixes n local write exists uri =
+ let exists_test new_uri =
+ if is_file_schema new_uri then
+ exists_file () (path_of_file_url new_uri)
+ else if is_http_schema new_uri then
+ exists_http ~local () new_uri
+ else false
+ in
+ let rec aux n = function
+ | (rex, _, url_prefix, attrs) :: tl when n > 0->
+ (match write, is_readwrite attrs, exists with
+ | true ,false, _ -> aux n tl
+ | true ,true ,true
+ | false,_ ,true ->
+ let new_uri = (Pcre.replace_first ~rex ~templ:url_prefix uri) in
+ if exists_test new_uri then new_uri::aux (n-1) tl else aux n tl
+ | true ,true ,false
+ | false,_ ,false ->
+ (Pcre.replace_first ~rex ~templ:url_prefix uri) :: (aux (n-1) tl))
+ | _ -> []
+ in
+ aux n (lookup uri)
+
+let resolve_prefix l w e u =
+ match resolve_prefixes 1 l w e u with
+ | hd :: _ -> hd
+ | [] ->
+ raise
+ (Resource_not_found
+ (Printf.sprintf "resolve_prefix write:%b exists:%b" w e,u))
+
+(* uncomment to debug prefix resolution *)
+(*
+let resolve_prefix w e u =
+ prerr_endline
+ ("XXX w=" ^ string_of_bool w ^ " e=" ^ string_of_bool e ^" :" ^ u);
+ let rc = resolve_prefix w e u in
+ prerr_endline ("YYY :" ^ rc ^ "\n");
+ rc
+*)
+
+(************************* DISPATCHERS ***************************************)
+
type 'a storage_method = {
name: string;
+ write: bool;
+ exists: bool;
+ local: bool;
file: string -> string -> 'a; (* unresolved uri, resolved uri *)
http: string -> string -> 'a; (* unresolved uri, resolved uri *)
}
-let normalize_root uri = (* add trailing slash to roots *)
- try
- if uri.[String.length uri - 1] = ':' then uri ^ "/"
- else uri
- with Invalid_argument _ -> uri
-
let invoke_method storage_method uri url =
try
- if Pcre.pmatch ~rex:file_scheme_RE url then
+ if is_file_schema url then
storage_method.file uri (path_of_file_url url)
- else if Pcre.pmatch ~rex:http_scheme_RE url then
+ else if is_http_schema url then
storage_method.http uri url
else
raise (Unsupported_scheme url)
with Not_found' -> raise (Resource_not_found (storage_method.name, uri))
-
+
let dispatch_single storage_method uri =
assert (extension uri <> gz_suffix);
let uri = normalize_root uri in
- let url = resolve_prefix uri in
+ let url =
+ resolve_prefix
+ storage_method.local storage_method.write storage_method.exists uri
+ in
invoke_method storage_method uri url
let dispatch_multi storage_method uri =
- let urls = resolve_prefixes uri in
+ let urls =
+ resolve_prefixes max_int
+ storage_method.local storage_method.write storage_method.exists uri
+ in
let rec aux = function
| [] -> raise (Resource_not_found (storage_method.name, uri))
| url :: tl ->
in
aux urls
-let exists =
- dispatch_single { name = "exists"; file = exists_file; http = exists_http }
-
-let resolve =
- dispatch_single { name = "resolve"; file = resolve_file; http = resolve_http }
-
-let ls_single =
- dispatch_single { name = "ls"; file = ls_file_single; http = ls_http_single }
+let dispatch_all storage_method uri =
+ let urls =
+ resolve_prefixes max_int
+ storage_method.local storage_method.write storage_method.exists uri
+ in
+ List.map (fun url -> invoke_method storage_method uri url) urls
+
+(******************************** EXPORTED FUNCTIONS *************************)
+
+let exists ~local s =
+ try
+ dispatch_single
+ { write = false;
+ name = "exists";
+ exists = true;
+ local=local;
+ file = exists_file; http = exists_http ~local; } s
+ with Resource_not_found _ -> false
+
+let resolve ~local ?(must_exists=true) ~writable =
+ (if must_exists then
+ dispatch_multi
+ else
+ dispatch_single)
+ { write = writable;
+ name="resolve";
+ exists = must_exists;
+ local=local;
+ file = resolve_file ~must_exists;
+ http = resolve_http ~local ~must_exists; }
let remove =
- dispatch_single { name = "remove"; file = remove_file; http = remove_http }
-
-let filename ?(find = false) =
- if find then
- dispatch_multi { name = "filename"; file = get_file; http = get_http }
- else
- dispatch_single { name = "filename"; file = get_file; http = get_http }
-
- (* ls_single performs ls only below a single prefix, but prefixes which have
- * common prefix (sorry) with a given one may need to be considered as well
- * for example: when doing "ls cic:/" we would like to see the "cic:/matita"
- * directory *)
-let ls uri_prefix =
-(* prerr_endline ("Http_getter_storage.ls " ^ uri_prefix); *)
- let direct_results = ls_single uri_prefix in
+ dispatch_single
+ { write = false;
+ name = "remove";
+ exists=true;
+ local=false;
+ file = remove_file; http = remove_http; }
+
+let filename ~local ?(find = false) =
+ (if find then dispatch_multi else dispatch_single)
+ { write = false;
+ name = "filename";
+ exists=true;
+ local=local;
+ file = get_file; http = get_http ~local ; }
+
+let ls ~local uri_prefix =
+ let ls_all s =
+ try
+ dispatch_all
+ { write=false;
+ name = "ls";
+ exists=true;
+ local=local;
+ file = ls_file_single; http = ls_http_single ~local; } s
+ with Resource_not_found _ -> []
+ in
+ let direct_results = List.flatten (ls_all uri_prefix) in
List.fold_left
(fun results (_, uri_prefix', _, _) ->
if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then
else
results)
direct_results
- (Lazy.force prefix_map)
+ (Lazy.force (prefix_map ()))
let clean_cache () =
ignore (Sys.command
(sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir)))
+let list_writable_prefixes _ =
+ HExtlib.filter_map
+ (fun (_,_,url,attrs) ->
+ if is_readwrite attrs then
+ Some url
+ else
+ None)
+ (Lazy.force (prefix_map ()))
+
+let is_legacy uri = List.for_all has_legacy (get_attrs uri)
+
+(* implement this in a fast way! *)
+let is_empty ~local buri =
+ let buri = strip_trailing_slash buri ^ "/" in
+ let files = ls ~local buri in
+ is_empty_listing files
+
+let is_read_only uri =
+ let is_empty_dir path =
+ let files =
+ try
+ if is_file_schema path then
+ ls_file_single () (path_of_file_url path)
+ else if is_http_schema path then
+ ls_http_single ~local:false () path
+ else
+ assert false
+ with Resource_not_found _ -> []
+ in
+ is_empty_listing files
+ in
+ let rec aux found_writable = function
+ | (rex, _, url_prefix, attrs)::tl ->
+ let new_url = (Pcre.replace_first ~rex ~templ:url_prefix uri) in
+ let rdonly = has_legacy attrs || has_rdonly attrs in
+ (match rdonly, is_empty_dir new_url, found_writable with
+ | true, false, _ -> true
+ | true, true, _ -> aux found_writable tl
+ | false, _, _ -> aux true tl)
+ | [] -> not found_writable (* if found_writable then false else true *)
+ in
+ aux false (lookup uri)
+
+let activate_system_mode () =
+ let map = Lazy.force (prefix_map ()) in
+ let map =
+ HExtlib.filter_map
+ (fun ((rex, urip, urlp, attrs) as entry) ->
+ if has_legacy attrs then
+ Some entry
+ else if has_rdonly attrs then
+ Some (rex, urip, urlp, List.filter ((<>) `Read_only) attrs)
+ else
+ None)
+ map
+ in
+ let map = map in (* just to remember that ocamlc 'lazy' is a ... *)
+ prefix_map_ref := (lazy map)
+
+(* eof *)