(* Copyright (C) 2004-2005, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://helm.cs.unibo.it/ *) open Printf open Http_getter_misc open Http_getter_types exception Not_found' exception Resource_not_found of string * string (** method, uri *) let index_fname = "INDEX" let trailing_slash_RE = Pcre.regexp "/$" let relative_RE_raw = "(^[^/]+(/[^/]+)*/?$)" let relative_RE = Pcre.regexp relative_RE_raw let file_scheme_RE_raw = "(^file://)" let extended_file_scheme_RE = Pcre.regexp "(^file:/+)" let file_scheme_RE = Pcre.regexp (relative_RE_raw ^ "|" ^ file_scheme_RE_raw) let http_scheme_RE = Pcre.regexp "^http://" let newline_RE = Pcre.regexp "\\n" let cic_scheme_sep_RE = Pcre.regexp ":/" let gz_suffix = ".gz" let gz_suffix_len = String.length gz_suffix let path_of_file_url url = assert (Pcre.pmatch ~rex:file_scheme_RE url); if Pcre.pmatch ~rex:relative_RE url then url else (* absolute path, add heading "/" if missing *) "/" ^ (Pcre.replace ~rex:extended_file_scheme_RE url) (** associative list regular expressions -> url prefixes * sorted with longest prefixes first *) let prefix_map = lazy ( let map_w_length = List.map (fun (uri_prefix, url_prefix) -> let uri_prefix = normalize_dir uri_prefix in let url_prefix = normalize_dir url_prefix in let regexp = Pcre.regexp ("^(" ^ Pcre.quote uri_prefix ^ ")") in (regexp, String.length uri_prefix, uri_prefix, url_prefix)) (Lazy.force Http_getter_env.prefixes) in let decreasing_length (_, len1, _, _) (_, len2, _, _) = compare len2 len1 in List.map (fun (regexp, len, uri_prefix, url_prefix) -> (regexp, strip_trailing_slash uri_prefix, url_prefix)) (List.fast_sort decreasing_length map_w_length)) let resolve_prefix uri = let matches = List.filter (fun (rex, _, _) -> Pcre.pmatch ~rex uri) (Lazy.force prefix_map) in match matches with | (rex, _, url_prefix) :: _ -> Pcre.replace_first ~rex ~templ:url_prefix uri | [] -> raise (Unresolvable_URI uri) let resolve_prefixes uri = let matches = List.filter (fun (rex, _, _) -> Pcre.pmatch ~rex uri) (Lazy.force prefix_map) in if matches = [] then raise (Unresolvable_URI uri); List.map (fun (rex, _, url_prefix) -> Pcre.replace_first ~rex ~templ:url_prefix uri) matches let exists_http _ url = Http_getter_wget.exists (url ^ gz_suffix) || Http_getter_wget.exists url let exists_file _ fname = Sys.file_exists (fname ^ gz_suffix) || Sys.file_exists fname let resolve_http _ url = try List.find Http_getter_wget.exists [ url ^ gz_suffix; url ] with Not_found -> raise Not_found' let resolve_file _ fname = try List.find Sys.file_exists [ fname ^ gz_suffix; fname ] with Not_found -> raise Not_found' let strip_gz_suffix fname = if extension fname = gz_suffix then String.sub fname 0 (String.length fname - gz_suffix_len) else fname let remove_duplicates l = Http_getter_misc.list_uniq (List.fast_sort Pervasives.compare l) let ls_file_single _ path_prefix = let is_dir fname = (Unix.stat fname).Unix.st_kind = Unix.S_DIR in let is_useless dir = try dir.[0] = '.' with _ -> false in let entries = ref [] in try let dir_handle = Unix.opendir path_prefix in (try while true do let entry = Unix.readdir dir_handle in if is_useless entry then () else if is_dir (path_prefix ^ "/" ^ entry) then entries := normalize_dir entry :: !entries else entries := strip_gz_suffix entry :: !entries done with End_of_file -> Unix.closedir dir_handle); remove_duplicates !entries with Unix.Unix_error (_, "opendir", _) -> [] let ls_http_single _ url_prefix = try let index = Http_getter_wget.get (normalize_dir url_prefix ^ index_fname) in Pcre.split ~rex:newline_RE index with Http_client_error _ -> raise Not_found' let get_file _ path = if Sys.file_exists (path ^ gz_suffix) then path ^ gz_suffix else if Sys.file_exists path then path else raise Not_found' let get_http uri url = let scheme, path = match Pcre.split ~rex:cic_scheme_sep_RE uri with | [scheme; path] -> scheme, path | _ -> assert false in let cache_name = sprintf "%s%s/%s" (Lazy.force Http_getter_env.cache_dir) scheme path in if Sys.file_exists (cache_name ^ gz_suffix) then cache_name ^ gz_suffix else if Sys.file_exists cache_name then cache_name else begin (* fill cache *) Http_getter_misc.mkdir ~parents:true (Filename.dirname cache_name); (try Http_getter_wget.get_and_save (url ^ gz_suffix) (cache_name ^ gz_suffix); cache_name ^ gz_suffix with Http_client_error _ -> (try Http_getter_wget.get_and_save url cache_name; cache_name with Http_client_error _ -> raise Not_found')) end let remove_file _ path = if Sys.file_exists (path ^ gz_suffix) then Sys.remove (path ^ gz_suffix); if Sys.file_exists path then Sys.remove path let remove_http _ _ = prerr_endline "Http_getter_storage.remove: not implemented for HTTP scheme"; assert false type 'a storage_method = { name: string; file: string -> string -> 'a; (* unresolved uri, resolved uri *) http: string -> string -> 'a; (* unresolved uri, resolved uri *) } let normalize_root uri = (* add trailing slash to roots *) try if uri.[String.length uri - 1] = ':' then uri ^ "/" else uri with Invalid_argument _ -> uri let invoke_method storage_method uri url = try if Pcre.pmatch ~rex:file_scheme_RE url then storage_method.file uri (path_of_file_url url) else if Pcre.pmatch ~rex:http_scheme_RE url then storage_method.http uri url else raise (Unsupported_scheme url) with Not_found' -> raise (Resource_not_found (storage_method.name, uri)) let dispatch_single storage_method uri = assert (extension uri <> gz_suffix); let uri = normalize_root uri in let url = resolve_prefix uri in invoke_method storage_method uri url let dispatch_multi storage_method uri = let urls = resolve_prefixes uri in let rec aux = function | [] -> raise (Resource_not_found (storage_method.name, uri)) | url :: tl -> (try invoke_method storage_method uri url with Resource_not_found _ -> aux tl) in aux urls let exists = dispatch_single { name = "exists"; file = exists_file; http = exists_http } let resolve = dispatch_single { name = "resolve"; file = resolve_file; http = resolve_http } let ls_single = dispatch_single { name = "ls"; file = ls_file_single; http = ls_http_single } let remove = dispatch_single { name = "remove"; file = remove_file; http = remove_http } let filename ?(find = false) = if find then dispatch_multi { name = "filename"; file = get_file; http = get_http } else dispatch_single { name = "filename"; file = get_file; http = get_http } (* ls_single performs ls only below a single prefix, but prefixes which have * common prefix (sorry) with a given one may need to be considered as well * for example: when doing "ls cic:/" we would like to see the "cic:/matita" * directory *) let ls uri_prefix = (* prerr_endline ("Http_getter_storage.ls " ^ uri_prefix); *) let direct_results = ls_single uri_prefix in List.fold_left (fun results (_, uri_prefix', _) -> if Filename.dirname uri_prefix' = strip_trailing_slash uri_prefix then (Filename.basename uri_prefix' ^ "/") :: results else results) direct_results (Lazy.force prefix_map) let clean_cache () = ignore (Sys.command (sprintf "rm -rf %s/" (Lazy.force Http_getter_env.cache_dir)))