-let cic_map =
- lazy (new Http_getter_map.map (Lazy.force Http_getter_env.cic_dbm))
-let nuprl_map =
- lazy (new Http_getter_map.map (Lazy.force Http_getter_env.nuprl_dbm))
-let rdf_map =
- lazy (new Http_getter_map.map (Lazy.force Http_getter_env.rdf_dbm))
-let xsl_map =
- lazy (new Http_getter_map.map (Lazy.force Http_getter_env.xsl_dbm))
-
-let uri_tree = ref None
-let deref_if_some r =
- match !r with
- | None -> assert false
- | Some x -> x
-let is_prefetch_on () =
- match !uri_tree with None -> false | Some _ -> true
-
-let dump_tree () =
- let path = Lazy.force Http_getter_env.dump_file in
- Tree.save_to_disk path (deref_if_some uri_tree);
- Http_getter_md5.create_hash [
- (Lazy.force Http_getter_env.cic_dbm_real);
- path ]
-
-let load_tree () =
- if not (Http_getter_md5.check_hash ()) then
- assert false
- else
- uri_tree := Some (Tree.load_from_disk
- (Lazy.force Http_getter_env.dump_file))
-
-let sync_with_map () =
- if not (Http_getter_md5.check_hash ()) then begin
- let tree = ref (Some Tree.empty_tree) in
- Http_getter_logger.log "Updating cic map dump...";
- let t = Unix.time () in
- (Lazy.force cic_map)#iter
- (fun k _ ->
- tree := Some (Tree.add_uri k (deref_if_some tree)));
- uri_tree := !tree;
- Http_getter_logger.log
- (sprintf "done in %.0f sec" (Unix.time () -. t));
- dump_tree ()
- end else begin
- Http_getter_logger.log "Cic map dump is up to date!";
- load_tree () (* XXX TASSI: race condition here *)
- end
-
-let maps = [ cic_map; nuprl_map; rdf_map; xsl_map ]
-let close_maps () = List.iter (fun m -> (Lazy.force m) # close) maps
-let clear_maps () = List.iter (fun m -> (Lazy.force m) # clear) maps
-let sync_maps () =
- List.iter (fun m -> (Lazy.force m) # sync) maps;
- sync_with_map ()
-
-let map_of_uri = function
- | uri when is_cic_uri uri -> Lazy.force cic_map
- | uri when is_nuprl_uri uri -> Lazy.force nuprl_map
- | uri when is_rdf_uri uri -> Lazy.force rdf_map
- | uri when is_xsl_uri uri -> Lazy.force xsl_map
- | uri -> raise (Unresolvable_URI uri)
-
-let update_from_server logger server_url = (* use global maps *)
- Http_getter_logger.log ("Updating information from " ^ server_url);
- let xml_url_of_uri = function
- (* TODO missing sanity checks on server_url, e.g. it can contains $1 *)
- | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) ->
- Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri
- | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) ->
- Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri
- | uri when (Pcre.pmatch ~rex:heading_nuprl_RE uri) ->
- Pcre.replace ~rex:heading_nuprl_RE ~templ:server_url uri
- | uri -> raise (Invalid_URI uri)
- in
- let rdf_url_of_uri = function (* TODO as above *)
- | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) ->
- Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri
- | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) ->
- Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri
- | uri -> raise (Invalid_URI uri)
- in
- logger (`T ("Processing server: " ^ server_url));
- logger `BR;
- let (xml_index, rdf_index, xsl_index) =
- (* TODO keeps index in memory, is better to keep them on temp files? *)
- (http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xml_index)),
- http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.rdf_index)),
- http_get (server_url ^ "/" ^ (Lazy.force Http_getter_env.xsl_index)))
- in
- if (xml_index = None && rdf_index = None && xsl_index = None) then
- Http_getter_logger.log (sprintf "Warning: useless server %s" server_url);
- (match xml_index with
- | Some xml_index ->
- logger (`T "- Updating XML db ...");
-(* logger `BR; *)
- List.iter
- (function
- | l when is_blank_line l -> () (* skip blank and commented lines *)
- | l ->
- (try
- (match Pcre.split ~rex:index_line_sep_RE l with
- | [uri; "gz"] ->
- assert (is_cic_uri uri || is_nuprl_uri uri) ;
- (map_of_uri uri)#replace
- uri ((xml_url_of_uri uri) ^ ".xml.gz")
- | [uri] ->
- assert (is_cic_uri uri || is_nuprl_uri uri) ;
- (map_of_uri uri)#replace
- uri ((xml_url_of_uri uri) ^ ".xml")
- | _ ->
- logger (`T ("Ignoring invalid line: '" ^ l));
- logger `BR)
- with Invalid_URI uri ->
- logger (`T ("Ignoring invalid XML URI: '" ^ l));
- logger `BR))
- (Pcre.split ~rex:index_sep_RE xml_index); (* xml_index lines *)
- logger (`T "All done");
- logger `BR
- | None -> ());
- (match rdf_index with
- | Some rdf_index ->
- logger (`T "- Updating RDF db ...");
-(* logger `BR; *)
- List.iter
- (fun l ->
- try
- (match Pcre.split ~rex:index_line_sep_RE l with
- | [uri; "gz"] ->
- (Lazy.force rdf_map) # replace uri
- ((rdf_url_of_uri uri) ^ ".xml.gz")
- | [uri] ->
- (Lazy.force rdf_map) # replace uri
- ((rdf_url_of_uri uri) ^ ".xml")
- | _ ->
- logger (`T ("Ignoring invalid line: '" ^ l));
- logger `BR)
- with Invalid_URI uri ->
- logger (`T ("Ignoring invalid RDF URI: '" ^ l));
- logger `BR)
- (Pcre.split ~rex:index_sep_RE rdf_index); (* rdf_index lines *)
- logger (`T "All done");
- logger `BR
- | None -> ());
- (match xsl_index with
- | Some xsl_index ->
- logger (`T "- Updating XSLT db ...");
-(* logger `BR; *)
- List.iter
- (fun l -> (Lazy.force xsl_map) # replace l (server_url ^ "/" ^ l))
- (Pcre.split ~rex:index_sep_RE xsl_index);
- logger (`T "All done");
- logger `BR
- | None -> ());
- Http_getter_logger.log "done with this server"
-
-let update_from_all_servers logger () = (* use global maps *)
- clear_maps ();
- List.iter
- (update_from_server logger)
- (* reverse order: 1st server is the most important one *)
- (List.map snd (List.rev (Http_getter_env.servers ())));
- sync_maps ()
-
-let update_from_one_server ?(logger = fun _ -> ()) server_url =
- update_from_server logger server_url
-
-let temp_file_of_uri uri =
- let flat_string s s' c =
- let cs = String.copy s in
- for i = 0 to (String.length s) - 1 do
- if String.contains s' s.[i] then cs.[i] <- c
- done;
- cs
- in
- let user = try Unix.getlogin () with _ -> "" in
- Filename.open_temp_file (user ^ flat_string uri ".-=:;!?/&" '_') ""