From 2e41357d3d5cca73b3f91e83528d3758ca3e62db Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli Date: Tue, 7 Jan 2003 14:59:40 +0000 Subject: [PATCH] - optimization: precompile all precompilable regexps --- helm/http_getter/http_getter.ml | 56 +++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/helm/http_getter/http_getter.ml b/helm/http_getter/http_getter.ml index 04eaed9c4..1e04ae878 100644 --- a/helm/http_getter/http_getter.ml +++ b/helm/http_getter/http_getter.ml @@ -24,8 +24,6 @@ * http://cs.unibo.it/helm/. *) -(* TODO optimization: precompile regexps *) - open Http_getter_common;; open Http_getter_misc;; open Http_getter_types;; @@ -54,15 +52,19 @@ let parse_output_format (req: Http_types.request) = | s when String.lowercase s = "xml" -> Fmt_xml | s -> raise (Http_getter_bad_request ("Invalid /ls format: " ^ s)) ;; -let parse_ls_uri (req: Http_types.request) = - let baseuri = req#param "baseuri" in - let subs = - Pcre.extract ~pat:"^(\\w+):(.*)$" (Pcre.replace ~pat:"/+$" baseuri) - in - match (subs.(1), subs.(2)) with - | "cic", uri -> Cic uri - | "theory", uri -> Theory uri - | _ -> raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ baseuri)) +let parse_ls_uri = + let parse_ls_RE = Pcre.regexp "^(\\w+):(.*)$" in + let trailing_slash_RE = Pcre.regexp "/+$" in + fun (req: Http_types.request) -> + let baseuri = req#param "baseuri" in + let subs = + Pcre.extract ~rex:parse_ls_RE + (Pcre.replace ~rex:trailing_slash_RE baseuri) + in + match (subs.(1), subs.(2)) with + | "cic", uri -> Cic uri + | "theory", uri -> Theory uri + | _ -> raise (Http_getter_bad_request ("Invalid /ls baseuri: " ^ baseuri)) ;; (* global maps, shared by all threads *) @@ -81,6 +83,9 @@ in let resolve uri = (map_of_uri uri)#resolve uri in let register uri = (map_of_uri uri )#add uri in let return_all_foo_uris map doctype filter outchan = + (** return all URIs contained in 'map' which satisfy predicate 'filter'; URIs + are written in an XMLish format ('doctype' is the XML doctype) onto 'outchan' + *) Http_daemon.send_basic_headers ~code:200 outchan; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_CRLF outchan; @@ -211,25 +216,28 @@ let return_ls = Http_daemon.respond ~headers:["Content-Type", "text/xml"] ~body outchan in -let (index_line_sep_RE, index_sep_RE) = - (Pcre.regexp "[ \t]+", Pcre.regexp "\n+") +let (index_line_sep_RE, index_sep_RE, trailing_types_RE, + heading_cic_RE, heading_theory_RE, + heading_rdf_cic_RE, heading_rdf_theory_RE) = + (Pcre.regexp "[ \t]+", Pcre.regexp "\n+", Pcre.regexp "\\.types$", + Pcre.regexp "^cic:", Pcre.regexp "^theory:", + Pcre.regexp "^helm:rdf.*//cic:", Pcre.regexp "^helm:rdf.*//theory:") in - (* TODO support 'file://.*' servers *) let update_from_server logmsg server_url = (* use global maps *) debug_print ("Updating information from " ^ server_url); let xml_url_of_uri = function (* TODO missing sanity checks on server_url, e.g. it can contains $1 *) - | uri when (Pcre.pmatch ~pat:"^cic:" uri) -> - Pcre.replace ~pat:"^cic:" ~templ:server_url uri - | uri when (Pcre.pmatch ~pat:"^theory:" uri) -> - Pcre.replace ~pat:"^theory:" ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_cic_RE uri) -> + Pcre.replace ~rex:heading_cic_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_theory_RE uri) -> + Pcre.replace ~rex:heading_theory_RE ~templ:server_url uri | uri -> raise (Http_getter_invalid_URI uri) in let rdf_url_of_uri = function (* TODO as above *) - | uri when (Pcre.pmatch ~pat:"^helm:rdf.*//cic:" uri) -> - Pcre.replace ~pat:"^helm:rdf.*//cic:" ~templ:server_url uri - | uri when (Pcre.pmatch ~pat:"^helm:rdf.*//theory:" uri) -> - Pcre.replace ~pat:"^helm:rdf.*//theory:" ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_rdf_cic_RE uri) -> + Pcre.replace ~rex:heading_rdf_cic_RE ~templ:server_url uri + | uri when (Pcre.pmatch ~rex:heading_rdf_theory_RE uri) -> + Pcre.replace ~rex:heading_rdf_theory_RE ~templ:server_url uri | uri -> raise (Http_getter_invalid_URI uri) in let log = ref (logmsg ^ "Processing server: " ^ server_url ^ "
\n") in @@ -336,8 +344,8 @@ let callback (req: Http_types.request) outchan = | "/getalluris" -> return_all_xml_uris (fun uri -> - (Pcre.pmatch ~pat:"^cic:" uri) && - not (Pcre.pmatch ~pat:"\\.types$" uri)) + (Pcre.pmatch ~rex:heading_cic_RE uri) && + not (Pcre.pmatch ~rex:trailing_types_RE uri)) outchan | "/getallrdfuris" -> (let classs = req#param "class" in -- 2.39.2