X-Git-Url: http://matita.cs.unibo.it/gitweb/?a=blobdiff_plain;f=helm%2FsearchEngine%2FsearchEngine.ml;h=c42c01043f081b01251b8566c73caf009abf8c2f;hb=4167cea65ca58897d1a3dbb81ff95de5074700cc;hp=bc4cd729a67128de91304c2c5c100c272554e4c6;hpb=ffb569818f6e4d9723dbd2c1721bbe80e75278e9;p=helm.git diff --git a/helm/searchEngine/searchEngine.ml b/helm/searchEngine/searchEngine.ml index bc4cd729a..c42c01043 100644 --- a/helm/searchEngine/searchEngine.ml +++ b/helm/searchEngine/searchEngine.ml @@ -1,4 +1,4 @@ -(* Copyright (C) 2002-2004, HELM Team. +(* Copyright (C) 2002-2005, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science @@ -25,8 +25,6 @@ open Printf -module DB = Dbi_mysql - let debug = true let debug_print s = if debug then prerr_endline s let _ = Http_common.debug := false @@ -35,44 +33,44 @@ exception Chat_unfinished exception Unbound_identifier of string exception Invalid_action of string (* invalid action for "/search" method *) -let daemon_name = "Moogle" -let configuration_file = "searchEngine.conf.xml" + (** raised by elim when a MutInd is required but not found *) +exception Not_a_MutInd + +let daemon_name = "Whelp" +let configuration_file = "/projects/helm/etc/whelp.conf.xml" -let expression_tag_RE = Pcre.regexp "@EXPRESSION@" -let action_tag_RE = Pcre.regexp "@ACTION@" -let advanced_tag_RE = Pcre.regexp "@ADVANCED@" -let advanced_checked_RE = Pcre.regexp "@ADVANCED_CHECKED@" -let simple_checked_RE = Pcre.regexp "@SIMPLE_CHECKED@" -let title_tag_RE = Pcre.regexp "@TITLE@" -let no_choices_tag_RE = Pcre.regexp "@NO_CHOICES@" -let current_choices_tag_RE = Pcre.regexp "@CURRENT_CHOICES@" -let choices_tag_RE = Pcre.regexp "@CHOICES@" -let msg_tag_RE = Pcre.regexp "@MSG@" -let id_to_uris_RE = Pcre.regexp "@ID_TO_URIS@" -let id_RE = Pcre.regexp "@ID@" -let iden_tag_RE = Pcre.regexp "@IDEN@" -let interpretations_RE = Pcre.regexp "@INTERPRETATIONS@" -let interpretations_labels_RE = Pcre.regexp "@INTERPRETATIONS_LABELS@" -let results_RE = Pcre.regexp "@RESULTS@" -let new_aliases_RE = Pcre.regexp "@NEW_ALIASES@" -let form_RE = Pcre.regexp "@FORM@" -let variables_initialization_RE = Pcre.regexp "@VARIABLES_INITIALIZATION@" -let search_engine_url_RE = Pcre.regexp "@SEARCH_ENGINE_URL@" -let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$" +let placeholders = [ + "ACTION"; "ADVANCED"; "ADVANCED_CHECKED"; "CHOICES"; "CURRENT_CHOICES"; + "EXPRESSION"; "ID"; "IDEN"; "ID_TO_URIS"; "INTERPRETATIONS"; + "INTERPRETATIONS_LABELS"; "MSG"; "NEW_ALIASES"; "NEXT_LINK"; "NO_CHOICES"; + "PAGE"; "PAGES"; "PAGELIST"; "PREV_LINK"; "QUERY_KIND"; "QUERY_SUMMARY"; "RESULTS"; + "SEARCH_ENGINE_URL"; "SIMPLE_CHECKED"; "TITLE"; +] + +let tag = + let regexps = Hashtbl.create 25 in + List.iter + (fun tag -> Hashtbl.add regexps tag (Pcre.regexp (sprintf "@%s@" tag))) + placeholders; + fun name -> + try + Hashtbl.find regexps name + with Not_found -> assert false (* First of all we load the configuration *) let _ = Helm_registry.load_from configuration_file let port = Helm_registry.get_int "search_engine.port" let pages_dir = Helm_registry.get "search_engine.html_dir" -let interactive_interpretation_choice_TPL = pages_dir ^ "/moogle_chat2.html" let moogle_TPL = pages_dir ^ "/moogle.html" +let choices_TPL = pages_dir ^ "/moogle_chat.html" let my_own_url = let ic = Unix.open_process_in "hostname -f" in let hostname = input_line ic in ignore (Unix.close_process_in ic); sprintf "http://%s:%d" hostname port +let _ = Helm_registry.set "search_engine.my_own_url" my_own_url let bad_request body outchan = Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) ~body @@ -105,6 +103,11 @@ let javascript_quote s = let string_tail s = let len = String.length s in String.sub s 1 (len-1) +let nonvar uri = + let s = UriManager.string_of_uri uri in + let len = String.length s in + let suffix = String.sub s (len-4) 4 in + not (suffix = ".var") let add_param_substs params = List.map @@ -115,56 +118,163 @@ let add_param_substs params = (fun ((key,_) as p) -> Pcre.pmatch ~pat:"^param\\." key) params) +let page_RE = Pcre.regexp "¶m\\.page=\\d+" +let identifier_RE = Pcre.regexp "^\\s*(\\w|')+\\s*$" +let qualified_mutind_RE = + Pcre.regexp "^\\s*cic:(/(\\w|')+)+\\.ind#xpointer\\(1/\\d+\\)\\s*$" + +let query_kind_of_req (req: Http_types.request) = + match req#path with + | "/match" -> "Match" + | "/hint" -> "Hint" + | "/locate" -> "Locate" + | "/elim" -> "Elim" + | "/instance" -> "Instance" + | _ -> "" + + (* given a uri with a query part in input try to find in it a string + * "¶m_name=..." (where param_name is given). If found its value will be + * set to param_value. If not, a trailing "¶m_name=param_value" (where + * both are given) is added to the input string *) +let patch_param param_name param_value url = + let rex = Pcre.regexp (sprintf "&%s=[^&]*" (Pcre.quote param_name)) in + if Pcre.pmatch ~rex url then + Pcre.replace ~rex ~templ:(sprintf "%s=%s" param_name param_value) url + else + sprintf "%s&%s=%s" url param_name param_value + + (** HTML encoding, e.g.: "<" -> "<" *) +let html_encode = Netencoding.Html.encode_from_latin1 + +let fold_n_to_m f n m acc = + let rec aux acc = + function + i when i <= m -> aux (f i acc) (i + 1) + | _ -> acc + in + aux acc n + let send_results results - ?(id_to_uris = CicTextualParser2.EnvironmentP3.of_string "") + ?(id_to_uris = DisambiguateTypes.empty_environment) (req: Http_types.request) outchan = + let query_kind = query_kind_of_req req in + let interp = try req#param "interp" with Http_types.Param_not_found _ -> "" in + let page_link anchor page = + try + let this = req#param "this" in + let target = + (patch_param "param.interp" interp + (patch_param "param.page" (string_of_int page) + this)) + in + let target = Pcre.replace ~pat:"&" ~templ:"&" target in + sprintf "%s" target anchor + with Http_types.Param_not_found _ -> "" + in Http_daemon.send_basic_headers ~code:(`Code 200) outchan ; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_CRLF outchan ; - let results_string = + let subst = match results with - | `Results r -> MooglePp.theory_of_result req r - | `Error msg -> msg + | `Results results -> + let page = try int_of_string (req#param "page") with _ -> 1 in + let results_no = List.length results in + let results_per_page = + Helm_registry.get_int "search_engine.results_per_page" + in + let pages = + if results_no mod results_per_page = 0 then + results_no / results_per_page + else + results_no / results_per_page + 1 + in + let pages = if pages = 0 then 1 else pages in + let additional_pages = 3 in + let (summary, results) = MooglePp.theory_of_result page results in + [ tag "PAGE", string_of_int page; + tag "PAGES", string_of_int pages ^ " Pages"; + tag "PAGELIST", + (let inf = page - additional_pages in + let sup = page + additional_pages in + let superinf = inf - (sup - pages) in + let supersup = sup + (1 - inf) in + let n,m = + if inf >= 1 && sup <= pages then + inf,sup + else if inf < 1 then + 1, (if supersup <= pages then supersup else pages) + else (* sup > pages *) + (if superinf >= 1 then superinf else 1),pages + in + fold_n_to_m + (fun n acc -> acc ^ " " ^ + (if n = page then string_of_int n + else page_link (string_of_int n) n)) + n m ""); + tag "PREV_LINK", (if page > 1 then page_link "Prev" (page-1) else ""); + tag "NEXT_LINK", + (if page < pages then page_link "Next" (page+1) else ""); + tag "QUERY_KIND", query_kind; + tag "QUERY_SUMMARY", summary; + tag "RESULTS", results ] + | `Error msg -> + [ tag "PAGE", "1"; + tag "PAGES", "1 Page"; + tag "PAGELIST", ""; + tag "PREV_LINK", ""; + tag "NEXT_LINK", ""; + tag "QUERY_KIND", query_kind; + tag "QUERY_SUMMARY", "error"; + tag "RESULTS", msg ] + in + let advanced = + try + req#param "advanced" + with Http_types.Param_not_found _ -> "no" in let subst = - (search_engine_url_RE, my_own_url) :: - (results_RE, results_string):: - (advanced_tag_RE, req#param "advanced"):: - (expression_tag_RE, req#param "expression"):: + (tag "SEARCH_ENGINE_URL", my_own_url) :: + (tag "ADVANCED", advanced) :: + (tag "EXPRESSION", html_encode (req#param "expression")) :: add_param_substs req#params @ - (if req#param "advanced" = "no" then - [ simple_checked_RE, "checked='true'"; - advanced_checked_RE, "" ] + (if advanced = "no" then + [ tag "SIMPLE_CHECKED", "checked='true'"; + tag "ADVANCED_CHECKED", "" ] else - [ simple_checked_RE, ""; - advanced_checked_RE, "checked='true'" ]) + [ tag "SIMPLE_CHECKED", ""; + tag "ADVANCED_CHECKED", "checked='true'" ]) @ + subst in iter_file (fun line -> - let new_aliases = - CicTextualParser2.EnvironmentP3.to_string id_to_uris - in + let new_aliases = DisambiguatePp.pp_environment id_to_uris in let processed_line = apply_substs (* CSC: Bug here: this is a string, not an array! *) - ((new_aliases_RE, "'" ^ javascript_quote new_aliases ^ "'")::subst) + ((tag "NEW_ALIASES", "'" ^ javascript_quote new_aliases ^ "'") :: + subst) line in output_string outchan (processed_line ^ "\n")) moogle_TPL -let exec_action dbh (req: Http_types.request) outchan = +let exec_action dbd (req: Http_types.request) outchan = let term_str = req#param "expression" in - let (context, metasenv) = ([], []) in - let id_to_uris_raw = - try req#param "aliases" - with Http_types.Param_not_found _ -> "" - in - let parse_interpretation_choices choices = - List.map int_of_string (Pcre.split ~pat:" " choices) in - let parse_choices choices_raw = - let choices = Pcre.split ~pat:";" choices_raw in + try + if req#path = "/elim" && + not (Pcre.pmatch ~rex:identifier_RE term_str || + Pcre.pmatch ~rex:qualified_mutind_RE term_str) then + raise Not_a_MutInd; + let (context, metasenv) = ([], []) in + let id_to_uris_raw = + try req#param "aliases" + with Http_types.Param_not_found _ -> "" + in + let parse_interpretation_choices choices = + List.map int_of_string (Pcre.split ~pat:" " choices) in + let parse_choices choices_raw = + let choices = Pcre.split ~pat:";" choices_raw in List.fold_left (fun f x -> match Pcre.split ~pat:"\\s" x with @@ -172,116 +282,136 @@ let exec_action dbh (req: Http_types.request) outchan = | id::tail when id<>"" -> (fun id' -> if id = id' then - Some (List.map (fun u -> Netencoding.Url.decode u) tail) + Some (List.map + (fun u -> UriManager.uri_of_string + (Netencoding.Url.decode u)) + tail) else f id') | _ -> failwith "Can't parse choices") (fun _ -> None) choices - in - let id_to_uris = CicTextualParser2.EnvironmentP3.of_string id_to_uris_raw in - let id_to_choices = - try - parse_choices (req#param "choices") - with Http_types.Param_not_found _ -> (fun _ -> None) - in - let interpretation_choices = - try - let choices_raw = req#param "interpretation_choices" in - if choices_raw = "" then None - else Some (parse_interpretation_choices choices_raw) - with Http_types.Param_not_found _ -> None - in - let module Chat: DisambiguateTypes.Callbacks = - struct - let interactive_user_uri_choice ~selection_mode ?ok - ?enable_button_for_non_vars ~(title: string) ~(msg: string) - ~(id: string) (choices: string list) - = - match id_to_choices id with - | Some choices -> choices - | None -> assert false + in + let id_to_uris = DisambiguatePp.parse_environment id_to_uris_raw in + let id_to_choices = + try + parse_choices (req#param "choices") + with Http_types.Param_not_found _ -> (fun _ -> None) + in + let interpretation_choices = + try + let choices_raw = req#param "interpretation_choices" in + if choices_raw = "" then None + else Some (parse_interpretation_choices choices_raw) + with Http_types.Param_not_found _ -> None + in + let module Chat: DisambiguateTypes.Callbacks = + struct + let interactive_user_uri_choice ~selection_mode ?ok + ?enable_button_for_non_vars ~(title: string) ~(msg: string) + ~(id: string) (choices: UriManager.uri list) + = + match id_to_choices id with + | Some choices -> choices + | None -> List.filter nonvar choices - let interactive_interpretation_choice interpretations = - match interpretation_choices with - | Some l -> prerr_endline "CARRAMBA" ; l - | None -> - let html_interpretations = - MooglePp.html_of_interpretations interpretations - in - Http_daemon.send_basic_headers ~code:(`Code 200) outchan ; - Http_daemon.send_CRLF outchan ; - iter_file - (fun line -> - let processed_line = - apply_substs - [advanced_tag_RE, req#param "advanced"; - interpretations_RE, html_interpretations; - current_choices_tag_RE, req#param "choices"; - expression_tag_RE, req#param "expression"; - action_tag_RE, string_tail req#path ] - line - in - output_string outchan (processed_line ^ "\n")) - interactive_interpretation_choice_TPL; - raise Chat_unfinished + let interactive_interpretation_choice interpretations = + match interpretation_choices with + | Some l -> l + | None -> + let html_interpretations = + MooglePp.html_of_interpretations interpretations + in + Http_daemon.send_basic_headers ~code:(`Code 200) outchan ; + Http_daemon.send_CRLF outchan ; + let advanced = + try + req#param "advanced" + with Http_types.Param_not_found _ -> "no" + in + let query_kind = query_kind_of_req req in + iter_file + (fun line -> + let processed_line = + apply_substs + [ tag "SEARCH_ENGINE_URL", my_own_url; + tag "ADVANCED", advanced; + tag "INTERPRETATIONS", html_interpretations; + tag "CURRENT_CHOICES", req#param "choices"; + tag "EXPRESSION", html_encode (req#param "expression"); + tag "QUERY_KIND", query_kind; + tag "QUERY_SUMMARY", "disambiguation"; + tag "ACTION", string_tail req#path ] + line + in + output_string outchan (processed_line ^ "\n")) + choices_TPL; + raise Chat_unfinished - let input_or_locate_uri ~title ?id () = - match id with - | Some id -> raise (Unbound_identifier id) - | None -> assert false - end - in - let module Disambiguate' = Disambiguate.Make(Chat) in - let ast = CicTextualParser2.parse_term (Stream.of_string term_str) in - let (id_to_uris, metasenv, term) = - match - Disambiguate'.disambiguate_term dbh context metasenv ast id_to_uris - with - | [id_to_uris,metasenv,term] -> id_to_uris,metasenv,term - | _ -> assert false - in - let uris = - match req#path with - | "/match" -> MetadataQuery.match_term ~dbh term - | "/hint" -> - let status = ProofEngineTypes.initial_status term [] in - let intros = PrimitiveTactics.intros_tac () in - let subgoals = ProofEngineTypes.apply_tactic intros status in - (match subgoals with - | proof, [goal] -> - let (uri,metasenv,bo,ty) = proof in - List.map fst (MetadataQuery.hint ~dbh (proof, goal)) - | _ -> assert false) - | "/elim" -> - let uri = - match term with - | Cic.MutInd (uri, typeno, _) -> - UriManager.string_of_uriref (uri, [typeno]) - | _ -> assert false - in - MetadataQuery.elim ~dbh uri - | _ -> assert false - in - send_results ~id_to_uris (`Results uris) req outchan + let input_or_locate_uri ~title ?id () = + match id with + | Some id -> raise (Unbound_identifier id) + | None -> assert false + end + in + let module Disambiguate' = Disambiguate.Make(Chat) in + let ast = Grammar.Entry.parse CicNotationParser.term (Stream.of_string term_str) in + let (id_to_uris, metasenv, term) = + match + Disambiguate'.disambiguate_term ~dbd ~context ~metasenv + ~aliases:id_to_uris ast + with + | [id_to_uris,metasenv,term,_] -> id_to_uris,metasenv,term + | _ -> assert false + in + let uris = + match req#path with + | "/match" -> MetadataQuery.match_term ~dbd term + | "/instance" -> MetadataQuery.instance ~dbd term + | "/hint" -> + let status = ProofEngineTypes.initial_status term metasenv in + let intros = PrimitiveTactics.intros_tac () in + let subgoals = ProofEngineTypes.apply_tactic intros status in + (match subgoals with + | proof, [goal] -> + let (uri,metasenv,bo,ty) = proof in + List.map fst (MetadataQuery.experimental_hint ~dbd (proof, goal)) + | _ -> assert false) + | "/elim" -> + let uri = + match term with + | Cic.MutInd (uri, typeno, _) -> + UriManager.uri_of_uriref uri typeno None + | _ -> raise Not_a_MutInd + in + MetadataQuery.elim ~dbd uri + | _ -> assert false + in + let uris = List.map UriManager.string_of_uri uris in + send_results ~id_to_uris (`Results uris) req outchan + with + | Not_a_MutInd -> + send_results (`Error (MooglePp.pp_error "Not an inductive type" + ("elim requires as input an identifier corresponding to an inductive" + ^ " type"))) + req outchan -let callback dbh (req: Http_types.request) outchan = +let callback dbd (req: Http_types.request) outchan = try debug_print (sprintf "Received request: %s" req#path); (match req#path with | "/getpage" -> (* TODO implement "is_permitted" *) - (let is_permitted _ = true in - let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in - let page = remove_fragment (req#param "url") in + (let is_permitted page = not (Pcre.pmatch ~pat:"/" page) in + let page = req#param "url" in + let fname = sprintf "%s/%s" pages_dir page in let preprocess = (try bool_of_string (req#param "preprocess") with Invalid_argument _ | Http_types.Param_not_found _ -> false) in (match page with - | page when is_permitted page -> - (let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in + | page when is_permitted page && Sys.file_exists fname -> Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/html" outchan; Http_daemon.send_CRLF outchan; @@ -290,15 +420,15 @@ let callback dbh (req: Http_types.request) outchan = (fun line -> output_string outchan ((apply_substs - ((search_engine_url_RE, my_own_url) :: - (advanced_tag_RE, "no") :: - (results_RE, "") :: + ((tag "SEARCH_ENGINE_URL", my_own_url) :: + (tag "ADVANCED", "no") :: + (tag "RESULTS", "") :: add_param_substs req#params) line) ^ "\n")) fname end else - Http_daemon.send_file ~src:(Http_types.FileSrc fname) outchan) + Http_daemon.send_file ~src:(Http_types.FileSrc fname) outchan | page -> Http_daemon.respond_forbidden ~url:page outchan)) | "/help" -> Http_daemon.respond ~body:daemon_name outchan | "/locate" -> @@ -311,21 +441,25 @@ let callback dbh (req: Http_types.request) outchan = in if expression = "" then send_results (`Results []) req outchan - else - let results = MetadataQuery.locate ~dbh expression in + else begin + let results = MetadataQuery.locate ~dbd expression in + let results = List.map UriManager.string_of_uri results in send_results (`Results results) req outchan + end | "/hint" | "/elim" - | "/match" -> exec_action dbh req outchan + | "/instance" + | "/match" -> exec_action dbd req outchan | invalid_request -> Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) outchan); debug_print (sprintf "%s done!" req#path) with + | Chat_unfinished -> () | Http_types.Param_not_found attr_name -> bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan - | CicTextualParser2.Parse_error msg -> - send_results (`Error (MooglePp.pp_error "Parse_error" msg)) req outchan + | CicNotationParser.Parse_error (_, msg) -> + send_results (`Error (MooglePp.pp_error "Parse error" msg)) req outchan | Unbound_identifier id -> send_results (`Error (MooglePp.pp_error "Unbound identifier" id)) req outchan @@ -335,16 +469,37 @@ let callback dbh (req: Http_types.request) outchan = let msg = MooglePp.pp_error "Uncaught exception" exn_string in send_results (`Error msg) req outchan +let restore_environment () = + match + Helm_registry.get_opt Helm_registry.string "search_engine.environment_dump" + with + | None -> () + | Some fname -> + printf "Restoring Cic environment from %s ... " fname; flush stdout; + let ic = open_in fname in + CicEnvironment.restore_from_channel ic; + close_in ic; + printf "done!\n"; flush stdout + +let read_notation () = + CicNotation.load_notation (Helm_registry.get "search_engine.notations"); + CicNotation.load_notation (Helm_registry.get "search_engine.interpretations") + let _ = printf "%s started and listening on port %d\n" daemon_name port; printf "Current directory is %s\n" (Sys.getcwd ()); printf "HTML directory is %s\n" pages_dir; flush stdout; Unix.putenv "http_proxy" ""; - let dbh = - new DB.connection ~host:(Helm_registry.get "db.host") - ~user:(Helm_registry.get "db.user") (Helm_registry.get "db.database") + let dbd = + Mysql.quick_connect + ~host:(Helm_registry.get "db.host") + ~database:(Helm_registry.get "db.database") + ~user:(Helm_registry.get "db.user") + () in - Http_daemon.start' ~port (callback dbh); + restore_environment (); + read_notation (); + Http_daemon.start' ~port (callback dbd); printf "%s is terminating, bye!\n" daemon_name