From 3c7ca719c304eb7de7d8d4e9a90ebe0db8d8ecab Mon Sep 17 00:00:00 2001 From: Claudio Sacerdoti Coen Date: Mon, 23 Jun 2003 16:10:58 +0000 Subject: [PATCH] Branch V7_3_new_exportation merged. --- helm/searchEngine/.cvsignore | 1 + helm/searchEngine/Makefile | 28 ++ helm/searchEngine/searchEngine.ml | 674 ++++++++++++++++++++++++++++++ 3 files changed, 703 insertions(+) create mode 100644 helm/searchEngine/.cvsignore create mode 100644 helm/searchEngine/Makefile create mode 100644 helm/searchEngine/searchEngine.ml diff --git a/helm/searchEngine/.cvsignore b/helm/searchEngine/.cvsignore new file mode 100644 index 000000000..6ecb17c4d --- /dev/null +++ b/helm/searchEngine/.cvsignore @@ -0,0 +1 @@ +*.cm[ioxa] *.o searchEngine searchEngine.opt diff --git a/helm/searchEngine/Makefile b/helm/searchEngine/Makefile new file mode 100644 index 000000000..a2287e77d --- /dev/null +++ b/helm/searchEngine/Makefile @@ -0,0 +1,28 @@ +REQUIRES = http helm-cic_textual_parser helm-cic_proof_checking \ + helm-xml gdome2-xslt helm-cic_unification helm-mathql \ + helm-mathql_interpreter helm-mathql_generator netclient +OCAMLOPTIONS = -package "$(REQUIRES)" -pp camlp4o -I ../gTopLevel +OCAMLC = ocamlfind ocamlc $(OCAMLOPTIONS) +OCAMLOPT = ocamlfind ocamlopt $(OCAMLOPTIONS) + +GTOPLEVEL_MODULES = disambiguate +MODULES = +CMOS = $(patsubst %,%.cmo,$(MODULES)) +CMXS = $(patsubst %,%.cmx,$(MODULES)) +LIBRARIES = $(patsubst %,../gTopLevel/%.cmo,$(GTOPLEVEL_MODULES)) +LIBRARIES_OPT = $(patsubst %,../gTopLevel/%.cmx,$(GTOPLEVEL_MODULES)) + +all: byte +world: byte opt +byte: searchEngine +opt: searchEngine.opt + +searchEngine: $(LIBRARIES) $(CMOS) searchEngine.ml + $(OCAMLC) -linkpkg -o $@ $^ +searchEngine.opt: $(LIBRARIES_OPT) $(CMXS) searchEngine.ml + $(OCAMLOPT) -linkpkg -o $@ $^ + +clean: + rm -f *.cm[aiox] *.o searchEngine{,.opt} + +.PHONY: all opt clean diff --git a/helm/searchEngine/searchEngine.ml b/helm/searchEngine/searchEngine.ml new file mode 100644 index 000000000..13331c27b --- /dev/null +++ b/helm/searchEngine/searchEngine.ml @@ -0,0 +1,674 @@ +(* Copyright (C) 2002, HELM Team. + * + * This file is part of HELM, an Hypertextual, Electronic + * Library of Mathematics, developed at the Computer Science + * Department, University of Bologna, Italy. + * + * HELM is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * HELM is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with HELM; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + * + * For details, see the HELM World-Wide-Web page, + * http://cs.unibo.it/helm/. + *) + +open Http_types ;; + +let debug = true;; +let debug_print s = if debug then prerr_endline s;; +Http_common.debug := true;; +(* Http_common.debug := true;; *) + + (** accepted HTTP servers for ask_uwobo method forwarding *) +let valid_servers = + [ "mowgli.cs.unibo.it:58080" ; "mowgli.cs.unibo.it" ; "localhost:58080" ];; + +let mqi_flags = [] (* default MathQL interpreter options *) + +open Printf;; + +let daemon_name = "Search Engine";; +let default_port = 58085;; +let port_env_var = "SEARCH_ENGINE_PORT";; + +let pages_dir = + try + Sys.getenv "SEARCH_ENGINE_HTML_DIR" + with Not_found -> "html" (* relative to searchEngine's document root *) +;; +let interactive_user_uri_choice_TPL = pages_dir ^ "/templateambigpdq1.html";; +let interactive_interpretation_choice_TPL = + pages_dir ^ "/templateambigpdq2.html";; +let constraints_choice_TPL = pages_dir ^ "/constraints_choice_template.html";; +let final_results_TPL = pages_dir ^ "/templateambigpdq3.html";; + +exception Chat_unfinished + + (* build a bool from a 1-character-string *) +let bool_of_string' = function + | "0" -> false + | "1" -> true + | s -> failwith ("Can't parse a boolean from string: " ^ s) +;; + + (* build an int option from a string *) +let int_of_string' = function + | "_" -> None + | s -> + try + Some (int_of_string s) + with Failure "int_of_string" -> + failwith ("Can't parse an int option from string: " ^ s) +;; + +let is_concl_pos pos = + pos = "http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion" + or + pos = "http://www.cs.unibo.it/helm/schemas/schema-helm#InConclusion" +;; + +let is_main_pos pos = + pos = "http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion" + or + pos = "http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis" +;; + + (* HTML pretty printers for mquery_generator types *) + +let html_of_r_obj (uri, pos, depth) = + sprintf + "%s%s%s" + uri (Str.string_after pos ((String.rindex pos '#') + 1)) + (if is_main_pos pos then + sprintf "" + (match depth with Some i -> string_of_int i | None -> "") + else + "") +;; + +let html_of_r_rel (pos, depth) = + sprintf + "%s" + pos (match depth with Some i -> string_of_int i | None -> "") +;; + +let html_of_r_sort (pos, depth, sort) = + sprintf + "%s%s" + sort pos (match depth with Some i -> string_of_int i | None -> "") +;; + + (** pretty print a MathQL query result to an HELM theory file *) +let theory_of_result result = + let results_no = List.length result in + if results_no > 0 then + let mode = if results_no > 10 then "linkonly" else "typeonly" in + let results = + let idx = ref (results_no + 1) in + List.fold_right + (fun (uri,attrs) i -> + decr idx ; + "" ^ string_of_int !idx ^ "." ^ i + ) result "" + in + "

Query Results:

" ^ results ^ "
" + else + "

Query Results:

No results found!

" +;; + +let pp_result result = + "\nQuery Results\n" ^ theory_of_result result ^ "" +;; + + (** chain application of Pcre substitutions *) +let rec apply_substs substs line = + match substs with + | [] -> line + | (rex, templ) :: rest -> apply_substs rest (Pcre.replace ~rex ~templ line) + (** fold like function on files *) +let fold_file f init fname = + let inchan = open_in fname in + let rec fold_lines' value = + try + let line = input_line inchan in + fold_lines' (f value line) + with End_of_file -> value + in + let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in + close_in inchan; + res + (** iter like function on files *) +let iter_file f = fold_file (fun _ line -> f line) () + +let (title_tag_RE, choices_tag_RE, msg_tag_RE, id_to_uris_RE, id_RE, + interpretations_RE, interpretations_labels_RE, results_RE, new_aliases_RE, + form_RE, variables_initialization_RE) + = + (Pcre.regexp "@TITLE@", Pcre.regexp "@CHOICES@", Pcre.regexp "@MSG@", + Pcre.regexp "@ID_TO_URIS@", Pcre.regexp "@ID@", + Pcre.regexp "@INTERPRETATIONS@", Pcre.regexp "@INTERPRETATIONS_LABELS@", + Pcre.regexp "@RESULTS@", Pcre.regexp "@NEW_ALIASES@", Pcre.regexp "@FORM@", + Pcre.regexp "@VARIABLES_INITIALIZATION@") +let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$" + +exception NotAnInductiveDefinition + +let port = + try + int_of_string (Sys.getenv port_env_var) + with + | Not_found -> default_port + | Failure "int_of_string" -> + prerr_endline "Warning: invalid port, reverting to default"; + default_port +;; + +let pp_error = sprintf "

Error: %s

";; + +let bad_request body outchan = + Http_daemon.respond_error ~status:(`Client_error `Bad_request) ~body outchan +;; + +let contype = "Content-Type", "text/html";; + +(* SEARCH ENGINE functions *) + +let whole_statement_universe = + ["http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion" ; + "http://www.cs.unibo.it/helm/schemas/schema-helm#InConclusion" ; + "http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis" ; + "http://www.cs.unibo.it/helm/schemas/schema-helm#InHypothesis"] +;; + +let only_conclusion_universe = + ["http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion" ; + "http://www.cs.unibo.it/helm/schemas/schema-helm#InConclusion"] +;; + +let refine_constraints (constr_obj, constr_rel, constr_sort) = + function + "/searchPattern" -> + whole_statement_universe, + (constr_obj, constr_rel, constr_sort), + (Some constr_obj, Some constr_rel, Some constr_sort) + | "/matchConclusion" -> + let constr_obj' = + List.map + (function (uri,pos,_) -> (uri,pos,None)) + (List.filter + (function (uri,pos,depth) as constr -> is_concl_pos pos) + constr_obj) + in + only_conclusion_universe, + (*CSC: we must select the must constraints here!!! *) + (constr_obj',[],[]),(Some constr_obj', None, None) + | _ -> assert false +in + +let get_constraints term = + function + "/locateInductivePrinciple" -> + let uri = + match term with + Cic.MutInd (uri,t,_) -> MQueryUtil.string_of_uriref (uri,[t]) + | _ -> raise NotAnInductiveDefinition + in + let constr_obj = + [uri,"http://www.cs.unibo.it/helm/schemas/schema-helm#InHypothesis", + None ; + uri,"http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis", + Some 0 + ] + in + let constr_rel = + ["http://www.cs.unibo.it/helm/schemas/schema-helm#MainConclusion", + None] in + let constr_sort = + ["http://www.cs.unibo.it/helm/schemas/schema-helm#MainHypothesis", + Some 1, "http://www.cs.unibo.it/helm/schemas/schema-helm#Prop"] + in + whole_statement_universe, + (constr_obj, constr_rel, constr_sort), (None,None,None) + | req_path -> + let must = MQueryLevels2.get_constraints term in + refine_constraints must req_path +in + +(* + format: + ':' ':' ':' ':' ':' + + ::= ('0'|'1') ('_'|) (',' ('0'|'1') ('_'|))* + ::= '0'|'1' +*) +let add_user_constraints ~constraints + ((obj, rel, sort), (only_obj, only_rel, only_sort)) += + let parse_must s = + let l = Pcre.split ~pat:"," s in + (try + List.map + (fun s -> + let subs = Pcre.extract ~pat:"^(.)(\\d+|_)$" s in + (bool_of_string' subs.(1), int_of_string' subs.(2))) + l + with + Not_found -> failwith ("Can't parse constraint string: " ^ constraints) + ) + in + (* to be used on "obj" *) + let add_user_must33 user_must must = + List.map2 + (fun (b, i) (p1, p2, p3) -> if b then (p1, p2, i) else (p1, p2, None)) + user_must must + in + (* to be used on "rel" *) + let add_user_must22 user_must must = + List.map2 + (fun (b, i) (p1, p2) -> if b then (p1, i) else (p1, None)) + user_must must + in + (* to be used on "sort" *) + let add_user_must32 user_must must = + List.map2 + (fun (b, i) (p1, p2, p3) -> if b then (p1, i, p3) else (p1, None, p3)) + user_must must + in + match Pcre.split ~pat:":" constraints with + | [user_obj;user_rel;user_sort;user_only_obj;user_only_rel;user_only_sort] -> + let + (user_obj,user_rel,user_sort,user_only_obj,user_only_rel,user_only_sort) + = + (parse_must user_obj, + parse_must user_rel, + parse_must user_sort, + bool_of_string' user_only_obj, + bool_of_string' user_only_rel, + bool_of_string' user_only_sort) + in + let only' = + (if user_only_obj then only_obj else None), + (if user_only_rel then only_rel else None), + (if user_only_sort then only_sort else None) + in + let must' = + add_user_must33 user_obj obj, + add_user_must22 user_rel rel, + add_user_must32 user_sort sort + in + (must', only') + | _ -> failwith ("Can't parse constraint string: " ^ constraints) +in + +(* HTTP DAEMON CALLBACK *) + +let callback (req: Http_types.request) outchan = + try + debug_print (sprintf "Received request: %s" req#path); + (match req#path with + | "/execute" -> + let mqi_handle = MQIConn.init mqi_flags debug_print in + let query_string = req#param "query" in + let lexbuf = Lexing.from_string query_string in + let query = MQueryUtil.query_of_text lexbuf in + let result = MQueryInterpreter.execute mqi_handle query in + let result_string = pp_result result in + MQIConn.close mqi_handle; + Http_daemon.respond ~body:result_string ~headers:[contype] outchan + | "/locate" -> + let mqi_handle = MQIConn.init mqi_flags debug_print in + let id = req#param "id" in + let query = MQueryGenerator.locate id in + let result = MQueryInterpreter.execute mqi_handle query in + MQIConn.close mqi_handle; + Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan + | "/getpage" -> + (* TODO implement "is_permitted" *) + (let is_permitted _ = true in + let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in + let page = remove_fragment (req#param "url") in + let preprocess = + (try + bool_of_string (req#param "preprocess") + with Invalid_argument _ | Http_types.Param_not_found _ -> false) + in + (match page with + | page when is_permitted page -> + (let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in + Http_daemon.send_basic_headers ~code:200 outchan; + Http_daemon.send_header "Content-Type" "text/html" outchan; + Http_daemon.send_CRLF outchan; + if preprocess then begin + iter_file + (fun line -> + output_string outchan + ((apply_substs + (List.map + (function (key,value) -> + let key' = + (Pcre.extract ~pat:"param\\.(.*)" key).(1) + in + Pcre.regexp ("@" ^ key' ^ "@"), value + ) + (List.filter + (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key) + req#params) + ) + line) ^ + "\n")) + fname + end else + Http_daemon.send_file ~src:(FileSrc fname) outchan) + | page -> Http_daemon.respond_forbidden ~url:page outchan)) + | "/ask_uwobo" -> + let url = req#param "url" in + let server_and_port = + (Pcre.extract ~rex:server_and_port_url_RE url).(1) + in + if List.mem server_and_port valid_servers then + Http_daemon.respond + ~headers:["Content-Type", "text/html"] + ~body:(Http_client.Convenience.http_get url) + outchan + else + Http_daemon.respond + ~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port)) + outchan + | "/searchPattern" + | "/matchConclusion" + | "/locateInductivePrinciple" -> + let mqi_handle = MQIConn.init mqi_flags debug_print in + let term_string = req#param "term" in + let lexbuf = Lexing.from_string term_string in + let (context, metasenv) = ([], []) in + let (dom, mk_metasenv_and_expr) = + CicTextualParserContext.main + ~context ~metasenv CicTextualLexer.token lexbuf + in + let id_to_uris_raw = req#param "aliases" in + let tokens = Pcre.split ~pat:"\\s" id_to_uris_raw in + let rec parse_tokens keys lookup = function (* TODO spostarla fuori *) + | [] -> keys, lookup + | "alias" :: key :: value :: rest -> + let key' = CicTextualParser0.Id key in + parse_tokens + (key'::keys) + (fun id -> + if id = key' then + Some + (CicTextualParser0.Uri (MQueryMisc.cic_textual_parser_uri_of_string value)) + else lookup id) + rest + | _ -> failwith "Can't parse aliases" + in + let parse_choices choices_raw = + let choices = Pcre.split ~pat:";" choices_raw in + List.fold_left + (fun f x -> + match Pcre.split ~pat:"\\s" x with + | ""::id::tail + | id::tail when id<>"" -> + (fun id' -> +prerr_endline ("#### " ^ id ^ " :="); +List.iter (fun u -> prerr_endline ("<" ^ Netencoding.Url.decode u ^ ">")) tail; + if id = id' then + Some (List.map (fun u -> Netencoding.Url.decode u) tail) + else + f id') + | _ -> failwith "Can't parse choices") + (fun _ -> None) + choices + in + let (id_to_uris : Disambiguate.domain_and_interpretation) = + parse_tokens [] (fun _ -> None) tokens in + let id_to_choices = + try + let choices_raw = req#param "choices" in + parse_choices choices_raw + with Http_types.Param_not_found _ -> (fun _ -> None) + in + let module Chat: Disambiguate.Callbacks = + struct + + let get_metasenv () = + !CicTextualParser0.metasenv + + let set_metasenv metasenv = + CicTextualParser0.metasenv := metasenv + + let output_html = prerr_endline + + let interactive_user_uri_choice + ~selection_mode ?ok + ?enable_button_for_non_vars ~(title: string) ~(msg: string) + ~(id: string) (choices: string list) + = + (match id_to_choices id with + | Some choices -> choices + | None -> + let msg = Pcre.replace ~pat:"\'" ~templ:"\\\'" msg in + (match selection_mode with + | `SINGLE -> assert false + | `EXTENDED -> + Http_daemon.send_basic_headers ~code:200 outchan ; + Http_daemon.send_CRLF outchan ; + iter_file + (fun line -> + let formatted_choices = + String.concat "," + (List.map (fun uri -> sprintf "\'%s\'" uri) choices) + in + let processed_line = + apply_substs + [title_tag_RE, title; + choices_tag_RE, formatted_choices; + msg_tag_RE, msg; + id_to_uris_RE, id_to_uris_raw; + id_RE, id] + line + in + output_string outchan (processed_line ^ "\n")) + interactive_user_uri_choice_TPL; + raise Chat_unfinished)) + + let interactive_interpretation_choice interpretations = + let html_interpretations_labels = + String.concat ", " + (List.map + (fun l -> + "\'" ^ + (String.concat "
" + (List.map + (fun (id, value) -> + (sprintf "alias %s %s" id value)) + l)) ^ + "\'") + interpretations) + in + let html_interpretations = + String.concat ", " + (List.map + (fun l -> + "\'" ^ + (String.concat " " + (List.map + (fun (id, value) -> + (sprintf "alias %s %s" + id + (MQueryMisc.wrong_xpointer_format_from_wrong_xpointer_format' + value))) + l)) ^ + "\'") + interpretations) + in + Http_daemon.send_basic_headers ~code:200 outchan ; + Http_daemon.send_CRLF outchan ; + iter_file + (fun line -> + let processed_line = + apply_substs + [interpretations_RE, html_interpretations; + interpretations_labels_RE, html_interpretations_labels] + line + in + output_string outchan (processed_line ^ "\n")) + interactive_interpretation_choice_TPL; + raise Chat_unfinished + + let input_or_locate_uri ~title = + UriManager.uri_of_string "cic:/Coq/Init/DataTypes/nat_ind.con" + + end + in + let module Disambiguate' = Disambiguate.Make (Chat) in + let (id_to_uris', metasenv', term') = + Disambiguate'.disambiguate_input mqi_handle + context metasenv dom mk_metasenv_and_expr id_to_uris + in + (match metasenv' with + | [] -> + let universe, + ((must_obj, must_rel, must_sort) as must'), + ((only_obj, only_rel, only_sort) as only) = + get_constraints term' req#path + in + let must'', only' = + (try + add_user_constraints + ~constraints:(req#param "constraints") + (must', only) + with Http_types.Param_not_found _ -> + let variables = + "var aliases = '" ^ id_to_uris_raw ^ "';\n" ^ + "var constr_obj_len = " ^ + string_of_int (List.length must_obj) ^ ";\n" ^ + "var constr_rel_len = " ^ + string_of_int (List.length must_rel) ^ ";\n" ^ + "var constr_sort_len = " ^ + string_of_int (List.length must_sort) ^ ";\n" in + let form = + (if must_obj = [] then "" else + "

Obj constraints

" ^ + "" ^ + (String.concat "\n" (List.map html_of_r_obj must_obj)) ^ + "
" ^ + (* The following three lines to make Javascript create *) + (* the constr_obj[] and obj_depth[] arrays even if we *) + (* have only one real entry. *) + "" ^ + "") ^ + (if must_rel = [] then "" else + "

Rel constraints

" ^ + "" ^ + (String.concat "\n" (List.map html_of_r_rel must_rel)) ^ + "
" ^ + (* The following two lines to make Javascript create *) + (* the constr_rel[] and rel_depth[] arrays even if *) + (* we have only one real entry. *) + "" ^ + "") ^ + (if must_sort = [] then "" else + "

Sort constraints

" ^ + "" ^ + (String.concat "\n" (List.map html_of_r_sort must_sort)) ^ + "
" ^ + (* The following two lines to make Javascript create *) + (* the constr_sort[] and sort_depth[] arrays even if *) + (* we have only one real entry. *) + "" ^ + "") ^ + "

Only constraints

" ^ + "Enforce Only constraints for objects: " ^ + "
" ^ + "Enforce Rel constraints for objects: " ^ + "
" ^ + "Enforce Sort constraints for objects: " ^ + "
" + in + Http_daemon.send_basic_headers ~code:200 outchan ; + Http_daemon.send_CRLF outchan ; + iter_file + (fun line -> + let processed_line = + apply_substs + [form_RE, form ; + variables_initialization_RE, variables] line + in + output_string outchan (processed_line ^ "\n")) + constraints_choice_TPL; + raise Chat_unfinished) + in + let query = + MQueryGenerator.query_of_constraints (Some universe) must'' only' + in + let results = MQueryInterpreter.execute mqi_handle query in + Http_daemon.send_basic_headers ~code:200 outchan ; + Http_daemon.send_CRLF outchan ; + iter_file + (fun line -> + let new_aliases = + match id_to_uris' with + | (domain, f) -> + String.concat ", " + (List.map + (fun name -> + sprintf "\'alias %s cic:%s\'" + (match name with + CicTextualParser0.Id name -> name + | _ -> assert false (*CSC: completare *)) + (match f name with + | None -> assert false + | Some (CicTextualParser0.Uri t) -> + MQueryMisc.string_of_cic_textual_parser_uri + t + | _ -> assert false (*CSC: completare *))) + domain) + in + let processed_line = + apply_substs + [results_RE, theory_of_result results ; + new_aliases_RE, new_aliases] + line + in + output_string outchan (processed_line ^ "\n")) + final_results_TPL + | _ -> (* unable to instantiate some implicit variable *) + Http_daemon.respond + ~headers:[contype] + ~body:"some implicit variables are still unistantiated :-(" + outchan); + MQIConn.close mqi_handle + | invalid_request -> + Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan); + debug_print (sprintf "%s done!" req#path) + with + | Chat_unfinished -> prerr_endline "Chat unfinished, Try again!" + | Http_types.Param_not_found attr_name -> + bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan + | exc -> + Http_daemon.respond + ~body:(pp_error ("Uncaught exception: " ^ (Printexc.to_string exc))) + outchan +in +printf "%s started and listening on port %d\n" daemon_name port; +printf "Current directory is %s\n" (Sys.getcwd ()); +printf "HTML directory is %s\n" pages_dir; +flush stdout; +Unix.putenv "http_proxy" ""; +Http_daemon.start' ~port callback; +printf "%s is terminating, bye!\n" daemon_name + -- 2.39.2