(* Copyright (C) 2002, HELM Team.
*
* This file is part of HELM, an Hypertextual, Electronic
* Library of Mathematics, developed at the Computer Science
* Department, University of Bologna, Italy.
*
* HELM is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* HELM is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HELM; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston,
* MA 02111-1307, USA.
*
* For details, see the HELM World-Wide-Web page,
* http://cs.unibo.it/helm/.
*)
module T = MQGTypes
module U = MQGUtil
module G = MQueryGenerator
module C = MQIConn
open Http_types ;;
let debug = true;;
let debug_print s = if debug then prerr_endline s;;
Http_common.debug := true;;
(* Http_common.debug := true;; *)
open Printf;;
let daemon_name = "Search Engine";;
let string_tail s =
let len = String.length s in
String.sub s 1 (len-1)
(* First of all we load the configuration *)
let _ =
let configuration_file = "/projects/helm/etc/searchEngine.conf.xml" in
Helm_registry.load_from configuration_file
;;
let port = Helm_registry.get_int "search_engine.port";;
let pages_dir = Helm_registry.get "search_engine.html_dir";;
(** accepted HTTP servers for ask_uwobo method forwarding *)
let valid_servers= Helm_registry.get_string_list "search_engine.valid_servers";;
let interactive_user_uri_choice_TPL = pages_dir ^ "/moogle_chat1.html";;
let interactive_interpretation_choice_TPL = pages_dir ^ "/moogle_chat2.html";;
let constraints_choice_TPL = pages_dir ^ "/moogle_constraints_choice.html";;
(* let final_results_TPL = pages_dir ^ "/templateambigpdq3.html";; *)
let start_TPL = pages_dir ^ "/moogle.html";;
let final_results_TPL = pages_dir ^ "/moogle.html";;
let my_own_url =
let ic = Unix.open_process_in "hostname -f" in
let hostname = input_line ic in
ignore (Unix.close_process_in ic);
sprintf "http://%s:%d" hostname port
;;
exception Chat_unfinished
exception Invalid_action of string (* invalid action for "/search" method *)
let javascript_quote s =
let rex = Pcre.regexp "'" in
let rex' = Pcre.regexp "\"" in
Pcre.replace ~rex ~templ:"\\'"
(Pcre.replace ~rex:rex' ~templ:"\\\"" s)
;;
(* build a bool from a 1-character-string *)
let bool_of_string' = function
| "0" -> false
| "1" -> true
| s -> failwith ("Can't parse a boolean from string: " ^ s)
;;
(* build an int option from a string *)
let int_of_string' = function
| "_" -> None
| s ->
try
Some (int_of_string s)
with Failure "int_of_string" ->
failwith ("Can't parse an int option from string: " ^ s)
;;
(* HTML pretty printers for mquery_generator types *)
let html_of_r_obj (pos, uri) =
sprintf
"
%s
%s
%s
"
uri (U.text_of_position pos)
(if U.is_main_position pos then
sprintf ""
(U.text_of_depth pos "")
else
"")
;;
let html_of_r_rel pos =
sprintf
"
"
(U.text_of_sort sort) (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "")
;;
(** pretty print a MathQL query result to an HELM theory file *)
let theory_of_result result =
let results_no = List.length result in
if results_no > 0 then
let mode = if results_no > 10 then "linkonly" else "typeonly" in
let results =
let idx = ref (results_no + 1) in
List.fold_right
(fun uri i ->
decr idx ;
"
" ^ string_of_int !idx ^ ".
" ^ i
) result ""
in
"Query Results:
" ^ results ^ "
"
else
"Query Results:
No results found!
"
;;
let pp_result result =
"\nQuery Results\n" ^ theory_of_result result ^ ""
;;
(** chain application of Pcre substitutions *)
let rec apply_substs substs line =
match substs with
| [] -> line
| (rex, templ) :: rest -> apply_substs rest (Pcre.replace ~rex ~templ line)
(** fold like function on files *)
let fold_file f init fname =
let inchan = open_in fname in
let rec fold_lines' value =
try
let line = input_line inchan in
fold_lines' (f value line)
with End_of_file -> value
in
let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in
close_in inchan;
res
(** iter like function on files *)
let iter_file f = fold_file (fun _ line -> f line) ()
let (expression_tag_RE,
action_tag_RE,
advanced_tag_RE,
title_tag_RE, no_choices_tag_RE, current_choices_tag_RE,
choices_tag_RE, msg_tag_RE, id_to_uris_RE, id_RE, iden_tag_RE,
interpretations_RE, interpretations_labels_RE, results_RE, new_aliases_RE,
form_RE, variables_initialization_RE, search_engine_url_RE)
=
(Pcre.regexp "@EXPRESSION@",
Pcre.regexp "@ACTION@",
Pcre.regexp "@ADVANCED@",
Pcre.regexp "@TITLE@", Pcre.regexp "@NO_CHOICES@",
Pcre.regexp "@CURRENT_CHOICES@",
Pcre.regexp "@CHOICES@", Pcre.regexp "@MSG@",
Pcre.regexp "@ID_TO_URIS@", Pcre.regexp "@ID@", Pcre.regexp "@IDEN@",
Pcre.regexp "@INTERPRETATIONS@", Pcre.regexp "@INTERPRETATIONS_LABELS@",
Pcre.regexp "@RESULTS@", Pcre.regexp "@NEW_ALIASES@", Pcre.regexp "@FORM@",
Pcre.regexp "@VARIABLES_INITIALIZATION@", Pcre.regexp "@SEARCH_ENGINE_URL@")
let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$"
let pp_error = sprintf "
Error: %s
";;
let bad_request body outchan =
Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) ~body
outchan
;;
let contype = "Content-Type", "text/html";;
(* SEARCH ENGINE functions *)
let get_constraints term =
function
| "/elim" ->
None,
(CGLocateInductive.get_constraints term),
(None,None,None)
| "/match" ->
let constr_obj, constr_rel, constr_sort =
CGSearchPattern.get_constraints term in
(Some CGSearchPattern.universe),
(constr_obj, constr_rel, constr_sort),
(Some constr_obj, Some constr_rel, Some constr_sort)
| "/hint" ->
let list_of_must, only = CGMatchConclusion.get_constraints [] [] term in
(* FG: there is no way to choose the block number ***************************)
let block = pred (List.length list_of_must) in
(Some CGMatchConclusion.universe),
(List.nth list_of_must block, [], []), (Some only, None, None)
| _ -> assert false
;;
(*
format:
':' ':' ':' ':' ':' ::= ('0'|'1') ('_'|) (',' ('0'|'1') ('_'|))*
::= '0'|'1'
*)
let add_user_constraints ~constraints
((obj, rel, sort), (only_obj, only_rel, only_sort))
=
let parse_must s =
let l = Pcre.split ~pat:"," s in
(try
List.map
(fun s ->
let subs = Pcre.extract ~pat:"^(.)(\\d+|_)$" s in
(bool_of_string' subs.(1), int_of_string' subs.(2)))
l
with
Not_found -> failwith ("Can't parse constraint string: " ^ constraints)
)
in
(* to be used on "obj" *)
let add_user_must33 user_must must =
List.map2
(fun (b, i) (p, u) ->
if b then Some (U.set_full_position p i, u) else None)
user_must must
in
(* to be used on "rel" *)
let add_user_must22 user_must must =
List.map2
(fun (b, i) p -> if b then Some (U.set_main_position p i) else None)
user_must must
in
(* to be used on "sort" *)
let add_user_must32 user_must must =
List.map2
(fun (b, i) (p, s)-> if b then Some (U.set_main_position p i, s) else None)
user_must must
in
match Pcre.split ~pat:":" constraints with
| [user_obj;user_rel;user_sort;user_only_obj;user_only_rel;user_only_sort] ->
let
(user_obj,user_rel,user_sort,user_only_obj,user_only_rel,user_only_sort)
=
(parse_must user_obj,
parse_must user_rel,
parse_must user_sort,
bool_of_string' user_only_obj,
bool_of_string' user_only_rel,
bool_of_string' user_only_sort)
in
let only' =
(if user_only_obj then only_obj else None),
(if user_only_rel then only_rel else None),
(if user_only_sort then only_sort else None)
in
let must' =
let rec filter_some =
function
[] -> []
| None::tl -> filter_some tl
| (Some x)::tl -> x::(filter_some tl)
in
filter_some (add_user_must33 user_obj obj),
filter_some (add_user_must22 user_rel rel),
filter_some (add_user_must32 user_sort sort)
in
(must', only')
| _ -> failwith ("Can't parse constraint string: " ^ constraints)
in
let send_results results
?(id_to_uris = DisambiguatingParser.EnvironmentP3.of_string "")
(req: Http_types.request) outchan
=
Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
Http_daemon.send_header "Content-Type" "text/xml" outchan;
Http_daemon.send_CRLF outchan ;
let subst =
(search_engine_url_RE, my_own_url) ::
(results_RE, theory_of_result results)::
(advanced_tag_RE, req#param "advanced")::
(expression_tag_RE, req#param "expression")::
(List.map
(function (key,value) ->
let key' = (Pcre.extract ~pat:"param\\.(.*)" key).(1) in
Pcre.regexp ("@" ^ key' ^ "@"), value)
(List.filter
(fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key)
req#params))
in
iter_file
(fun line ->
let new_aliases =
DisambiguatingParser.EnvironmentP3.to_string id_to_uris
in
let processed_line =
apply_substs
(* CSC: Bug here: this is a string, not an array! *)
((new_aliases_RE, "'" ^ javascript_quote new_aliases ^ "'")::subst)
line
in
output_string outchan (processed_line ^ "\n"))
final_results_TPL
in
let exec_action mqi_handle (req: Http_types.request) outchan =
let term_string = req#param "expression" in
let (context, metasenv) = ([], []) in
let id_to_uris_raw =
try req#param "aliases"
with Http_types.Param_not_found _ -> ""
in
let parse_interpretation_choices choices =
List.map int_of_string (Pcre.split ~pat:" " choices) in
let parse_choices choices_raw =
let choices = Pcre.split ~pat:";" choices_raw in
List.fold_left
(fun f x ->
match Pcre.split ~pat:"\\s" x with
| ""::id::tail
| id::tail when id<>"" ->
(fun id' ->
if id = id' then
Some (List.map (fun u -> Netencoding.Url.decode u) tail)
else
f id')
| _ -> failwith "Can't parse choices")
(fun _ -> None)
choices
in
let id_to_uris =
DisambiguatingParser.EnvironmentP3.of_string id_to_uris_raw in
let id_to_choices =
try
let choices_raw = req#param "choices" in
parse_choices choices_raw
with Http_types.Param_not_found _ -> (fun _ -> None)
in
let interpretation_choices =
try
let choices_raw = req#param "interpretation_choices" in
if choices_raw = "" then None
else Some (parse_interpretation_choices choices_raw)
with Http_types.Param_not_found _ -> None
in
let module Chat: DisambiguateTypes.Callbacks =
struct
let interactive_user_uri_choice
~selection_mode ?ok
?enable_button_for_non_vars ~(title: string) ~(msg: string)
~(id: string) (choices: string list)
=
(match id_to_choices id with
| Some choices -> choices
| None ->
if req#param "advanced" = "no" then
let isvar s =
let len = String.length s in
let suffix = String.sub s (len-4) 4 in
not (suffix = ".var") in
List.filter isvar choices
else
let msg = Pcre.replace ~pat:"\'" ~templ:"\\\'" msg in
(match selection_mode with
| `SINGLE -> assert false
| `MULTIPLE ->
Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
Http_daemon.send_CRLF outchan ;
let check_box uri =
"" ^ "" ^ uri ^ "" in
(* aggiungere gli hyperlinks? *)
let check_boxes =
String.concat " "
(List.map check_box choices) in
iter_file
(fun line ->
let processed_line =
apply_substs
[advanced_tag_RE, req#param "advanced";
choices_tag_RE, check_boxes;
no_choices_tag_RE,
string_of_int (List.length choices);
iden_tag_RE, id;
current_choices_tag_RE, req#param "choices";
expression_tag_RE, req#param "expression";
action_tag_RE, string_tail req#path ]
line
in
output_string outchan (processed_line ^ "\n"))
interactive_user_uri_choice_TPL;
raise Chat_unfinished))
let interactive_interpretation_choice interpretations =
match interpretation_choices with
Some l -> prerr_endline "CARRAMBA" ; l
| None ->
let html_interpretations =
let radio_button n =
"" in
let text interp =
String.concat " "
(List.map
(fun (id, value) ->
sprintf "%s = %s" id value)
interp) in
let rec aux n =
function
[] -> []
| interp::tl ->
((radio_button n)^(text interp))::(aux (n+1) tl) in
String.concat " " (aux 0 interpretations)
in
Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
Http_daemon.send_CRLF outchan ;
iter_file
(fun line ->
let processed_line =
apply_substs
[advanced_tag_RE, req#param "advanced";
interpretations_RE, html_interpretations;
current_choices_tag_RE, req#param "choices";
expression_tag_RE, req#param "expression";
action_tag_RE, string_tail req#path ]
line
in
output_string outchan (processed_line ^ "\n"))
interactive_interpretation_choice_TPL;
raise Chat_unfinished
let input_or_locate_uri ~title ?id () =
assert false
end
in
let module Disambiguate' = DisambiguatingParser.Make(Chat) in
let (id_to_uris', metasenv', term') =
match
Disambiguate'.disambiguate_term mqi_handle
context metasenv term_string id_to_uris
with
[id_to_uris',metasenv',term'] -> id_to_uris',metasenv',term'
| _ -> assert false
in
let universe,
((must_obj, must_rel, must_sort) as must'),
((only_obj, only_rel, only_sort) as only) =
get_constraints term' req#path
in
if
(try ignore (req#param "constraints"); false
with Http_types.Param_not_found _ -> true) &&
(req#param "advanced" = "no") && (req#path = "/hint")
then
let dbd =
match mqi_handle.MQIConn.pgc with
| MQIConn.MySQL_C conn -> conn
| _ -> assert false
in
let results = List.map snd (Match_concl.cmatch dbd term') in
send_results results ~id_to_uris:id_to_uris' req outchan
else
let must'', only' =
(try
add_user_constraints
~constraints:(req#param "constraints")
(must', only)
with Http_types.Param_not_found _ ->
if req#param "advanced" = "no" then
(must',only)
else
let variables =
"var aliases = '" ^ id_to_uris_raw ^ "';\n" ^
"var constr_obj_len = " ^
string_of_int (List.length must_obj) ^ ";\n" ^
"var constr_rel_len = " ^
string_of_int (List.length must_rel) ^ ";\n" ^
"var constr_sort_len = " ^
string_of_int (List.length must_sort) ^ ";\n" in
let form =
(if must_obj = [] then "" else
"
" ^
(* The following three lines to make Javascript create *)
(* the constr_obj[] and obj_depth[] arrays even if we *)
(* have only one real entry. *)
"" ^
"") ^
(if must_rel = [] then "" else
"
" ^
(* The following two lines to make Javascript create *)
(* the constr_rel[] and rel_depth[] arrays even if *)
(* we have only one real entry. *)
"" ^
"") ^
(if must_sort = [] then "" else
"
" ^
(* The following two lines to make Javascript create *)
(* the constr_sort[] and sort_depth[] arrays even if *)
(* we have only one real entry. *)
"" ^
"") ^
"
Only constraints
" ^
"Enforce Only constraints for objects: " ^
" " ^
"Enforce Rel constraints for objects: " ^
" " ^
"Enforce Sort constraints for objects: " ^
" "
in
Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
Http_daemon.send_CRLF outchan ;
iter_file
(fun line ->
let processed_line =
apply_substs
[form_RE, form ;
variables_initialization_RE, variables;
advanced_tag_RE, req#param "advanced";
current_choices_tag_RE, req#param "choices";
interpretations_RE, req#param "interpretation_choices";
expression_tag_RE, req#param "expression";
action_tag_RE, string_tail req#path] line
in
output_string outchan (processed_line ^ "\n"))
constraints_choice_TPL;
raise Chat_unfinished)
in
let query =
G.query_of_constraints universe must'' only'
in
let results = MQueryInterpreter.execute mqi_handle query in
send_results (List.map fst results) ~id_to_uris:id_to_uris' req outchan
in
(* HTTP DAEMON CALLBACK *)
let build_dynamic_uri url params =
let p =
String.concat "&" (List.map (fun (key,value) -> (key ^ "=" ^ (Netencoding.Url.encode value))) params) in
url ^ "?" ^ p
in
let build_uwobo_request (req: Http_types.request) outchan =
prerr_endline ("ECCOLO: " ^ req#param "param.SEARCH_ENGINE_URL");
let xmluri = build_dynamic_uri ((req#param "param.SEARCH_ENGINE_URL") ^ "/search") req#params in
prerr_endline ("xmluri: " ^ xmluri);
(*let xmluri = Netencoding.Url.encode xmluri in*)
let server_and_port = req#param "param.processorURL" in
let newreq =
build_dynamic_uri
(server_and_port ^ "apply")
(("xmluri",xmluri)::("keys",(req#param "param.thkeys"))::req#params) in
(* if List.mem server_and_port valid_servers then *)
prerr_endline newreq;
if true then
Http_daemon.respond
~headers:["Content-Type", "text/html"]
~body:(Http_client.http_get newreq)
outchan
else
Http_daemon.respond
~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port ^
(String.concat "\n" valid_servers)))
outchan
in
let proxy url outchan =
let server_and_port =
(Pcre.extract ~rex:server_and_port_url_RE url).(1)
in
if List.mem server_and_port valid_servers then
Http_daemon.respond
~headers:["Content-Type", "text/html"]
~body:(Http_client.http_get url)
outchan
else
Http_daemon.respond
~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port))
outchan
in
let callback mqi_handle (req: Http_types.request) outchan =
try
debug_print (sprintf "Received request: %s" req#path);
(match req#path with
| "/help" -> Http_daemon.respond ~body:"HELM Search Engine" outchan
| "/locate" ->
let initial_expression =
try req#param "expression" with Http_types.Param_not_found _ -> ""
in
let expression =
Pcre.replace ~pat:"\\s*$"
(Pcre.replace ~pat:"^\\s*" initial_expression)
in
if expression = "" then
send_results [] req outchan
else
let results =
let query = G.locate expression in
MQueryInterpreter.execute mqi_handle query
in
send_results (List.map fst results) req outchan
| "/execute" ->
let query_string = req#param "query" in
let lexbuf = Lexing.from_string query_string in
let query = MQueryUtil.query_of_text lexbuf in
let result = MQueryInterpreter.execute mqi_handle query in
let result_string = pp_result (List.map fst result) in
Http_daemon.respond ~body:result_string ~headers:[contype] outchan
(* Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan *)
| "/unreferred" ->
let target = req#param "target" in
let source = req#param "source" in
let query = G.unreferred target source in
let result = MQueryInterpreter.execute mqi_handle query in
Http_daemon.respond ~headers:[contype]
~body:(pp_result (List.map fst result)) outchan
| "/getpage" ->
(* TODO implement "is_permitted" *)
let _ = prerr_endline
(Netencoding.Url.encode "http://mowgli.cs.unibo.it:38080/") in
(let is_permitted _ = true in
let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in
let page = remove_fragment (req#param "url") in
let preprocess =
(try
bool_of_string (req#param "preprocess")
with Invalid_argument _ | Http_types.Param_not_found _ -> false)
in
(match page with
| page when is_permitted page ->
(let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in
Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
Http_daemon.send_header "Content-Type" "text/html" outchan;
Http_daemon.send_CRLF outchan;
if preprocess then begin
iter_file
(fun line ->
output_string outchan
((apply_substs
((search_engine_url_RE, my_own_url) ::
(advanced_tag_RE, "no") ::
(results_RE, "") ::
(List.map
(function (key,value) ->
let key' =
(Pcre.extract ~pat:"param\\.(.*)" key).(1)
in
Pcre.regexp ("@" ^ key' ^ "@"), value
)
(List.filter
(fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key)
req#params)
))
line) ^
"\n"))
fname
end else
Http_daemon.send_file ~src:(FileSrc fname) outchan)
| page -> Http_daemon.respond_forbidden ~url:page outchan))
(* OLD | "/ask_uwobo" -> proxy (req#param "url") outchan *)
| "/ask_uwobo" -> build_uwobo_request req outchan
| "/hint"
| "/match"
| "/elim" ->
exec_action mqi_handle req outchan
| invalid_request ->
Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request))
outchan);
debug_print (sprintf "%s done!" req#path)
with
| Chat_unfinished -> prerr_endline "Chat unfinished, Try again!"
| Http_types.Param_not_found attr_name ->
bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan
| exc ->
let msg = sprintf "Uncaught exception: %s" (Printexc.to_string exc) in
debug_print msg ;
Http_daemon.respond ~body:(pp_error msg) outchan
in
printf "%s started and listening on port %d\n" daemon_name port;
printf "Current directory is %s\n" (Sys.getcwd ());
printf "HTML directory is %s\n" pages_dir;
flush stdout;
Unix.putenv "http_proxy" "";
let mqi_handle = C.init ~log:debug_print () in
Http_daemon.start' ~port (callback mqi_handle);
C.close mqi_handle;
printf "%s is terminating, bye!\n" daemon_name