1 (* Copyright (C) 2002, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://cs.unibo.it/helm/.
28 module G = MQueryGenerator
34 let debug_print s = if debug then prerr_endline s;;
35 Http_common.debug := true;;
36 (* Http_common.debug := true;; *)
40 let daemon_name = "Search Engine";;
43 let len = String.length s in
44 String.sub s 1 (len-1)
46 (* First of all we load the configuration *)
48 let configuration_file = "/projects/helm/etc/searchEngine.conf.xml" in
49 Helm_registry.load_from configuration_file
52 let port = Helm_registry.get_int "search_engine.port";;
54 let pages_dir = Helm_registry.get "search_engine.html_dir";;
56 (** accepted HTTP servers for ask_uwobo method forwarding *)
57 let valid_servers= Helm_registry.get_string_list "search_engine.valid_servers";;
59 let interactive_user_uri_choice_TPL = pages_dir ^ "/moogle_chat1.html";;
60 let interactive_interpretation_choice_TPL = pages_dir ^ "/moogle_chat2.html";;
61 let constraints_choice_TPL = pages_dir ^ "/moogle_constraints_choice.html";;
62 (* let final_results_TPL = pages_dir ^ "/templateambigpdq3.html";; *)
63 let start_TPL = pages_dir ^ "/moogle.html";;
64 let final_results_TPL = pages_dir ^ "/moogle.html";;
67 let ic = Unix.open_process_in "hostname -f" in
68 let hostname = input_line ic in
69 ignore (Unix.close_process_in ic);
70 sprintf "http://%s:%d" hostname port
73 exception Chat_unfinished
74 exception Invalid_action of string (* invalid action for "/search" method *)
76 let javascript_quote s =
77 let rex = Pcre.regexp "'" in
78 let rex' = Pcre.regexp "\"" in
79 Pcre.replace ~rex ~templ:"\\'"
80 (Pcre.replace ~rex:rex' ~templ:"\\\"" s)
83 (* build a bool from a 1-character-string *)
84 let bool_of_string' = function
87 | s -> failwith ("Can't parse a boolean from string: " ^ s)
90 (* build an int option from a string *)
91 let int_of_string' = function
95 Some (int_of_string s)
96 with Failure "int_of_string" ->
97 failwith ("Can't parse an int option from string: " ^ s)
100 (* HTML pretty printers for mquery_generator types *)
102 let html_of_r_obj (pos, uri) =
104 "<tr><td><input type='checkbox' name='constr_obj' checked='on'/></td><td>%s</td><td>%s</td><td>%s</td></tr>"
105 uri (U.text_of_position pos)
106 (if U.is_main_position pos then
107 sprintf "<input name='obj_depth' size='2' type='text' value='%s' />"
108 (U.text_of_depth pos "")
110 "<input type=\"hidden\" name=\"obj_depth\" />")
113 let html_of_r_rel pos =
115 "<tr><td><input type='checkbox' name='constr_rel' checked='on'/></td><td>%s</td><td><input name='rel_depth' size='2' type='text' value='%s' /></td></tr>"
116 (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "")
119 let html_of_r_sort (pos, sort) =
121 "<tr><td><input type='checkbox' name='constr_sort' checked='on'/></td><td>%s</td><td>%s</td><td><input name='sort_depth' size='2' type='text' value='%s'/></td></tr>"
122 (U.text_of_sort sort) (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "")
125 (** pretty print a MathQL query result to an HELM theory file *)
126 let theory_of_result result =
127 let results_no = List.length result in
128 if results_no > 0 then
129 let mode = if results_no > 10 then "linkonly" else "typeonly" in
131 let idx = ref (results_no + 1) in
135 "<tr><td valign=\"top\">" ^ string_of_int !idx ^ ".</td><td><ht:OBJECT uri=\"" ^ uri ^ "\" mode=\"" ^ mode ^ "\"/></td></tr>" ^ i
138 "<b><font size=\"+1\">Query Results:</font></b><table xmlns:ht=\"http://www.cs.unibo.it/helm/namespaces/helm-theory\">" ^ results ^ "</table>"
140 "<b><font size=\"+1\">Query Results:</font></b><p>No results found!</p>"
143 let pp_result result =
144 "<html xmlns:ht=\"http://www.cs.unibo.it/helm/namespaces/helm-theory\">\n<head><title>Query Results</title><style> A { text-decoration: none } </style></head>\n<body>" ^ theory_of_result result ^ "</body></html>"
147 (** chain application of Pcre substitutions *)
148 let rec apply_substs substs line =
151 | (rex, templ) :: rest -> apply_substs rest (Pcre.replace ~rex ~templ line)
152 (** fold like function on files *)
153 let fold_file f init fname =
154 let inchan = open_in fname in
155 let rec fold_lines' value =
157 let line = input_line inchan in
158 fold_lines' (f value line)
159 with End_of_file -> value
161 let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in
164 (** iter like function on files *)
165 let iter_file f = fold_file (fun _ line -> f line) ()
167 let (expression_tag_RE,
170 title_tag_RE, no_choices_tag_RE, current_choices_tag_RE,
171 choices_tag_RE, msg_tag_RE, id_to_uris_RE, id_RE, iden_tag_RE,
172 interpretations_RE, interpretations_labels_RE, results_RE, new_aliases_RE,
173 form_RE, variables_initialization_RE, search_engine_url_RE)
175 (Pcre.regexp "@EXPRESSION@",
176 Pcre.regexp "@ACTION@",
177 Pcre.regexp "@ADVANCED@",
178 Pcre.regexp "@TITLE@", Pcre.regexp "@NO_CHOICES@",
179 Pcre.regexp "@CURRENT_CHOICES@",
180 Pcre.regexp "@CHOICES@", Pcre.regexp "@MSG@",
181 Pcre.regexp "@ID_TO_URIS@", Pcre.regexp "@ID@", Pcre.regexp "@IDEN@",
182 Pcre.regexp "@INTERPRETATIONS@", Pcre.regexp "@INTERPRETATIONS_LABELS@",
183 Pcre.regexp "@RESULTS@", Pcre.regexp "@NEW_ALIASES@", Pcre.regexp "@FORM@",
184 Pcre.regexp "@VARIABLES_INITIALIZATION@", Pcre.regexp "@SEARCH_ENGINE_URL@")
185 let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$"
187 let pp_error = sprintf "<html><body><h1>Error: %s</h1></body></html>";;
189 let bad_request body outchan =
190 Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) ~body
194 let contype = "Content-Type", "text/html";;
196 (* SEARCH ENGINE functions *)
198 let get_constraints term =
202 (CGLocateInductive.get_constraints term),
205 let constr_obj, constr_rel, constr_sort =
206 CGSearchPattern.get_constraints term in
207 (Some CGSearchPattern.universe),
208 (constr_obj, constr_rel, constr_sort),
209 (Some constr_obj, Some constr_rel, Some constr_sort)
211 let list_of_must, only = CGMatchConclusion.get_constraints [] [] term in
212 (* FG: there is no way to choose the block number ***************************)
213 let block = pred (List.length list_of_must) in
214 (Some CGMatchConclusion.universe),
215 (List.nth list_of_must block, [], []), (Some only, None, None)
221 <must_obj> ':' <must_rel> ':' <must_sort> ':' <only_obj> ':' <only_rel> ':' <only_sort>
223 <must_*> ::= ('0'|'1') ('_'|<int>) (',' ('0'|'1') ('_'|<int>))*
226 let add_user_constraints ~constraints
227 ((obj, rel, sort), (only_obj, only_rel, only_sort))
230 let l = Pcre.split ~pat:"," s in
234 let subs = Pcre.extract ~pat:"^(.)(\\d+|_)$" s in
235 (bool_of_string' subs.(1), int_of_string' subs.(2)))
238 Not_found -> failwith ("Can't parse constraint string: " ^ constraints)
241 (* to be used on "obj" *)
242 let add_user_must33 user_must must =
244 (fun (b, i) (p, u) ->
245 if b then Some (U.set_full_position p i, u) else None)
248 (* to be used on "rel" *)
249 let add_user_must22 user_must must =
251 (fun (b, i) p -> if b then Some (U.set_main_position p i) else None)
254 (* to be used on "sort" *)
255 let add_user_must32 user_must must =
257 (fun (b, i) (p, s)-> if b then Some (U.set_main_position p i, s) else None)
260 match Pcre.split ~pat:":" constraints with
261 | [user_obj;user_rel;user_sort;user_only_obj;user_only_rel;user_only_sort] ->
263 (user_obj,user_rel,user_sort,user_only_obj,user_only_rel,user_only_sort)
265 (parse_must user_obj,
267 parse_must user_sort,
268 bool_of_string' user_only_obj,
269 bool_of_string' user_only_rel,
270 bool_of_string' user_only_sort)
273 (if user_only_obj then only_obj else None),
274 (if user_only_rel then only_rel else None),
275 (if user_only_sort then only_sort else None)
278 let rec filter_some =
281 | None::tl -> filter_some tl
282 | (Some x)::tl -> x::(filter_some tl)
284 filter_some (add_user_must33 user_obj obj),
285 filter_some (add_user_must22 user_rel rel),
286 filter_some (add_user_must32 user_sort sort)
289 | _ -> failwith ("Can't parse constraint string: " ^ constraints)
292 let send_results results
293 ?(id_to_uris = DisambiguatingParser.EnvironmentP3.of_string "")
294 (req: Http_types.request) outchan
296 Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
297 Http_daemon.send_header "Content-Type" "text/xml" outchan;
298 Http_daemon.send_CRLF outchan ;
300 (search_engine_url_RE, my_own_url) ::
301 (results_RE, theory_of_result results)::
302 (advanced_tag_RE, req#param "advanced")::
303 (expression_tag_RE, req#param "expression")::
305 (function (key,value) ->
306 let key' = (Pcre.extract ~pat:"param\\.(.*)" key).(1) in
307 Pcre.regexp ("@" ^ key' ^ "@"), value)
309 (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key)
315 DisambiguatingParser.EnvironmentP3.to_string id_to_uris
319 (* CSC: Bug here: this is a string, not an array! *)
320 ((new_aliases_RE, "'" ^ javascript_quote new_aliases ^ "'")::subst)
323 output_string outchan (processed_line ^ "\n"))
327 let exec_action mqi_handle (req: Http_types.request) outchan =
328 let term_string = req#param "expression" in
329 let (context, metasenv) = ([], []) in
331 try req#param "aliases"
332 with Http_types.Param_not_found _ -> ""
334 let parse_interpretation_choices choices =
335 List.map int_of_string (Pcre.split ~pat:" " choices) in
336 let parse_choices choices_raw =
337 let choices = Pcre.split ~pat:";" choices_raw in
340 match Pcre.split ~pat:"\\s" x with
342 | id::tail when id<>"" ->
345 Some (List.map (fun u -> Netencoding.Url.decode u) tail)
348 | _ -> failwith "Can't parse choices")
353 DisambiguatingParser.EnvironmentP3.of_string id_to_uris_raw in
356 let choices_raw = req#param "choices" in
357 parse_choices choices_raw
358 with Http_types.Param_not_found _ -> (fun _ -> None)
360 let interpretation_choices =
362 let choices_raw = req#param "interpretation_choices" in
363 if choices_raw = "" then None
364 else Some (parse_interpretation_choices choices_raw)
365 with Http_types.Param_not_found _ -> None
367 let module Chat: DisambiguateTypes.Callbacks =
370 let interactive_user_uri_choice
372 ?enable_button_for_non_vars ~(title: string) ~(msg: string)
373 ~(id: string) (choices: string list)
375 (match id_to_choices id with
376 | Some choices -> choices
378 if req#param "advanced" = "no" then
380 let len = String.length s in
381 let suffix = String.sub s (len-4) 4 in
382 not (suffix = ".var") in
383 List.filter isvar choices
385 let msg = Pcre.replace ~pat:"\'" ~templ:"\\\'" msg in
386 (match selection_mode with
387 | `SINGLE -> assert false
389 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
390 Http_daemon.send_CRLF outchan ;
392 "<input type=\"checkbox\" name=\"param.choices\"
394 uri ^ "\" />" ^ "<b>" ^ uri ^ "</b>" in
395 (* aggiungere gli hyperlinks? *)
397 String.concat "<br />"
398 (List.map check_box choices) in
403 [advanced_tag_RE, req#param "advanced";
404 choices_tag_RE, check_boxes;
406 string_of_int (List.length choices);
408 current_choices_tag_RE, req#param "choices";
409 expression_tag_RE, req#param "expression";
410 action_tag_RE, string_tail req#path ]
413 output_string outchan (processed_line ^ "\n"))
414 interactive_user_uri_choice_TPL;
415 raise Chat_unfinished))
417 let interactive_interpretation_choice interpretations =
418 match interpretation_choices with
419 Some l -> prerr_endline "CARRAMBA" ; l
421 let html_interpretations =
423 "<input type=\"radio\" name=\"param.interp\" value=\"" ^
424 (string_of_int n) ^ "\" />" in
426 String.concat "<br />"
429 sprintf "<span>%s = %s</span>" id value)
435 ((radio_button n)^(text interp))::(aux (n+1) tl) in
436 String.concat "<br />" (aux 0 interpretations)
438 Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
439 Http_daemon.send_CRLF outchan ;
444 [advanced_tag_RE, req#param "advanced";
445 interpretations_RE, html_interpretations;
446 current_choices_tag_RE, req#param "choices";
447 expression_tag_RE, req#param "expression";
448 action_tag_RE, string_tail req#path ]
451 output_string outchan (processed_line ^ "\n"))
452 interactive_interpretation_choice_TPL;
453 raise Chat_unfinished
455 let input_or_locate_uri ~title ?id () =
460 let module Disambiguate' = DisambiguatingParser.Make(Chat) in
461 let (id_to_uris', metasenv', term') =
463 Disambiguate'.disambiguate_term mqi_handle
464 context metasenv term_string id_to_uris
466 [id_to_uris',metasenv',term'] -> id_to_uris',metasenv',term'
470 ((must_obj, must_rel, must_sort) as must'),
471 ((only_obj, only_rel, only_sort) as only) =
472 get_constraints term' req#path
475 (try ignore (req#param "constraints"); false
476 with Http_types.Param_not_found _ -> true) &&
477 (req#param "advanced" = "no") && (req#path = "/hint")
480 match mqi_handle.MQIConn.pgc with
481 | MQIConn.MySQL_C conn -> conn
484 let results = List.map snd (Match_concl.cmatch dbd term') in
485 send_results results ~id_to_uris:id_to_uris' req outchan
490 ~constraints:(req#param "constraints")
492 with Http_types.Param_not_found _ ->
493 if req#param "advanced" = "no" then
497 "var aliases = '" ^ id_to_uris_raw ^ "';\n" ^
498 "var constr_obj_len = " ^
499 string_of_int (List.length must_obj) ^ ";\n" ^
500 "var constr_rel_len = " ^
501 string_of_int (List.length must_rel) ^ ";\n" ^
502 "var constr_sort_len = " ^
503 string_of_int (List.length must_sort) ^ ";\n" in
505 (if must_obj = [] then "" else
506 "<h4>Obj constraints</h4>" ^
508 (String.concat "\n" (List.map html_of_r_obj must_obj)) ^
510 (* The following three lines to make Javascript create *)
511 (* the constr_obj[] and obj_depth[] arrays even if we *)
512 (* have only one real entry. *)
513 "<input type=\"hidden\" name=\"constr_obj\" />" ^
514 "<input type=\"hidden\" name=\"obj_depth\" />") ^
515 (if must_rel = [] then "" else
516 "<h4>Rel constraints</h4>" ^
518 (String.concat "\n" (List.map html_of_r_rel must_rel)) ^
520 (* The following two lines to make Javascript create *)
521 (* the constr_rel[] and rel_depth[] arrays even if *)
522 (* we have only one real entry. *)
523 "<input type=\"hidden\" name=\"constr_rel\" />" ^
524 "<input type=\"hidden\" name=\"rel_depth\" />") ^
525 (if must_sort = [] then "" else
526 "<h4>Sort constraints</h4>" ^
528 (String.concat "\n" (List.map html_of_r_sort must_sort)) ^
530 (* The following two lines to make Javascript create *)
531 (* the constr_sort[] and sort_depth[] arrays even if *)
532 (* we have only one real entry. *)
533 "<input type=\"hidden\" name=\"constr_sort\" />" ^
534 "<input type=\"hidden\" name=\"sort_depth\" />") ^
535 "<h4>Only constraints</h4>" ^
536 "Enforce Only constraints for objects: " ^
537 "<input type='checkbox' name='only_obj'" ^
538 (if only_obj = None then "" else " checked='yes'") ^
540 "Enforce Rel constraints for objects: " ^
541 "<input type='checkbox' name='only_rel'" ^
542 (if only_rel = None then "" else " checked='yes'") ^
544 "Enforce Sort constraints for objects: " ^
545 "<input type='checkbox' name='only_sort'" ^
546 (if only_sort = None then "" else " checked='yes'") ^
549 Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
550 Http_daemon.send_CRLF outchan ;
556 variables_initialization_RE, variables;
557 advanced_tag_RE, req#param "advanced";
558 current_choices_tag_RE, req#param "choices";
559 interpretations_RE, req#param "interpretation_choices";
560 expression_tag_RE, req#param "expression";
561 action_tag_RE, string_tail req#path] line
563 output_string outchan (processed_line ^ "\n"))
564 constraints_choice_TPL;
565 raise Chat_unfinished)
568 G.query_of_constraints universe must'' only'
570 let results = MQueryInterpreter.execute mqi_handle query in
571 send_results (List.map fst results) ~id_to_uris:id_to_uris' req outchan
574 (* HTTP DAEMON CALLBACK *)
576 let build_dynamic_uri url params =
578 String.concat "&" (List.map (fun (key,value) -> (key ^ "=" ^ (Netencoding.Url.encode value))) params) in
582 let build_uwobo_request (req: Http_types.request) outchan =
583 prerr_endline ("ECCOLO: " ^ req#param "param.SEARCH_ENGINE_URL");
584 let xmluri = build_dynamic_uri ((req#param "param.SEARCH_ENGINE_URL") ^ "/search") req#params in
585 prerr_endline ("xmluri: " ^ xmluri);
586 (*let xmluri = Netencoding.Url.encode xmluri in*)
587 let server_and_port = req#param "param.processorURL" in
590 (server_and_port ^ "apply")
591 (("xmluri",xmluri)::("keys",(req#param "param.thkeys"))::req#params) in
592 (* if List.mem server_and_port valid_servers then *)
593 prerr_endline newreq;
596 ~headers:["Content-Type", "text/html"]
597 ~body:(Http_client.http_get newreq)
601 ~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port ^
602 (String.concat "\n" valid_servers)))
606 let proxy url outchan =
607 let server_and_port =
608 (Pcre.extract ~rex:server_and_port_url_RE url).(1)
610 if List.mem server_and_port valid_servers then
612 ~headers:["Content-Type", "text/html"]
613 ~body:(Http_client.http_get url)
617 ~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port))
621 let callback mqi_handle (req: Http_types.request) outchan =
623 debug_print (sprintf "Received request: %s" req#path);
625 | "/help" -> Http_daemon.respond ~body:"HELM Search Engine" outchan
627 let initial_expression =
628 try req#param "expression" with Http_types.Param_not_found _ -> ""
631 Pcre.replace ~pat:"\\s*$"
632 (Pcre.replace ~pat:"^\\s*" initial_expression)
634 if expression = "" then
635 send_results [] req outchan
638 let query = G.locate expression in
639 MQueryInterpreter.execute mqi_handle query
641 send_results (List.map fst results) req outchan
643 let query_string = req#param "query" in
644 let lexbuf = Lexing.from_string query_string in
645 let query = MQueryUtil.query_of_text lexbuf in
646 let result = MQueryInterpreter.execute mqi_handle query in
647 let result_string = pp_result (List.map fst result) in
648 Http_daemon.respond ~body:result_string ~headers:[contype] outchan
649 (* Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan *)
651 let target = req#param "target" in
652 let source = req#param "source" in
653 let query = G.unreferred target source in
654 let result = MQueryInterpreter.execute mqi_handle query in
655 Http_daemon.respond ~headers:[contype]
656 ~body:(pp_result (List.map fst result)) outchan
658 (* TODO implement "is_permitted" *)
659 let _ = prerr_endline
660 (Netencoding.Url.encode "http://mowgli.cs.unibo.it:38080/") in
661 (let is_permitted _ = true in
662 let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in
663 let page = remove_fragment (req#param "url") in
666 bool_of_string (req#param "preprocess")
667 with Invalid_argument _ | Http_types.Param_not_found _ -> false)
670 | page when is_permitted page ->
671 (let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in
672 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
673 Http_daemon.send_header "Content-Type" "text/html" outchan;
674 Http_daemon.send_CRLF outchan;
675 if preprocess then begin
678 output_string outchan
680 ((search_engine_url_RE, my_own_url) ::
681 (advanced_tag_RE, "no") ::
684 (function (key,value) ->
686 (Pcre.extract ~pat:"param\\.(.*)" key).(1)
688 Pcre.regexp ("@" ^ key' ^ "@"), value
691 (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key)
698 Http_daemon.send_file ~src:(FileSrc fname) outchan)
699 | page -> Http_daemon.respond_forbidden ~url:page outchan))
700 (* OLD | "/ask_uwobo" -> proxy (req#param "url") outchan *)
701 | "/ask_uwobo" -> build_uwobo_request req outchan
705 exec_action mqi_handle req outchan
707 Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request))
709 debug_print (sprintf "%s done!" req#path)
711 | Chat_unfinished -> prerr_endline "Chat unfinished, Try again!"
712 | Http_types.Param_not_found attr_name ->
713 bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan
715 let msg = sprintf "Uncaught exception: %s" (Printexc.to_string exc) in
717 Http_daemon.respond ~body:(pp_error msg) outchan
719 printf "%s started and listening on port %d\n" daemon_name port;
720 printf "Current directory is %s\n" (Sys.getcwd ());
721 printf "HTML directory is %s\n" pages_dir;
723 Unix.putenv "http_proxy" "";
724 let mqi_handle = C.init ~log:debug_print () in
725 Http_daemon.start' ~port (callback mqi_handle);
727 printf "%s is terminating, bye!\n" daemon_name