1 (* Copyright (C) 2002, HELM Team.
3 * This file is part of HELM, an Hypertextual, Electronic
4 * Library of Mathematics, developed at the Computer Science
5 * Department, University of Bologna, Italy.
7 * HELM is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * HELM is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with HELM; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22 * For details, see the HELM World-Wide-Web page,
23 * http://cs.unibo.it/helm/.
28 module G = MQueryGenerator
34 let debug_print s = if debug then prerr_endline s;;
35 Http_common.debug := true;;
36 (* Http_common.debug := true;; *)
40 let daemon_name = "Search Engine";;
42 (* First of all we load the configuration *)
44 let configuration_file = "/projects/helm/etc/searchEngine.conf.xml" in
45 Helm_registry.load_from configuration_file
48 let pages_dir = Helm_registry.get "search_engine.html_dir";;
50 (** accepted HTTP servers for ask_uwobo method forwarding *)
51 let valid_servers= Helm_registry.get_string_list "search_engine.valid_servers";;
54 let interactive_user_uri_choice_TPL = pages_dir ^ "/templateambigpdq1.html";;
55 let interactive_interpretation_choice_TPL =
56 pages_dir ^ "/templateambigpdq2.html";;
57 let constraints_choice_TPL = pages_dir ^ "/constraints_choice_template.html";;
58 let final_results_TPL = pages_dir ^ "/templateambigpdq3.html";;
60 exception Chat_unfinished
62 let javascript_quote s =
63 let rex = Pcre.regexp "'" in
64 let rex' = Pcre.regexp "\"" in
65 Pcre.replace ~rex ~templ:"\\'"
66 (Pcre.replace ~rex:rex' ~templ:"\\\"" s)
69 (* build a bool from a 1-character-string *)
70 let bool_of_string' = function
73 | s -> failwith ("Can't parse a boolean from string: " ^ s)
76 (* build an int option from a string *)
77 let int_of_string' = function
81 Some (int_of_string s)
82 with Failure "int_of_string" ->
83 failwith ("Can't parse an int option from string: " ^ s)
86 (* HTML pretty printers for mquery_generator types *)
88 let html_of_r_obj (pos, uri) =
90 "<tr><td><input type='checkbox' name='constr_obj' checked='on'/></td><td>%s</td><td>%s</td><td>%s</td></tr>"
91 uri (U.text_of_position pos)
92 (if U.is_main_position pos then
93 sprintf "<input name='obj_depth' size='2' type='text' value='%s' />"
94 (U.text_of_depth pos "")
96 "<input type=\"hidden\" name=\"obj_depth\" />")
99 let html_of_r_rel pos =
101 "<tr><td><input type='checkbox' name='constr_rel' checked='on'/></td><td>%s</td><td><input name='rel_depth' size='2' type='text' value='%s' /></td></tr>"
102 (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "")
105 let html_of_r_sort (pos, sort) =
107 "<tr><td><input type='checkbox' name='constr_sort' checked='on'/></td><td>%s</td><td>%s</td><td><input name='sort_depth' size='2' type='text' value='%s'/></td></tr>"
108 (U.text_of_sort sort) (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "")
111 (** pretty print a MathQL query result to an HELM theory file *)
112 let theory_of_result result =
113 let results_no = List.length result in
114 if results_no > 0 then
115 let mode = if results_no > 10 then "linkonly" else "typeonly" in
117 let idx = ref (results_no + 1) in
119 (fun (uri,attrs) i ->
121 "<tr><td valign=\"top\">" ^ string_of_int !idx ^ ".</td><td><ht:OBJECT uri=\"" ^ uri ^ "\" mode=\"" ^ mode ^ "\"/></td></tr>" ^ i
124 "<h1>Query Results:</h1><table xmlns:ht=\"http://www.cs.unibo.it/helm/namespaces/helm-theory\">" ^ results ^ "</table>"
126 "<h1>Query Results:</h1><p>No results found!</p>"
129 let pp_result result =
130 "<html xmlns:ht=\"http://www.cs.unibo.it/helm/namespaces/helm-theory\">\n<head><title>Query Results</title><style> A { text-decoration: none } </style></head>\n<body>" ^ theory_of_result result ^ "</body></html>"
133 (** chain application of Pcre substitutions *)
134 let rec apply_substs substs line =
137 | (rex, templ) :: rest -> apply_substs rest (Pcre.replace ~rex ~templ line)
138 (** fold like function on files *)
139 let fold_file f init fname =
140 let inchan = open_in fname in
141 let rec fold_lines' value =
143 let line = input_line inchan in
144 fold_lines' (f value line)
145 with End_of_file -> value
147 let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in
150 (** iter like function on files *)
151 let iter_file f = fold_file (fun _ line -> f line) ()
153 let (title_tag_RE, choices_tag_RE, msg_tag_RE, id_to_uris_RE, id_RE,
154 interpretations_RE, interpretations_labels_RE, results_RE, new_aliases_RE,
155 form_RE, variables_initialization_RE)
157 (Pcre.regexp "@TITLE@", Pcre.regexp "@CHOICES@", Pcre.regexp "@MSG@",
158 Pcre.regexp "@ID_TO_URIS@", Pcre.regexp "@ID@",
159 Pcre.regexp "@INTERPRETATIONS@", Pcre.regexp "@INTERPRETATIONS_LABELS@",
160 Pcre.regexp "@RESULTS@", Pcre.regexp "@NEW_ALIASES@", Pcre.regexp "@FORM@",
161 Pcre.regexp "@VARIABLES_INITIALIZATION@")
162 let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$"
164 let port = Helm_registry.get_int "search_engine.port";;
166 let pp_error = sprintf "<html><body><h1>Error: %s</h1></body></html>";;
168 let bad_request body outchan =
169 Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) ~body
173 let contype = "Content-Type", "text/html";;
175 (* SEARCH ENGINE functions *)
177 let get_constraints term =
179 | "/locateInductivePrinciple" ->
181 (CGLocateInductive.get_constraints term),
183 | "/searchPattern" ->
184 let constr_obj, constr_rel, constr_sort =
185 CGSearchPattern.get_constraints term in
186 (Some CGSearchPattern.universe),
187 (constr_obj, constr_rel, constr_sort),
188 (Some constr_obj, Some constr_rel, Some constr_sort)
189 | "/matchConclusion" ->
190 let list_of_must, only = CGMatchConclusion.get_constraints [] [] term in
191 (* FG: there is no way to choose the block number ***************************)
192 let block = pred (List.length list_of_must) in
193 (Some CGMatchConclusion.universe),
194 (List.nth list_of_must block, [], []), (Some only, None, None)
200 <must_obj> ':' <must_rel> ':' <must_sort> ':' <only_obj> ':' <only_rel> ':' <only_sort>
202 <must_*> ::= ('0'|'1') ('_'|<int>) (',' ('0'|'1') ('_'|<int>))*
205 let add_user_constraints ~constraints
206 ((obj, rel, sort), (only_obj, only_rel, only_sort))
209 let l = Pcre.split ~pat:"," s in
213 let subs = Pcre.extract ~pat:"^(.)(\\d+|_)$" s in
214 (bool_of_string' subs.(1), int_of_string' subs.(2)))
217 Not_found -> failwith ("Can't parse constraint string: " ^ constraints)
220 (* to be used on "obj" *)
221 let add_user_must33 user_must must =
223 (fun (b, i) (p, u) ->
224 if b then Some (U.set_full_position p i, u) else None)
227 (* to be used on "rel" *)
228 let add_user_must22 user_must must =
230 (fun (b, i) p -> if b then Some (U.set_main_position p i) else None)
233 (* to be used on "sort" *)
234 let add_user_must32 user_must must =
236 (fun (b, i) (p, s)-> if b then Some (U.set_main_position p i, s) else None)
239 match Pcre.split ~pat:":" constraints with
240 | [user_obj;user_rel;user_sort;user_only_obj;user_only_rel;user_only_sort] ->
242 (user_obj,user_rel,user_sort,user_only_obj,user_only_rel,user_only_sort)
244 (parse_must user_obj,
246 parse_must user_sort,
247 bool_of_string' user_only_obj,
248 bool_of_string' user_only_rel,
249 bool_of_string' user_only_sort)
252 (if user_only_obj then only_obj else None),
253 (if user_only_rel then only_rel else None),
254 (if user_only_sort then only_sort else None)
257 let rec filter_some =
260 | None::tl -> filter_some tl
261 | (Some x)::tl -> x::(filter_some tl)
263 filter_some (add_user_must33 user_obj obj),
264 filter_some (add_user_must22 user_rel rel),
265 filter_some (add_user_must32 user_sort sort)
268 | _ -> failwith ("Can't parse constraint string: " ^ constraints)
271 (* HTTP DAEMON CALLBACK *)
273 let callback mqi_handle (req: Http_types.request) outchan =
275 debug_print (sprintf "Received request: %s" req#path);
277 | "/help" -> Http_daemon.respond ~body:"HELM Search Engine" outchan
279 let query_string = req#param "query" in
280 let lexbuf = Lexing.from_string query_string in
281 let query = MQueryUtil.query_of_text lexbuf in
282 let result = MQueryInterpreter.execute mqi_handle query in
283 let result_string = pp_result result in
284 Http_daemon.respond ~body:result_string ~headers:[contype] outchan
286 let id = req#param "id" in
287 let query = G.locate id in
288 let result = MQueryInterpreter.execute mqi_handle query in
289 Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan
291 let target = req#param "target" in
292 let source = req#param "source" in
293 let query = G.unreferred target source in
294 let result = MQueryInterpreter.execute mqi_handle query in
295 Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan
297 (* TODO implement "is_permitted" *)
298 (let is_permitted _ = true in
299 let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in
300 let page = remove_fragment (req#param "url") in
303 bool_of_string (req#param "preprocess")
304 with Invalid_argument _ | Http_types.Param_not_found _ -> false)
307 | page when is_permitted page ->
308 (let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in
309 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
310 Http_daemon.send_header "Content-Type" "text/html" outchan;
311 Http_daemon.send_CRLF outchan;
312 if preprocess then begin
315 output_string outchan
318 (function (key,value) ->
320 (Pcre.extract ~pat:"param\\.(.*)" key).(1)
322 Pcre.regexp ("@" ^ key' ^ "@"), value
325 (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key)
332 Http_daemon.send_file ~src:(FileSrc fname) outchan)
333 | page -> Http_daemon.respond_forbidden ~url:page outchan))
335 let url = req#param "url" in
336 let server_and_port =
337 (Pcre.extract ~rex:server_and_port_url_RE url).(1)
339 if List.mem server_and_port valid_servers then
341 ~headers:["Content-Type", "text/html"]
342 ~body:(Http_client.http_get url)
346 ~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port))
350 | "/locateInductivePrinciple" ->
351 let term_string = req#param "term" in
352 let (context, metasenv) = ([], []) in
353 let id_to_uris_raw = req#param "aliases" in
354 let parse_interpretation_choices choices =
355 List.map int_of_string (Pcre.split ~pat:" " choices) in
356 let parse_choices choices_raw =
357 let choices = Pcre.split ~pat:";" choices_raw in
360 match Pcre.split ~pat:"\\s" x with
362 | id::tail when id<>"" ->
365 Some (List.map (fun u -> Netencoding.Url.decode u) tail)
368 | _ -> failwith "Can't parse choices")
373 DisambiguatingParser.EnvironmentP3.of_string id_to_uris_raw in
376 let choices_raw = req#param "choices" in
377 parse_choices choices_raw
378 with Http_types.Param_not_found _ -> (fun _ -> None)
380 let interpretation_choices =
382 let choices_raw = req#param "interpretation_choices" in
383 Some (parse_interpretation_choices choices_raw)
384 with Http_types.Param_not_found _ -> None
386 let module Chat: DisambiguateTypes.Callbacks =
389 let interactive_user_uri_choice
391 ?enable_button_for_non_vars ~(title: string) ~(msg: string)
392 ~(id: string) (choices: string list)
394 (match id_to_choices id with
395 | Some choices -> choices
397 let msg = Pcre.replace ~pat:"\'" ~templ:"\\\'" msg in
398 (match selection_mode with
399 | `SINGLE -> assert false
401 Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
402 Http_daemon.send_CRLF outchan ;
405 let formatted_choices =
407 (List.map (fun uri -> sprintf "\'%s\'" uri)
412 [title_tag_RE, title;
413 choices_tag_RE, formatted_choices;
415 id_to_uris_RE, id_to_uris_raw;
419 output_string outchan (processed_line ^ "\n"))
420 interactive_user_uri_choice_TPL;
421 raise Chat_unfinished))
423 let interactive_interpretation_choice interpretations =
424 match interpretation_choices with
425 Some l -> prerr_endline "CARRAMBA" ; l
427 let html_interpretations_labels =
432 (String.concat "<br />"
435 let id = javascript_quote id in
436 let value = javascript_quote value in
437 sprintf "%s = %s" id value)
442 let html_interpretations =
446 | _::tl -> ("'" ^ string_of_int n ^ "'")::(aux (n+1) tl)
448 String.concat ", " (aux 0 interpretations)
450 Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
451 Http_daemon.send_CRLF outchan ;
456 [interpretations_RE, html_interpretations;
457 interpretations_labels_RE, html_interpretations_labels]
460 output_string outchan (processed_line ^ "\n"))
461 interactive_interpretation_choice_TPL;
462 raise Chat_unfinished
464 let input_or_locate_uri ~title ?id () =
469 let module Disambiguate' = DisambiguatingParser.Make(Chat) in
470 let (id_to_uris', metasenv', term') =
472 Disambiguate'.disambiguate_term mqi_handle
473 context metasenv term_string id_to_uris
475 [id_to_uris',metasenv',term'] -> id_to_uris',metasenv',term'
479 ((must_obj, must_rel, must_sort) as must'),
480 ((only_obj, only_rel, only_sort) as only) =
481 get_constraints term' req#path
486 ~constraints:(req#param "constraints")
488 with Http_types.Param_not_found _ ->
490 "var aliases = '" ^ id_to_uris_raw ^ "';\n" ^
491 "var constr_obj_len = " ^
492 string_of_int (List.length must_obj) ^ ";\n" ^
493 "var constr_rel_len = " ^
494 string_of_int (List.length must_rel) ^ ";\n" ^
495 "var constr_sort_len = " ^
496 string_of_int (List.length must_sort) ^ ";\n" in
498 (if must_obj = [] then "" else
499 "<h4>Obj constraints</h4>" ^
501 (String.concat "\n" (List.map html_of_r_obj must_obj)) ^
503 (* The following three lines to make Javascript create *)
504 (* the constr_obj[] and obj_depth[] arrays even if we *)
505 (* have only one real entry. *)
506 "<input type=\"hidden\" name=\"constr_obj\" />" ^
507 "<input type=\"hidden\" name=\"obj_depth\" />") ^
508 (if must_rel = [] then "" else
509 "<h4>Rel constraints</h4>" ^
511 (String.concat "\n" (List.map html_of_r_rel must_rel)) ^
513 (* The following two lines to make Javascript create *)
514 (* the constr_rel[] and rel_depth[] arrays even if *)
515 (* we have only one real entry. *)
516 "<input type=\"hidden\" name=\"constr_rel\" />" ^
517 "<input type=\"hidden\" name=\"rel_depth\" />") ^
518 (if must_sort = [] then "" else
519 "<h4>Sort constraints</h4>" ^
521 (String.concat "\n" (List.map html_of_r_sort must_sort)) ^
523 (* The following two lines to make Javascript create *)
524 (* the constr_sort[] and sort_depth[] arrays even if *)
525 (* we have only one real entry. *)
526 "<input type=\"hidden\" name=\"constr_sort\" />" ^
527 "<input type=\"hidden\" name=\"sort_depth\" />") ^
528 "<h4>Only constraints</h4>" ^
529 "Enforce Only constraints for objects: " ^
530 "<input type='checkbox' name='only_obj'" ^
531 (if only_obj = None then "" else " checked='yes'") ^
533 "Enforce Rel constraints for objects: " ^
534 "<input type='checkbox' name='only_rel'" ^
535 (if only_rel = None then "" else " checked='yes'") ^
537 "Enforce Sort constraints for objects: " ^
538 "<input type='checkbox' name='only_sort'" ^
539 (if only_sort = None then "" else " checked='yes'") ^
542 Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
543 Http_daemon.send_CRLF outchan ;
549 variables_initialization_RE, variables] line
551 output_string outchan (processed_line ^ "\n"))
552 constraints_choice_TPL;
553 raise Chat_unfinished)
556 G.query_of_constraints universe must'' only'
558 let results = MQueryInterpreter.execute mqi_handle query in
559 Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
560 Http_daemon.send_CRLF outchan ;
564 DisambiguatingParser.EnvironmentP3.to_string id_to_uris' in
567 [results_RE, theory_of_result results ;
568 (* CSC: Bug here: this is a string, not an array! *)
569 new_aliases_RE, "'" ^ javascript_quote new_aliases ^ "'"]
572 output_string outchan (processed_line ^ "\n"))
575 Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request))
577 debug_print (sprintf "%s done!" req#path)
579 | Chat_unfinished -> prerr_endline "Chat unfinished, Try again!"
580 | Http_types.Param_not_found attr_name ->
581 bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan
583 let msg = sprintf "Uncaught exception: %s" (Printexc.to_string exc) in
585 Http_daemon.respond ~body:(pp_error msg) outchan
587 printf "%s started and listening on port %d\n" daemon_name port;
588 printf "Current directory is %s\n" (Sys.getcwd ());
589 printf "HTML directory is %s\n" pages_dir;
591 Unix.putenv "http_proxy" "";
592 let mqi_handle = C.init ~log:debug_print () in
593 Http_daemon.start' ~port (callback mqi_handle);
595 printf "%s is terminating, bye!\n" daemon_name