(* Copyright (C) 2002, HELM Team. * * This file is part of HELM, an Hypertextual, Electronic * Library of Mathematics, developed at the Computer Science * Department, University of Bologna, Italy. * * HELM is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * HELM is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HELM; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, * MA 02111-1307, USA. * * For details, see the HELM World-Wide-Web page, * http://cs.unibo.it/helm/. *) module T = MQGTypes module U = MQGUtil module G = MQueryGenerator module C = MQIConn open Http_types ;; let debug = true;; let debug_print s = if debug then prerr_endline s;; Http_common.debug := true;; (* Http_common.debug := true;; *) open Printf;; let daemon_name = "Search Engine";; let string_tail s = let len = String.length s in String.sub s 1 (len-1) (* First of all we load the configuration *) let _ = let configuration_file = "searchEngine.conf.xml" in Helm_registry.load_from configuration_file ;; let port = Helm_registry.get_int "search_engine.port";; let pages_dir = Helm_registry.get "search_engine.html_dir";; (** accepted HTTP servers for ask_uwobo method forwarding *) let valid_servers= Helm_registry.get_string_list "search_engine.valid_servers";; let interactive_user_uri_choice_TPL = pages_dir ^ "/moogle_chat1.html";; let interactive_interpretation_choice_TPL = pages_dir ^ "/moogle_chat2.html";; let constraints_choice_TPL = pages_dir ^ "/moogle_constraints_choice.html";; (* let final_results_TPL = pages_dir ^ "/templateambigpdq3.html";; *) let start_TPL = pages_dir ^ "/moogle.html";; let final_results_TPL = pages_dir ^ "/moogle.html";; let my_own_url = let ic = Unix.open_process_in "hostname -f" in let hostname = input_line ic in ignore (Unix.close_process_in ic); sprintf "http://%s:%d" hostname port ;; exception Chat_unfinished exception Invalid_action of string (* invalid action for "/search" method *) let javascript_quote s = let rex = Pcre.regexp "'" in let rex' = Pcre.regexp "\"" in Pcre.replace ~rex ~templ:"\\'" (Pcre.replace ~rex:rex' ~templ:"\\\"" s) ;; (* build a bool from a 1-character-string *) let bool_of_string' = function | "0" -> false | "1" -> true | s -> failwith ("Can't parse a boolean from string: " ^ s) ;; (* build an int option from a string *) let int_of_string' = function | "_" -> None | s -> try Some (int_of_string s) with Failure "int_of_string" -> failwith ("Can't parse an int option from string: " ^ s) ;; (* HTML pretty printers for mquery_generator types *) let html_of_r_obj (pos, uri) = sprintf "%s%s%s" uri (U.text_of_position pos) (if U.is_main_position pos then sprintf "" (U.text_of_depth pos "") else "") ;; let html_of_r_rel pos = sprintf "%s" (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "") ;; let html_of_r_sort (pos, sort) = sprintf "%s%s" (U.text_of_sort sort) (U.text_of_position (pos:>T.full_position)) (U.text_of_depth (pos:>T.full_position) "") ;; (** pretty print a MathQL query result to an HELM theory file *) let theory_of_result result = let results_no = List.length result in if results_no > 0 then let mode = if results_no > 10 then "linkonly" else "typeonly" in let results = let idx = ref (results_no + 1) in List.fold_right (fun (uri,attrs) i -> decr idx ; "" ^ string_of_int !idx ^ "." ^ i ) result "" in "Query Results:" ^ results ^ "
" else "Query Results:

No results found!

" ;; let pp_result result = "\nQuery Results\n" ^ theory_of_result result ^ "" ;; (** chain application of Pcre substitutions *) let rec apply_substs substs line = match substs with | [] -> line | (rex, templ) :: rest -> apply_substs rest (Pcre.replace ~rex ~templ line) (** fold like function on files *) let fold_file f init fname = let inchan = open_in fname in let rec fold_lines' value = try let line = input_line inchan in fold_lines' (f value line) with End_of_file -> value in let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in close_in inchan; res (** iter like function on files *) let iter_file f = fold_file (fun _ line -> f line) () let (expression_tag_RE, action_tag_RE, advanced_tag_RE, title_tag_RE, no_choices_tag_RE, current_choices_tag_RE, choices_tag_RE, msg_tag_RE, id_to_uris_RE, id_RE, iden_tag_RE, interpretations_RE, interpretations_labels_RE, results_RE, new_aliases_RE, form_RE, variables_initialization_RE, search_engine_url_RE) = (Pcre.regexp "@EXPRESSION@", Pcre.regexp "@ACTION@", Pcre.regexp "@ADVANCED@", Pcre.regexp "@TITLE@", Pcre.regexp "@NO_CHOICES@", Pcre.regexp "@CURRENT_CHOICES@", Pcre.regexp "@CHOICES@", Pcre.regexp "@MSG@", Pcre.regexp "@ID_TO_URIS@", Pcre.regexp "@ID@", Pcre.regexp "@IDEN@", Pcre.regexp "@INTERPRETATIONS@", Pcre.regexp "@INTERPRETATIONS_LABELS@", Pcre.regexp "@RESULTS@", Pcre.regexp "@NEW_ALIASES@", Pcre.regexp "@FORM@", Pcre.regexp "@VARIABLES_INITIALIZATION@", Pcre.regexp "@SEARCH_ENGINE_URL@") let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$" let pp_error = sprintf "

Error: %s

";; let bad_request body outchan = Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) ~body outchan ;; let contype = "Content-Type", "text/html";; (* SEARCH ENGINE functions *) let get_constraints term = function | "/elim" -> None, (CGLocateInductive.get_constraints term), (None,None,None) | "/match" -> let constr_obj, constr_rel, constr_sort = CGSearchPattern.get_constraints term in (Some CGSearchPattern.universe), (constr_obj, constr_rel, constr_sort), (Some constr_obj, Some constr_rel, Some constr_sort) | "/hint" -> let list_of_must, only = CGMatchConclusion.get_constraints [] [] term in (* FG: there is no way to choose the block number ***************************) let block = pred (List.length list_of_must) in (Some CGMatchConclusion.universe), (List.nth list_of_must block, [], []), (Some only, None, None) | _ -> assert false ;; (* format: ':' ':' ':' ':' ':' ::= ('0'|'1') ('_'|) (',' ('0'|'1') ('_'|))* ::= '0'|'1' *) let add_user_constraints ~constraints ((obj, rel, sort), (only_obj, only_rel, only_sort)) = let parse_must s = let l = Pcre.split ~pat:"," s in (try List.map (fun s -> let subs = Pcre.extract ~pat:"^(.)(\\d+|_)$" s in (bool_of_string' subs.(1), int_of_string' subs.(2))) l with Not_found -> failwith ("Can't parse constraint string: " ^ constraints) ) in (* to be used on "obj" *) let add_user_must33 user_must must = List.map2 (fun (b, i) (p, u) -> if b then Some (U.set_full_position p i, u) else None) user_must must in (* to be used on "rel" *) let add_user_must22 user_must must = List.map2 (fun (b, i) p -> if b then Some (U.set_main_position p i) else None) user_must must in (* to be used on "sort" *) let add_user_must32 user_must must = List.map2 (fun (b, i) (p, s)-> if b then Some (U.set_main_position p i, s) else None) user_must must in match Pcre.split ~pat:":" constraints with | [user_obj;user_rel;user_sort;user_only_obj;user_only_rel;user_only_sort] -> let (user_obj,user_rel,user_sort,user_only_obj,user_only_rel,user_only_sort) = (parse_must user_obj, parse_must user_rel, parse_must user_sort, bool_of_string' user_only_obj, bool_of_string' user_only_rel, bool_of_string' user_only_sort) in let only' = (if user_only_obj then only_obj else None), (if user_only_rel then only_rel else None), (if user_only_sort then only_sort else None) in let must' = let rec filter_some = function [] -> [] | None::tl -> filter_some tl | (Some x)::tl -> x::(filter_some tl) in filter_some (add_user_must33 user_obj obj), filter_some (add_user_must22 user_rel rel), filter_some (add_user_must32 user_sort sort) in (must', only') | _ -> failwith ("Can't parse constraint string: " ^ constraints) in let send_results results ?(id_to_uris = DisambiguatingParser.EnvironmentP3.of_string "") (req: Http_types.request) outchan = Http_daemon.send_basic_headers ~code:(`Code 200) outchan ; Http_daemon.send_header "Content-Type" "text/xml" outchan; Http_daemon.send_CRLF outchan ; let subst = (search_engine_url_RE, my_own_url) :: (results_RE, theory_of_result results):: (advanced_tag_RE, req#param "advanced"):: (expression_tag_RE, req#param "expression"):: (List.map (function (key,value) -> let key' = (Pcre.extract ~pat:"param\\.(.*)" key).(1) in Pcre.regexp ("@" ^ key' ^ "@"), value) (List.filter (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key) req#params)) in iter_file (fun line -> let new_aliases = DisambiguatingParser.EnvironmentP3.to_string id_to_uris in let processed_line = apply_substs (* CSC: Bug here: this is a string, not an array! *) ((new_aliases_RE, "'" ^ javascript_quote new_aliases ^ "'")::subst) line in output_string outchan (processed_line ^ "\n")) final_results_TPL in let exec_action mqi_handle (req: Http_types.request) outchan = let term_string = req#param "expression" in let (context, metasenv) = ([], []) in let id_to_uris_raw = try req#param "aliases" with Http_types.Param_not_found _ -> "" in let parse_interpretation_choices choices = List.map int_of_string (Pcre.split ~pat:" " choices) in let parse_choices choices_raw = let choices = Pcre.split ~pat:";" choices_raw in List.fold_left (fun f x -> match Pcre.split ~pat:"\\s" x with | ""::id::tail | id::tail when id<>"" -> (fun id' -> if id = id' then Some (List.map (fun u -> Netencoding.Url.decode u) tail) else f id') | _ -> failwith "Can't parse choices") (fun _ -> None) choices in let id_to_uris = DisambiguatingParser.EnvironmentP3.of_string id_to_uris_raw in let id_to_choices = try let choices_raw = req#param "choices" in parse_choices choices_raw with Http_types.Param_not_found _ -> (fun _ -> None) in let interpretation_choices = try let choices_raw = req#param "interpretation_choices" in if choices_raw = "" then None else Some (parse_interpretation_choices choices_raw) with Http_types.Param_not_found _ -> None in let module Chat: DisambiguateTypes.Callbacks = struct let interactive_user_uri_choice ~selection_mode ?ok ?enable_button_for_non_vars ~(title: string) ~(msg: string) ~(id: string) (choices: string list) = (match id_to_choices id with | Some choices -> choices | None -> if req#param "advanced" = "no" then let isvar s = let len = String.length s in let suffix = String.sub s (len-4) 4 in not (suffix = ".var") in List.filter isvar choices else let msg = Pcre.replace ~pat:"\'" ~templ:"\\\'" msg in (match selection_mode with | `SINGLE -> assert false | `MULTIPLE -> Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_CRLF outchan ; let check_box uri = "" ^ "" ^ uri ^ "" in (* aggiungere gli hyperlinks? *) let check_boxes = String.concat "
" (List.map check_box choices) in iter_file (fun line -> let processed_line = apply_substs [advanced_tag_RE, req#param "advanced"; choices_tag_RE, check_boxes; no_choices_tag_RE, string_of_int (List.length choices); iden_tag_RE, id; current_choices_tag_RE, req#param "choices"; expression_tag_RE, req#param "expression"; action_tag_RE, string_tail req#path ] line in output_string outchan (processed_line ^ "\n")) interactive_user_uri_choice_TPL; raise Chat_unfinished)) let interactive_interpretation_choice interpretations = match interpretation_choices with Some l -> prerr_endline "CARRAMBA" ; l | None -> let html_interpretations = let radio_button n = "" in let text interp = String.concat "
" (List.map (fun (id, value) -> sprintf "%s = %s" id value) interp) in let rec aux n = function [] -> [] | interp::tl -> ((radio_button n)^(text interp))::(aux (n+1) tl) in String.concat "
" (aux 0 interpretations) in Http_daemon.send_basic_headers ~code:(`Code 200) outchan ; Http_daemon.send_CRLF outchan ; iter_file (fun line -> let processed_line = apply_substs [advanced_tag_RE, req#param "advanced"; interpretations_RE, html_interpretations; current_choices_tag_RE, req#param "choices"; expression_tag_RE, req#param "expression"; action_tag_RE, string_tail req#path ] line in output_string outchan (processed_line ^ "\n")) interactive_interpretation_choice_TPL; raise Chat_unfinished let input_or_locate_uri ~title ?id () = assert false end in let module Disambiguate' = DisambiguatingParser.Make(Chat) in let (id_to_uris', metasenv', term') = match Disambiguate'.disambiguate_term mqi_handle context metasenv term_string id_to_uris with [id_to_uris',metasenv',term'] -> id_to_uris',metasenv',term' | _ -> assert false in let universe, ((must_obj, must_rel, must_sort) as must'), ((only_obj, only_rel, only_sort) as only) = get_constraints term' req#path in let must'', only' = (try add_user_constraints ~constraints:(req#param "constraints") (must', only) with Http_types.Param_not_found _ -> if req#param "advanced" = "no" then (must',only) else let variables = "var aliases = '" ^ id_to_uris_raw ^ "';\n" ^ "var constr_obj_len = " ^ string_of_int (List.length must_obj) ^ ";\n" ^ "var constr_rel_len = " ^ string_of_int (List.length must_rel) ^ ";\n" ^ "var constr_sort_len = " ^ string_of_int (List.length must_sort) ^ ";\n" in let form = (if must_obj = [] then "" else "

Obj constraints

" ^ "" ^ (String.concat "\n" (List.map html_of_r_obj must_obj)) ^ "
" ^ (* The following three lines to make Javascript create *) (* the constr_obj[] and obj_depth[] arrays even if we *) (* have only one real entry. *) "" ^ "") ^ (if must_rel = [] then "" else "

Rel constraints

" ^ "" ^ (String.concat "\n" (List.map html_of_r_rel must_rel)) ^ "
" ^ (* The following two lines to make Javascript create *) (* the constr_rel[] and rel_depth[] arrays even if *) (* we have only one real entry. *) "" ^ "") ^ (if must_sort = [] then "" else "

Sort constraints

" ^ "" ^ (String.concat "\n" (List.map html_of_r_sort must_sort)) ^ "
" ^ (* The following two lines to make Javascript create *) (* the constr_sort[] and sort_depth[] arrays even if *) (* we have only one real entry. *) "" ^ "") ^ "

Only constraints

" ^ "Enforce Only constraints for objects: " ^ "
" ^ "Enforce Rel constraints for objects: " ^ "
" ^ "Enforce Sort constraints for objects: " ^ "
" in Http_daemon.send_basic_headers ~code:(`Code 200) outchan ; Http_daemon.send_CRLF outchan ; iter_file (fun line -> let processed_line = apply_substs [form_RE, form ; variables_initialization_RE, variables; advanced_tag_RE, req#param "advanced"; current_choices_tag_RE, req#param "choices"; interpretations_RE, req#param "interpretation_choices"; expression_tag_RE, req#param "expression"; action_tag_RE, string_tail req#path] line in output_string outchan (processed_line ^ "\n")) constraints_choice_TPL; raise Chat_unfinished) in let query = G.query_of_constraints universe must'' only' in let results = MQueryInterpreter.execute mqi_handle query in send_results results ~id_to_uris:id_to_uris' req outchan in (* HTTP DAEMON CALLBACK *) let build_dynamic_uri url params = let p = String.concat "&" (List.map (fun (key,value) -> (key ^ "=" ^ (Netencoding.Url.encode value))) params) in url ^ "?" ^ p in let build_uwobo_request (req: Http_types.request) outchan = prerr_endline ("ECCOLO: " ^ req#param "param.SEARCH_ENGINE_URL"); let xmluri = build_dynamic_uri ((req#param "param.SEARCH_ENGINE_URL") ^ "/search") req#params in prerr_endline ("xmluri: " ^ xmluri); (*let xmluri = Netencoding.Url.encode xmluri in*) let server_and_port = req#param "param.processorURL" in let newreq = build_dynamic_uri (server_and_port ^ "apply") (("xmluri",xmluri)::("keys",(req#param "param.thkeys"))::req#params) in (* if List.mem server_and_port valid_servers then *) prerr_endline newreq; if true then Http_daemon.respond ~headers:["Content-Type", "text/html"] ~body:(Http_client.http_get newreq) outchan else Http_daemon.respond ~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port ^ (String.concat "\n" valid_servers))) outchan in let proxy url outchan = let server_and_port = (Pcre.extract ~rex:server_and_port_url_RE url).(1) in if List.mem server_and_port valid_servers then Http_daemon.respond ~headers:["Content-Type", "text/html"] ~body:(Http_client.http_get url) outchan else Http_daemon.respond ~body:(pp_error ("Untrusted UWOBO server: " ^ server_and_port)) outchan in let callback mqi_handle (req: Http_types.request) outchan = try debug_print (sprintf "Received request: %s" req#path); (match req#path with | "/help" -> Http_daemon.respond ~body:"HELM Search Engine" outchan | "/locate" -> let initial_expression = try req#param "expression" with Http_types.Param_not_found _ -> "" in let expression = Pcre.replace ~pat:"\\s*$" (Pcre.replace ~pat:"^\\s*" initial_expression) in if expression = "" then send_results [] req outchan else let results = let query = G.locate expression in MQueryInterpreter.execute mqi_handle query in send_results results req outchan | "/execute" -> let query_string = req#param "query" in let lexbuf = Lexing.from_string query_string in let query = MQueryUtil.query_of_text lexbuf in let result = MQueryInterpreter.execute mqi_handle query in let result_string = pp_result result in Http_daemon.respond ~body:result_string ~headers:[contype] outchan (* Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan *) | "/unreferred" -> let target = req#param "target" in let source = req#param "source" in let query = G.unreferred target source in let result = MQueryInterpreter.execute mqi_handle query in Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan | "/getpage" -> (* TODO implement "is_permitted" *) let _ = prerr_endline (Netencoding.Url.encode "http://mowgli.cs.unibo.it:38080/") in (let is_permitted _ = true in let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in let page = remove_fragment (req#param "url") in let preprocess = (try bool_of_string (req#param "preprocess") with Invalid_argument _ | Http_types.Param_not_found _ -> false) in (match page with | page when is_permitted page -> (let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in Http_daemon.send_basic_headers ~code:(`Code 200) outchan; Http_daemon.send_header "Content-Type" "text/html" outchan; Http_daemon.send_CRLF outchan; if preprocess then begin iter_file (fun line -> output_string outchan ((apply_substs ((search_engine_url_RE, my_own_url) :: (advanced_tag_RE, "no") :: (results_RE, "") :: (List.map (function (key,value) -> let key' = (Pcre.extract ~pat:"param\\.(.*)" key).(1) in Pcre.regexp ("@" ^ key' ^ "@"), value ) (List.filter (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key) req#params) )) line) ^ "\n")) fname end else Http_daemon.send_file ~src:(FileSrc fname) outchan) | page -> Http_daemon.respond_forbidden ~url:page outchan)) (* OLD | "/ask_uwobo" -> proxy (req#param "url") outchan *) | "/ask_uwobo" -> build_uwobo_request req outchan | "/hint" | "/match" | "/elim" -> exec_action mqi_handle req outchan | invalid_request -> Http_daemon.respond_error ~code:(`Status (`Client_error `Bad_request)) outchan); debug_print (sprintf "%s done!" req#path) with | Chat_unfinished -> prerr_endline "Chat unfinished, Try again!" | Http_types.Param_not_found attr_name -> bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan | exc -> let msg = sprintf "Uncaught exception: %s" (Printexc.to_string exc) in debug_print msg ; Http_daemon.respond ~body:(pp_error msg) outchan in printf "%s started and listening on port %d\n" daemon_name port; printf "Current directory is %s\n" (Sys.getcwd ()); printf "HTML directory is %s\n" pages_dir; flush stdout; Unix.putenv "http_proxy" ""; let mqi_handle = C.init ~log:debug_print () in Http_daemon.start' ~port (callback mqi_handle); C.close mqi_handle; printf "%s is terminating, bye!\n" daemon_name