]> matita.cs.unibo.it Git - helm.git/blobdiff - helm/searchEngine/searchEngine.ml
1. param.name=value parameters added to the getpage method
[helm.git] / helm / searchEngine / searchEngine.ml
index 38b92fd48717ea8ab6852b80668c03e0265f839b..77280d77a32aa63f3df6973a95f71fbc017007c8 100644 (file)
@@ -1,4 +1,3 @@
-
 (* Copyright (C) 2002, HELM Team.
  * 
  * This file is part of HELM, an Hypertextual, Electronic
  * http://cs.unibo.it/helm/.
  *)
 
-let debug = false;;
+let debug = true;;
 let debug_print s = if debug then prerr_endline s;;
-Http_common.debug := debug;;
+Http_common.debug := true;;
+(* Http_common.debug := true;; *)
+
+  (** accepted HTTP servers for ask_uwobo method forwarding *)
+let valid_servers = [ "mowgli.cs.unibo.it:58080" ; "localhost:58080" ] ;;
 
 open Printf;;
 
+let postgresConnectionString =
+ try
+  Sys.getenv "POSTGRESQL_CONNECTION_STRING"
+ with
+  Not_found -> "host=mowgli.cs.unibo.it dbname=helm_mowgli_new_schema user=helm"
+;;
+
 let daemon_name = "Search Engine";;
 let default_port = 58085;;
 let port_env_var = "SEARCH_ENGINE_PORT";;
 
+let pages_dir =
+  try
+    Sys.getenv "SEARCH_ENGINE_HTML_DIR"
+  with Not_found -> "html"  (* relative to searchEngine's document root *)
+;;
+let interactive_user_uri_choice_TPL = pages_dir ^ "/templateambigpdq1.html";;
+let interactive_interpretation_choice_TPL = pages_dir ^ "/templateambigpdq2.html";;
+let final_results_TPL = pages_dir ^ "/templateambigpdq3.html";;
+
+exception Chat_unfinished
+
+  (** pretty print a MathQL query result to an HELM theory file *)
+let theory_of_result result =
+ let results_no = List.length result in
+  if results_no > 0 then
+   let mode = if results_no > 10 then "linkonly" else "typeonly" in
+   let results =
+    let idx = ref (results_no + 1) in
+     List.fold_right
+      (fun (uri,attrs) i ->
+        decr idx ;
+        "<tr><td valign=\"top\">" ^ string_of_int !idx ^ ".</td><td><ht:OBJECT uri=\"" ^ uri ^ "\" mode=\"" ^ mode ^ "\"/></td></tr>" ^  i
+      ) result ""
+   in
+    "<h1>Query Results:</h1><table xmlns:ht=\"http://www.cs.unibo.it/helm/namespaces/helm-theory\">" ^ results ^ "</table>"
+  else
+    "<h1>Query Results:</h1><p>No results found!</p>"
+;;
+
+let pp_result result =
+ "<html xmlns:ht=\"http://www.cs.unibo.it/helm/namespaces/helm-theory\">\n<head><title>Query Results</title><style> A { text-decoration: none } </style></head>\n<body>" ^ theory_of_result result ^ "</body></html>"
+;;
+
+  (** chain application of Pcre substitutions *)
+let rec apply_substs substs line =
+  match substs with
+  | [] -> line
+  | (rex, templ) :: rest -> apply_substs rest (Pcre.replace ~rex ~templ line)
+  (** fold like function on files *)
+let fold_file f init fname =
+  let inchan = open_in fname in
+  let rec fold_lines' value =
+    try 
+      let line = input_line inchan in 
+      fold_lines' (f value line)
+    with End_of_file -> value
+  in
+  let res = (try fold_lines' init with e -> (close_in inchan; raise e)) in
+  close_in inchan;
+  res
+  (** iter like function on files *)
+let iter_file f = fold_file (fun _ line -> f line) ()
+
+let (title_tag_RE, choices_tag_RE, msg_tag_RE, id_to_uris_RE, id_RE,
+    interpretations_RE, interpretations_labels_RE, results_RE, new_aliases_RE)
+  =
+  (Pcre.regexp "@TITLE@", Pcre.regexp "@CHOICES@", Pcre.regexp "@MSG@",
+  Pcre.regexp "@ID_TO_URIS@", Pcre.regexp "@ID@",
+  Pcre.regexp "@INTERPRETATIONS@", Pcre.regexp "@INTERPRETATIONS_LABELS@",
+  Pcre.regexp "@RESULTS@", Pcre.regexp "@NEW_ALIASES@")
+let server_and_port_url_RE = Pcre.regexp "^http://([^/]+)/.*$"
+
 let port =
   try
     int_of_string (Sys.getenv port_env_var)
@@ -43,67 +115,294 @@ let port =
       prerr_endline "Warning: invalid port, reverting to default";
       default_port
 in
-let pp_result result =
-  let result_string = MQueryUtil.text_of_result result "\n" in
-  (sprintf "<html><body><pre>%s</pre></body></html>" result_string)
-in
 let pp_error = sprintf "<html><body><h1>Error: %s</h1></body></html>" in
 let bad_request body outchan =
   Http_daemon.respond_error ~status:(`Client_error `Bad_request) ~body outchan
 in
-let callback req outchan =
+let contype = "Content-Type", "text/html" in
+
+(* SEARCH ENGINE functions *)
+
+let refine_constraints (x, y, z) = (x, y, z), (Some x, Some y, Some z) in
+
+(* HTTP DAEMON CALLBACK *)
+
+let callback (req: Http_types.request) outchan =
   try
+    debug_print (sprintf "Received request: %s" req#path);
+    if req#path <> "/getpage" then
+      Mqint.init postgresConnectionString;
     (match req#path with
     | "/execute" ->
         let query_string = req#param "query" in
         let lexbuf = Lexing.from_string query_string in
-        let query = MQueryTParser.query MQueryTLexer.query_token lexbuf in
+        let query = MQueryUtil.query_of_text lexbuf in
         let result = MQueryGenerator.execute_query query in
-        let result_string = MQueryUtil.text_of_result result "\n" in
-        Http_daemon.respond
-          ~body:
-            (sprintf "<html><body><pre>%s</pre></body></html>" result_string)
-          outchan
+        let result_string = pp_result result in
+        Http_daemon.respond ~body:result_string ~headers:[contype] outchan
     | "/locate" ->
         let id = req#param "id" in
         let result = MQueryGenerator.locate id in
-        Http_daemon.respond ~body:(pp_result result) outchan
+        Http_daemon.respond ~headers:[contype] ~body:(pp_result result) outchan
+    | "/getpage" ->
+        (* TODO implement "is_permitted" *)
+        (let is_permitted _ = true in
+        let remove_fragment uri = Pcre.replace ~pat:"#.*" uri in
+        let page = remove_fragment (req#param "url") in
+        let preprocess =
+          (try
+            bool_of_string (req#param "preprocess")
+          with Invalid_argument _ | Http_types.Param_not_found _ -> false)
+        in
+        (match page with
+        | page when is_permitted page ->
+            let fname = sprintf "%s/%s" pages_dir (remove_fragment page) in
+            if preprocess then begin
+              Http_daemon.send_basic_headers ~code:200 outchan;
+              Http_daemon.send_CRLF outchan;
+              iter_file
+                (fun line ->
+                  output_string outchan
+                    ((apply_substs
+                       (List.map
+                         (function (key,value) ->
+                           let key' =
+                            (Pcre.extract ~pat:"param\\.(.*)" key).(1)
+                           in
+                            Pcre.regexp ("@" ^ key' ^ "@"), value
+                         )
+                         (List.filter
+                           (fun (key,_) as p-> Pcre.pmatch ~pat:"^param\\." key)
+                           req#params)
+                       )
+                       line) ^
+                    "\n"))
+                fname
+            end else
+              Http_daemon.respond_file ~fname outchan
+        | page -> Http_daemon.respond_forbidden ~url:page outchan))
+    | "/ask_uwobo" ->
+      let url = req#param "url" in
+      let server_and_port =
+        (Pcre.extract ~rex:server_and_port_url_RE url).(1)
+      in
+      if List.mem server_and_port valid_servers then
+        Http_daemon.respond
+          ~body:(Http_client.Convenience.http_get url)
+          outchan
+      else
+        Http_daemon.respond
+          ~body:(pp_error ("Invalid UWOBO server: " ^ server_and_port))
+          outchan
     | "/searchPattern" ->
         let term_string = req#param "term" in
-        let precision = int_of_string (req#param "precision") in
         let lexbuf = Lexing.from_string term_string in
-        let (dom, mkterm) =
-          CicTextualParser.main CicTextualLexer.token lexbuf
+        let (context, metasenv) = ([], []) in
+        let (dom, mk_metasenv_and_expr) =
+          CicTextualParserContext.main
+            ~context ~metasenv CicTextualLexer.token lexbuf
         in
-        (match dom with
-        | [] -> (* no free variables *)
-            let term = mkterm (fun _ -> None) in
-           prerr_endline (CicPp.ppterm term);
-            let result = MQueryGenerator.searchPattern [] [] term precision in
-            Http_daemon.respond ~body:(pp_result result) outchan
-        | _ ->
+        let id_to_uris_raw = req#param "aliases" in
+        let tokens = Pcre.split ~pat:"\\s" id_to_uris_raw in
+        let rec parse_tokens keys lookup = function (* TODO spostarla fuori *)
+          | [] -> keys, lookup
+          | "alias" :: key :: value :: rest ->
+              let key' = CicTextualParser0.Id key in
+               parse_tokens
+                 (key'::keys)
+                 (fun id ->
+                   if id = key' then
+                     Some
+                      (CicTextualParser0.Uri (MQueryMisc.cic_textual_parser_uri_of_string value))
+                   else lookup id)
+                 rest
+          | _ -> failwith "Can't parse aliases"
+        in
+        let parse_choices choices_raw =
+          let choices = Pcre.split ~pat:";" choices_raw in
+          List.fold_left
+            (fun f x ->
+              match Pcre.split ~pat:"\\s" x with
+              | ""::id::tail
+              | id::tail when id<>"" ->
+                  (fun id' ->
+prerr_endline ("#### " ^ id ^ " :=");
+List.iter (fun u -> prerr_endline ("<" ^ Netencoding.Url.decode u ^ ">")) tail;
+                    if id = id' then
+                      Some (List.map (fun u -> Netencoding.Url.decode u) tail)
+                    else
+                      f id')
+              | _ -> failwith "Can't parse choices")
+            (fun _ -> None)
+            choices
+        in
+        let (id_to_uris : Disambiguate.domain_and_interpretation) =
+         parse_tokens [] (fun _ -> None) tokens in
+        let id_to_choices =
+          try
+            let choices_raw = req#param "choices" in
+            parse_choices choices_raw
+          with Http_types.Param_not_found _ -> (fun _ -> None)
+        in
+        let module Chat: Disambiguate.Callbacks =
+          struct
+
+            let get_metasenv () =
+             !CicTextualParser0.metasenv
+
+            let set_metasenv metasenv =
+              CicTextualParser0.metasenv := metasenv
+
+            let output_html = prerr_endline
+
+            let interactive_user_uri_choice
+              ~selection_mode ?ok
+              ?enable_button_for_non_vars ~(title: string) ~(msg: string)
+              ~(id: string) (choices: string list)
+              =
+                (match id_to_choices id with
+                | Some choices -> choices
+                | None ->
+                  let msg = Pcre.replace ~pat:"\"" ~templ:"\\\"" msg in
+                  (match selection_mode with
+                  | `SINGLE -> assert false
+                  | `EXTENDED ->
+                      Http_daemon.send_basic_headers ~code:200 outchan ;
+                      Http_daemon.send_CRLF outchan ;
+                      iter_file
+                        (fun line ->
+                          let formatted_choices =
+                            String.concat ","
+                              (List.map (fun uri -> sprintf "\"%s\"" uri) choices)
+                          in
+                          let processed_line =
+                            apply_substs
+                              [title_tag_RE, title;
+                               choices_tag_RE, formatted_choices;
+                               msg_tag_RE, msg;
+                               id_to_uris_RE, id_to_uris_raw;
+                               id_RE, id]
+                              line
+                          in
+                          output_string outchan (processed_line ^ "\n"))
+                        interactive_user_uri_choice_TPL;
+                      raise Chat_unfinished))
+
+            let interactive_interpretation_choice interpretations =
+              let html_interpretations_labels =
+                String.concat ", "
+                  (List.map
+                    (fun l ->
+                      "\"" ^
+                      (String.concat "<br />"
+                        (List.map
+                          (fun (id, value) ->
+                            (sprintf "alias %s %s" id value))
+                          l)) ^
+                      "\"")
+                  interpretations)
+              in
+              let html_interpretations =
+                String.concat ", "
+                  (List.map
+                    (fun l ->
+                      "\"" ^
+                      (String.concat " "
+                        (List.map
+                          (fun (id, value) ->
+                            (sprintf "alias %s %s"
+                              id
+                              (MQueryMisc.wrong_xpointer_format_from_wrong_xpointer_format'
+                                value)))
+                          l)) ^
+                      "\"")
+                    interpretations)
+              in
+              Http_daemon.send_basic_headers ~code:200 outchan ;
+              Http_daemon.send_CRLF outchan ;
+              iter_file
+                (fun line ->
+                  let processed_line =
+                    apply_substs
+                      [interpretations_RE, html_interpretations;
+                       interpretations_labels_RE, html_interpretations_labels]
+                      line
+                  in
+                  output_string outchan (processed_line ^ "\n"))
+                interactive_interpretation_choice_TPL;
+              raise Chat_unfinished
+
+            let input_or_locate_uri ~title =
+              UriManager.uri_of_string "cic:/Coq/Init/DataTypes/nat_ind.con"
+
+          end
+        in
+        let module Disambiguate' = Disambiguate.Make (Chat) in
+        let (id_to_uris', metasenv', term') =
+          Disambiguate'.disambiguate_input
+            context metasenv dom mk_metasenv_and_expr id_to_uris
+        in
+        (match metasenv' with
+        | [] ->
+            let must = MQueryLevels2.get_constraints term' in
+            let must',only = refine_constraints must in
+            let results = MQueryGenerator.searchPattern must' only in 
+            Http_daemon.send_basic_headers ~code:200 outchan ;
+            Http_daemon.send_CRLF outchan ;
+            iter_file
+              (fun line ->
+                let new_aliases =
+                  match id_to_uris' with
+                  | (domain, f) ->
+                      String.concat ", "
+                        (List.map
+                          (fun name ->
+                            sprintf "\"alias %s cic:%s\""
+                              (match name with
+                                  CicTextualParser0.Id name -> name
+                                | _ -> assert false (*CSC: completare *))
+                              (match f name with
+                              | None -> assert false
+                              | Some (CicTextualParser0.Uri t) ->
+                                  MQueryMisc.string_of_cic_textual_parser_uri
+                                    t
+                              | _ -> assert false (*CSC: completare *)))
+                          domain)
+                in
+                let processed_line =
+                  apply_substs
+                    [results_RE, theory_of_result results ;
+                     new_aliases_RE, new_aliases]
+                    line
+                in
+                output_string outchan (processed_line ^ "\n"))
+              final_results_TPL
+        | _ -> (* unable to instantiate some implicit variable *)
             Http_daemon.respond
-              ~body:(pp_error
-                "identifiers resolution in the environment not yet implemented")
+              ~headers:[contype]
+              ~body:"some implicit variables are still unistantiated :-("
               outchan)
+
     | invalid_request ->
-        Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan)
+        Http_daemon.respond_error ~status:(`Client_error `Bad_request) outchan);
+    if req#path <> "/getpage" then
+      Mqint.close ();
+    debug_print (sprintf "%s done!" req#path)
   with
-  | Http_request.Param_not_found attr_name ->
+  | Chat_unfinished -> prerr_endline "Chat unfinished, Try again!"
+  | Http_types.Param_not_found attr_name ->
       bad_request (sprintf "Parameter '%s' is missing" attr_name) outchan
-  | Failure "int_of_string" ->
-      bad_request "Invalid 'precision' value, must be an integer" outchan
   | exc ->
       Http_daemon.respond
         ~body:(pp_error ("Uncaught exception: " ^ (Printexc.to_string exc)))
-       outchan
+        outchan
 in
 printf "%s started and listening on port %d\n" daemon_name port;
-printf "current directory is %s\n" (Sys.getcwd ());
+printf "Current directory is %s\n" (Sys.getcwd ());
+printf "HTML directory is %s\n" pages_dir;
 flush stdout;
+Unix.putenv "http_proxy" "";
 Mqint.set_database Mqint.postgres_db;
-Mqint.init "host=mowgli.cs.unibo.it dbname=helm_mowgli user=helm";
 Http_daemon.start' ~port callback;
-Mqint.close ();
 printf "%s is terminating, bye!\n" daemon_name