]> matita.cs.unibo.it Git - helm.git/blobdiff - helm/searchEngine/searchEngine.ml
debian: rebuilt against ocaml 3.08.3
[helm.git] / helm / searchEngine / searchEngine.ml
index 57c90f068272516aa6dc9889644500153194ea28..d262029ed55f5d2d6fe1293f8653631953e0588d 100644 (file)
@@ -1,4 +1,4 @@
-(* Copyright (C) 2002-2004, HELM Team.
+(* Copyright (C) 2002-2005, HELM Team.
  * 
  * This file is part of HELM, an Hypertextual, Electronic
  * Library of Mathematics, developed at the Computer Science
@@ -25,8 +25,6 @@
 
 open Printf
 
-module DB = Dbi_mysql
-
 let debug = true
 let debug_print s = if debug then prerr_endline s
 let _ = Http_common.debug := false
@@ -36,13 +34,14 @@ exception Unbound_identifier of string
 exception Invalid_action of string  (* invalid action for "/search" method *)
 
 let daemon_name = "Moogle"
-let configuration_file = "/projects/helm/etc/moogle.conf.xml" 
+let configuration_file = "/projects/helm/etc/moogle.conf.xml"
 
 let placeholders = [
-  "EXPRESSION"; "ACTION"; "ADVANCED"; "ADVANCED_CHECKED"; "SIMPLE_CHECKED";
-  "TITLE"; "NO_CHOICES"; "CURRENT_CHOICES"; "CHOICES"; "MSG"; "ID_TO_URIS";
-  "ID"; "IDEN"; "INTERPRETATIONS"; "INTERPRETATIONS_LABELS"; "RESULTS";
-  "NEW_ALIASES"; "SEARCH_ENGINE_URL"; "PREV_LINK"; "PAGE"; "PAGES"; "NEXT_LINK"
+  "ACTION"; "ADVANCED"; "ADVANCED_CHECKED"; "CHOICES"; "CURRENT_CHOICES";
+  "EXPRESSION"; "ID"; "IDEN"; "ID_TO_URIS"; "INTERPRETATIONS";
+  "INTERPRETATIONS_LABELS"; "MSG"; "NEW_ALIASES"; "NEXT_LINK"; "NO_CHOICES";
+  "PAGE"; "PAGES"; "PREV_LINK"; "QUERY_KIND"; "QUERY_SUMMARY"; "RESULTS";
+  "SEARCH_ENGINE_URL"; "SIMPLE_CHECKED"; "TITLE";
 ]
 
 let tag =
@@ -50,15 +49,18 @@ let tag =
   List.iter
     (fun tag -> Hashtbl.add regexps tag (Pcre.regexp (sprintf "@%s@" tag)))
     placeholders;
-  Hashtbl.find regexps
+  fun name ->
+    try
+      Hashtbl.find regexps name
+    with Not_found -> assert false
 
   (* First of all we load the configuration *)
 let _ = Helm_registry.load_from configuration_file
 let port = Helm_registry.get_int "search_engine.port"
 let pages_dir = Helm_registry.get "search_engine.html_dir"
 
-let interactive_interpretation_choice_TPL = pages_dir ^ "/moogle_chat2.html"
 let moogle_TPL = pages_dir ^ "/moogle.html"
+let choices_TPL = pages_dir ^ "/moogle_chat.html"
 
 let my_own_url =
  let ic = Unix.open_process_in "hostname -f" in
@@ -114,19 +116,38 @@ let add_param_substs params =
 
 let page_RE = Pcre.regexp "&param\\.page=\\d+"
 
+let query_kind_of_req (req: Http_types.request) =
+  match req#path with
+  | "/match" -> "Match"
+  | "/hint" -> "Hint"
+  | "/locate" -> "Locate"
+  | "/elim" -> "Elim"
+  | _ -> ""
+
+  (* given a uri with a query part in input try to find in it a string
+   * "&param_name=..." (where param_name is given). If found its value will be
+   * set to param_value. If not, a trailing "&param_name=param_value" (where
+   * both are given) is added to the input string *)
+let patch_param param_name param_value url =
+  let rex = Pcre.regexp (sprintf "&%s=[^&]*" (Pcre.quote param_name)) in
+  if Pcre.pmatch ~rex url then
+    Pcre.replace ~rex ~templ:(sprintf "%s=%s" param_name param_value) url
+  else
+    sprintf "%s&%s=%s" url param_name param_value
+
 let send_results results
   ?(id_to_uris = CicTextualParser2.EnvironmentP3.of_string "") 
    (req: Http_types.request) outchan
   =
+  let query_kind = query_kind_of_req req in
+  let interp = try req#param "interp" with Http_types.Param_not_found _ -> "" in
   let page_link anchor page =
     try
       let this = req#param "this" in
       let target =
-        if Pcre.pmatch ~rex:page_RE this then
-          Pcre.replace ~rex:page_RE ~templ:(sprintf "&param.page=%d" page)
-            this
-        else
-          sprintf "%s&param.page=%d" this page
+        (patch_param "param.interp" interp
+           (patch_param "param.page" (string_of_int page)
+              this))
       in
       let target = Pcre.replace ~pat:"&" ~templ:"&" target in
       sprintf "<a href=\"%s\">%s</a>" target anchor
@@ -135,7 +156,7 @@ let send_results results
   Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
   Http_daemon.send_header "Content-Type" "text/xml" outchan;
   Http_daemon.send_CRLF outchan ;
-  let subst, results_string =
+  let subst =
     match results with
     | `Results results ->
         let page = try int_of_string (req#param "page") with _ -> 1 in
@@ -149,24 +170,36 @@ let send_results results
           else
             results_no / results_per_page + 1
         in
-        ([ tag "PAGE", string_of_int page; tag "PAGES", string_of_int pages ] @
-         [ tag "PREV_LINK",
-           if page > 1 then page_link "Prev" (page-1) else "" ] @
-         [ tag "NEXT_LINK",
-           if page < pages then page_link "Next" (page+1) else "" ]),
-        MooglePp.theory_of_result req page results
+        let pages = if pages = 0 then 1 else pages in
+        let (summary, results) = MooglePp.theory_of_result page results in
+        [ tag "PAGE", string_of_int page;
+          tag "PAGES", string_of_int pages;
+          tag "PREV_LINK", (if page > 1 then page_link "Prev" (page-1) else "");
+          tag "NEXT_LINK",
+            (if page < pages then page_link "Next" (page+1) else "");
+          tag "QUERY_KIND", query_kind;
+          tag "QUERY_SUMMARY", summary;
+          tag "RESULTS", results ]
     | `Error msg ->
-        [ tag "PAGE", ""; tag "PAGES", "";
-          tag "PREV_LINK", ""; tag "NEXT_LINK", "" ],
-        msg
+        [ tag "PAGE", "1";
+          tag "PAGES", "1";
+          tag "PREV_LINK", "";
+          tag "NEXT_LINK", "";
+          tag "QUERY_KIND", query_kind;
+          tag "QUERY_SUMMARY", "error";
+          tag "RESULTS", msg ]
+  in
+  let advanced =
+    try
+      req#param "advanced"
+    with Http_types.Param_not_found _ -> "no"
   in
   let subst =
     (tag "SEARCH_ENGINE_URL", my_own_url) ::
-    (tag "RESULTS", results_string) ::
-    (tag "ADVANCED", req#param "advanced") ::
+    (tag "ADVANCED", advanced) ::
     (tag "EXPRESSION", req#param "expression") ::
     add_param_substs req#params @
-    (if req#param "advanced" = "no" then
+    (if advanced = "no" then
       [ tag "SIMPLE_CHECKED", "checked='true'";
         tag "ADVANCED_CHECKED", "" ]
     else
@@ -246,19 +279,28 @@ let exec_action dbd (req: Http_types.request) outchan =
             in
             Http_daemon.send_basic_headers ~code:(`Code 200) outchan ;
             Http_daemon.send_CRLF outchan ;
+            let advanced =
+              try
+                req#param "advanced"
+              with Http_types.Param_not_found _ -> "no"
+            in
+            let query_kind = query_kind_of_req req in
             iter_file
               (fun line ->
                  let processed_line =
                    apply_substs
-                     [tag "ADVANCED", req#param "advanced";
-                      tag "INTERPRETATIONS", html_interpretations;
-                      tag "CURRENT_CHOICES", req#param "choices";
-                      tag "EXPRESSION", req#param "expression";
-                      tag "ACTION", string_tail req#path ]
-                      line
+                     [ tag "SEARCH_ENGINE_URL", my_own_url;
+                       tag "ADVANCED", advanced;
+                       tag "INTERPRETATIONS", html_interpretations;
+                       tag "CURRENT_CHOICES", req#param "choices";
+                       tag "EXPRESSION", req#param "expression";
+                       tag "QUERY_KIND", query_kind;
+                       tag "QUERY_SUMMARY", "disambiguation";
+                       tag "ACTION", string_tail req#path ]
+                     line
                  in
                  output_string outchan (processed_line ^ "\n"))
-              interactive_interpretation_choice_TPL;
+              choices_TPL;
             raise Chat_unfinished
 
       let input_or_locate_uri ~title ?id () =
@@ -306,16 +348,16 @@ let callback dbd (req: Http_types.request) outchan =
     (match req#path with
     | "/getpage" ->
           (* TODO implement "is_permitted" *)
-        (let is_permitted _ = true in
+        (let is_permitted page = not (Pcre.pmatch ~pat:"/" page) in
         let page = req#param "url" in
+        let fname = sprintf "%s/%s" pages_dir page in
         let preprocess =
           (try
             bool_of_string (req#param "preprocess")
           with Invalid_argument _ | Http_types.Param_not_found _ -> false)
         in
         (match page with
-        | page when is_permitted page ->
-            (let fname = sprintf "%s/%s" pages_dir page in
+        | page when is_permitted page && Sys.file_exists fname ->
             Http_daemon.send_basic_headers ~code:(`Code 200) outchan;
             Http_daemon.send_header "Content-Type" "text/html" outchan;
             Http_daemon.send_CRLF outchan;
@@ -332,7 +374,7 @@ let callback dbd (req: Http_types.request) outchan =
                     "\n"))
                 fname
             end else
-              Http_daemon.send_file ~src:(Http_types.FileSrc fname) outchan)
+              Http_daemon.send_file ~src:(Http_types.FileSrc fname) outchan
         | page -> Http_daemon.respond_forbidden ~url:page outchan))
     | "/help" -> Http_daemon.respond ~body:daemon_name outchan
     | "/locate" ->
@@ -345,9 +387,10 @@ let callback dbd (req: Http_types.request) outchan =
         in
         if expression = "" then
           send_results (`Results []) req outchan
-        else
+        else begin
           let results = MetadataQuery.locate ~dbd expression in
           send_results (`Results results) req outchan
+        end
     | "/hint"
     | "/elim"
     | "/match" -> exec_action dbd req outchan