Spaces:

opensearch-project
/

OpenSearch-AI

Running on CPU Upgrade

App Files Files

prasadnu commited on Jun 24

Commit

ef1e3b1

1 Parent(s): 3cdaefa

updated ML models

Browse files

Files changed (2) hide show

pages/Semantic_Search.py +14 -1
semantic_search/all_search_execute.py +38 -13

pages/Semantic_Search.py CHANGED Viewed

@@ -126,7 +126,11 @@ if "questions" not in st.session_state:
     st.session_state.questions = []
 if "input_mvector_rerank" not in st.session_state:
-    st.session_state.input_colBert_rerank = False
 if "clear_" not in st.session_state:
     st.session_state.clear_ = False
@@ -685,14 +689,23 @@ if(search_all_type == True or 1==1):
         ########################## enable for query_rewrite ########################
         if rewrite_query:
             st.session_state.input_is_rewrite_query = 'enabled'
         st.subheader(':blue[Vector Search]')
         mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
         if(mvector_rerank):
             st.session_state.input_mvector_rerank = True
         else:
             st.session_state.input_mvector_rerank = False
         st.subheader(':blue[Hybrid Search]')
         with st.expander("Set query Weightage:"):
             st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5,  key='input_Keyword-weight', help=None)

     st.session_state.questions = []
 if "input_mvector_rerank" not in st.session_state:
+    st.session_state.input_colBert_rerank = False
+if "input_multilingual" not in st.session_state:
+    st.session_state.input_multilingual = False
 if "clear_" not in st.session_state:
     st.session_state.clear_ = False
         ########################## enable for query_rewrite ########################
         if rewrite_query:
             st.session_state.input_is_rewrite_query = 'enabled'
         st.subheader(':blue[Vector Search]')
         mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
+        multilingual = st.checkbox("Enable multilingual mode",key = 'multilingual',help = "Enabling this option uses titan model's multilingual embeddings to retrieve documents and haike model to translate the product descriptions to the query language.")
         if(mvector_rerank):
             st.session_state.input_mvector_rerank = True
         else:
             st.session_state.input_mvector_rerank = False
+        if(multilingual):
+            st.session_state.input_multilingual = True
+        else:
+            st.session_state.input_multilingual = False
         st.subheader(':blue[Hybrid Search]')
         with st.expander("Set query Weightage:"):
             st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5,  key='input_Keyword-weight', help=None)

semantic_search/all_search_execute.py CHANGED Viewed

@@ -215,6 +215,7 @@ def handler(input_,session_id):
         hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
     if('Vector Search' in search_types):
         if(st.session_state.input_mvector_rerank):
             query_vector = cb.vectorise(query,False)
             vector_field = "description_vector"
@@ -253,15 +254,27 @@ def handler(input_,session_id):
         #using neural query
         else:
-            vector_payload = {
-                        "neural": {
                         "product_description_vector": {
                             "query_text": query,
                             "model_id": BEDROCK_TEXT_MODEL_ID,
                             "k": k_
                         }
-                        }
-                    }
         ###### start of efficient filter applying #####
         if(st.session_state.input_rewritten_query!=""):
@@ -412,14 +425,22 @@ def handler(input_,session_id):
             single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
             del hybrid_payload["query"]["hybrid"]
             hybrid_payload["query"] = single_query
-            if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
-                path = "demostore-search-index-reindex-new/_search?search_pipeline=rerank_pipeline"
-                url = host + path
-                hybrid_payload["ext"] = {"rerank": {
-                                            "query_context": {
-                                                "query_text": query
-                                            }
-                                            }}
             r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
             response_ = json.loads(r.text)
@@ -488,8 +509,12 @@ def handler(input_,session_id):
     doc_ids = []
     for doc in docs:
         if(doc['_source']['image_url'] not in dup):
             res_ = {
-                "desc":doc['_source']['product_description'],
                "caption":doc['_source']['caption'],
                 "image_url":doc['_source']['image_url'],
                "category":doc['_source']['category'],

         hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
     if('Vector Search' in search_types):
         if(st.session_state.input_mvector_rerank):
             query_vector = cb.vectorise(query,False)
             vector_field = "description_vector"
         #using neural query
         else:
+            if(st.session_state.input_multilingual):
+                vector_payload = {
+                          "term": {
+                            "product_description": {
+                            "value": query
+                            }
+                            }}
+            else:
+                vector_payload {"neural": {
                         "product_description_vector": {
                             "query_text": query,
                             "model_id": BEDROCK_TEXT_MODEL_ID,
                             "k": k_
                         }
+                        }}
         ###### start of efficient filter applying #####
         if(st.session_state.input_rewritten_query!=""):
             single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
             del hybrid_payload["query"]["hybrid"]
             hybrid_payload["query"] = single_query
+            if(st.session_state.input_multilingual):
+                if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
+                    path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation_with_rerank"
+                    url = host + path
+                else:
+                    path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation"
+                    url = host + path
+            else:
+                if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
+                    path = "demostore-search-index-reindex-new/_search?search_pipeline=rerank_pipeline"
+                    url = host + path
+                    hybrid_payload["ext"] = {"rerank": {
+                                                "query_context": {
+                                                    "query_text": query
+                                                }
+                                                }}
             r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
             response_ = json.loads(r.text)
     doc_ids = []
     for doc in docs:
         if(doc['_source']['image_url'] not in dup):
+            if("product_description_translated" in doc['_source'].keys()):
+                desc = doc['_source']['product_description_translated']
+            else:
+                desc = doc['_source']['product_description']
             res_ = {
+                "desc":desc,
                "caption":doc['_source']['caption'],
                 "image_url":doc['_source']['image_url'],
                "category":doc['_source']['category'],