Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
updated ML models
Browse files- pages/Semantic_Search.py +14 -1
- semantic_search/all_search_execute.py +38 -13
pages/Semantic_Search.py
CHANGED
|
@@ -126,7 +126,11 @@ if "questions" not in st.session_state:
|
|
| 126 |
st.session_state.questions = []
|
| 127 |
|
| 128 |
if "input_mvector_rerank" not in st.session_state:
|
| 129 |
-
st.session_state.input_colBert_rerank = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
if "clear_" not in st.session_state:
|
| 132 |
st.session_state.clear_ = False
|
|
@@ -685,14 +689,23 @@ if(search_all_type == True or 1==1):
|
|
| 685 |
########################## enable for query_rewrite ########################
|
| 686 |
if rewrite_query:
|
| 687 |
st.session_state.input_is_rewrite_query = 'enabled'
|
|
|
|
| 688 |
st.subheader(':blue[Vector Search]')
|
| 689 |
|
| 690 |
mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
|
| 691 |
|
|
|
|
|
|
|
| 692 |
if(mvector_rerank):
|
| 693 |
st.session_state.input_mvector_rerank = True
|
| 694 |
else:
|
| 695 |
st.session_state.input_mvector_rerank = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
st.subheader(':blue[Hybrid Search]')
|
| 697 |
with st.expander("Set query Weightage:"):
|
| 698 |
st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5, key='input_Keyword-weight', help=None)
|
|
|
|
| 126 |
st.session_state.questions = []
|
| 127 |
|
| 128 |
if "input_mvector_rerank" not in st.session_state:
|
| 129 |
+
st.session_state.input_colBert_rerank = False
|
| 130 |
+
|
| 131 |
+
if "input_multilingual" not in st.session_state:
|
| 132 |
+
st.session_state.input_multilingual = False
|
| 133 |
+
|
| 134 |
|
| 135 |
if "clear_" not in st.session_state:
|
| 136 |
st.session_state.clear_ = False
|
|
|
|
| 689 |
########################## enable for query_rewrite ########################
|
| 690 |
if rewrite_query:
|
| 691 |
st.session_state.input_is_rewrite_query = 'enabled'
|
| 692 |
+
|
| 693 |
st.subheader(':blue[Vector Search]')
|
| 694 |
|
| 695 |
mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
|
| 696 |
|
| 697 |
+
multilingual = st.checkbox("Enable multilingual mode",key = 'multilingual',help = "Enabling this option uses titan model's multilingual embeddings to retrieve documents and haike model to translate the product descriptions to the query language.")
|
| 698 |
+
|
| 699 |
if(mvector_rerank):
|
| 700 |
st.session_state.input_mvector_rerank = True
|
| 701 |
else:
|
| 702 |
st.session_state.input_mvector_rerank = False
|
| 703 |
+
|
| 704 |
+
if(multilingual):
|
| 705 |
+
st.session_state.input_multilingual = True
|
| 706 |
+
else:
|
| 707 |
+
st.session_state.input_multilingual = False
|
| 708 |
+
|
| 709 |
st.subheader(':blue[Hybrid Search]')
|
| 710 |
with st.expander("Set query Weightage:"):
|
| 711 |
st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5, key='input_Keyword-weight', help=None)
|
semantic_search/all_search_execute.py
CHANGED
|
@@ -215,6 +215,7 @@ def handler(input_,session_id):
|
|
| 215 |
hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
|
| 216 |
|
| 217 |
if('Vector Search' in search_types):
|
|
|
|
| 218 |
if(st.session_state.input_mvector_rerank):
|
| 219 |
query_vector = cb.vectorise(query,False)
|
| 220 |
vector_field = "description_vector"
|
|
@@ -253,15 +254,27 @@ def handler(input_,session_id):
|
|
| 253 |
|
| 254 |
#using neural query
|
| 255 |
else:
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
"product_description_vector": {
|
| 259 |
"query_text": query,
|
| 260 |
"model_id": BEDROCK_TEXT_MODEL_ID,
|
| 261 |
"k": k_
|
| 262 |
}
|
| 263 |
-
}
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
###### start of efficient filter applying #####
|
| 267 |
if(st.session_state.input_rewritten_query!=""):
|
|
@@ -412,14 +425,22 @@ def handler(input_,session_id):
|
|
| 412 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
| 413 |
del hybrid_payload["query"]["hybrid"]
|
| 414 |
hybrid_payload["query"] = single_query
|
| 415 |
-
if(st.session_state.
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
| 425 |
response_ = json.loads(r.text)
|
|
@@ -488,8 +509,12 @@ def handler(input_,session_id):
|
|
| 488 |
doc_ids = []
|
| 489 |
for doc in docs:
|
| 490 |
if(doc['_source']['image_url'] not in dup):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 491 |
res_ = {
|
| 492 |
-
"desc":
|
| 493 |
"caption":doc['_source']['caption'],
|
| 494 |
"image_url":doc['_source']['image_url'],
|
| 495 |
"category":doc['_source']['category'],
|
|
|
|
| 215 |
hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
|
| 216 |
|
| 217 |
if('Vector Search' in search_types):
|
| 218 |
+
|
| 219 |
if(st.session_state.input_mvector_rerank):
|
| 220 |
query_vector = cb.vectorise(query,False)
|
| 221 |
vector_field = "description_vector"
|
|
|
|
| 254 |
|
| 255 |
#using neural query
|
| 256 |
else:
|
| 257 |
+
if(st.session_state.input_multilingual):
|
| 258 |
+
vector_payload = {
|
| 259 |
+
"term": {
|
| 260 |
+
"product_description": {
|
| 261 |
+
"value": query
|
| 262 |
+
}
|
| 263 |
+
}}
|
| 264 |
+
else:
|
| 265 |
+
vector_payload {"neural": {
|
| 266 |
"product_description_vector": {
|
| 267 |
"query_text": query,
|
| 268 |
"model_id": BEDROCK_TEXT_MODEL_ID,
|
| 269 |
"k": k_
|
| 270 |
}
|
| 271 |
+
}}
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
|
| 278 |
|
| 279 |
###### start of efficient filter applying #####
|
| 280 |
if(st.session_state.input_rewritten_query!=""):
|
|
|
|
| 425 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
| 426 |
del hybrid_payload["query"]["hybrid"]
|
| 427 |
hybrid_payload["query"] = single_query
|
| 428 |
+
if(st.session_state.input_multilingual):
|
| 429 |
+
if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
|
| 430 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation_with_rerank"
|
| 431 |
+
url = host + path
|
| 432 |
+
else:
|
| 433 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation"
|
| 434 |
+
url = host + path
|
| 435 |
+
else:
|
| 436 |
+
if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
|
| 437 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=rerank_pipeline"
|
| 438 |
+
url = host + path
|
| 439 |
+
hybrid_payload["ext"] = {"rerank": {
|
| 440 |
+
"query_context": {
|
| 441 |
+
"query_text": query
|
| 442 |
+
}
|
| 443 |
+
}}
|
| 444 |
|
| 445 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
| 446 |
response_ = json.loads(r.text)
|
|
|
|
| 509 |
doc_ids = []
|
| 510 |
for doc in docs:
|
| 511 |
if(doc['_source']['image_url'] not in dup):
|
| 512 |
+
if("product_description_translated" in doc['_source'].keys()):
|
| 513 |
+
desc = doc['_source']['product_description_translated']
|
| 514 |
+
else:
|
| 515 |
+
desc = doc['_source']['product_description']
|
| 516 |
res_ = {
|
| 517 |
+
"desc":desc,
|
| 518 |
"caption":doc['_source']['caption'],
|
| 519 |
"image_url":doc['_source']['image_url'],
|
| 520 |
"category":doc['_source']['category'],
|