Spaces:
Sleeping
Sleeping
Update src/rag_engine.py
Browse filesexpanded the number of retrieved documents from 10 to 25 and output documents from 3-6
- src/rag_engine.py +4 -3
src/rag_engine.py
CHANGED
|
@@ -101,7 +101,7 @@ def process_and_add_document(file_path, username, strategy="paragraph"):
|
|
| 101 |
return False, str(e)
|
| 102 |
|
| 103 |
# --- RETRIEVAL ENGINE ---
|
| 104 |
-
def search_knowledge_base(query, username, k=
|
| 105 |
"""
|
| 106 |
Two-Stage Retrieval System (RAG):
|
| 107 |
1. Retrieval: Get 10 candidates via fast Vector Search.
|
|
@@ -120,7 +120,7 @@ def search_knowledge_base(query, username, k=3):
|
|
| 120 |
reranker = get_reranker_model()
|
| 121 |
|
| 122 |
# 1. Broad Search
|
| 123 |
-
results = db.similarity_search(query, k=
|
| 124 |
|
| 125 |
if not results:
|
| 126 |
return []
|
|
@@ -131,7 +131,8 @@ def search_knowledge_base(query, username, k=3):
|
|
| 131 |
|
| 132 |
top_results = []
|
| 133 |
sorted_ranks = sorted(ranks, key=lambda x: x['score'], reverse=True)
|
| 134 |
-
|
|
|
|
| 135 |
for rank in sorted_ranks[:k]:
|
| 136 |
doc_index = rank['corpus_id']
|
| 137 |
doc = results[doc_index]
|
|
|
|
| 101 |
return False, str(e)
|
| 102 |
|
| 103 |
# --- RETRIEVAL ENGINE ---
|
| 104 |
+
def search_knowledge_base(query, username, k=6):
|
| 105 |
"""
|
| 106 |
Two-Stage Retrieval System (RAG):
|
| 107 |
1. Retrieval: Get 10 candidates via fast Vector Search.
|
|
|
|
| 120 |
reranker = get_reranker_model()
|
| 121 |
|
| 122 |
# 1. Broad Search
|
| 123 |
+
results = db.similarity_search(query, k=25)
|
| 124 |
|
| 125 |
if not results:
|
| 126 |
return []
|
|
|
|
| 131 |
|
| 132 |
top_results = []
|
| 133 |
sorted_ranks = sorted(ranks, key=lambda x: x['score'], reverse=True)
|
| 134 |
+
|
| 135 |
+
# Return the top k results
|
| 136 |
for rank in sorted_ranks[:k]:
|
| 137 |
doc_index = rank['corpus_id']
|
| 138 |
doc = results[doc_index]
|