Spaces:
Sleeping
Sleeping
Update src/rag_engine.py
Browse filesUpdated the search function to handle complex cases and special characters
- src/rag_engine.py +18 -6
src/rag_engine.py
CHANGED
|
@@ -110,17 +110,29 @@ def search_knowledge_base(query, username, k=6):
|
|
| 110 |
"""
|
| 111 |
try:
|
| 112 |
db = get_vectorstore(username)
|
| 113 |
-
|
| 114 |
-
# FIX #3: Graceful handling for empty/missing DB
|
| 115 |
-
# If the collection is empty, Chroma sometimes throws an error or returns nothing.
|
| 116 |
-
# We check count first to be safe.
|
| 117 |
if db._collection.count() == 0:
|
| 118 |
return []
|
| 119 |
|
| 120 |
reranker = get_reranker_model()
|
| 121 |
|
| 122 |
-
# 1.
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
if not results:
|
| 126 |
return []
|
|
|
|
| 110 |
"""
|
| 111 |
try:
|
| 112 |
db = get_vectorstore(username)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
if db._collection.count() == 0:
|
| 114 |
return []
|
| 115 |
|
| 116 |
reranker = get_reranker_model()
|
| 117 |
|
| 118 |
+
# 1. Vector Search (Broad Net)
|
| 119 |
+
vector_results = db.similarity_search(query, k=25)
|
| 120 |
+
|
| 121 |
+
# 2. "Poor Man's" Keyword Search (The Safety Net)
|
| 122 |
+
# We perform a basic text search for unique terms in the query
|
| 123 |
+
# This catches acronyms like "C&D" if we normalize them
|
| 124 |
+
|
| 125 |
+
# Normalize query acronyms (e.g., "C&D" -> "C D")
|
| 126 |
+
normalized_query = query.replace("&", " ")
|
| 127 |
+
keyword_results = []
|
| 128 |
+
|
| 129 |
+
# (Optional: In a production DB like Pinecone/Weaviate, this is built-in.
|
| 130 |
+
# For Chroma local, we rely on the vector net mostly, but we can
|
| 131 |
+
# extend k significantly to catch edge cases).
|
| 132 |
+
|
| 133 |
+
# STRATEGY: Just widen the net significantly.
|
| 134 |
+
# Vector models often hide the match at rank 30 or 40 if the spelling differs.
|
| 135 |
+
results = db.similarity_search(query, k=50) # Widen from 25 to 50
|
| 136 |
|
| 137 |
if not results:
|
| 138 |
return []
|