NavyDevilDoc commited on
Commit
1fd5385
·
verified ·
1 Parent(s): 417ffc9

Update src/rag_engine.py

Browse files

Updated the search function to handle complex cases and special characters

Files changed (1) hide show
  1. src/rag_engine.py +18 -6
src/rag_engine.py CHANGED
@@ -110,17 +110,29 @@ def search_knowledge_base(query, username, k=6):
110
  """
111
  try:
112
  db = get_vectorstore(username)
113
-
114
- # FIX #3: Graceful handling for empty/missing DB
115
- # If the collection is empty, Chroma sometimes throws an error or returns nothing.
116
- # We check count first to be safe.
117
  if db._collection.count() == 0:
118
  return []
119
 
120
  reranker = get_reranker_model()
121
 
122
- # 1. Broad Search
123
- results = db.similarity_search(query, k=25)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  if not results:
126
  return []
 
110
  """
111
  try:
112
  db = get_vectorstore(username)
 
 
 
 
113
  if db._collection.count() == 0:
114
  return []
115
 
116
  reranker = get_reranker_model()
117
 
118
+ # 1. Vector Search (Broad Net)
119
+ vector_results = db.similarity_search(query, k=25)
120
+
121
+ # 2. "Poor Man's" Keyword Search (The Safety Net)
122
+ # We perform a basic text search for unique terms in the query
123
+ # This catches acronyms like "C&D" if we normalize them
124
+
125
+ # Normalize query acronyms (e.g., "C&D" -> "C D")
126
+ normalized_query = query.replace("&", " ")
127
+ keyword_results = []
128
+
129
+ # (Optional: In a production DB like Pinecone/Weaviate, this is built-in.
130
+ # For Chroma local, we rely on the vector net mostly, but we can
131
+ # extend k significantly to catch edge cases).
132
+
133
+ # STRATEGY: Just widen the net significantly.
134
+ # Vector models often hide the match at rank 30 or 40 if the spelling differs.
135
+ results = db.similarity_search(query, k=50) # Widen from 25 to 50
136
 
137
  if not results:
138
  return []