Adoption commited on
Commit
7a274fe
·
verified ·
1 Parent(s): 5cf6fe6

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +31 -0
src/app.py CHANGED
@@ -107,7 +107,38 @@ def get_rag_chain():
107
  )
108
  return chain
109
 
 
 
110
  def search_archives(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  retriever = get_retriever()
112
  docs = retriever.invoke(query)
113
  return docs
 
107
  )
108
  return chain
109
 
110
+ # In app.py
111
+
112
  def search_archives(query):
113
+ """
114
+ STRICT SEARCH LOGIC:
115
+ 1. Runs a Pure Keyword (BM25) search first.
116
+ 2. If it finds exact matches, it returns them immediately (ignoring Vector noise).
117
+ 3. Only falls back to Vector search if Keywords find nothing.
118
+ """
119
+ # --- PHASE 1: PRECISE KEYWORD SEARCH ---
120
+ if os.path.exists(CHUNKS_FILE):
121
+ try:
122
+ with open(CHUNKS_FILE, "rb") as f:
123
+ chunks = pickle.load(f)
124
+
125
+ # Create a temporary keyword retriever just for this search
126
+ keyword_retriever = BM25Retriever.from_documents(chunks)
127
+ keyword_retriever.k = 15 # Fetch top 15 exact matches
128
+
129
+ # Run the search
130
+ keyword_docs = keyword_retriever.invoke(query)
131
+
132
+ # CRITICAL CHECK: Did we find anything?
133
+ if keyword_docs:
134
+ print(f"✅ Found {len(keyword_docs)} matches via Keywords.")
135
+ return keyword_docs
136
+ except Exception as e:
137
+ print(f"⚠️ Keyword Search failed: {e}")
138
+
139
+ # --- PHASE 2: FALLBACK VECTOR SEARCH ---
140
+ # Only runs if Phase 1 returned nothing.
141
+ print("⚠️ No keywords found. Falling back to Vector Search...")
142
  retriever = get_retriever()
143
  docs = retriever.invoke(query)
144
  return docs