Spaces:
Sleeping
Sleeping
Update src/app.py
Browse files- src/app.py +31 -0
src/app.py
CHANGED
|
@@ -107,7 +107,38 @@ def get_rag_chain():
|
|
| 107 |
)
|
| 108 |
return chain
|
| 109 |
|
|
|
|
|
|
|
| 110 |
def search_archives(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
retriever = get_retriever()
|
| 112 |
docs = retriever.invoke(query)
|
| 113 |
return docs
|
|
|
|
| 107 |
)
|
| 108 |
return chain
|
| 109 |
|
| 110 |
+
# In app.py
|
| 111 |
+
|
| 112 |
def search_archives(query):
|
| 113 |
+
"""
|
| 114 |
+
STRICT SEARCH LOGIC:
|
| 115 |
+
1. Runs a Pure Keyword (BM25) search first.
|
| 116 |
+
2. If it finds exact matches, it returns them immediately (ignoring Vector noise).
|
| 117 |
+
3. Only falls back to Vector search if Keywords find nothing.
|
| 118 |
+
"""
|
| 119 |
+
# --- PHASE 1: PRECISE KEYWORD SEARCH ---
|
| 120 |
+
if os.path.exists(CHUNKS_FILE):
|
| 121 |
+
try:
|
| 122 |
+
with open(CHUNKS_FILE, "rb") as f:
|
| 123 |
+
chunks = pickle.load(f)
|
| 124 |
+
|
| 125 |
+
# Create a temporary keyword retriever just for this search
|
| 126 |
+
keyword_retriever = BM25Retriever.from_documents(chunks)
|
| 127 |
+
keyword_retriever.k = 15 # Fetch top 15 exact matches
|
| 128 |
+
|
| 129 |
+
# Run the search
|
| 130 |
+
keyword_docs = keyword_retriever.invoke(query)
|
| 131 |
+
|
| 132 |
+
# CRITICAL CHECK: Did we find anything?
|
| 133 |
+
if keyword_docs:
|
| 134 |
+
print(f"✅ Found {len(keyword_docs)} matches via Keywords.")
|
| 135 |
+
return keyword_docs
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"⚠️ Keyword Search failed: {e}")
|
| 138 |
+
|
| 139 |
+
# --- PHASE 2: FALLBACK VECTOR SEARCH ---
|
| 140 |
+
# Only runs if Phase 1 returned nothing.
|
| 141 |
+
print("⚠️ No keywords found. Falling back to Vector Search...")
|
| 142 |
retriever = get_retriever()
|
| 143 |
docs = retriever.invoke(query)
|
| 144 |
return docs
|