Spaces:
Sleeping
Sleeping
Commit ·
48d1e8f
1
Parent(s): 8e21b3a
Clean up comparison table headers
Browse files- agents/nodes.py +19 -15
agents/nodes.py
CHANGED
|
@@ -430,32 +430,33 @@ class AgentNodes:
|
|
| 430 |
# This helps the retriever find relevant feature chunks for the specific plan
|
| 431 |
focused_query = f"features, benefits, eligibility and exclusions of {matched}"
|
| 432 |
|
| 433 |
-
#
|
| 434 |
-
# IMPORTANT: Search by insurer and manually filter by product_name
|
| 435 |
-
# This is more robust than passing a combined filter to the vector store
|
| 436 |
matched_insurer = None
|
| 437 |
if hasattr(self, "_cached_plans") and self._cached_plans:
|
| 438 |
for p_meta in self._cached_plans:
|
| 439 |
if p_meta["product_name"] == matched:
|
| 440 |
matched_insurer = p_meta.get("insurer")
|
| 441 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
# Search only by insurer and then manually filter by product_name
|
| 446 |
-
# This is more robust than passing a combined filter to the vector store
|
| 447 |
-
docs = retriever.search(focused_query, filters=search_filters, k=50)
|
| 448 |
|
| 449 |
plan_chunks = []
|
| 450 |
for doc in docs:
|
| 451 |
doc_product = doc.metadata.get("product_name", "")
|
| 452 |
-
#
|
| 453 |
if self._find_closest_plan_name(doc_product, [matched]) == matched:
|
| 454 |
plan_chunks.append(doc)
|
| 455 |
|
| 456 |
-
for doc in plan_chunks[:
|
| 457 |
-
|
| 458 |
-
|
|
|
|
| 459 |
"content": doc.page_content,
|
| 460 |
"product_name": doc.metadata.get("product_name"),
|
| 461 |
"document_type": doc.metadata.get("document_type", "brochure"),
|
|
@@ -468,8 +469,9 @@ class AgentNodes:
|
|
| 468 |
"""Group retrieved documents by plan_id."""
|
| 469 |
grouped = defaultdict(list)
|
| 470 |
for doc in docs:
|
| 471 |
-
|
| 472 |
-
|
|
|
|
| 473 |
"content": doc.page_content,
|
| 474 |
"product_name": doc.metadata.get("product_name"),
|
| 475 |
"document_type": doc.metadata.get("document_type", "brochure"),
|
|
@@ -859,7 +861,9 @@ For general insurance terminology questions:
|
|
| 859 |
|
| 860 |
# 3. Word overlap (Lower Confidence fallback)
|
| 861 |
query_words = set(query_plan.lower().split())
|
| 862 |
-
|
|
|
|
|
|
|
| 863 |
query_significant = query_words - stop_words
|
| 864 |
|
| 865 |
best_match = None
|
|
|
|
| 430 |
# This helps the retriever find relevant feature chunks for the specific plan
|
| 431 |
focused_query = f"features, benefits, eligibility and exclusions of {matched}"
|
| 432 |
|
| 433 |
+
# Find the insurer for this product from cache for better filtering
|
|
|
|
|
|
|
| 434 |
matched_insurer = None
|
| 435 |
if hasattr(self, "_cached_plans") and self._cached_plans:
|
| 436 |
for p_meta in self._cached_plans:
|
| 437 |
if p_meta["product_name"] == matched:
|
| 438 |
matched_insurer = p_meta.get("insurer")
|
| 439 |
break
|
| 440 |
+
|
| 441 |
+
# IMPORTANT: Search by product_name directly if possible
|
| 442 |
+
search_filters = {"product_name": matched}
|
| 443 |
+
if matched_insurer:
|
| 444 |
+
search_filters["insurer"] = matched_insurer
|
| 445 |
|
| 446 |
+
# Use a slightly lower k because we are being very specific with the filter
|
| 447 |
+
docs = retriever.search(focused_query, filters=search_filters, k=20)
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
plan_chunks = []
|
| 450 |
for doc in docs:
|
| 451 |
doc_product = doc.metadata.get("product_name", "")
|
| 452 |
+
# Final check for safety, but with accurate fuzzy matching
|
| 453 |
if self._find_closest_plan_name(doc_product, [matched]) == matched:
|
| 454 |
plan_chunks.append(doc)
|
| 455 |
|
| 456 |
+
for doc in plan_chunks[:8]:
|
| 457 |
+
# Use product_name for the key instead of plan_id to ensure clean table headers
|
| 458 |
+
plan_name = doc.metadata.get("product_name", matched)
|
| 459 |
+
chunks_by_plan[plan_name].append({
|
| 460 |
"content": doc.page_content,
|
| 461 |
"product_name": doc.metadata.get("product_name"),
|
| 462 |
"document_type": doc.metadata.get("document_type", "brochure"),
|
|
|
|
| 469 |
"""Group retrieved documents by plan_id."""
|
| 470 |
grouped = defaultdict(list)
|
| 471 |
for doc in docs:
|
| 472 |
+
# Prefer product_name for display keys
|
| 473 |
+
plan_name = doc.metadata.get("product_name", doc.metadata.get("plan_id", "unknown"))
|
| 474 |
+
grouped[plan_name].append({
|
| 475 |
"content": doc.page_content,
|
| 476 |
"product_name": doc.metadata.get("product_name"),
|
| 477 |
"document_type": doc.metadata.get("document_type", "brochure"),
|
|
|
|
| 861 |
|
| 862 |
# 3. Word overlap (Lower Confidence fallback)
|
| 863 |
query_words = set(query_plan.lower().split())
|
| 864 |
+
# REMOVED insurer names from stop_words because they are critical for distinguishing
|
| 865 |
+
# similar plan names (like 'Saral Jeevan Bima') across different companies.
|
| 866 |
+
stop_words = {"plan", "insurance", "the", "a", "of", "with", "compare", "is", "between"}
|
| 867 |
query_significant = query_words - stop_words
|
| 868 |
|
| 869 |
best_match = None
|