DeltaVenom commited on
Commit
48d1e8f
·
1 Parent(s): 8e21b3a

Clean up comparison table headers

Browse files
Files changed (1) hide show
  1. agents/nodes.py +19 -15
agents/nodes.py CHANGED
@@ -430,32 +430,33 @@ class AgentNodes:
430
  # This helps the retriever find relevant feature chunks for the specific plan
431
  focused_query = f"features, benefits, eligibility and exclusions of {matched}"
432
 
433
- # Use a fresh, strictly focused filter for each plan
434
- # IMPORTANT: Search by insurer and manually filter by product_name
435
- # This is more robust than passing a combined filter to the vector store
436
  matched_insurer = None
437
  if hasattr(self, "_cached_plans") and self._cached_plans:
438
  for p_meta in self._cached_plans:
439
  if p_meta["product_name"] == matched:
440
  matched_insurer = p_meta.get("insurer")
441
  break
 
 
 
 
 
442
 
443
- search_filters = {"insurer": matched_insurer} if matched_insurer else {}
444
-
445
- # Search only by insurer and then manually filter by product_name
446
- # This is more robust than passing a combined filter to the vector store
447
- docs = retriever.search(focused_query, filters=search_filters, k=50)
448
 
449
  plan_chunks = []
450
  for doc in docs:
451
  doc_product = doc.metadata.get("product_name", "")
452
- # Use fuzzy match for manual filter consistency
453
  if self._find_closest_plan_name(doc_product, [matched]) == matched:
454
  plan_chunks.append(doc)
455
 
456
- for doc in plan_chunks[:10]:
457
- plan_id = doc.metadata.get("plan_id", matched)
458
- chunks_by_plan[plan_id].append({
 
459
  "content": doc.page_content,
460
  "product_name": doc.metadata.get("product_name"),
461
  "document_type": doc.metadata.get("document_type", "brochure"),
@@ -468,8 +469,9 @@ class AgentNodes:
468
  """Group retrieved documents by plan_id."""
469
  grouped = defaultdict(list)
470
  for doc in docs:
471
- plan_id = doc.metadata.get("plan_id", doc.metadata.get("product_name", "unknown"))
472
- grouped[plan_id].append({
 
473
  "content": doc.page_content,
474
  "product_name": doc.metadata.get("product_name"),
475
  "document_type": doc.metadata.get("document_type", "brochure"),
@@ -859,7 +861,9 @@ For general insurance terminology questions:
859
 
860
  # 3. Word overlap (Lower Confidence fallback)
861
  query_words = set(query_plan.lower().split())
862
- stop_words = {"tata", "aia", "edelweiss", "life", "generali", "central", "plan", "insurance", "the", "a", "of", "with", "compare"}
 
 
863
  query_significant = query_words - stop_words
864
 
865
  best_match = None
 
430
  # This helps the retriever find relevant feature chunks for the specific plan
431
  focused_query = f"features, benefits, eligibility and exclusions of {matched}"
432
 
433
+ # Find the insurer for this product from cache for better filtering
 
 
434
  matched_insurer = None
435
  if hasattr(self, "_cached_plans") and self._cached_plans:
436
  for p_meta in self._cached_plans:
437
  if p_meta["product_name"] == matched:
438
  matched_insurer = p_meta.get("insurer")
439
  break
440
+
441
+ # IMPORTANT: Search by product_name directly if possible
442
+ search_filters = {"product_name": matched}
443
+ if matched_insurer:
444
+ search_filters["insurer"] = matched_insurer
445
 
446
+ # Use a slightly lower k because we are being very specific with the filter
447
+ docs = retriever.search(focused_query, filters=search_filters, k=20)
 
 
 
448
 
449
  plan_chunks = []
450
  for doc in docs:
451
  doc_product = doc.metadata.get("product_name", "")
452
+ # Final check for safety, but with accurate fuzzy matching
453
  if self._find_closest_plan_name(doc_product, [matched]) == matched:
454
  plan_chunks.append(doc)
455
 
456
+ for doc in plan_chunks[:8]:
457
+ # Use product_name for the key instead of plan_id to ensure clean table headers
458
+ plan_name = doc.metadata.get("product_name", matched)
459
+ chunks_by_plan[plan_name].append({
460
  "content": doc.page_content,
461
  "product_name": doc.metadata.get("product_name"),
462
  "document_type": doc.metadata.get("document_type", "brochure"),
 
469
  """Group retrieved documents by plan_id."""
470
  grouped = defaultdict(list)
471
  for doc in docs:
472
+ # Prefer product_name for display keys
473
+ plan_name = doc.metadata.get("product_name", doc.metadata.get("plan_id", "unknown"))
474
+ grouped[plan_name].append({
475
  "content": doc.page_content,
476
  "product_name": doc.metadata.get("product_name"),
477
  "document_type": doc.metadata.get("document_type", "brochure"),
 
861
 
862
  # 3. Word overlap (Lower Confidence fallback)
863
  query_words = set(query_plan.lower().split())
864
+ # REMOVED insurer names from stop_words because they are critical for distinguishing
865
+ # similar plan names (like 'Saral Jeevan Bima') across different companies.
866
+ stop_words = {"plan", "insurance", "the", "a", "of", "with", "compare", "is", "between"}
867
  query_significant = query_words - stop_words
868
 
869
  best_match = None