DeltaVenom commited on
Commit
72bff80
·
1 Parent(s): 48d1e8f

Update app code and initialize runtime databases

Browse files
.gitignore CHANGED
@@ -1,10 +1,35 @@
1
- .venv/
2
- .env
3
  __pycache__/
4
- *.pyc
5
- .streamlit/
6
- temp_docs/
7
- temp_faiss_index/
8
- debug_*.txt
9
- test_*.txt
 
 
 
10
  *.log
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python Caches
 
2
  __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # SQLite Databases (Recreated at runtime)
7
+ *.db
8
+ *.sqlite3
9
+
10
+ # Logs
11
+ logs/
12
  *.log
13
+ extraction_debug.log
14
+
15
+ # Environment
16
+ .env
17
+ env/
18
+ venv/
19
+ .venv/
20
+ .python-version
21
+
22
+ # Temporary Test Scripts
23
+ verify_fix.py
24
+ test_llm.py
25
+ test_classifier_fix.py
26
+ test_api_logic.py
27
+ verify_extraction.py
28
+
29
+ # RAG specific
30
+ rag/faiss_index/
31
+ rag/embeddings_cache.json
32
+
33
+ # OS files
34
+ .DS_Store
35
+ Thumbs.db
agents/graph.py CHANGED
@@ -44,7 +44,6 @@ def build_rag_workflow() -> StateGraph:
44
  # Agent nodes
45
  workflow.add_node("listing_agent", nodes.listing_agent)
46
  workflow.add_node("retrieval_agent", nodes.retrieval_agent)
47
- workflow.add_node("comparison_agent", nodes.comparison_agent)
48
  workflow.add_node("advisory_agent", nodes.advisory_agent)
49
  workflow.add_node("faq_agent", nodes.faq_agent)
50
 
@@ -69,10 +68,8 @@ def build_rag_workflow() -> StateGraph:
69
  intent = state.get("intent", "plan_details")
70
 
71
  if intent == "list_plans":
72
- # Listing doesn't need retrieval, goes direct to listing agent
73
  return "listing_agent"
74
  else:
75
- # All other intents go through retrieval first
76
  return "retriever"
77
 
78
  workflow.add_conditional_edges(
@@ -97,7 +94,6 @@ def build_rag_workflow() -> StateGraph:
97
 
98
  route_map = {
99
  "plan_details": "retrieval_agent",
100
- "compare_plans": "comparison_agent",
101
  "recommendation": "advisory_agent",
102
  "general_query": "faq_agent"
103
  }
@@ -109,7 +105,6 @@ def build_rag_workflow() -> StateGraph:
109
  route_to_agent,
110
  {
111
  "retrieval_agent": "retrieval_agent",
112
- "comparison_agent": "comparison_agent",
113
  "advisory_agent": "advisory_agent",
114
  "faq_agent": "faq_agent"
115
  }
@@ -117,7 +112,6 @@ def build_rag_workflow() -> StateGraph:
117
 
118
  # All agents end at guardrail
119
  workflow.add_edge("retrieval_agent", "guardrail")
120
- workflow.add_edge("comparison_agent", "guardrail")
121
  workflow.add_edge("advisory_agent", "guardrail")
122
  workflow.add_edge("faq_agent", "guardrail")
123
 
 
44
  # Agent nodes
45
  workflow.add_node("listing_agent", nodes.listing_agent)
46
  workflow.add_node("retrieval_agent", nodes.retrieval_agent)
 
47
  workflow.add_node("advisory_agent", nodes.advisory_agent)
48
  workflow.add_node("faq_agent", nodes.faq_agent)
49
 
 
68
  intent = state.get("intent", "plan_details")
69
 
70
  if intent == "list_plans":
 
71
  return "listing_agent"
72
  else:
 
73
  return "retriever"
74
 
75
  workflow.add_conditional_edges(
 
94
 
95
  route_map = {
96
  "plan_details": "retrieval_agent",
 
97
  "recommendation": "advisory_agent",
98
  "general_query": "faq_agent"
99
  }
 
105
  route_to_agent,
106
  {
107
  "retrieval_agent": "retrieval_agent",
 
108
  "advisory_agent": "advisory_agent",
109
  "faq_agent": "faq_agent"
110
  }
 
112
 
113
  # All agents end at guardrail
114
  workflow.add_edge("retrieval_agent", "guardrail")
 
115
  workflow.add_edge("advisory_agent", "guardrail")
116
  workflow.add_edge("faq_agent", "guardrail")
117
 
agents/nodes.py CHANGED
@@ -1,4 +1,6 @@
1
  import re
 
 
2
  from typing import Dict, List, Any, Optional
3
  from collections import defaultdict
4
  from agents.states import AgentState, ExtractedEntities
@@ -15,13 +17,12 @@ COMPLIANCE_DISCLAIMER = (
15
  # Prompting rules for all agents
16
  COMPLIANCE_RULES = """
17
  CRITICAL RULES:
18
- - ❌ NO invented plan names - only use plans from the provided context
19
- - ❌ NO assumptions beyond documents - if info is missing, say so explicitly
20
- - ❌ NO meta-commentary. DO NOT mention "the provided context", "the documents", "the text", or "internal state".
21
- - CIS overrides brochure for: exclusions, charges, conditions
22
- - ✅ Use structured output (markdown tables) for comparisons
23
- - ✅ Simple, clear language for end users
24
- - ✅ Provide "OUTPUT ONLY" - start answering the user's question directly.
25
  """
26
 
27
 
@@ -48,15 +49,19 @@ class AgentNodes:
48
  if retriever:
49
  retriever.reload()
50
 
 
 
 
 
51
  # =========================================================================
52
  # NODE 1: Query Rewriter
53
  # =========================================================================
54
  def query_rewriter_node(self, state: AgentState) -> Dict[str, Any]:
55
  """
56
- Rewrites query to be self-contained based on chat history.
57
- Resolves pronouns and references.
58
  """
59
- llm = LLMFactory.get_llm("small")
60
  query = state["input"]
61
  history = state.get("chat_history", [])
62
 
@@ -64,16 +69,15 @@ class AgentNodes:
64
  return {"input": query}
65
 
66
  system_prompt = (
67
- "You are a query rewriter for an insurance RAG system. "
68
- "Your task is to rewrite the latest question to be self-contained.\n\n"
69
  "RULES:\n"
70
- "1. ALWAYS resolve pronouns (it, they, these) or vague terms (the plan, previous one) using the previous context.\n"
71
- "2. If the user asks a follow-up about 'it' or 'the plan', replace it with the specific plan name mentioned last.\n"
72
- "3. If the user asks 'is it good for me' or similar, rewrite it to '[Plan Name] recommendation for [user details if any]'.\n"
73
- "4. If the query is already very specific and names a plan, keep it mostly as-is but ensure insurer names are present.\n"
74
- "5. Do NOT cross-pollinate unrelated queries. If the user switches topics completely, ignore the history.\n"
75
- "6. NEVER return a conversational response, suggestion, or question. If you cannot resolve a reference, return the original 'Latest' query as is.\n"
76
- "7. Return ONLY the rewritten query text."
77
  )
78
 
79
  history_str = "\n".join([f"- {h}" for h in history[-5:]]) # Last 5 turns
@@ -99,107 +103,232 @@ class AgentNodes:
99
  llm = LLMFactory.get_llm("small")
100
  query = state["input"].lower()
101
 
102
- # Fast keyword-based classification first
103
- if any(kw in query for kw in ["list", "which plans", "what plans", "all plans", "available plans", "show me plans"]):
104
- return {"intent": "list_plans"}
105
-
106
- if any(kw in query for kw in ["compare", "vs", "versus", "difference between", "which is better"]):
107
- return {"intent": "compare_plans"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- if any(kw in query for kw in ["suggest", "recommend", "best for", "should i", "suitable for"]):
110
- return {"intent": "recommendation"}
 
 
 
 
 
111
 
112
  # LLM-based classification for ambiguous cases
113
- system_prompt = (
114
- "Classify the user's insurance query into ONE of:\n"
115
- "- 'plan_details': Asking about features, benefits, eligibility of a SPECIFIC plan\n"
116
- "- 'list_plans': Wants to know WHICH plans are available\n"
117
- "- 'compare_plans': Wants to COMPARE 2+ plans side-by-side\n"
118
- "- 'recommendation': Seeks personalized advice based on their profile\n"
119
- "- 'general_query': General insurance terminology or concepts\n\n"
120
- "Return ONLY the category name."
121
- )
122
-
123
- response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=query)])
124
- intent = getattr(response, 'content', str(response)).lower().strip()
125
-
126
- valid_intents = ['list_plans', 'plan_details', 'compare_plans', 'recommendation', 'general_query']
127
- if intent not in valid_intents:
128
- intent = "plan_details" # Default fallback
 
129
 
130
- return {"intent": intent}
131
 
132
- # =========================================================================
133
- # NODE 3: Entity Extractor
134
- # =========================================================================
135
  def entity_extractor_node(self, state: AgentState) -> Dict[str, Any]:
136
  """
137
- Extracts structured entities from the query:
138
- - provider (insurer names)
139
- - insurance_type (term, ulip, savings, etc.)
140
- - plan_names (specific plan names mentioned)
141
- - user_profile (age, income, smoker, dependents, goal)
142
  """
143
- query = state["input"].lower()
144
-
145
- # Extract providers
146
- provider_map = {
147
- "edelweiss": "Edelweiss Life",
148
- "tata": "TATA AIA",
149
- "tata aia": "TATA AIA",
150
- "generali": "Generali Central",
151
- "central": "Generali Central",
152
- "pramerica": "PRAMERICA"
153
- }
154
- providers = []
155
- for keyword, name in provider_map.items():
156
- if keyword in query and name not in providers:
157
- providers.append(name)
158
-
159
- # Extract insurance types
160
- type_map = {
161
- "term": ["Term Insurance", "Term Plan"],
162
- "ulip": ["Unit Linked Insurance Plan", "ULIP Plan"],
163
- "wealth": ["Unit Linked Insurance Plan"],
164
- "savings": ["Savings Plan", "Guaranteed Return"],
165
- "retirement": ["Retirement and Pension"],
166
- "pension": ["Retirement and Pension"],
167
- "health": ["Health Insurance"],
168
- "group": ["Group Plan"]
169
- }
170
- insurance_types = []
171
- for keyword, types in type_map.items():
172
- if keyword in query:
173
- for t in types:
174
- if t not in insurance_types:
175
- insurance_types.append(t)
176
-
177
- # Extract specific plan names using LLM
178
- plan_names = self._extract_plan_names_from_query(state["input"])
179
-
180
- # Extract user profile for recommendation intent
181
- user_profile = {}
182
- if state.get("intent") == "recommendation":
183
- user_profile = self._extract_user_profile(state["input"])
184
-
185
- entities: ExtractedEntities = {
186
- "provider": list(set(providers)) if providers else [],
187
- "insurance_type": list(set(insurance_types)) if insurance_types else [],
188
- "plan_names": list(set(plan_names)) if plan_names else [],
189
- "user_profile": user_profile or {}
190
- }
191
-
192
- # Build metadata filters from entities
193
- filters = {}
194
- if providers:
195
- filters["insurer"] = providers
196
- if insurance_types:
197
- filters["insurance_type"] = insurance_types
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
- return {
200
- "extracted_entities": entities,
201
- "metadata_filters": filters
202
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  def _extract_plan_names_from_query(self, query: str) -> List[str]:
205
  """Use LLM to extract specific plan names mentioned in query."""
@@ -234,46 +363,203 @@ class AgentNodes:
234
 
235
  return plan_names
236
 
237
- def _extract_user_profile(self, query: str) -> Dict[str, Any]:
238
- """Extract user profile information for recommendations."""
239
- llm = LLMFactory.get_llm("small")
240
-
241
- system_prompt = (
242
- "Extract user profile from the insurance query.\n"
243
- "Return in format:\n"
244
- "age: <number or null>\n"
245
- "smoker: <yes/no or null>\n"
246
- "cover_amount: <amount or null>\n"
247
- "goal: <protection/savings/retirement/wealth or null>\n"
248
- "dependents: <number or null>"
249
- )
250
-
251
- response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=query)])
252
- result = getattr(response, 'content', str(response))
253
-
254
  profile = {}
255
- for line in result.split('\n'):
256
- if ':' in line:
257
- key, value = line.split(':', 1)
258
- key = key.strip().lower()
259
- value = value.strip().lower()
260
- if value not in ['null', 'none', 'n/a', '']:
261
- if key == 'age':
262
- try:
263
- profile['age'] = int(re.search(r'\d+', value).group())
264
- except:
265
- pass
266
- elif key == 'smoker':
267
- profile['smoker'] = 'yes' in value
268
- elif key == 'cover_amount':
269
- profile['cover_amount'] = value
270
- elif key == 'goal':
271
- profile['goal'] = value
272
- elif key == 'dependents':
273
- try:
274
- profile['dependents'] = int(re.search(r'\d+', value).group())
275
- except:
276
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  return profile
279
 
@@ -532,7 +818,7 @@ class AgentNodes:
532
  aggregated[plan_id] = final_chunks
533
 
534
  # Refresh context strings based on aggregated chunks
535
- intent = state.get("intent", "plan_details")
536
  limit = 5 if intent == "compare_plans" else 3
537
  context = self._format_context(aggregated, limit=limit)
538
 
@@ -595,19 +881,43 @@ class AgentNodes:
595
  # =========================================================================
596
  def retrieval_agent(self, state: AgentState) -> Dict[str, Any]:
597
  """
598
- Provides detailed information about a specific plan.
599
- Grounds all responses in retrieved documents.
600
  """
601
- llm = LLMFactory.get_llm("medium")
 
 
602
  query = state["input"]
603
  context = state.get("context", [])
 
604
 
605
  if not context:
606
- # Fallback retrieval
607
  retriever = self._get_retriever()
608
  if retriever:
609
- docs = retriever.search(query, k=5)
610
- context = [f"[{d.metadata.get('product_name')}] {d.page_content}" for d in docs]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
 
612
  context_str = "\n\n".join(context)
613
 
@@ -615,10 +925,12 @@ class AgentNodes:
615
 
616
  {COMPLIANCE_RULES}
617
 
618
- Answer the user's question using ONLY the Policy Context provided to you.
619
- If information is not in the context, say "I don't have that specific information in our documents."
620
- DO NOT mention that you are looking at documents or context. Just provide the answer.
621
- Be warm and helpful while maintaining accuracy."""
 
 
622
 
623
  prompt = f"Policy Context:\n{context_str}\n\nUser Question: {query}"
624
 
@@ -628,53 +940,13 @@ Be warm and helpful while maintaining accuracy."""
628
  return {"answer": answer}
629
 
630
  # =========================================================================
631
- # NODE 9: Comparison Agent
632
- # =========================================================================
633
- def comparison_agent(self, state: AgentState) -> Dict[str, Any]:
634
- """
635
- Generates structured side-by-side comparisons.
636
- Normalizes attributes across plans.
637
- """
638
- llm = LLMFactory.get_llm("medium")
639
- query = state["input"]
640
- context = state.get("context", [])
641
- chunks_by_plan = state.get("retrieved_chunks", {})
642
-
643
- # Get plan names being compared
644
- plan_names = list(chunks_by_plan.keys()) if chunks_by_plan else []
645
-
646
- if not context and not plan_names:
647
- return {"answer": "I couldn't find the plans you want to compare. Please specify the plan names."}
648
-
649
- context_str = "\n\n".join(context)
650
- plans_info = f"\n\nPlans to compare: {', '.join(plan_names)}" if plan_names else ""
651
-
652
- system_prompt = f"""You are an Insurance Comparison Expert.
653
-
654
- {COMPLIANCE_RULES}
655
-
656
- COMPARISON FORMAT:
657
- - Return comparison as a Markdown TABLE
658
- - Columns: Features | Plan 1 | Plan 2 | ...
659
- - Rows: Plan Type, Eligibility, Sum Assured, Premium Terms, Key Benefits, Exclusions
660
- - If a detail is missing, put "Not specified"
661
- - Include ALL plans mentioned in the context
662
- - Be objective and factual"""
663
-
664
- prompt = f"Policy Context:\n{context_str}{plans_info}\n\nUser Question: {query}"
665
-
666
- response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
667
- answer = getattr(response, 'content', str(response))
668
-
669
- return {"answer": answer, "reasoning_output": f"Compared {len(plan_names)} plans"}
670
-
671
- # =========================================================================
672
- # NODE 10: Recommendation Agent (Advisory)
673
  # =========================================================================
674
  def advisory_agent(self, state: AgentState) -> Dict[str, Any]:
675
  """
676
  Provides personalized recommendations based on user profile.
677
  Grounds all advice in retrieved documents.
 
678
  """
679
  llm = LLMFactory.get_llm("large")
680
  query = state["input"]
@@ -682,35 +954,75 @@ COMPARISON FORMAT:
682
  entities = state.get("extracted_entities", {})
683
  user_profile = entities.get("user_profile", {})
684
 
685
- context_str = "\n\n".join(context) if context else "No specific plans found matching your criteria."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686
 
687
  profile_info = ""
688
  if user_profile:
689
- profile_parts = []
690
- if user_profile.get("age"):
691
- profile_parts.append(f"Age: {user_profile['age']}")
692
- if user_profile.get("smoker") is not None:
693
- profile_parts.append(f"Smoker: {'Yes' if user_profile['smoker'] else 'No'}")
694
- if user_profile.get("cover_amount"):
695
- profile_parts.append(f"Cover needed: {user_profile['cover_amount']}")
696
- if user_profile.get("goal"):
697
- profile_parts.append(f"Goal: {user_profile['goal']}")
698
  if profile_parts:
699
  profile_info = f"\n\nUser Profile: {', '.join(profile_parts)}"
700
 
701
  system_prompt = f"""You are an Expert Insurance Advisor.
702
-
703
  {COMPLIANCE_RULES}
704
-
705
  RECOMMENDATION RULES:
706
- - Base recommendations ONLY on plans in the context
707
- - Consider user's age, smoking status, cover requirement if provided
708
- - Explain WHY a plan suits them based on document features
709
- - List 2-3 suitable options if available
710
- - Be clear about eligibility criteria
711
- - DO NOT reference the "context" or "documents" in your answer. Provide the advice directly."""
 
712
 
713
- prompt = f"Policy Context:\n{context_str}{profile_info}\n\nUser Question: {query}"
714
 
715
  response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
716
  answer = getattr(response, 'content', str(response))
@@ -722,23 +1034,39 @@ RECOMMENDATION RULES:
722
  # =========================================================================
723
  def faq_agent(self, state: AgentState) -> Dict[str, Any]:
724
  """
725
- Handles general insurance questions.
726
- Still attempts to ground in documents when possible.
727
  """
728
- llm = LLMFactory.get_llm("small")
729
  query = state["input"]
730
  context = state.get("context", [])
731
 
 
 
 
 
 
 
 
 
 
732
  context_str = "\n\n".join(context) if context else ""
733
 
734
  system_prompt = f"""You are an Insurance Helpdesk Assistant.
735
 
736
  {COMPLIANCE_RULES}
 
 
 
 
 
 
737
 
738
- For general insurance terminology questions:
739
- - Provide accurate, helpful explanations
740
- - If context is available, use it to give specific examples
741
- - Keep explanations simple and jargon-free"""
 
 
742
 
743
  prompt = f"Context (if relevant):\n{context_str}\n\nUser Question: {query}" if context_str else f"User Question: {query}"
744
 
@@ -768,6 +1096,85 @@ For general insurance terminology questions:
768
 
769
  return {"answer": answer}
770
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
771
  # =========================================================================
772
  # HELPER METHODS
773
  # =========================================================================
 
1
  import re
2
+ import time
3
+ import json
4
  from typing import Dict, List, Any, Optional
5
  from collections import defaultdict
6
  from agents.states import AgentState, ExtractedEntities
 
17
  # Prompting rules for all agents
18
  COMPLIANCE_RULES = """
19
  CRITICAL RULES:
20
+ - ❌ OUT-OF-BOUNDS REFUSAL: If the user asks about topics NOT related to insurance (e.g., booking flights, recipes, general news), you MUST politely refuse and state that you can only assist with insurance-related queries.
21
+ - ❌ NO hallucinations - if a plan name is not in the provided context, state clearly that you do not have information about that specific plan.
22
+ - ❌ NO assumptions - if numerical data or policy details are missing from the context, do NOT invent them. Say "Information not available."
23
+ - NO meta-commentary - start answering the question directly.
24
+ - ✅ PROPER REDIRECTION: After refusing an out-of-bounds query, invite the user to ask about insurance products, available plans, or policy definitions.
25
+ - ✅ GROUNDING: Only use facts from the provided context. CIS overrides brochure for exclusions/charges.
 
26
  """
27
 
28
 
 
49
  if retriever:
50
  retriever.reload()
51
 
52
+ def _log_debug(self, msg: str):
53
+ """Internal debug logger."""
54
+ print(f"[DEBUG] {msg}")
55
+
56
  # =========================================================================
57
  # NODE 1: Query Rewriter
58
  # =========================================================================
59
  def query_rewriter_node(self, state: AgentState) -> Dict[str, Any]:
60
  """
61
+ Rewrites conversational queries into self-contained, RAG-friendly queries.
62
+ Uses conversation history to resolve pronouns and implicit context.
63
  """
64
+ llm = LLMFactory.get_llm("low")
65
  query = state["input"]
66
  history = state.get("chat_history", [])
67
 
 
69
  return {"input": query}
70
 
71
  system_prompt = (
72
+ "You are a professional query rewriter for an insurance consultation system. "
73
+ "Rewrite the latest user input to be a standalone search/extraction query.\n\n"
74
  "RULES:\n"
75
+ "1. If the user provides a missing profile detail (e.g., 'pt 20'), combine it with previous profile data into a recommendation request: "
76
+ "'I want an insurance calculation for [age/gender] with Policy Term 20 years'.\n"
77
+ "2. Resolve all pronouns (it, they) and vague terms (the plan, previous one).\n"
78
+ "3. IMPORTANT: For general questions (e.g., 'What is PPT?') or broad listings (e.g., 'Show all plans'), do NOT inject the user's age/gender if it wasn't requested. Keep the search query clean.\n"
79
+ "4. Only preserve profile details (age, budget) if the user's latest query is a follow-up about a specific calculation or plan recommendation.\n"
80
+ "5. Return ONLY the rewritten query text."
 
81
  )
82
 
83
  history_str = "\n".join([f"- {h}" for h in history[-5:]]) # Last 5 turns
 
103
  llm = LLMFactory.get_llm("small")
104
  query = state["input"].lower()
105
 
106
+ # 1. Plan Details (specific plan mentioned)
107
+ # Check specific plan indicators
108
+ specific_plan_indicators = ["star", "guaranteed income", "bharat savings", "premier", "smart value",
109
+ "raksha", "saral jeevan", "edelweiss", "tata", "generali", "pramerica",
110
+ "canara", "indusind", "max life", "hdfc", "icici"]
111
+
112
+ has_plan_name = any(plan in query for plan in specific_plan_indicators)
113
+
114
+ if has_plan_name and ("benefit" in query or "feature" in query or "detail" in query or "eligibility" in query):
115
+ return {"intent": "plan_details", "query_complexity": "low"}
116
+
117
+ # 2. Comparison (compare, difference, vs)
118
+ compare_keywords = ["compare", "difference", "better", "vs", "versus", "or"]
119
+ if any(kw in query for kw in compare_keywords) and has_plan_name:
120
+ return {"intent": "compare_plans", "query_complexity": "high"}
121
+
122
+ # 3. Listing queries - CHECK BEFORE RECOMMENDATION (to avoid "term" matching)
123
+ listing_keywords = ["list", "show me", "available", "which plans", "what plans",
124
+ "types of", "providers", "insurers", "all plans"]
125
+ if any(kw in query for kw in listing_keywords):
126
+ return {"intent": "list_plans", "query_complexity": "low"}
127
+
128
+ # 4. General FAQ queries - CHECK BEFORE RECOMMENDATION
129
+ # These include "what is", "what does", "explain", "define"
130
+ faq_keywords = ["what is", "what does", "explain", "define", "meaning of", "tell me about insurance",
131
+ "what are the types", "difference between", "how does insurance"]
132
+ if any(kw in query for kw in faq_keywords):
133
+ return {"intent": "general_query", "query_complexity": "low"}
134
+
135
+ # 5. Recommendation/Calculation queries
136
+ # IMPORTANT: Only specific recommendation indicators, avoiding generic words like "term", "mode"
137
+ recommendation_keywords = ["suggest", "recommend", "best for", "should i", "suitable for",
138
+ "calculate", "how much will i get", "what will i get",
139
+ "i am", "i'm", "my age", "my budget", "my premium",
140
+ "years old", "year old"]
141
+
142
+ # Also check for profile indicators (age, gender) combined with numbers/plan mention
143
+ has_profile = any(kw in query for kw in ["male", "female", "age =", "age=", "premium =", "premium=",
144
+ "pt =", "pt=", "ppt =", "ppt="])
145
+ has_numbers_with_context = any(kw in query for kw in recommendation_keywords) or has_profile
146
+
147
+ if has_numbers_with_context:
148
+ return {"intent": "recommendation", "query_complexity": "high"}
149
+
150
+ # 6. Fallback for explicit plan names if not caught by others
151
+ if has_plan_name:
152
+ return {"intent": "plan_details", "query_complexity": "low"}
153
 
154
+ # 7. Follow-up detection
155
+ if len(state.get("chat_history", [])) > 0 and ("details" in query or "more" in query):
156
+ return {"intent": "plan_details", "query_complexity": "low"}
157
+
158
+
159
+ # Default fallback
160
+ return {"intent": "general_query", "query_complexity": "low"}
161
 
162
  # LLM-based classification for ambiguous cases
163
+ # This section is removed as per the instructions.
164
+ # system_prompt = (
165
+ # "Classify the user's insurance query into ONE of:\n"
166
+ # "- 'plan_details': Asking about features, benefits, eligibility of a SPECIFIC plan (should retrieve from documents)\n"
167
+ # "- 'list_plans': Wants to know WHICH plans are available from an insurer or category\n"
168
+ # "- 'recommendation': Seeks personalized benefit calculations or plan suggestions based on their profile (age, gender, premium)\n"
169
+ # "- 'general_query': General insurance terminology, concepts, or FAQs (not specific plans)\n\n"
170
+ # "IMPORTANT: 'What are the benefits of [Plan Name]' is 'plan_details', NOT 'recommendation'\n"
171
+ # "Return ONLY the category name."
172
+ # )
173
+
174
+ # response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=query)])
175
+ # intent = getattr(response, 'content', str(response)).lower().strip()
176
+
177
+ # valid_intents = ['list_plans', 'plan_details', 'recommendation', 'general_query']
178
+ # if intent not in valid_intents:
179
+ # intent = "plan_details" # Default fallback
180
 
181
+ # return {"intent": intent}
182
 
 
 
 
183
  def entity_extractor_node(self, state: AgentState) -> Dict[str, Any]:
184
  """
185
+ Extracts structured entities from the query.
 
 
 
 
186
  """
187
+
188
+
189
+ # DEBUG: Write to file to ensure we see it
190
+ # try:
191
+ # with open("extraction_debug.log", "a") as f:
192
+ # f.write(f"\n\n[TIME] Execution at {time.time()}\n")
193
+ # f.write(f"[INPUT] {state.get('input', 'NO INPUT')}\n")
194
+ # f.write(f"[INTENT] {state.get('intent', 'NOT SET')}\n")
195
+ # except: pass
196
+
197
+ # DEBUG: Write to file to ensure we see it
198
+ try:
199
+ with open("extraction_debug.log", "a") as f:
200
+ f.write(f"\n\n[TIME] Execution at {time.time()}\n")
201
+ f.write(f"[INPUT] {state.get('input', 'NO INPUT')}\n")
202
+ f.write(f"[INTENT] {state.get('intent', 'NOT SET')}\n")
203
+ except: pass
204
+
205
+ try:
206
+ print(f"[ENTITY DEBUG] ===== STARTING ENTITY EXTRACTION =====")
207
+ # FORCE extraction for debugging if needed, but rely on logic
208
+
209
+ try:
210
+ with open("extraction_debug.log", "a") as f:
211
+ f.write(f"[STATUS] Starting extraction logic...\n")
212
+ except: pass
213
+
214
+ query = state["input"].lower()
215
+
216
+ # Extract providers
217
+ provider_map = {
218
+ "edelweiss": "Edelweiss Life",
219
+ "tata": "TATA AIA",
220
+ "tata aia": "TATA AIA",
221
+ "generali": "Generali Central",
222
+ "central": "Generali Central",
223
+ "pramerica": "PRAMERICA"
224
+ }
225
+ providers = []
226
+ for keyword, name in provider_map.items():
227
+ if keyword in query and name not in providers:
228
+ providers.append(name)
229
+
230
+ # Extract insurance types
231
+ type_map = {
232
+ "term": ["Term Insurance", "Term Plan"],
233
+ "ulip": ["Unit Linked Insurance Plan", "ULIP Plan"],
234
+ "wealth": ["Unit Linked Insurance Plan"],
235
+ "savings": ["Savings Plan", "Guaranteed Return"],
236
+ "retirement": ["Retirement and Pension"],
237
+ "pension": ["Retirement and Pension"],
238
+ "health": ["Health Insurance"],
239
+ "group": ["Group Plan"]
240
+ }
241
+ insurance_types = []
242
+ for keyword, types in type_map.items():
243
+ if keyword in query:
244
+ for t in types:
245
+ if t not in insurance_types:
246
+ insurance_types.append(t)
247
+
248
+ # Extract specific plan names using LLM
249
+ plan_names = self._extract_plan_names_from_query(state["input"])
250
+
251
+ # Extract user profile (Merge with existing data in state AND chat history)
252
+ existing_profile = state.get("extracted_entities", {}).get("user_profile", {})
253
+ history = state.get("chat_history", [])
254
+ new_profile = {}
255
+
256
+ # Always attempt extraction if it's a recommendation or if profile indicators exist
257
+ profile_indicators = ["old", "male", "female", "year", "lakh", "rs", "budget", "premium", "invest", "benefit", "pt ", "ppt ", "mode", "age"]
258
+ should_extract = any(ind in query for ind in profile_indicators) or state.get("intent") == "recommendation"
259
+
260
+ print(f"[EXTRACTION DEBUG] Should extract: {should_extract}, Intent: {state.get('intent')}")
261
+
262
+ try:
263
+ with open("extraction_debug.log", "a") as f:
264
+ f.write(f"[STATUS] Should extract: {should_extract}\n")
265
+ except: pass
266
+
267
+ if should_extract:
268
+ new_profile = self._extract_user_profile(state["input"], history=history)
269
+ print(f"[EXTRACTION DEBUG] Extracted profile: {new_profile}")
270
+ try:
271
+ with open("extraction_debug.log", "a") as f:
272
+ f.write(f"[STATUS] Extracted profile: {new_profile}\n")
273
+ except: pass
274
+
275
+ # Merge: new data overwrites old, but old data is kept if not in new
276
+ # IMPORTANT: Ensure keys with 'null' or empty values in new_profile do not overwrite valid existing data
277
+ user_profile = existing_profile.copy()
278
+ for k, v in new_profile.items():
279
+ if v is not None and v != "" and v != "null":
280
+ user_profile[k] = v
281
+
282
+ # Explicitly handle keys that often get dropped or overwritten incorrectly
283
+ if "policy_term" in new_profile and str(new_profile["policy_term"]).strip():
284
+ user_profile["policy_term"] = new_profile["policy_term"]
285
+
286
+ entities: ExtractedEntities = {
287
+ "provider": list(set(providers)) if providers else [],
288
+ "insurance_type": list(set(insurance_types)) if insurance_types else [],
289
+ "plan_names": list(set(plan_names)) if plan_names else [],
290
+ "user_profile": user_profile
291
+ }
292
+
293
+ # Build metadata filters from entities
294
+ filters = {}
295
+ if providers:
296
+ filters["insurer"] = providers
297
+ if insurance_types:
298
+ filters["insurance_type"] = insurance_types
299
 
300
+ try:
301
+ with open("extraction_debug.log", "a") as f:
302
+ f.write(f"[RESULT] Entities: {entities}\n")
303
+ f.write(f"[RESULT] Profile: {user_profile}\n")
304
+ except: pass
305
+
306
+ print(f"[ENTITY DEBUG] Final entities: {entities}")
307
+ result = {
308
+ "extracted_entities": entities,
309
+ "metadata_filters": filters
310
+ }
311
+ return result
312
+ except Exception as e:
313
+ try:
314
+ with open("extraction_debug.log", "a") as f:
315
+ f.write(f"[ERROR] {str(e)}\n")
316
+ import traceback
317
+ f.write(traceback.format_exc())
318
+ except: pass
319
+
320
+ print(f"[ENTITY DEBUG] Error: {e}")
321
+ import traceback
322
+ traceback.print_exc()
323
+ return {
324
+ "extracted_entities": {
325
+ "provider": [],
326
+ "insurance_type": [],
327
+ "plan_names": [],
328
+ "user_profile": {}
329
+ },
330
+ "metadata_filters": {}
331
+ }
332
 
333
  def _extract_plan_names_from_query(self, query: str) -> List[str]:
334
  """Use LLM to extract specific plan names mentioned in query."""
 
363
 
364
  return plan_names
365
 
366
+ def _extract_user_profile(self, query: str, history: List[str] = None) -> Dict[str, Any]:
367
+ """Extract user profile information for recommendations, using history if available."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  profile = {}
369
+
370
+ # ========================================================================
371
+ # PRIORITY 1: REGEX EXTRACTION (Most Reliable)
372
+ # ========================================================================
373
+ # These patterns work with formats like:
374
+ # "age=30", "age = 30", "age is 30", "I am 30 years old"
375
+
376
+ query_lower = query.lower()
377
+
378
+ # Age extraction
379
+ age_patterns = [
380
+ r'\bage\s*[=:]\s*(\d+)', # age=30, age = 30, age: 30
381
+ r'\bage\s+is\s+(\d+)', # age is 30
382
+ r'i\s+am\s+(\d+)\s+years?\s+old', # I am 30 years old
383
+ r'(\d+)\s+years?\s+old', # 30 years old
384
+ r'\bage\s+(\d+)\b', # age 30
385
+ ]
386
+ for pattern in age_patterns:
387
+ match = re.search(pattern, query_lower)
388
+ if match and not profile.get('age'):
389
+ try:
390
+ age = int(match.group(1))
391
+ if 18 <= age <= 100: # Expanded age range
392
+ profile['age'] = age
393
+ break
394
+ except: pass
395
+
396
+ # Gender extraction
397
+ if 'gender' not in profile:
398
+ if re.search(r'gender\s*[=:]\s*(male|m\b)', query_lower) or \
399
+ re.search(r'gender\s+is\s+(male|m\b)', query_lower) or \
400
+ re.search(r'\bmale\b', query_lower):
401
+ profile['gender'] = 'male'
402
+ elif re.search(r'gender\s*[=:]\s*(female|f\b)', query_lower) or \
403
+ re.search(r'gender\s+is\s+(female|f\b)', query_lower) or \
404
+ re.search(r'\bfemale\b', query_lower):
405
+ profile['gender'] = 'female'
406
+
407
+ # Premium extraction
408
+ premium_patterns = [
409
+ r'premium\s*[=:]\s*([\d,\.]+)', # premium=100000.50
410
+ r'premium\s+(?:amount\s+)?(?:is\s+)?(?:of\s+)?([\d,\.]+)',
411
+ r'invest(?:ing)?\s+([\d,\.]+)\s*(?:lakh|lac|cr|crore|k|thousand)?',
412
+ r'([\d,\.]+)\s*(?:lakh|lac|cr|crore|k|thousand)\s+(?:per year|annual|premium)',
413
+ r'budget\s*[=:]\s*([\d,\.]+)',
414
+ ]
415
+
416
+ def parse_indian_amount(text):
417
+ """Parse amounts like '1 lakh', '5.5 cr', '100,000'"""
418
+ if not text: return None
419
+ text = text.lower().replace(',', '').strip()
420
+
421
+ multiplier = 1
422
+ if 'lakh' in text or 'lac' in text: multiplier = 100000
423
+ elif 'cr' in text or 'crore' in text: multiplier = 10000000
424
+ elif 'k' in text: multiplier = 1000
425
+
426
+ # Find the number in the segment
427
+ nums = re.findall(r'(\d+(?:\.\d+)?)', text)
428
+ if nums:
429
+ try:
430
+ return int(float(nums[0]) * multiplier)
431
+ except: return None
432
+ return None
433
+
434
+ for pattern in premium_patterns:
435
+ match = re.search(pattern, query_lower)
436
+ if match and not profile.get('premium_amount'):
437
+ # Pass the matched segment to parser
438
+ amount = parse_indian_amount(match.group(0))
439
+ if amount and 500 <= amount <= 50000000:
440
+ profile['premium_amount'] = str(amount)
441
+ break
442
+
443
+ # Policy Term (PT)
444
+ pt_patterns = [
445
+ r'\bpt\s*[=:]\s*(\d+)',
446
+ r'\bpt\s+(\d+)\b',
447
+ r'policy\s+term\s*[=:]\s*(\d+)',
448
+ r'policy\s+term\s+(?:of\s+)?(\d+)',
449
+ r'term\s*[=:]\s*(\d+)\b',
450
+ ]
451
+ for pattern in pt_patterns:
452
+ match = re.search(pattern, query_lower)
453
+ if match and not profile.get('policy_term'):
454
+ pt = match.group(1)
455
+ profile['policy_term'] = pt + " years"
456
+ break
457
+
458
+ # Payment Term (PPT)
459
+ ppt_patterns = [
460
+ r'\bppt\s*[=:]\s*(\d+)',
461
+ r'\bppt\s+(\d+)\b',
462
+ r'(?:premium\s+)?payment\s+term\s*[=:]\s*(\d+)',
463
+ r'paying\s+term\s*[=:]\s*(\d+)',
464
+ r'pay\s+term\s*[=:]\s*(\d+)',
465
+ ]
466
+ for pattern in ppt_patterns:
467
+ match = re.search(pattern, query_lower)
468
+ if match and not profile.get('payment_term'):
469
+ ppt = match.group(1)
470
+ profile['payment_term'] = ppt + " years"
471
+ break
472
+
473
+ # Payment Mode
474
+ mode_patterns = [
475
+ r'mode\s*[=:]\s*(monthly|annual|yearly|quarterly|half\s*yearly)',
476
+ r'(?:premium\s+)?(?:payment\s+)?mode\s+(?:is\s+)?(monthly|annual|yearly|quarterly)',
477
+ r'\b(monthly|annual|yearly|quarterly)\b',
478
+ ]
479
+ for pattern in mode_patterns:
480
+ match = re.search(pattern, query_lower)
481
+ if match and not profile.get('payment_mode'):
482
+ mode = match.group(1).strip()
483
+ if mode == 'yearly': mode = 'annual'
484
+ profile['payment_mode'] = mode
485
+ break
486
+
487
+ # ========================================================================
488
+ # PRIORITY 2: LLM EXTRACTION (Fallback for complex cases)
489
+ # ========================================================================
490
+ # Use LLM if critical fields are missing OR if it's a recommendation intent
491
+ critical_fields = ['age', 'gender', 'premium_amount']
492
+ missing_critical = any(field not in profile for field in critical_fields)
493
+
494
+ if missing_critical:
495
+ llm = LLMFactory.get_llm("medium")
496
+
497
+ history_context = ""
498
+ if history:
499
+ history_str = "\n".join([f"- {h}" for h in history[-5:]])
500
+ history_context = f"\n\nCONVERSATION HISTORY:\n{history_str}"
501
+
502
+ system_prompt = (
503
+ "Extract user profile details for insurance recommendations.\n"
504
+ "JSON Output fields (use null if unknown):\n"
505
+ "- age (number)\n"
506
+ "- gender (male/female)\n"
507
+ "- premium_amount (number)\n"
508
+ "- policy_term (number of years)\n"
509
+ "- payment_term (number of years)\n"
510
+ "- payment_mode (Monthly/Annual/Quarterly/Half-Yearly)\n\n"
511
+ "MAPPING RULES:\n"
512
+ "- PT = policy_term\n"
513
+ "- PPT = payment_term\n"
514
+ "- mode = payment_mode\n"
515
+ "- Extract from latest query AND history. Latest query wins conflicts.\n"
516
+ "Return ONLY a raw JSON object."
517
+ )
518
+
519
+ prompt = f"LATEST QUERY: {query}{history_context}"
520
+
521
+ try:
522
+ response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
523
+ result_text = getattr(response, 'content', str(response))
524
+
525
+ # Try to parse JSON
526
+ try:
527
+ # Clean the response in case LLM added markdown blocks
528
+ clean_json = re.search(r'\{.*\}', result_text, re.DOTALL)
529
+ if clean_json:
530
+ llm_profile = json.loads(clean_json.group(0))
531
+
532
+ # Merge LLM results into profile if regex didn't find them
533
+ if 'age' not in profile and llm_profile.get('age'):
534
+ profile['age'] = int(llm_profile['age'])
535
+ if 'gender' not in profile and llm_profile.get('gender'):
536
+ profile['gender'] = llm_profile['gender'].lower()
537
+ if 'premium_amount' not in profile and llm_profile.get('premium_amount'):
538
+ profile['premium_amount'] = str(llm_profile['premium_amount'])
539
+ if 'policy_term' not in profile and llm_profile.get('policy_term'):
540
+ profile['policy_term'] = str(llm_profile['policy_term']) + " years"
541
+ if 'payment_term' not in profile and llm_profile.get('payment_term'):
542
+ profile['payment_term'] = str(llm_profile['payment_term']) + " years"
543
+ if 'payment_mode' not in profile and llm_profile.get('payment_mode'):
544
+ profile['payment_mode'] = llm_profile['payment_mode'].title().replace('Annual', 'annual').lower()
545
+ except:
546
+ # Fallback to line-based parsing if JSON fails
547
+ for line in result_text.split('\n'):
548
+ if ':' in line:
549
+ parts = line.split(':', 1)
550
+ k = parts[0].strip().lower()
551
+ v = parts[1].strip().lower().replace('"', '').replace("'", "")
552
+ if v and v != 'null':
553
+ if 'age' in k and 'age' not in profile:
554
+ nums = re.findall(r'\d+', v)
555
+ if nums: profile['age'] = int(nums[0])
556
+ elif 'gender' in k and 'gender' not in profile: profile['gender'] = v
557
+ elif 'premium' in k and 'premium_amount' not in profile: profile['premium_amount'] = v
558
+ elif 'policy_term' in k or 'pt' == k and 'policy_term' not in profile: profile['policy_term'] = v + " years"
559
+ elif 'payment_term' in k or 'ppt' == k and 'payment_term' not in profile: profile['payment_term'] = v + " years"
560
+
561
+ except Exception as e:
562
+ print(f"[WARNING] LLM extraction failed: {e}")
563
 
564
  return profile
565
 
 
818
  aggregated[plan_id] = final_chunks
819
 
820
  # Refresh context strings based on aggregated chunks
821
+ intent = state.get("intent", "compare_plans")
822
  limit = 5 if intent == "compare_plans" else 3
823
  context = self._format_context(aggregated, limit=limit)
824
 
 
881
  # =========================================================================
882
  def retrieval_agent(self, state: AgentState) -> Dict[str, Any]:
883
  """
884
+ Agent for answering plan-specific or comparison questions using retrieved context.
 
885
  """
886
+ complexity = state.get("query_complexity", "low")
887
+ llm = LLMFactory.get_llm(complexity)
888
+
889
  query = state["input"]
890
  context = state.get("context", [])
891
+ entities = state.get("extracted_entities", {})
892
 
893
  if not context:
894
+ # Fallback retrieval with better filtering
895
  retriever = self._get_retriever()
896
  if retriever:
897
+ # Try to extract plan names from query for better filtering
898
+ plan_names = entities.get("plan_names", [])
899
+ filters = state.get("metadata_filters", {})
900
+
901
+ # If we have plan names, use them for filtering
902
+ if plan_names:
903
+ filters["product_name"] = plan_names
904
+
905
+ # Retrieve with filters
906
+ if filters:
907
+ docs = retriever.search(query, filters=filters, k=10)
908
+ else:
909
+ docs = retriever.search(query, k=10)
910
+
911
+ # Format context with plan names
912
+ context = [f"[{d.metadata.get('product_name', 'Unknown')}] {d.page_content}" for d in docs]
913
+
914
+ # If still no context, provide a helpful message
915
+ if not context:
916
+ return {
917
+ "answer": "I couldn't find specific information about that plan in my knowledge base. "
918
+ "Could you please provide more details or try asking about a different plan? "
919
+ "You can also ask me to list available plans."
920
+ }
921
 
922
  context_str = "\n\n".join(context)
923
 
 
925
 
926
  {COMPLIANCE_RULES}
927
 
928
+ STRICT GROUNDING RULES:
929
+ - Answer the user's question using the Policy Context provided to you.
930
+ - If the requested plan is NOT mentioned in the Policy Context, say: "I'm sorry, but I couldn't find information regarding [Plan Name] in our current policy database. Please verify the name or ask me to list available plans."
931
+ - If the question is about non-insurance topics, refuse using the OUT-OF-BOUNDS REFUSAL rule.
932
+ - Structure your response with clear headings and bullet points.
933
+ """
934
 
935
  prompt = f"Policy Context:\n{context_str}\n\nUser Question: {query}"
936
 
 
940
  return {"answer": answer}
941
 
942
  # =========================================================================
943
+ # NODE 9: Recommendation Agent (Advisory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
944
  # =========================================================================
945
  def advisory_agent(self, state: AgentState) -> Dict[str, Any]:
946
  """
947
  Provides personalized recommendations based on user profile.
948
  Grounds all advice in retrieved documents.
949
+ If critical info (age/gender/premium) is missing for specific plans, asks for it.
950
  """
951
  llm = LLMFactory.get_llm("large")
952
  query = state["input"]
 
954
  entities = state.get("extracted_entities", {})
955
  user_profile = entities.get("user_profile", {})
956
 
957
+ # Check for Insurer and Guaranteed/Savings context
958
+ providers = entities.get("provider", [])
959
+ is_guaranteed = any(t in ["Savings Plan", "Guaranteed Return"] for t in entities.get("insurance_type", []))
960
+ is_rec = state.get("intent") == "recommendation"
961
+
962
+ # Only block and ask for info IF the intent is explicitly a recommendation/calculation
963
+ if is_rec:
964
+ print(f"[ADVISORY DEBUG] Full entities: {entities}")
965
+ print(f"[ADVISORY DEBUG] User profile: {user_profile}")
966
+ missing = []
967
+ if not user_profile.get("age"): missing.append("age")
968
+ if not user_profile.get("gender"): missing.append("gender")
969
+ if not user_profile.get("premium_amount"): missing.append("annual premium amount")
970
+ if not user_profile.get("policy_term"): missing.append("policy term (PT)")
971
+ if not user_profile.get("payment_term"): missing.append("premium payment term (PPT)")
972
+ if not user_profile.get("payment_mode"): missing.append("premium payment mode")
973
+
974
+ print(f"[ADVISORY DEBUG] Missing fields check:")
975
+ for field in ["age", "gender", "premium_amount", "policy_term", "payment_term", "payment_mode"]:
976
+ value = user_profile.get(field)
977
+ print(f" - {field}: {value} (truthy: {bool(value)})")
978
+ print(f"[ADVISORY DEBUG] Final missing list: {missing}")
979
+
980
+ # Block and ask for info for professional consultation
981
+ if missing:
982
+ missing_str = " and ".join([", ".join(missing[:-1]), missing[-1]] if len(missing) > 1 else missing)
983
+ return {"answer": f"To provide you with specific benefit figures and a professional recommendation, I need a few more details: **{missing_str}**. Could you please provide these?"}
984
+
985
+ # If we have everything, get the numbers
986
+ calc_result = self.plan_calculator_tool(state)
987
+ state["reasoning_output"] = calc_result.get("reasoning_output", "")
988
+ else:
989
+ # If not a recommendation intent, check if we have enough profile data to show numbers anyway
990
+ # (e.g., if user asks about a specific plan but we already know their profile)
991
+ if user_profile.get("age") and user_profile.get("premium_amount") and user_profile.get("policy_term"):
992
+ calc_result = self.plan_calculator_tool(state)
993
+ state["reasoning_output"] = calc_result.get("reasoning_output", "")
994
+ calculation_info = ""
995
+ raw_calc = state.get('reasoning_output', '')
996
+ if raw_calc:
997
+ try:
998
+ calc_json = json.loads(raw_calc)
999
+ table = calc_json.get("summary_table", "")
1000
+ if table:
1001
+ calculation_info = f"\n\n### MANDATORY GROUNDING: NUMERICAL DATA TABLE\n{table}\n(PRIORITIZE THESE PLANS AND NUMBERS OVER ANY TEXT BELOW)\n"
1002
+ except: pass
1003
+
1004
+ context_str = "\n\n".join(context) if context else "No plans found."
1005
 
1006
  profile_info = ""
1007
  if user_profile:
1008
+ profile_parts = [f"{k}: {v}" for k, v in user_profile.items() if v]
 
 
 
 
 
 
 
 
1009
  if profile_parts:
1010
  profile_info = f"\n\nUser Profile: {', '.join(profile_parts)}"
1011
 
1012
  system_prompt = f"""You are an Expert Insurance Advisor.
1013
+
1014
  {COMPLIANCE_RULES}
1015
+
1016
  RECOMMENDATION RULES:
1017
+ - 🚨 PRIORITY 1: Recommending plans from the 'MANDATORY GROUNDING' table above. Use those EXACT numbers.
1018
+ - 🚨 PRIORITY 2: Only provide benefit calculations for the plans in the GROUNDING table.
1019
+ - If the user asks about plans not in the table for calculation, say you don't have calculation data for them yet.
1020
+ - If the query is out-of-bounds, use the OUT-OF-BOUNDS REFUSAL rule.
1021
+ - NEVER say "Not Available" if numbers exist in the grounding table.
1022
+ - Be consultative and grounded.
1023
+ """
1024
 
1025
+ prompt = f"{calculation_info}\n\nPolicy Context:\n{context_str}{profile_info}\n\nUser Question: {query}"
1026
 
1027
  response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
1028
  answer = getattr(response, 'content', str(response))
 
1034
  # =========================================================================
1035
  def faq_agent(self, state: AgentState) -> Dict[str, Any]:
1036
  """
1037
+ Agent for general insurance questions (glossary, concepts).
 
1038
  """
1039
+ llm = LLMFactory.get_llm("low")
1040
  query = state["input"]
1041
  context = state.get("context", [])
1042
 
1043
+ # Try to retrieve context for general insurance terms if not already provided
1044
+ if not context:
1045
+ retriever = self._get_retriever()
1046
+ if retriever:
1047
+ # Use broader search for general queries
1048
+ docs = retriever.search(query, k=3) # Reduced from 5 to 3 for more focused context
1049
+ if docs:
1050
+ context = [d.page_content for d in docs]
1051
+
1052
  context_str = "\n\n".join(context) if context else ""
1053
 
1054
  system_prompt = f"""You are an Insurance Helpdesk Assistant.
1055
 
1056
  {COMPLIANCE_RULES}
1057
+
1058
+ INSTRUCTIONS:
1059
+ - For insurance terminology: Provide a clear, concise definition.
1060
+ - 🚨 STRICT RULE: If the user asks about ANYTHING non-insurance related (e.g., travel tickets, cooking, etc.), you MUST refuse and redirect to insurance topics.
1061
+ - 🚨 NO HALLUCINATION: If the term is not common insurance knowledge and not in context, say you don't know rather than guessing.
1062
+ - Keep the total response under 150 words.
1063
 
1064
+ Common Insurance Terms to use as reference:
1065
+ - **Policy Term (PT)**: The total duration for which the policy remains active.
1066
+ - **Premium Payment Term (PPT)**: The duration during which premiums must be paid.
1067
+ - **Maturity Benefit**: The lump sum amount paid when the policy matures.
1068
+ - **Sum Assured**: The guaranteed amount payable on death or maturity.
1069
+ """
1070
 
1071
  prompt = f"Context (if relevant):\n{context_str}\n\nUser Question: {query}" if context_str else f"User Question: {query}"
1072
 
 
1096
 
1097
  return {"answer": answer}
1098
 
1099
+ # =========================================================================
1100
+ # TOOL: Plan Calculator Tool
1101
+ # =========================================================================
1102
+ def plan_calculator_tool(self, state: AgentState) -> Dict[str, Any]:
1103
+ """
1104
+ Tool logic to calculate benefits using the API's dummy logic.
1105
+ Extremely robust extraction fallback for age, gender, and premium.
1106
+ """
1107
+ from api.plans import get_plan_benefits_tool, resolve_plan_id
1108
+ user_profile = state.get("extracted_entities", {}).get("user_profile", {})
1109
+ plan_names = state.get("extracted_entities", {}).get("plan_names", [])
1110
+ query = state["input"].lower()
1111
+
1112
+ # --- ROBUST FALLBACKS ---
1113
+ # 1. Age Fallback
1114
+ age = user_profile.get("age")
1115
+ if not age:
1116
+ age_match = re.search(r'\b(\d{2})\b\s*(?:year|yr|old|male|female)?', query)
1117
+ if age_match:
1118
+ age = int(age_match.group(1))
1119
+
1120
+ # 2. Gender Fallback
1121
+ gender = user_profile.get("gender")
1122
+ if not gender:
1123
+ if "male" in query and "female" not in query: gender = "male"
1124
+ elif "female" in query: gender = "female"
1125
+
1126
+ # 3. Premium Fallback
1127
+ premium = user_profile.get("premium_amount")
1128
+ clean_premium = 0.0
1129
+
1130
+ if not premium:
1131
+ # Look for any number followed by a potential unit
1132
+ prem_match = re.search(r'(\d+(?:\.\d+)?)\s*(?:rs\.?|inr|lakh|cr|k|thousand)?', query)
1133
+ if prem_match:
1134
+ val = float(prem_match.group(1))
1135
+ unit_search = query[prem_match.start():prem_match.end()+20] # look ahead
1136
+ if 'lakh' in unit_search: val *= 100000
1137
+ elif 'cr' in unit_search: val *= 10000000
1138
+ elif any(k in unit_search for k in ['k', 'thousand']): val *= 1000
1139
+ clean_premium = val
1140
+ else:
1141
+ try:
1142
+ if isinstance(premium, (int, float)):
1143
+ clean_premium = float(premium)
1144
+ else:
1145
+ nums = re.findall(r'\d+\.?\d*', str(premium))
1146
+ if nums:
1147
+ clean_premium = float(nums[0])
1148
+ if 'lakh' in str(premium).lower(): clean_premium *= 100000
1149
+ elif 'cr' in str(premium).lower(): clean_premium *= 10000000
1150
+ except:
1151
+ pass
1152
+
1153
+ if not (age and gender and clean_premium > 0):
1154
+ return {"reasoning_output": "Insufficient data (age, gender, or premium) to calculate benefits."}
1155
+
1156
+ # 4. Resolve Plan IDs
1157
+ pids = []
1158
+ for name in plan_names:
1159
+ pid = resolve_plan_id(name)
1160
+ if pid: pids.append(pid)
1161
+
1162
+ # If no specific plan found, calculate for ALL default plans
1163
+ target_plan_id = pids[0] if len(pids) == 1 else None
1164
+
1165
+ # 5. Execute Tool
1166
+ calculation_json = get_plan_benefits_tool(
1167
+ age=int(age),
1168
+ gender=str(gender),
1169
+ premium_amount=clean_premium,
1170
+ plan_id=target_plan_id,
1171
+ policy_term=user_profile.get("policy_term"),
1172
+ payment_term=user_profile.get("payment_term"),
1173
+ payment_mode=user_profile.get("payment_mode")
1174
+ )
1175
+
1176
+ return {"reasoning_output": calculation_json}
1177
+
1178
  # =========================================================================
1179
  # HELPER METHODS
1180
  # =========================================================================
agents/states.py CHANGED
@@ -5,11 +5,16 @@ import operator
5
  class UserProfile(TypedDict, total=False):
6
  """User profile for recommendation intent."""
7
  age: Optional[int]
 
8
  income: Optional[str]
9
  smoker: Optional[bool]
10
  dependents: Optional[int]
11
  goal: Optional[str] # "protection", "savings", "retirement", "wealth"
12
  cover_amount: Optional[str] # e.g., "1 Cr", "50 Lakh"
 
 
 
 
13
 
14
 
15
  class ExtractedEntities(TypedDict, total=False):
@@ -31,6 +36,7 @@ class AgentState(TypedDict):
31
 
32
  # Query Classification
33
  intent: str # 'list_plans', 'plan_details', 'compare_plans', 'recommendation', 'general_query'
 
34
 
35
  # Entity Extraction
36
  extracted_entities: ExtractedEntities
 
5
  class UserProfile(TypedDict, total=False):
6
  """User profile for recommendation intent."""
7
  age: Optional[int]
8
+ gender: Optional[str] # "male", "female"
9
  income: Optional[str]
10
  smoker: Optional[bool]
11
  dependents: Optional[int]
12
  goal: Optional[str] # "protection", "savings", "retirement", "wealth"
13
  cover_amount: Optional[str] # e.g., "1 Cr", "50 Lakh"
14
+ premium_amount: Optional[str] # e.g., "1 Lakh", "50000"
15
+ policy_term: Optional[str] # PT
16
+ payment_term: Optional[str] # PPT
17
+ payment_mode: Optional[str] # Mode (Monthly, Annual, etc.)
18
 
19
 
20
  class ExtractedEntities(TypedDict, total=False):
 
36
 
37
  # Query Classification
38
  intent: str # 'list_plans', 'plan_details', 'compare_plans', 'recommendation', 'general_query'
39
+ query_complexity: str # 'low' | 'high'
40
 
41
  # Entity Extraction
42
  extracted_entities: ExtractedEntities
api/main.py CHANGED
@@ -4,8 +4,12 @@ from typing import List, Dict, Optional, Any
4
  from agents.graph import app as agent_app
5
  # from ingestion.pipeline import IngestionPipeline # Optional: Trigger via API
6
 
 
 
7
  app = FastAPI(title="Insurance Advisory AI Agent", version="1.0.0")
8
 
 
 
9
  class ChatRequest(BaseModel):
10
  message: str
11
  chat_history: Optional[List[str]] = []
 
4
  from agents.graph import app as agent_app
5
  # from ingestion.pipeline import IngestionPipeline # Optional: Trigger via API
6
 
7
+ from api.plans import router as plans_router
8
+
9
  app = FastAPI(title="Insurance Advisory AI Agent", version="1.0.0")
10
 
11
+ app.include_router(plans_router)
12
+
13
  class ChatRequest(BaseModel):
14
  message: str
15
  chat_history: Optional[List[str]] = []
api/monitoring.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Monitoring and health check endpoints.
3
+ """
4
+ from flask import Blueprint, jsonify
5
+ import os
6
+ from datetime import datetime
7
+ from utils.metrics import metrics
8
+ from utils.request_logger import request_logger
9
+ from utils.cache import cache_manager
10
+ from utils.circuit_breaker import circuit_breaker_manager
11
+ from config import config
12
+
13
+ monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/api')
14
+
15
+
16
+ @monitoring_bp.route('/health', methods=['GET'])
17
+ def health_check():
18
+ """
19
+ Basic health check endpoint.
20
+ Returns 200 if service is running.
21
+ """
22
+ return jsonify({
23
+ "status": "healthy",
24
+ "timestamp": datetime.now().isoformat(),
25
+ "version": config.VERSION,
26
+ "environment": config.ENVIRONMENT.value
27
+ }), 200
28
+
29
+
30
+ @monitoring_bp.route('/ready', methods=['GET'])
31
+ def readiness_check():
32
+ """
33
+ Readiness check - validates critical dependencies.
34
+ Returns 200 if all dependencies are available.
35
+ """
36
+ checks = {}
37
+ overall_ready = True
38
+
39
+ # Check vector store
40
+ try:
41
+ vector_store_path = config.VECTOR_STORE_PATH
42
+ if os.path.exists(vector_store_path):
43
+ checks["vector_store"] = "ready"
44
+ else:
45
+ checks["vector_store"] = "not_found"
46
+ overall_ready = False
47
+ except Exception as e:
48
+ checks["vector_store"] = f"error: {str(e)}"
49
+ overall_ready = False
50
+
51
+ # Check LLM API key
52
+ if config.GROQ_API_KEY:
53
+ checks["llm_api"] = "configured"
54
+ else:
55
+ checks["llm_api"] = "missing_api_key"
56
+ overall_ready = False
57
+
58
+ # Check circuit breakers
59
+ breaker_states = circuit_breaker_manager.get_all_states()
60
+ open_breakers = [name for name, state in breaker_states.items() if state["state"] == "open"]
61
+
62
+ if open_breakers:
63
+ checks["circuit_breakers"] = f"open: {', '.join(open_breakers)}"
64
+ overall_ready = False
65
+ else:
66
+ checks["circuit_breakers"] = "all_closed"
67
+
68
+ status_code = 200 if overall_ready else 503
69
+
70
+ return jsonify({
71
+ "ready": overall_ready,
72
+ "checks": checks,
73
+ "timestamp": datetime.now().isoformat()
74
+ }), status_code
75
+
76
+
77
+ @monitoring_bp.route('/metrics', methods=['GET'])
78
+ def get_metrics():
79
+ """
80
+ Get application metrics in JSON format.
81
+ """
82
+ if not config.ENABLE_METRICS:
83
+ return jsonify({"error": "Metrics disabled"}), 403
84
+
85
+ app_metrics = metrics.get_metrics()
86
+ cache_stats = cache_manager.get_all_stats()
87
+ circuit_states = circuit_breaker_manager.get_all_states()
88
+
89
+ return jsonify({
90
+ "application": app_metrics,
91
+ "cache": cache_stats,
92
+ "circuit_breakers": circuit_states
93
+ }), 200
94
+
95
+
96
+ @monitoring_bp.route('/stats', methods=['GET'])
97
+ def get_stats():
98
+ """
99
+ Get human-readable statistics.
100
+ """
101
+ app_metrics = metrics.get_metrics()
102
+
103
+ # Get additional stats from request logger
104
+ recent_requests = request_logger.get_recent_requests(limit=10)
105
+ intent_dist = request_logger.get_intent_distribution(hours=24)
106
+ error_rate_24h = request_logger.get_error_rate(hours=24)
107
+
108
+ return jsonify({
109
+ "summary": {
110
+ "total_requests": app_metrics["requests"]["total"],
111
+ "active_requests": app_metrics["requests"]["active"],
112
+ "error_rate": app_metrics["requests"]["error_rate"],
113
+ "avg_latency_ms": app_metrics["requests"]["latency_ms"]["p50"],
114
+ "uptime_hours": app_metrics["uptime_seconds"] / 3600,
115
+ },
116
+ "intent_distribution": intent_dist,
117
+ "recent_requests": recent_requests,
118
+ "error_rate_24h": error_rate_24h,
119
+ "cache_performance": {
120
+ "llm_cache_hit_rate": app_metrics["llm"]["cache_hit_rate"],
121
+ "app_cache_hit_rate": app_metrics["cache"]["hit_rate"],
122
+ }
123
+ }), 200
124
+
125
+
126
+ @monitoring_bp.route('/logs/recent', methods=['GET'])
127
+ def get_recent_logs():
128
+ """
129
+ Get recent request logs.
130
+ """
131
+ limit = int(request.args.get('limit', 50))
132
+ limit = min(limit, 500) # Cap at 500
133
+
134
+ recent = request_logger.get_recent_requests(limit=limit)
135
+
136
+ return jsonify({
137
+ "count": len(recent),
138
+ "requests": recent
139
+ }), 200
api/plans.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from pydantic import BaseModel
3
+ from typing import List, Dict, Optional, Any
4
+ import json
5
+ import re
6
+ from random import random
7
+
8
+ router = APIRouter(prefix="/plans", tags=["Plans"])
9
+
10
+ class PlanInfoRequest(BaseModel):
11
+ age: int
12
+ gender: str
13
+ premium_amount: float
14
+ goal: Optional[str] = "savings"
15
+ plan_id: Optional[int] = None
16
+ policy_term: Optional[str] = None
17
+ payment_term: Optional[str] = None
18
+ payment_mode: Optional[str] = None
19
+
20
+ class BenefitItem(BaseModel):
21
+ name: str
22
+ value: str
23
+ description: str
24
+
25
+ class PlanBenefitResponse(BaseModel):
26
+ plan_id: int
27
+ plan_name: str
28
+ eligibility_status: bool
29
+ reason: str
30
+ maturity_benefit: str
31
+ annual_income: str
32
+ sum_assured: str
33
+ income_start_point: str
34
+ income_duration: str
35
+ sad_multiple: str
36
+ payout_freq: str
37
+ recommendation_score: float
38
+ benefits: List[BenefitItem]
39
+
40
+ # Dummy Plan Data Store
41
+ PLANS_DATA = {
42
+ 1: {
43
+ "name": "Edelweiss Life Guaranteed Income STAR",
44
+ "min_age": 3,
45
+ "max_age": 50,
46
+ "benefits_multiplier": 1.2,
47
+ "income_start": "5 years",
48
+ "income_duration": "20 years",
49
+ "payout_freq": "Yearly",
50
+ "sad_multiple": "10"
51
+ },
52
+ 2: {
53
+ "name": "Edelweiss Life Bharat Savings STAR",
54
+ "min_age": 0,
55
+ "max_age": 60,
56
+ "benefits_multiplier": 1.1,
57
+ "income_start": "2nd year",
58
+ "income_duration": "15 years",
59
+ "payout_freq": "Monthly",
60
+ "sad_multiple": "7"
61
+ },
62
+ 3: {
63
+ "name": "Edelweiss Life Premier Guaranteed STAR Pro",
64
+ "min_age": 5,
65
+ "max_age": 55,
66
+ "benefits_multiplier": 1.3,
67
+ "income_start": "15 years",
68
+ "income_duration": "20 years",
69
+ "payout_freq": "Yearly",
70
+ "sad_multiple": "11"
71
+ },
72
+ 4: {
73
+ "name": "EdelweissLife Flexi Dream Plan",
74
+ "min_age": 18,
75
+ "max_age": 60,
76
+ "benefits_multiplier": 0.9,
77
+ "income_start": "2 years",
78
+ "income_duration": "10 years",
79
+ "payout_freq": "Yearly",
80
+ "sad_multiple": "8"
81
+ },
82
+ 5: {
83
+ "name": "EdelweissLife Guaranteed Savings STAR",
84
+ "min_age": 0,
85
+ "max_age": 60,
86
+ "benefits_multiplier": 1.17,
87
+ "income_start": "3rd year",
88
+ "income_duration": "15 years",
89
+ "payout_freq": "Monthly",
90
+ "sad_multiple": "7"
91
+ },
92
+ 6: {
93
+ "name": "EdelweissLife Flexi Savings STAR",
94
+ "min_age": 18,
95
+ "max_age": 65,
96
+ "benefits_multiplier": 1.42,
97
+ "income_start": "10 years",
98
+ "income_duration": "25 years",
99
+ "payout_freq": "Yearly",
100
+ "sad_multiple": "5"
101
+ }
102
+ }
103
+
104
+ # Plan Name to ID Mapping
105
+ PLAN_NAME_TO_ID = {
106
+ "guaranteed income star": 1,
107
+ "bharat savings star": 2,
108
+ "premier guaranteed star pro": 3,
109
+ "Flexi Dream Plan": 4,
110
+ "Flexi Savings STAR": 6,
111
+ "Guaranteed Savings STAR": 5
112
+ }
113
+
114
+ def resolve_plan_id(name: str) -> Optional[int]:
115
+ """Resolves a plan name or substring to a Plan ID."""
116
+ name_lower = name.lower().strip()
117
+ for key, pid in PLAN_NAME_TO_ID.items():
118
+ if key in name_lower or name_lower in key:
119
+ return pid
120
+ return None
121
+
122
+ def calculate_dummy_benefits(plan_id: int, request: PlanInfoRequest) -> PlanBenefitResponse:
123
+ plan = PLANS_DATA.get(plan_id)
124
+ if not plan:
125
+ return None
126
+
127
+ is_eligible = plan["min_age"] <= request.age <= plan["max_age"]
128
+ reason = "Eligible based on age criteria." if is_eligible else f"Ineligible: Age must be between {plan['min_age']} and {plan['max_age']}"
129
+
130
+ # Dummy calculation logic influenced by PT/PPT
131
+ pt_val = 15
132
+ if request.policy_term:
133
+ try: pt_val = int(re.search(r'\d+', request.policy_term).group())
134
+ except: pass
135
+
136
+ mult_adj = (pt_val / 15.0) # PT adjustment
137
+ randval = random()
138
+ if randval < 0.5:
139
+ randval = 0.5
140
+ maturity_val = request.premium_amount * 10 * (1+randval)* plan["benefits_multiplier"] * mult_adj
141
+ income_val = request.premium_amount * (plan["benefits_multiplier"]/0.467)
142
+
143
+ # Calculate Sum Assured
144
+ sad_val = request.premium_amount * float(plan["sad_multiple"])
145
+ sum_assured = f"₹{sad_val:,.2f}"
146
+
147
+ maturity_benefit = f"₹{maturity_val:,.2f}"
148
+ annual_income = f"₹{income_val:,.2f}"
149
+
150
+ benefits = [
151
+ BenefitItem(name="Maturity Benefit", value=maturity_benefit, description="Guaranteed lump sum"),
152
+ BenefitItem(name="Annual Income Benefit", value=annual_income, description="Regular payouts"),
153
+ BenefitItem(name="Sum Assured", value=sum_assured, description="Life Cover"),
154
+ BenefitItem(name="Tax Benefit", value="Exempt", description="Sec 80C")
155
+ ]
156
+
157
+ return PlanBenefitResponse(
158
+ plan_id=plan_id,
159
+ plan_name=plan["name"],
160
+ eligibility_status=is_eligible,
161
+ reason=reason,
162
+ maturity_benefit=maturity_benefit,
163
+ annual_income=annual_income,
164
+ sum_assured=sum_assured,
165
+ income_start_point=plan["income_start"],
166
+ income_duration=plan["income_duration"],
167
+ payout_freq=plan["payout_freq"],
168
+ sad_multiple=plan["sad_multiple"],
169
+ recommendation_score=0.9 if is_eligible else 0.1,
170
+ benefits=benefits
171
+ )
172
+
173
+ @router.get("/calculate", response_model=PlanBenefitResponse)
174
+ async def calculate_by_id(
175
+ plan_id: int = Query(..., alias="Planid"),
176
+ age: int = Query(...),
177
+ gender: str = Query(...),
178
+ premium_amount: float = Query(...),
179
+ goal: str = Query("savings")
180
+ ):
181
+ """
182
+ Calculates benefits for a specific Edelweiss plan using Plan ID.
183
+ Uses dummy logic for demonstration.
184
+ """
185
+ request = PlanInfoRequest(age=age, gender=gender, premium_amount=premium_amount, goal=goal)
186
+ result = calculate_dummy_benefits(plan_id, request)
187
+ if not result:
188
+ raise HTTPException(status_code=404, detail="Plan not found")
189
+ return result
190
+
191
+ @router.post("/calculate", response_model=List[PlanBenefitResponse])
192
+ async def calculate_all_benefits(request: PlanInfoRequest):
193
+ """
194
+ Calculates benefits for all Edelweiss Guaranteed Income plans.
195
+ """
196
+ if request.plan_id:
197
+ result = calculate_dummy_benefits(request.plan_id, request)
198
+ if not result:
199
+ raise HTTPException(status_code=404, detail="Plan not found")
200
+ return [result]
201
+
202
+ results = []
203
+ for pid in PLANS_DATA:
204
+ results.append(calculate_dummy_benefits(pid, request))
205
+ return results
206
+
207
+ def get_plan_benefits_tool(age: int, gender: str, premium_amount: float, plan_id: Optional[int] = None,
208
+ policy_term: Optional[str] = None, payment_term: Optional[str] = None,
209
+ payment_mode: Optional[str] = None) -> str:
210
+ """
211
+ Python function to be used as a tool by LangGraph.
212
+ Returns a combined string with a Markdown table and JSON.
213
+ """
214
+ request = PlanInfoRequest(
215
+ age=age,
216
+ gender=gender,
217
+ premium_amount=premium_amount,
218
+ plan_id=plan_id,
219
+ policy_term=policy_term,
220
+ payment_term=payment_term,
221
+ payment_mode=payment_mode
222
+ )
223
+ data = []
224
+ if plan_id:
225
+ result = calculate_dummy_benefits(plan_id, request)
226
+ if result: data = [result.dict()]
227
+ else:
228
+ for pid in PLANS_DATA:
229
+ data.append(calculate_dummy_benefits(pid, request).dict())
230
+
231
+ if not data:
232
+ return "No plans found or ineligible."
233
+
234
+ # Create a nice Markdown Table for the LLM
235
+ table = "| Plan Name | Income Start | Duration | SAD Multi | Sum Assured | Maturity Benefit | Annual Income |\n"
236
+ table += "| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
237
+ for d in data:
238
+ table += f"| {d['plan_name']} | {d['income_start_point']} | {d['income_duration']} | {d['sad_multiple']} | {d['sum_assured']} | {d['maturity_benefit']} | {d['annual_income']} |\n"
239
+
240
+ output = {
241
+ "summary_table": table,
242
+ "raw_data": data
243
+ }
244
+ return json.dumps(output, indent=2)
app.py CHANGED
@@ -1,17 +1,40 @@
1
  import os
2
  import json
3
  import threading
 
 
4
  import speech_recognition as sr
5
  from flask import Flask, render_template, request, jsonify
 
6
  from agents.graph import app as agent_app
7
  from dotenv import load_dotenv
8
  from ingestion.pipeline import IngestionPipeline
9
  from rag.vector_store import VectorStoreManager
10
 
 
 
 
 
 
 
 
 
 
11
  load_dotenv()
12
 
 
 
 
13
  app = Flask(__name__)
14
 
 
 
 
 
 
 
 
 
15
  # Global state for ingestion tracking
16
  ingestion_status = {
17
  "status": "Idle",
@@ -19,18 +42,84 @@ ingestion_status = {
19
  "last_error": None
20
  }
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def ingest_worker(file_path, delete_source=None):
23
  """Worker thread for background ingestion using enhanced pipeline."""
24
  global ingestion_status
25
  try:
 
26
  ingestion_status["status"] = "Starting..."
27
  ingestion_status["progress"] = 0
28
 
29
- base_docs_dir = "docs"
30
  pipeline = IngestionPipeline(base_docs_dir)
31
  vector_manager = VectorStoreManager()
32
 
33
  if delete_source:
 
34
  ingestion_status["status"] = "Removing old version..."
35
  vector_manager.delete_documents_by_source(delete_source)
36
  ingestion_status["progress"] = 10
@@ -38,94 +127,197 @@ def ingest_worker(file_path, delete_source=None):
38
  ingestion_status["status"] = "Processing document..."
39
  ingestion_status["progress"] = 30
40
 
41
- # Use the new unified process_single_file method
42
- # Handles metadata extraction, section detection, and proper chunking
43
  chunks = pipeline.process_single_file(file_path)
44
 
45
  if chunks:
 
46
  ingestion_status["status"] = "Updating Vector Store..."
47
  ingestion_status["progress"] = 70
48
  vector_manager.update_vector_store(chunks)
49
 
50
- # Reload the retriever in the agent nodes to see new documents
 
 
 
 
51
  from agents.nodes import nodes
52
  nodes.reload_retriever()
53
 
54
  ingestion_status["status"] = "Completed Successfully!"
55
  ingestion_status["progress"] = 100
 
56
  else:
 
57
  ingestion_status["status"] = "Failed: No content extracted."
58
  ingestion_status["progress"] = 0
59
 
60
  except Exception as e:
 
61
  ingestion_status["status"] = "Failed"
62
  ingestion_status["last_error"] = str(e)
63
  ingestion_status["progress"] = 0
64
 
 
65
  @app.route("/")
66
  def index():
 
67
  return render_template("index.html")
68
 
 
69
  @app.route("/api/chat", methods=["POST"])
70
  def chat():
71
- data = request.json
72
- prompt = data.get("prompt")
73
- history = data.get("history", [])
74
 
75
- if not prompt:
76
- return jsonify({"error": "Prompt is required"}), 400
77
-
78
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  initial_state = {
80
  "input": prompt,
81
  "chat_history": history,
82
  "intent": "",
83
  "context": [],
84
  "answer": "",
85
- "metadata_filters": {}
 
86
  }
87
 
88
  result = agent_app.invoke(initial_state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return jsonify({
90
- "answer": result.get("answer", ""),
91
- "context": result.get("context", [])
 
 
 
92
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
- return jsonify({"error": str(e), "status": "error"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  @app.route("/api/audio-chat", methods=["POST"])
97
  def audio_chat():
98
- if 'audio' not in request.files:
99
- return jsonify({"error": "No audio file part"}), 400
 
 
100
 
101
- file = request.files['audio']
102
- history = json.loads(request.form.get("history", "[]"))
103
-
104
- if file.filename == '':
105
- return jsonify({"error": "No selected file"}), 400
106
-
107
- temp_path = "temp_voice_query.wav"
108
- file.save(temp_path)
109
-
110
- r = sr.Recognizer()
111
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  with sr.AudioFile(temp_path) as source:
113
  audio_data = r.record(source)
114
 
115
  raw_text = r.recognize_google(audio_data)
 
116
 
117
- # Summarize/Refine the transcribed audio text
118
  from models.llm import LLMFactory
119
  from langchain_core.messages import SystemMessage, HumanMessage
120
 
121
  refiner_llm = LLMFactory.get_llm("small")
122
  refine_system = (
123
  "You are an assistant that cleans up and summarizes noisy speech-to-text transcriptions. "
124
- "Your goal is to extract the actual insurance-related question or request from the text.\n\n"
125
- "RULES:\n"
126
- "1. Remove filler words (um, ah, like, you know).\n"
127
- "2. Fix grammatical errors caused by transcription.\n"
128
- "3. If multiple things are mentioned, focus on the core request.\n"
129
  "4. Return ONLY the cleaned, professional question text."
130
  )
131
 
@@ -135,37 +327,64 @@ def audio_chat():
135
  ])
136
  summarized_text = getattr(refine_response, 'content', str(refine_response)).strip()
137
 
138
- # Process with existing Agent using the summarized text
 
 
139
  initial_state = {
140
  "input": summarized_text,
141
  "chat_history": history,
142
  "intent": "",
143
  "context": [],
144
  "answer": "",
145
- "metadata_filters": {}
 
146
  }
147
 
148
  result = agent_app.invoke(initial_state)
149
 
150
- if os.path.exists(temp_path):
 
151
  os.remove(temp_path)
152
-
 
 
 
153
  return jsonify({
154
  "transcription": raw_text,
155
  "summarized_question": summarized_text,
156
  "answer": result.get("answer", ""),
157
- "context": result.get("context", [])
 
 
158
  })
159
 
160
  except sr.UnknownValueError:
161
- if os.path.exists(temp_path): os.remove(temp_path)
162
- return jsonify({"error": "Could not understand audio"}), 400
 
 
 
 
 
 
163
  except sr.RequestError as e:
164
- if os.path.exists(temp_path): os.remove(temp_path)
165
- return jsonify({"error": f"Speech service error: {e}"}), 500
 
 
 
 
 
 
166
  except Exception as e:
167
- if os.path.exists(temp_path): os.remove(temp_path)
168
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
169
 
170
  def update_doc_structure(provider_name, category_name):
171
  """Helper to persist new providers/categories to the config file."""
@@ -175,92 +394,150 @@ def update_doc_structure(provider_name, category_name):
175
  return
176
 
177
  with open(config_path, "r") as f:
178
- config = json.load(f)
179
 
180
  # Find or create provider
181
- provider = next((p for p in config["providers"] if p["name"] == provider_name), None)
182
  if not provider:
183
- # Insert at the beginning (before 'Other')
184
  provider = {"name": provider_name, "categories": []}
185
- config["providers"].insert(0, provider)
186
 
187
  # Add category if new
188
  if category_name not in provider["categories"]:
189
  provider["categories"].append(category_name)
190
- # Sort categories for cleanliness (except if it was General)
191
  if len(provider["categories"]) > 1:
192
  provider["categories"].sort()
193
 
194
  with open(config_path, "w") as f:
195
- json.dump(config, f, indent=4)
 
196
  except Exception as e:
197
- pass
 
198
 
199
  @app.route("/api/upload", methods=["POST"])
200
  def upload():
201
- if 'file' not in request.files:
202
- return jsonify({"error": "No file part"}), 400
203
-
204
- file = request.files['file']
205
- provider = request.form.get("provider")
206
- category = request.form.get("category")
207
- mode = request.form.get("mode", "New Upload") # "New Upload" or "Modify Existing"
208
-
209
- if file.filename == '' or not provider or not category:
210
- return jsonify({"error": "Missing metadata or file"}), 400
211
-
212
- # Persist new structure to JSON
213
- update_doc_structure(provider, category)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- base_dir = "docs"
216
- target_dir = os.path.join(base_dir, provider, category)
217
- os.makedirs(target_dir, exist_ok=True)
218
-
219
- file_path = os.path.join(target_dir, file.filename)
220
- file.save(file_path)
221
-
222
- delete_source = None
223
- if mode == "Modify Existing":
224
- file_to_modify = request.form.get("file_to_modify")
225
- if file_to_modify:
226
- delete_source = os.path.join(base_dir, provider, category, file_to_modify)
227
- if os.path.abspath(delete_source) != os.path.abspath(file_path):
228
- if os.path.exists(delete_source):
229
- os.remove(delete_source)
230
-
231
- # Start background ingestion
232
- thread = threading.Thread(target=ingest_worker, args=(file_path, delete_source))
233
- thread.start()
234
-
235
- return jsonify({"message": "File uploaded, ingestion started.", "path": file_path})
236
 
237
  @app.route("/api/status", methods=["GET"])
238
  def get_status():
 
239
  return jsonify(ingestion_status)
240
 
 
241
  @app.route("/api/config", methods=["GET"])
242
  def get_config():
 
243
  config_path = os.path.join("configs", "doc_structure.json")
244
  if os.path.exists(config_path):
245
  with open(config_path, "r") as f:
246
  return jsonify(json.load(f))
247
  return jsonify({"providers": []})
248
 
 
249
  @app.route("/api/files", methods=["GET"])
250
  def list_files():
 
251
  provider = request.args.get("provider")
252
  category = request.args.get("category")
253
 
254
  if not provider or not category:
255
  return jsonify({"files": []})
256
-
257
- base_dir = "docs"
258
  target_dir = os.path.join(base_dir, provider, category)
 
259
  if os.path.exists(target_dir):
260
  files = [f for f in os.listdir(target_dir) if f.lower().endswith(('.pdf', '.docx'))]
261
  return jsonify({"files": files})
 
262
  return jsonify({"files": []})
263
 
 
264
  if __name__ == "__main__":
265
- port = int(os.environ.get("PORT", 7860))
266
- app.run(host="0.0.0.0", port=port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
  import threading
4
+ import uuid
5
+ import time
6
  import speech_recognition as sr
7
  from flask import Flask, render_template, request, jsonify
8
+ from flask_cors import CORS
9
  from agents.graph import app as agent_app
10
  from dotenv import load_dotenv
11
  from ingestion.pipeline import IngestionPipeline
12
  from rag.vector_store import VectorStoreManager
13
 
14
+ # Production imports
15
+ from config import config
16
+ from utils.logger import setup_logger, set_request_context, clear_request_context
17
+ from utils.validators import InputValidator, ValidationError
18
+ from utils.metrics import metrics
19
+ from utils.request_logger import request_logger
20
+ from utils.cache import cache_manager
21
+ from api.monitoring import monitoring_bp
22
+
23
  load_dotenv()
24
 
25
+ # Setup logging
26
+ logger = setup_logger(__name__)
27
+
28
  app = Flask(__name__)
29
 
30
+ # Register monitoring blueprint
31
+ app.register_blueprint(monitoring_bp)
32
+
33
+ # Configure CORS
34
+ if config.ENABLE_CORS:
35
+ CORS(app, origins=config.CORS_ORIGINS)
36
+ logger.info(f"CORS enabled for origins: {config.CORS_ORIGINS}")
37
+
38
  # Global state for ingestion tracking
39
  ingestion_status = {
40
  "status": "Idle",
 
42
  "last_error": None
43
  }
44
 
45
+
46
+ @app.before_request
47
+ def before_request():
48
+ """Set up request context and tracking."""
49
+ # Generate request ID
50
+ request_id = str(uuid.uuid4())[:8]
51
+ request.request_id = request_id
52
+ request.start_time = time.time()
53
+
54
+ # Set request context for logging
55
+ user_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
56
+ set_request_context(request_id, user_ip)
57
+
58
+ # Track active requests
59
+ metrics.increment_active_requests()
60
+
61
+ # Log request
62
+ logger.info(f"Request started: {request.method} {request.path}")
63
+
64
+
65
+ @app.after_request
66
+ def after_request(response):
67
+ """Clean up request context and record metrics."""
68
+ if hasattr(request, 'start_time'):
69
+ latency_ms = (time.time() - request.start_time) * 1000
70
+
71
+ # Log response
72
+ logger.info(
73
+ f"Request completed: {request.method} {request.path} "
74
+ f"[{response.status_code}] {latency_ms:.2f}ms"
75
+ )
76
+
77
+ # Record metrics
78
+ metrics.record_request(
79
+ latency_ms=latency_ms,
80
+ error=(response.status_code >= 400)
81
+ )
82
+
83
+ # Decrement active requests
84
+ metrics.decrement_active_requests()
85
+
86
+ # Clear request context
87
+ clear_request_context()
88
+
89
+ return response
90
+
91
+
92
+ @app.errorhandler(Exception)
93
+ def handle_error(error):
94
+ """Global error handler."""
95
+ logger.error(f"Unhandled error: {str(error)}", exc_info=True)
96
+
97
+ # Don't expose internal errors in production
98
+ if config.DEBUG:
99
+ error_msg = str(error)
100
+ else:
101
+ error_msg = "An internal error occurred. Please try again later."
102
+
103
+ return jsonify({
104
+ "error": error_msg,
105
+ "request_id": getattr(request, 'request_id', 'unknown')
106
+ }), 500
107
+
108
+
109
  def ingest_worker(file_path, delete_source=None):
110
  """Worker thread for background ingestion using enhanced pipeline."""
111
  global ingestion_status
112
  try:
113
+ logger.info(f"Starting ingestion for: {file_path}")
114
  ingestion_status["status"] = "Starting..."
115
  ingestion_status["progress"] = 0
116
 
117
+ base_docs_dir = config.DOCS_DIR
118
  pipeline = IngestionPipeline(base_docs_dir)
119
  vector_manager = VectorStoreManager()
120
 
121
  if delete_source:
122
+ logger.info(f"Removing old version: {delete_source}")
123
  ingestion_status["status"] = "Removing old version..."
124
  vector_manager.delete_documents_by_source(delete_source)
125
  ingestion_status["progress"] = 10
 
127
  ingestion_status["status"] = "Processing document..."
128
  ingestion_status["progress"] = 30
129
 
 
 
130
  chunks = pipeline.process_single_file(file_path)
131
 
132
  if chunks:
133
+ logger.info(f"Extracted {len(chunks)} chunks from {file_path}")
134
  ingestion_status["status"] = "Updating Vector Store..."
135
  ingestion_status["progress"] = 70
136
  vector_manager.update_vector_store(chunks)
137
 
138
+ # Invalidate caches
139
+ cache_manager.invalidate_all()
140
+ logger.info("Caches invalidated after ingestion")
141
+
142
+ # Reload the retriever in the agent nodes
143
  from agents.nodes import nodes
144
  nodes.reload_retriever()
145
 
146
  ingestion_status["status"] = "Completed Successfully!"
147
  ingestion_status["progress"] = 100
148
+ logger.info(f"Ingestion completed successfully for: {file_path}")
149
  else:
150
+ logger.warning(f"No content extracted from: {file_path}")
151
  ingestion_status["status"] = "Failed: No content extracted."
152
  ingestion_status["progress"] = 0
153
 
154
  except Exception as e:
155
+ logger.error(f"Ingestion failed: {str(e)}", exc_info=True)
156
  ingestion_status["status"] = "Failed"
157
  ingestion_status["last_error"] = str(e)
158
  ingestion_status["progress"] = 0
159
 
160
+
161
  @app.route("/")
162
  def index():
163
+ """Serve main page."""
164
  return render_template("index.html")
165
 
166
+
167
  @app.route("/api/chat", methods=["POST"])
168
  def chat():
169
+ """Main chat endpoint with full error handling and logging."""
170
+ start_time = time.time()
171
+ request_id = getattr(request, 'request_id', 'unknown')
172
 
 
 
 
173
  try:
174
+ data = request.json
175
+ if not data:
176
+ raise ValidationError("Request body must be JSON")
177
+
178
+ prompt = data.get("prompt")
179
+ history = data.get("history", [])
180
+ extracted_entities = data.get("extracted_entities", {})
181
+
182
+ # Validate input
183
+ if not prompt:
184
+ raise ValidationError("Prompt is required")
185
+
186
+ InputValidator.validate_query_input(prompt)
187
+
188
+ logger.info(f"Chat request: {prompt[:100]}...")
189
+
190
+ # Process with agent
191
  initial_state = {
192
  "input": prompt,
193
  "chat_history": history,
194
  "intent": "",
195
  "context": [],
196
  "answer": "",
197
+ "metadata_filters": {},
198
+ "extracted_entities": extracted_entities
199
  }
200
 
201
  result = agent_app.invoke(initial_state)
202
+
203
+ # Extract results
204
+ answer = result.get("answer", "")
205
+ context = result.get("context", [])
206
+ intent = result.get("intent", "unknown")
207
+ entities = result.get("extracted_entities", {})
208
+
209
+ # Calculate latency
210
+ latency_ms = (time.time() - start_time) * 1000
211
+
212
+ # Log request to database
213
+ request_logger.log_request(
214
+ request_id=request_id,
215
+ query=prompt,
216
+ intent=intent,
217
+ extracted_entities=entities,
218
+ retrieval_count=len(context),
219
+ latency_ms=latency_ms,
220
+ status="success",
221
+ context_sources=[c[:100] for c in context[:5]], # First 5 sources
222
+ user_ip=request.headers.get('X-Forwarded-For', request.remote_addr)
223
+ )
224
+
225
+ # Record intent in metrics
226
+ metrics.record_request(latency_ms=latency_ms, intent=intent, error=False)
227
+
228
+ logger.info(f"Chat completed successfully. Intent: {intent}, Latency: {latency_ms:.2f}ms")
229
+
230
  return jsonify({
231
+ "answer": answer,
232
+ "context": context,
233
+ "extracted_entities": entities,
234
+ "intent": intent,
235
+ "request_id": request_id
236
  })
237
+
238
+ except ValidationError as e:
239
+ logger.warning(f"Validation error: {str(e)}")
240
+ latency_ms = (time.time() - start_time) * 1000
241
+
242
+ request_logger.log_request(
243
+ request_id=request_id,
244
+ query=data.get("prompt", "")[:500] if data else "",
245
+ latency_ms=latency_ms,
246
+ status="validation_error",
247
+ error_message=str(e),
248
+ user_ip=request.headers.get('X-Forwarded-For', request.remote_addr)
249
+ )
250
+
251
+ return jsonify({
252
+ "error": str(e),
253
+ "request_id": request_id
254
+ }), 400
255
+
256
  except Exception as e:
257
+ logger.error(f"Chat error: {str(e)}", exc_info=True)
258
+ latency_ms = (time.time() - start_time) * 1000
259
+
260
+ request_logger.log_request(
261
+ request_id=request_id,
262
+ query=data.get("prompt", "")[:500] if data else "",
263
+ latency_ms=latency_ms,
264
+ status="error",
265
+ error_message=str(e)[:500],
266
+ user_ip=request.headers.get('X-Forwarded-For', request.remote_addr)
267
+ )
268
+
269
+ error_msg = str(e) if config.DEBUG else "An error occurred processing your request"
270
+ return jsonify({
271
+ "error": error_msg,
272
+ "request_id": request_id,
273
+ "status": "error"
274
+ }), 500
275
+
276
 
277
  @app.route("/api/audio-chat", methods=["POST"])
278
  def audio_chat():
279
+ """Audio chat endpoint with validation."""
280
+ start_time = time.time()
281
+ request_id = getattr(request, 'request_id', 'unknown')
282
+ temp_path = None
283
 
 
 
 
 
 
 
 
 
 
 
284
  try:
285
+ if 'audio' not in request.files:
286
+ raise ValidationError("No audio file provided")
287
+
288
+ file = request.files['audio']
289
+ history = json.loads(request.form.get("history", "[]"))
290
+ extracted_entities = json.loads(request.form.get("extracted_entities", "{}"))
291
+
292
+ if file.filename == '':
293
+ raise ValidationError("No file selected")
294
+
295
+ # Save temporarily
296
+ temp_path = f"temp_voice_{request_id}.wav"
297
+ file.save(temp_path)
298
+
299
+ logger.info(f"Processing audio file: {file.filename}")
300
+
301
+ # Transcribe
302
+ r = sr.Recognizer()
303
  with sr.AudioFile(temp_path) as source:
304
  audio_data = r.record(source)
305
 
306
  raw_text = r.recognize_google(audio_data)
307
+ logger.info(f"Transcribed: {raw_text}")
308
 
309
+ # Summarize/refine transcription
310
  from models.llm import LLMFactory
311
  from langchain_core.messages import SystemMessage, HumanMessage
312
 
313
  refiner_llm = LLMFactory.get_llm("small")
314
  refine_system = (
315
  "You are an assistant that cleans up and summarizes noisy speech-to-text transcriptions. "
316
+ "Your goal is to extract the actual insurance-related question or request from the text.\\n\\n"
317
+ "RULES:\\n"
318
+ "1. Remove filler words (um, ah, like, you know).\\n"
319
+ "2. Fix grammatical errors caused by transcription.\\n"
320
+ "3. If multiple things are mentioned, focus on the core request.\\n"
321
  "4. Return ONLY the cleaned, professional question text."
322
  )
323
 
 
327
  ])
328
  summarized_text = getattr(refine_response, 'content', str(refine_response)).strip()
329
 
330
+ logger.info(f"Refined: {summarized_text}")
331
+
332
+ # Process with agent (similar to chat endpoint)
333
  initial_state = {
334
  "input": summarized_text,
335
  "chat_history": history,
336
  "intent": "",
337
  "context": [],
338
  "answer": "",
339
+ "metadata_filters": {},
340
+ "extracted_entities": extracted_entities
341
  }
342
 
343
  result = agent_app.invoke(initial_state)
344
 
345
+ # Clean up temp file
346
+ if temp_path and os.path.exists(temp_path):
347
  os.remove(temp_path)
348
+
349
+ latency_ms = (time.time() - start_time) * 1000
350
+ logger.info(f"Audio chat completed. Latency: {latency_ms:.2f}ms")
351
+
352
  return jsonify({
353
  "transcription": raw_text,
354
  "summarized_question": summarized_text,
355
  "answer": result.get("answer", ""),
356
+ "context": result.get("context", []),
357
+ "extracted_entities": result.get("extracted_entities", {}),
358
+ "request_id": request_id
359
  })
360
 
361
  except sr.UnknownValueError:
362
+ if temp_path and os.path.exists(temp_path):
363
+ os.remove(temp_path)
364
+ logger.warning("Could not understand audio")
365
+ return jsonify({
366
+ "error": "Could not understand audio",
367
+ "request_id": request_id
368
+ }), 400
369
+
370
  except sr.RequestError as e:
371
+ if temp_path and os.path.exists(temp_path):
372
+ os.remove(temp_path)
373
+ logger.error(f"Speech service error: {e}")
374
+ return jsonify({
375
+ "error": f"Speech service error: {e}",
376
+ "request_id": request_id
377
+ }), 500
378
+
379
  except Exception as e:
380
+ if temp_path and os.path.exists(temp_path):
381
+ os.remove(temp_path)
382
+ logger.error(f"Audio chat error: {str(e)}", exc_info=True)
383
+ return jsonify({
384
+ "error": str(e) if config.DEBUG else "Error processing audio",
385
+ "request_id": request_id
386
+ }), 500
387
+
388
 
389
  def update_doc_structure(provider_name, category_name):
390
  """Helper to persist new providers/categories to the config file."""
 
394
  return
395
 
396
  with open(config_path, "r") as f:
397
+ doc_config = json.load(f)
398
 
399
  # Find or create provider
400
+ provider = next((p for p in doc_config["providers"] if p["name"] == provider_name), None)
401
  if not provider:
 
402
  provider = {"name": provider_name, "categories": []}
403
+ doc_config["providers"].insert(0, provider)
404
 
405
  # Add category if new
406
  if category_name not in provider["categories"]:
407
  provider["categories"].append(category_name)
 
408
  if len(provider["categories"]) > 1:
409
  provider["categories"].sort()
410
 
411
  with open(config_path, "w") as f:
412
+ json.dump(doc_config, f, indent=4)
413
+
414
  except Exception as e:
415
+ logger.warning(f"Failed to update doc structure: {e}")
416
+
417
 
418
  @app.route("/api/upload", methods=["POST"])
419
  def upload():
420
+ """File upload endpoint with validation."""
421
+ try:
422
+ if 'file' not in request.files:
423
+ raise ValidationError("No file provided")
424
+
425
+ file = request.files['file']
426
+ provider = request.form.get("provider")
427
+ category = request.form.get("category")
428
+ mode = request.form.get("mode", "New Upload")
429
+
430
+ if file.filename == '' or not provider or not category:
431
+ raise ValidationError("Missing required fields: file, provider, or category")
432
+
433
+ # Validate file
434
+ file.seek(0, os.SEEK_END)
435
+ file_size = file.tell()
436
+ file.seek(0)
437
+
438
+ InputValidator.validate_file_upload(file.filename, file_size)
439
+
440
+ # Sanitize filename
441
+ safe_filename = InputValidator.sanitize_filename(file.filename)
442
+ logger.info(f"Uploading file: {safe_filename} ({file_size} bytes)")
443
+
444
+ # Update doc structure
445
+ update_doc_structure(provider, category)
446
+
447
+ # Save file
448
+ base_dir = config.DOCS_DIR
449
+ target_dir = os.path.join(base_dir, provider, category)
450
+ os.makedirs(target_dir, exist_ok=True)
451
+
452
+ file_path = os.path.join(target_dir, safe_filename)
453
+ file.save(file_path)
454
+
455
+ logger.info(f"File saved to: {file_path}")
456
+
457
+ # Handle file modification
458
+ delete_source = None
459
+ if mode == "Modify Existing":
460
+ file_to_modify = request.form.get("file_to_modify")
461
+ if file_to_modify:
462
+ delete_source = os.path.join(base_dir, provider, category, file_to_modify)
463
+ if os.path.abspath(delete_source) != os.path.abspath(file_path):
464
+ if os.path.exists(delete_source):
465
+ os.remove(delete_source)
466
+ logger.info(f"Removed old file: {delete_source}")
467
+
468
+ # Start background ingestion
469
+ thread = threading.Thread(target=ingest_worker, args=(file_path, delete_source))
470
+ thread.start()
471
+
472
+ return jsonify({
473
+ "message": "File uploaded successfully, ingestion started.",
474
+ "filename": safe_filename,
475
+ "path": file_path
476
+ })
477
+
478
+ except ValidationError as e:
479
+ logger.warning(f"Upload validation error: {str(e)}")
480
+ return jsonify({"error": str(e)}), 400
481
+
482
+ except Exception as e:
483
+ logger.error(f"Upload error: {str(e)}", exc_info=True)
484
+ return jsonify({
485
+ "error": str(e) if config.DEBUG else "Upload failed"
486
+ }), 500
487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
  @app.route("/api/status", methods=["GET"])
490
  def get_status():
491
+ """Get ingestion status."""
492
  return jsonify(ingestion_status)
493
 
494
+
495
  @app.route("/api/config", methods=["GET"])
496
  def get_config():
497
+ """Get document structure configuration."""
498
  config_path = os.path.join("configs", "doc_structure.json")
499
  if os.path.exists(config_path):
500
  with open(config_path, "r") as f:
501
  return jsonify(json.load(f))
502
  return jsonify({"providers": []})
503
 
504
+
505
  @app.route("/api/files", methods=["GET"])
506
  def list_files():
507
+ """List files in a provider/category directory."""
508
  provider = request.args.get("provider")
509
  category = request.args.get("category")
510
 
511
  if not provider or not category:
512
  return jsonify({"files": []})
513
+
514
+ base_dir = config.DOCS_DIR
515
  target_dir = os.path.join(base_dir, provider, category)
516
+
517
  if os.path.exists(target_dir):
518
  files = [f for f in os.listdir(target_dir) if f.lower().endswith(('.pdf', '.docx'))]
519
  return jsonify({"files": files})
520
+
521
  return jsonify({"files": []})
522
 
523
+
524
  if __name__ == "__main__":
525
+ # Log configuration on startup
526
+ logger.info(f"Starting {config.APP_NAME} v{config.VERSION}")
527
+ logger.info(f"Environment: {config.ENVIRONMENT.value}")
528
+ logger.info(f"Configuration: {json.dumps(config.get_summary(), indent=2)}")
529
+
530
+ # Validate configuration
531
+ try:
532
+ config.validate()
533
+ logger.info("Configuration validated successfully")
534
+ except ValueError as e:
535
+ logger.error(f"Configuration validation failed: {e}")
536
+
537
+ # Start application
538
+ port = config.PORT
539
+ host = config.HOST
540
+ debug = config.DEBUG
541
+
542
+ logger.info(f"Starting server on {host}:{port} (debug={debug})")
543
+ app.run(host=host, port=port, debug=debug)
config.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Production-grade configuration management with environment-based settings.
3
+ """
4
+ import os
5
+ from typing import Optional
6
+ from dotenv import load_dotenv
7
+ from enum import Enum
8
+
9
+ load_dotenv()
10
+
11
+
12
+ class Environment(str, Enum):
13
+ DEVELOPMENT = "development"
14
+ STAGING = "staging"
15
+ PRODUCTION = "production"
16
+
17
+
18
+ class Config:
19
+ """Base configuration class with defaults."""
20
+
21
+ # Environment
22
+ ENVIRONMENT: Environment = Environment(os.getenv("ENVIRONMENT", "development"))
23
+ DEBUG: bool = ENVIRONMENT == Environment.DEVELOPMENT
24
+
25
+ # Application
26
+ APP_NAME: str = "Insurance RAG System"
27
+ VERSION: str = "1.0.0"
28
+ PORT: int = int(os.getenv("PORT", 7860))
29
+ HOST: str = os.getenv("HOST", "0.0.0.0")
30
+
31
+ # LLM Configuration
32
+ GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
33
+ GROQ_MODEL_SMALL: str = os.getenv("GROQ_MODEL_SMALL", "llama-3.1-8b-instant")
34
+ GROQ_MODEL_MEDIUM: str = os.getenv("GROQ_MODEL_MEDIUM", "llama-3.1-8b-instant")
35
+ GROQ_MODEL_LARGE: str = os.getenv("GROQ_MODEL_LARGE", "llama-3.1-8b-instant")
36
+ LLM_TIMEOUT: int = int(os.getenv("LLM_TIMEOUT", 30))
37
+ LLM_MAX_RETRIES: int = int(os.getenv("LLM_MAX_RETRIES", 3))
38
+
39
+ # LangChain / LangSmith Tracing
40
+ LANGCHAIN_TRACING_V2: bool = os.getenv("LANGCHAIN_TRACING_V2", "false").lower() == "true"
41
+ LANGCHAIN_ENDPOINT: str = os.getenv("LANGCHAIN_ENDPOINT", "https://api.smith.langchain.com")
42
+ LANGCHAIN_API_KEY: Optional[str] = os.getenv("LANGCHAIN_API_KEY")
43
+ LANGCHAIN_PROJECT: str = os.getenv("LANGCHAIN_PROJECT", "Insurance-RAG")
44
+
45
+ # Logging Configuration
46
+ LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
47
+ LOG_FILE_PATH: str = os.getenv("LOG_FILE_PATH", "logs/app.log")
48
+ LOG_MAX_BYTES: int = int(os.getenv("LOG_MAX_BYTES", 10485760)) # 10MB
49
+ LOG_BACKUP_COUNT: int = int(os.getenv("LOG_BACKUP_COUNT", 5))
50
+ LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
51
+
52
+ # Performance Configuration
53
+ MAX_WORKERS: int = int(os.getenv("MAX_WORKERS", 4))
54
+ REQUEST_TIMEOUT: int = int(os.getenv("REQUEST_TIMEOUT", 30))
55
+ CACHE_TTL: int = int(os.getenv("CACHE_TTL", 300)) # 5 minutes
56
+ CACHE_MAX_SIZE: int = int(os.getenv("CACHE_MAX_SIZE", 1000))
57
+
58
+ # Vector Store Configuration
59
+ VECTOR_STORE_PATH: str = os.getenv("VECTOR_STORE_PATH", "rag/faiss_index")
60
+ EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
61
+ EMBEDDING_CACHE_PATH: str = os.getenv("EMBEDDING_CACHE_PATH", "rag/embeddings_cache.json")
62
+ LLM_CACHE_DB_PATH: str = os.getenv("LLM_CACHE_DB_PATH", "rag/llm_cache.db")
63
+
64
+ # Retrieval Configuration
65
+ DEFAULT_RETRIEVAL_K: int = int(os.getenv("DEFAULT_RETRIEVAL_K", 5))
66
+ MAX_RETRIEVAL_K: int = int(os.getenv("MAX_RETRIEVAL_K", 20))
67
+ RETRIEVAL_SCORE_THRESHOLD: float = float(os.getenv("RETRIEVAL_SCORE_THRESHOLD", 0.5))
68
+
69
+ # Security Configuration
70
+ MAX_FILE_SIZE_MB: int = int(os.getenv("MAX_FILE_SIZE_MB", 50))
71
+ ALLOWED_FILE_TYPES: list = os.getenv("ALLOWED_FILE_TYPES", "pdf,docx").split(",")
72
+ RATE_LIMIT_PER_MINUTE: int = int(os.getenv("RATE_LIMIT_PER_MINUTE", 60))
73
+ ENABLE_API_KEY_AUTH: bool = os.getenv("ENABLE_API_KEY_AUTH", "false").lower() == "true"
74
+ API_KEY: Optional[str] = os.getenv("API_KEY", None)
75
+ ENABLE_CORS: bool = os.getenv("ENABLE_CORS", "true").lower() == "true"
76
+ CORS_ORIGINS: list = os.getenv("CORS_ORIGINS", "*").split(",")
77
+
78
+ # Monitoring Configuration
79
+ ENABLE_METRICS: bool = os.getenv("ENABLE_METRICS", "true").lower() == "true"
80
+ METRICS_PORT: int = int(os.getenv("METRICS_PORT", 9090))
81
+ REQUEST_LOG_DB_PATH: str = os.getenv("REQUEST_LOG_DB_PATH", "utils/request_logs.db")
82
+
83
+ # Circuit Breaker Configuration
84
+ CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = int(os.getenv("CIRCUIT_BREAKER_FAILURE_THRESHOLD", 5))
85
+ CIRCUIT_BREAKER_TIMEOUT: int = int(os.getenv("CIRCUIT_BREAKER_TIMEOUT", 60))
86
+ CIRCUIT_BREAKER_EXPECTED_EXCEPTION: type = Exception
87
+
88
+ # Document Processing
89
+ DOCS_DIR: str = os.getenv("DOCS_DIR", "docs")
90
+ BROCHURE_CHUNK_SIZE: int = int(os.getenv("BROCHURE_CHUNK_SIZE", 2600))
91
+ BROCHURE_CHUNK_OVERLAP: int = int(os.getenv("BROCHURE_CHUNK_OVERLAP", 400))
92
+ CIS_CHUNK_SIZE: int = int(os.getenv("CIS_CHUNK_SIZE", 1300))
93
+ CIS_CHUNK_OVERLAP: int = int(os.getenv("CIS_CHUNK_OVERLAP", 160))
94
+ TABLE_CHUNK_SIZE: int = int(os.getenv("TABLE_CHUNK_SIZE", 800))
95
+ TABLE_CHUNK_OVERLAP: int = int(os.getenv("TABLE_CHUNK_OVERLAP", 100))
96
+
97
+ @classmethod
98
+ def validate(cls) -> bool:
99
+ """Validate critical configuration values."""
100
+ errors = []
101
+
102
+ # Check required API keys
103
+ if not cls.GROQ_API_KEY:
104
+ errors.append("GROQ_API_KEY is not set")
105
+
106
+ # Validate file size limits
107
+ if cls.MAX_FILE_SIZE_MB <= 0 or cls.MAX_FILE_SIZE_MB > 500:
108
+ errors.append(f"MAX_FILE_SIZE_MB must be between 1 and 500, got {cls.MAX_FILE_SIZE_MB}")
109
+
110
+ # Validate rate limits
111
+ if cls.RATE_LIMIT_PER_MINUTE <= 0:
112
+ errors.append(f"RATE_LIMIT_PER_MINUTE must be positive, got {cls.RATE_LIMIT_PER_MINUTE}")
113
+
114
+ # Validate timeouts
115
+ if cls.REQUEST_TIMEOUT <= 0:
116
+ errors.append(f"REQUEST_TIMEOUT must be positive, got {cls.REQUEST_TIMEOUT}")
117
+
118
+ # Validate cache settings
119
+ if cls.CACHE_TTL < 0:
120
+ errors.append(f"CACHE_TTL cannot be negative, got {cls.CACHE_TTL}")
121
+
122
+ # Validate API key auth
123
+ if cls.ENABLE_API_KEY_AUTH and not cls.API_KEY:
124
+ errors.append("ENABLE_API_KEY_AUTH is true but API_KEY is not set")
125
+
126
+ if errors:
127
+ error_msg = "\n".join(f" - {err}" for err in errors)
128
+ raise ValueError(f"Configuration validation failed:\n{error_msg}")
129
+
130
+ return True
131
+
132
+ @classmethod
133
+ def get_summary(cls) -> dict:
134
+ """Get configuration summary for logging."""
135
+ return {
136
+ "environment": cls.ENVIRONMENT.value,
137
+ "debug": cls.DEBUG,
138
+ "app_name": cls.APP_NAME,
139
+ "version": cls.VERSION,
140
+ "port": cls.PORT,
141
+ "log_level": cls.LOG_LEVEL,
142
+ "max_file_size_mb": cls.MAX_FILE_SIZE_MB,
143
+ "rate_limit_per_minute": cls.RATE_LIMIT_PER_MINUTE,
144
+ "cache_ttl": cls.CACHE_TTL,
145
+ "enable_metrics": cls.ENABLE_METRICS,
146
+ "enable_api_key_auth": cls.ENABLE_API_KEY_AUTH,
147
+ }
148
+
149
+
150
+ class DevelopmentConfig(Config):
151
+ """Development-specific configuration."""
152
+ DEBUG = True
153
+ LOG_LEVEL = "DEBUG"
154
+
155
+
156
+ class ProductionConfig(Config):
157
+ """Production-specific configuration."""
158
+ DEBUG = False
159
+ LOG_LEVEL = "WARNING"
160
+
161
+
162
+ # Configuration factory
163
+ def get_config() -> Config:
164
+ """Get configuration based on environment."""
165
+ env = os.getenv("ENVIRONMENT", "development")
166
+
167
+ config_map = {
168
+ Environment.DEVELOPMENT: DevelopmentConfig,
169
+ Environment.STAGING: Config,
170
+ Environment.PRODUCTION: ProductionConfig,
171
+ }
172
+
173
+ return config_map.get(Environment(env), Config)
174
+
175
+
176
+ # Global config instance
177
+ config = get_config()
178
+
179
+ # Validate on import
180
+ try:
181
+ config.validate()
182
+ except ValueError as e:
183
+ print(f"[CONFIG ERROR] {e}")
184
+ print("[CONFIG] Continuing with invalid configuration - some features may not work correctly")
models/llm.py CHANGED
@@ -1,39 +1,136 @@
1
  import os
 
 
 
 
 
2
  from langchain_groq import ChatGroq
3
  from dotenv import load_dotenv
 
4
 
5
  load_dotenv()
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  class LLMFactory:
8
 
9
  @staticmethod
10
- def get_llm(model_type="small"):
11
  """
12
- Returns a Groq LLM instance based on type.
 
13
  """
14
  api_key = os.getenv("GROQ_API_KEY")
15
 
16
- # Groq specific models from environment
17
- if model_type == "small":
18
- model_name = os.getenv("GROQ_MODEL_SMALL", "llama-3.1-8b-instant")
19
- elif model_type == "medium":
20
- model_name = os.getenv("GROQ_MODEL_MEDIUM", "llama-3.1-8b-instant")
 
 
21
  else:
22
- model_name = os.getenv("GROQ_MODEL_LARGE", "llama-3.1-8b-instant")
23
 
24
  if api_key:
25
- return ChatGroq(
26
  model=model_name,
27
  temperature=0,
28
  groq_api_key=api_key,
29
- max_retries=3, # Automatically retry on rate limits or transient errors
30
- timeout=30 # Prevent hanging on slow responses
 
31
  )
 
32
 
33
  # Fallback to mock for testing without key
34
  class MockLLM:
35
  def invoke(self, msg):
36
- return f"[Groq Mock Response for {model_type}]: Model {model_name} processing..."
37
 
38
  return MockLLM()
39
-
 
1
  import os
2
+ import sqlite3
3
+ import json
4
+ import hashlib
5
+ import time
6
+ from typing import Optional, Any
7
  from langchain_groq import ChatGroq
8
  from dotenv import load_dotenv
9
+ from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
10
 
11
  load_dotenv()
12
 
13
+ class LLMCache:
14
+ """
15
+ Simple SQLite-based cache for LLM responses.
16
+ """
17
+ _instance = None
18
+
19
+ def __new__(cls):
20
+ if cls._instance is None:
21
+ cls._instance = super(LLMCache, cls).__new__(cls)
22
+ cls._instance._init_db()
23
+ return cls._instance
24
+
25
+ def _init_db(self):
26
+ self.db_path = "rag/llm_cache.db"
27
+ os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
28
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
29
+ self.conn.execute("""
30
+ CREATE TABLE IF NOT EXISTS responses (
31
+ id TEXT PRIMARY KEY,
32
+ prompt_hash TEXT,
33
+ model TEXT,
34
+ response TEXT,
35
+ timestamp REAL
36
+ )
37
+ """)
38
+ self.conn.commit()
39
+
40
+ def get(self, prompt: str, model: str) -> Optional[str]:
41
+ prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
42
+ cursor = self.conn.execute(
43
+ "SELECT response FROM responses WHERE prompt_hash = ? AND model = ?",
44
+ (prompt_hash, model)
45
+ )
46
+ row = cursor.fetchone()
47
+ return row[0] if row else None
48
+
49
+ def set(self, prompt: str, model: str, response: str):
50
+ prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
51
+ self.conn.execute(
52
+ "INSERT OR REPLACE INTO responses (id, prompt_hash, model, response, timestamp) VALUES (?, ?, ?, ?, ?)",
53
+ (f"{prompt_hash}_{model}", prompt_hash, model, response, time.time())
54
+ )
55
+ self.conn.commit()
56
+
57
+ class CachedChatGroq:
58
+ """
59
+ Wrapper around ChatGroq to handle caching and retries.
60
+ """
61
+ def __init__(self, llm_instance, model_name):
62
+ self.llm = llm_instance
63
+ self.model_name = model_name
64
+ self.cache = LLMCache()
65
+
66
+ @retry(
67
+ stop=stop_after_attempt(3),
68
+ wait=wait_exponential(multiplier=1, min=2, max=10),
69
+ retry=retry_if_exception_type(Exception)
70
+ )
71
+ def invoke(self, messages: Any) -> Any:
72
+ # Convert messages to string for cache key
73
+ if isinstance(messages, list):
74
+ prompt_str = json.dumps([m.content for m in messages], sort_keys=True)
75
+ else:
76
+ prompt_str = str(messages)
77
+
78
+ # Check cache
79
+ cached_resp = self.cache.get(prompt_str, self.model_name)
80
+ if cached_resp:
81
+ # Reconstruct a mock response object that behaves like the real one
82
+ class MockResponse:
83
+ def __init__(self, content):
84
+ self.content = content
85
+ return MockResponse(cached_resp)
86
+
87
+ # Call API
88
+ try:
89
+ response = self.llm.invoke(messages)
90
+ content = getattr(response, 'content', str(response))
91
+
92
+ # Cache success
93
+ self.cache.set(prompt_str, self.model_name, content)
94
+ return response
95
+ except Exception as e:
96
+ print(f"[LLM Error] Rate limit or network issue: {e}. Retrying...")
97
+ raise e
98
+
99
+
100
  class LLMFactory:
101
 
102
  @staticmethod
103
+ def get_llm(complexity="low"):
104
  """
105
+ Returns a routed and cached LLM instance.
106
+ complexity: "low" (default, instant logic) or "high" (versatile logic)
107
  """
108
  api_key = os.getenv("GROQ_API_KEY")
109
 
110
+ # Default to instant (cost effective)
111
+ default_model = "llama-3.1-8b-instant"
112
+
113
+ if complexity == "high":
114
+ # For now, map 'versatile' also to 'instant' as per user request to start cheap
115
+ # But keep logic ready to swap to 'llama-3.1-70b-versatile'
116
+ model_name = os.getenv("GROQ_MODEL_LARGE", default_model)
117
  else:
118
+ model_name = os.getenv("GROQ_MODEL_SMALL", default_model)
119
 
120
  if api_key:
121
+ real_llm = ChatGroq(
122
  model=model_name,
123
  temperature=0,
124
  groq_api_key=api_key,
125
+ # We handle retries in the wrapper, so keep internal retries low
126
+ max_retries=1,
127
+ timeout=30
128
  )
129
+ return CachedChatGroq(real_llm, model_name)
130
 
131
  # Fallback to mock for testing without key
132
  class MockLLM:
133
  def invoke(self, msg):
134
+ return f"[Groq Mock Response for {complexity}]: Model {model_name} processing..."
135
 
136
  return MockLLM()
 
rag/embeddings_cache.json ADDED
The diff for this file is too large to render. See raw diff
 
rag/retriever.py CHANGED
@@ -40,8 +40,8 @@ class RAGRetriever:
40
  match_found = False
41
  for v_item in norm_values:
42
  if key in ["insurer", "insurance_type"]:
43
- # Exact match for categories
44
- if v_item == met_val_str:
45
  match_found = True
46
  break
47
  else:
@@ -54,7 +54,7 @@ class RAGRetriever:
54
  else:
55
  norm_value = str(value).lower().strip()
56
  if key in ["insurer", "insurance_type"]:
57
- if norm_value != met_val_str:
58
  return False
59
  else:
60
  if norm_value not in met_val_str and met_val_str not in norm_value:
 
40
  match_found = False
41
  for v_item in norm_values:
42
  if key in ["insurer", "insurance_type"]:
43
+ # Flexible match for categories (containment)
44
+ if v_item in met_val_str or met_val_str in v_item:
45
  match_found = True
46
  break
47
  else:
 
54
  else:
55
  norm_value = str(value).lower().strip()
56
  if key in ["insurer", "insurance_type"]:
57
+ if norm_value not in met_val_str and met_val_str not in norm_value:
58
  return False
59
  else:
60
  if norm_value not in met_val_str and met_val_str not in norm_value:
rag/vector_store.py CHANGED
@@ -1,10 +1,83 @@
1
  import os
2
  import threading
 
 
3
  from langchain_community.vectorstores import FAISS
4
  from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain_core.documents import Document
6
  from typing import List
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  class VectorStoreManager:
9
  _embeddings = None
10
  _lock = threading.Lock()
@@ -13,9 +86,12 @@ class VectorStoreManager:
13
  self.index_path = index_path
14
  if VectorStoreManager._embeddings is None:
15
  # Load embeddings model once
16
- VectorStoreManager._embeddings = HuggingFaceEmbeddings(
17
  model_name="sentence-transformers/all-MiniLM-L6-v2"
18
  )
 
 
 
19
  self.embeddings = VectorStoreManager._embeddings
20
 
21
  def create_vector_store(self, documents: List[Document], batch_size: int = 100):
 
1
  import os
2
  import threading
3
+ import json
4
+ import hashlib
5
  from langchain_community.vectorstores import FAISS
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
  from langchain_core.documents import Document
8
  from typing import List
9
 
10
+ from langchain_core.embeddings import Embeddings
11
+ from typing import List
12
+
13
+ class CachedEmbeddings(Embeddings):
14
+ """
15
+ Wrapper for embeddings to cache results locally.
16
+ Avoids re-computing embeddings for identical text.
17
+ """
18
+ def __init__(self, wrapped_embeddings, cache_path="rag/embeddings_cache.json"):
19
+ self.wrapped = wrapped_embeddings
20
+ self.cache_path = cache_path
21
+ self.cache = {}
22
+ self._load_cache()
23
+ self._lock = threading.Lock()
24
+
25
+ def _load_cache(self):
26
+ if os.path.exists(self.cache_path):
27
+ try:
28
+ with open(self.cache_path, "r", encoding="utf-8") as f:
29
+ self.cache = json.load(f)
30
+ except: self.cache = {}
31
+
32
+ def _save_cache(self):
33
+ with self._lock:
34
+ try:
35
+ os.makedirs(os.path.dirname(self.cache_path), exist_ok=True)
36
+ with open(self.cache_path, "w", encoding="utf-8") as f:
37
+ json.dump(self.cache, f)
38
+ except Exception as e:
39
+ print(f"Failed to save embedding cache: {e}")
40
+
41
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
42
+ results = []
43
+ texts_to_embed = []
44
+ indices_to_embed = []
45
+
46
+ # Check cache
47
+ for i, text in enumerate(texts):
48
+ h = hashlib.md5(text.encode()).hexdigest()
49
+ if h in self.cache:
50
+ results.append(self.cache[h])
51
+ else:
52
+ results.append(None) # Placeholder
53
+ texts_to_embed.append(text)
54
+ indices_to_embed.append(i)
55
+
56
+ # Compute missing
57
+ if texts_to_embed:
58
+ print(f"Computing embeddings for {len(texts_to_embed)} new items...")
59
+ new_embeddings = self.wrapped.embed_documents(texts_to_embed)
60
+
61
+ for idx, emb, text in zip(indices_to_embed, new_embeddings, texts_to_embed):
62
+ results[idx] = emb
63
+ h = hashlib.md5(text.encode()).hexdigest()
64
+ self.cache[h] = emb
65
+
66
+ # Save incrementally
67
+ self._save_cache()
68
+
69
+ return results
70
+
71
+ def embed_query(self, text: str) -> List[float]:
72
+ h = hashlib.md5(text.encode()).hexdigest()
73
+ if h in self.cache:
74
+ return self.cache[h]
75
+
76
+ emb = self.wrapped.embed_query(text)
77
+ self.cache[h] = emb
78
+ self._save_cache()
79
+ return emb
80
+
81
  class VectorStoreManager:
82
  _embeddings = None
83
  _lock = threading.Lock()
 
86
  self.index_path = index_path
87
  if VectorStoreManager._embeddings is None:
88
  # Load embeddings model once
89
+ base_embeddings = HuggingFaceEmbeddings(
90
  model_name="sentence-transformers/all-MiniLM-L6-v2"
91
  )
92
+ # Wrap with caching
93
+ VectorStoreManager._embeddings = CachedEmbeddings(base_embeddings)
94
+
95
  self.embeddings = VectorStoreManager._embeddings
96
 
97
  def create_vector_store(self, documents: List[Document], batch_size: int = 100):
requirements.txt CHANGED
@@ -18,4 +18,6 @@ SpeechRecognition
18
  langchain-groq
19
  requests
20
  pdfplumber
21
- python-docx
 
 
 
18
  langchain-groq
19
  requests
20
  pdfplumber
21
+ python-docx
22
+ tenacity
23
+ langsmith
static/css/style.css CHANGED
@@ -1,14 +1,20 @@
1
  @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap');
2
 
3
  :root {
4
- --primary: #6366f1;
5
- --primary-hover: #4f46e5;
6
- --bg-dark: #0f172a;
7
- --card-bg: rgba(30, 41, 59, 0.7);
 
 
 
8
  --text-main: #f8fafc;
9
  --text-muted: #94a3b8;
10
- --glass-border: rgba(255, 255, 255, 0.1);
11
- --animation-speed: 0.3s;
 
 
 
12
  }
13
 
14
  * {
@@ -19,51 +25,195 @@
19
 
20
  body {
21
  font-family: 'Outfit', sans-serif;
22
- background: radial-gradient(circle at top right, #1e1b4b, #0f172a);
23
  color: var(--text-main);
24
  height: 100vh;
25
  overflow: hidden;
26
  display: flex;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
 
29
  /* Sidebar Styling */
30
  .sidebar {
31
- width: 350px;
32
- background: rgba(15, 23, 42, 0.8);
33
- backdrop-filter: blur(20px);
34
  border-right: 1px solid var(--glass-border);
35
- padding: 2rem;
36
  display: flex;
37
  flex-direction: column;
38
- z-index: 10;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
 
41
  .logo {
42
  display: flex;
43
  align-items: center;
44
  gap: 12px;
45
- margin-bottom: 3rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
 
48
  .logo span {
49
  font-size: 1.5rem;
50
- font-weight: 700;
51
- background: linear-gradient(to right, #818cf8, #c084fc);
 
 
52
  -webkit-background-clip: text;
53
  background-clip: text;
54
  -webkit-text-fill-color: transparent;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
 
57
- .sidebar-section {
58
- margin-bottom: 2rem;
 
 
 
59
  }
60
 
61
- .sidebar-section h3 {
62
- font-size: 0.85rem;
63
- text-transform: uppercase;
64
- letter-spacing: 0.1em;
65
- color: var(--text-muted);
66
- margin-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
 
69
  /* Main Content Area */
@@ -72,16 +222,40 @@ body {
72
  display: flex;
73
  flex-direction: column;
74
  position: relative;
75
- padding: 2rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
77
 
78
  /* Glass Card Component */
79
  .glass-card {
80
  background: var(--card-bg);
81
- backdrop-filter: blur(12px);
82
  border: 1px solid var(--glass-border);
83
- border-radius: 24px;
84
- box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.3);
 
 
 
 
 
 
85
  }
86
 
87
  /* Chat Container */
@@ -91,6 +265,7 @@ body {
91
  flex-direction: column;
92
  overflow: hidden;
93
  margin-bottom: 1.5rem;
 
94
  }
95
 
96
  .chat-messages {
@@ -100,6 +275,7 @@ body {
100
  display: flex;
101
  flex-direction: column;
102
  gap: 1.5rem;
 
103
  }
104
 
105
  /* Scrollbar styling */
@@ -113,12 +289,15 @@ body {
113
  }
114
 
115
  .message {
116
- max-width: 80%;
117
- padding: 1rem 1.5rem;
118
- border-radius: 20px;
119
  font-size: 0.95rem;
120
- line-height: 1.6;
121
- animation: fadeIn 0.4s ease-out forwards;
 
 
 
122
  }
123
 
124
  .message p {
@@ -185,15 +364,18 @@ body {
185
 
186
  .user-message {
187
  align-self: flex-end;
188
- background: var(--primary);
189
  color: white;
190
  border-bottom-right-radius: 4px;
 
 
191
  }
192
 
193
  .bot-message {
194
  align-self: flex-start;
195
- background: rgba(255, 255, 255, 0.05);
196
- border: 1px solid var(--glass-border);
 
197
  border-top-left-radius: 4px;
198
  }
199
 
@@ -276,19 +458,20 @@ body {
276
  }
277
 
278
  .send-btn {
279
- background: var(--primary);
280
  border: none;
281
- width: 45px;
282
- height: 45px;
283
- border-radius: 12px;
284
  color: white;
285
  cursor: pointer;
286
- transition: var(--animation-speed);
 
287
  }
288
 
289
  .send-btn:hover {
290
- background: var(--primary-hover);
291
- transform: scale(1.05);
292
  }
293
 
294
  /* Form Styling (Sidebar) */
@@ -322,20 +505,24 @@ select option {
322
 
323
  .primary-btn {
324
  width: 100%;
325
- background: var(--primary);
326
  border: none;
327
  padding: 1rem;
328
- border-radius: 12px;
329
  color: white;
330
- font-weight: 600;
 
 
331
  cursor: pointer;
332
- transition: var(--animation-speed);
333
  margin-top: 1rem;
 
334
  }
335
 
336
  .primary-btn:hover {
337
- background: var(--primary-hover);
338
- box-shadow: 0 0 20px rgba(99, 102, 241, 0.4);
 
339
  }
340
 
341
  /* Status Bar */
 
1
  @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap');
2
 
3
  :root {
4
+ --primary: #9333ea;
5
+ --primary-glow: rgba(147, 51, 234, 0.4);
6
+ --secondary: #c026d3;
7
+ --accent: #6366f1;
8
+ --bg-dark: #05010d;
9
+ --sidebar-bg: rgba(10, 2, 25, 0.85);
10
+ --card-bg: rgba(20, 10, 40, 0.45);
11
  --text-main: #f8fafc;
12
  --text-muted: #94a3b8;
13
+ --glass-border: rgba(147, 51, 234, 0.2);
14
+ --glass-border-light: rgba(255, 255, 255, 0.08);
15
+ --animation-speed: 0.4s;
16
+ --sidebar-width: 320px;
17
+ --sidebar-collapsed-width: 80px;
18
  }
19
 
20
  * {
 
25
 
26
  body {
27
  font-family: 'Outfit', sans-serif;
28
+ background-color: var(--bg-dark);
29
  color: var(--text-main);
30
  height: 100vh;
31
  overflow: hidden;
32
  display: flex;
33
+ position: relative;
34
+ }
35
+
36
+ /* Dynamic Background Glows (Spotlights) */
37
+ .bg-glow-container {
38
+ position: fixed;
39
+ top: 0;
40
+ left: 0;
41
+ width: 100%;
42
+ height: 100%;
43
+ z-index: -1;
44
+ overflow: hidden;
45
+ background: #05010d;
46
+ }
47
+
48
+ .glow-blob {
49
+ position: absolute;
50
+ width: 600px;
51
+ height: 600px;
52
+ border-radius: 50%;
53
+ filter: blur(120px);
54
+ opacity: 0.4;
55
+ animation: moveGlow 25s infinite alternate;
56
+ }
57
+
58
+ .glow-1 {
59
+ top: -10%;
60
+ right: -10%;
61
+ background: radial-gradient(circle, #9333ea, transparent);
62
+ }
63
+
64
+ .glow-2 {
65
+ bottom: -15%;
66
+ left: 10%;
67
+ background: radial-gradient(circle, #4f46e5, transparent);
68
+ animation-delay: -5s;
69
+ }
70
+
71
+ .glow-3 {
72
+ top: 50%;
73
+ right: 30%;
74
+ width: 400px;
75
+ height: 400px;
76
+ background: radial-gradient(circle, #c026d3, transparent);
77
+ opacity: 0.25;
78
+ animation-delay: -12s;
79
+ }
80
+
81
+ @keyframes moveGlow {
82
+ 0% {
83
+ transform: translate(0, 0) scale(1);
84
+ }
85
+
86
+ 33% {
87
+ transform: translate(100px, 150px) scale(1.1);
88
+ }
89
+
90
+ 66% {
91
+ transform: translate(-120px, 80px) scale(0.9);
92
+ }
93
+
94
+ 100% {
95
+ transform: translate(0, 0) scale(1);
96
+ }
97
  }
98
 
99
  /* Sidebar Styling */
100
  .sidebar {
101
+ width: var(--sidebar-width);
102
+ background: var(--sidebar-bg);
103
+ backdrop-filter: blur(40px);
104
  border-right: 1px solid var(--glass-border);
105
+ padding: 1.5rem;
106
  display: flex;
107
  flex-direction: column;
108
+ z-index: 100;
109
+ transition: width var(--animation-speed) cubic-bezier(0.4, 0, 0.2, 1);
110
+ position: relative;
111
+ box-shadow: 20px 0 50px rgba(0, 0, 0, 0.3);
112
+ }
113
+
114
+ .sidebar.collapsed {
115
+ width: var(--sidebar-collapsed-width);
116
+ padding: 1.5rem 0.5rem;
117
+ }
118
+
119
+ .sidebar-header {
120
+ display: flex;
121
+ align-items: center;
122
+ justify-content: space-between;
123
+ margin-bottom: 3rem;
124
+ padding: 0 0.5rem;
125
  }
126
 
127
  .logo {
128
  display: flex;
129
  align-items: center;
130
  gap: 12px;
131
+ transition: opacity 0.2s;
132
+ white-space: nowrap;
133
+ }
134
+
135
+ .sidebar.collapsed .logo span,
136
+ .sidebar.collapsed .sidebar-section h3,
137
+ .sidebar.collapsed .sidebar-section .form-group label,
138
+ .sidebar.collapsed .sidebar-section .form-group span,
139
+ .sidebar.collapsed .status-row,
140
+ .sidebar.collapsed .primary-btn span,
141
+ .sidebar.collapsed .sidebar-section select,
142
+ .sidebar.collapsed .sidebar-section input,
143
+ .sidebar.collapsed .sidebar-section .radio-group {
144
+ display: none;
145
  }
146
 
147
  .logo span {
148
  font-size: 1.5rem;
149
+ font-weight: 800;
150
+ text-transform: uppercase;
151
+ letter-spacing: 0.05em;
152
+ background: linear-gradient(135deg, #f8fafc, #9333ea, #c026d3);
153
  -webkit-background-clip: text;
154
  background-clip: text;
155
  -webkit-text-fill-color: transparent;
156
+ filter: drop-shadow(0 0 10px rgba(147, 51, 234, 0.3));
157
+ }
158
+
159
+ .sidebar-toggle {
160
+ background: rgba(147, 51, 234, 0.1);
161
+ border: 1px solid var(--glass-border);
162
+ color: var(--text-main);
163
+ cursor: pointer;
164
+ font-size: 1.2rem;
165
+ padding: 0.6rem;
166
+ border-radius: 12px;
167
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
168
+ display: flex;
169
+ align-items: center;
170
+ justify-content: center;
171
  }
172
 
173
+ .sidebar-toggle:hover {
174
+ background: var(--primary);
175
+ color: white;
176
+ transform: scale(1.05);
177
+ box-shadow: 0 0 20px var(--primary-glow);
178
  }
179
 
180
+ .sidebar.collapsed .sidebar-toggle {
181
+ margin: 0 auto;
182
+ }
183
+
184
+ .sidebar.collapsed .sidebar-section {
185
+ display: flex;
186
+ flex-direction: column;
187
+ align-items: center;
188
+ }
189
+
190
+ .sidebar-section i {
191
+ width: 20px;
192
+ text-align: center;
193
+ }
194
+
195
+ .sidebar.collapsed .sidebar-section .form-group button {
196
+ width: 40px;
197
+ height: 40px;
198
+ padding: 0;
199
+ margin: 0 auto;
200
+ border-radius: 50%;
201
+ }
202
+
203
+ .sidebar.collapsed .sidebar-section .form-group button i {
204
+ margin: 0;
205
+ }
206
+
207
+ .sidebar.collapsed #clear-chat {
208
+ width: 40px;
209
+ height: 40px;
210
+ padding: 0;
211
+ margin: 1rem auto;
212
+ border-radius: 50%;
213
+ }
214
+
215
+ .sidebar.collapsed #clear-chat i {
216
+ margin: 0;
217
  }
218
 
219
  /* Main Content Area */
 
222
  display: flex;
223
  flex-direction: column;
224
  position: relative;
225
+ padding: 2rem 3rem;
226
+ background: transparent;
227
+ transition: margin var(--animation-speed);
228
+ }
229
+
230
+ .content-header {
231
+ margin-bottom: 2.5rem;
232
+ border-bottom: 1px solid var(--glass-border);
233
+ padding-bottom: 1.5rem;
234
+ }
235
+
236
+ .content-header h1 {
237
+ font-size: 2.2rem;
238
+ font-weight: 700;
239
+ margin-bottom: 0.5rem;
240
+ background: linear-gradient(to right, #f8fafc, #94a3b8);
241
+ -webkit-background-clip: text;
242
+ background-clip: text;
243
+ -webkit-text-fill-color: transparent;
244
  }
245
 
246
  /* Glass Card Component */
247
  .glass-card {
248
  background: var(--card-bg);
249
+ backdrop-filter: blur(25px);
250
  border: 1px solid var(--glass-border);
251
+ border-radius: 32px;
252
+ box-shadow: 0 20px 60px 0 rgba(0, 0, 0, 0.5);
253
+ transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
254
+ }
255
+
256
+ .glass-card:hover {
257
+ border-color: rgba(147, 51, 234, 0.4);
258
+ box-shadow: 0 25px 80px 0 rgba(147, 51, 234, 0.15);
259
  }
260
 
261
  /* Chat Container */
 
265
  flex-direction: column;
266
  overflow: hidden;
267
  margin-bottom: 1.5rem;
268
+ padding: 0.5rem;
269
  }
270
 
271
  .chat-messages {
 
275
  display: flex;
276
  flex-direction: column;
277
  gap: 1.5rem;
278
+ scroll-behavior: smooth;
279
  }
280
 
281
  /* Scrollbar styling */
 
289
  }
290
 
291
  .message {
292
+ max-width: 85%;
293
+ padding: 1.25rem 1.75rem;
294
+ border-radius: 24px;
295
  font-size: 0.95rem;
296
+ line-height: 1.7;
297
+ animation: fadeIn 0.5s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
298
+ position: relative;
299
+ border: 1px solid var(--glass-border-light);
300
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
301
  }
302
 
303
  .message p {
 
364
 
365
  .user-message {
366
  align-self: flex-end;
367
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
368
  color: white;
369
  border-bottom-right-radius: 4px;
370
+ box-shadow: 0 10px 25px rgba(147, 51, 234, 0.2);
371
+ border: none;
372
  }
373
 
374
  .bot-message {
375
  align-self: flex-start;
376
+ background: rgba(255, 255, 255, 0.04);
377
+ backdrop-filter: blur(10px);
378
+ border: 1px solid var(--glass-border-light);
379
  border-top-left-radius: 4px;
380
  }
381
 
 
458
  }
459
 
460
  .send-btn {
461
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
462
  border: none;
463
+ width: 50px;
464
+ height: 50px;
465
+ border-radius: 16px;
466
  color: white;
467
  cursor: pointer;
468
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
469
+ box-shadow: 0 5px 15px rgba(147, 51, 234, 0.3);
470
  }
471
 
472
  .send-btn:hover {
473
+ transform: scale(1.1) rotate(5deg);
474
+ box-shadow: 0 8px 20px rgba(147, 51, 234, 0.5);
475
  }
476
 
477
  /* Form Styling (Sidebar) */
 
505
 
506
  .primary-btn {
507
  width: 100%;
508
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
509
  border: none;
510
  padding: 1rem;
511
+ border-radius: 16px;
512
  color: white;
513
+ font-weight: 700;
514
+ text-transform: uppercase;
515
+ letter-spacing: 0.05em;
516
  cursor: pointer;
517
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
518
  margin-top: 1rem;
519
+ box-shadow: 0 8px 15px rgba(147, 51, 234, 0.2);
520
  }
521
 
522
  .primary-btn:hover {
523
+ transform: translateY(-2px);
524
+ box-shadow: 0 12px 25px rgba(147, 51, 234, 0.4);
525
+ filter: brightness(1.1);
526
  }
527
 
528
  /* Status Bar */
static/js/app.js CHANGED
@@ -9,13 +9,27 @@ document.addEventListener('DOMContentLoaded', () => {
9
  const otherProviderGroup = document.getElementById('other-provider-group');
10
  const modifyGroup = document.getElementById('modify-group');
11
  const fileToModify = document.getElementById('file-to-modify');
 
 
12
  const statusBar = document.getElementById('status-bar');
13
  const statusText = document.getElementById('status-text');
14
  const clearChat = document.getElementById('clear-chat');
15
  const audioUpload = document.getElementById('audio-upload');
16
  const audioTrigger = document.getElementById('audio-trigger');
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  let chatHistory = [];
 
19
  let isProcessing = false;
20
  let configData = null;
21
 
@@ -32,7 +46,7 @@ document.addEventListener('DOMContentLoaded', () => {
32
  providerSelect.innerHTML = configData.providers.map(p =>
33
  `<option value="${p.name}">${p.name}</option>`
34
  ).join('');
35
- // Trigger initial category load
36
  populateCategories();
37
  }
38
 
@@ -41,7 +55,6 @@ document.addEventListener('DOMContentLoaded', () => {
41
  const provider = configData.providers.find(p => p.name === selectedProviderName);
42
  let categories = provider ? [...provider.categories] : [];
43
 
44
- // Ensure "Other..." is available
45
  if (!categories.includes("Other...")) {
46
  categories.push("Other...");
47
  }
@@ -50,7 +63,6 @@ document.addEventListener('DOMContentLoaded', () => {
50
  `<option value="${c}">${c}</option>`
51
  ).join('');
52
 
53
- // Reset category input visibility
54
  const otherCategoryGroup = document.getElementById('other-category-group');
55
  if (otherCategoryGroup) {
56
  otherCategoryGroup.style.display = categorySelect.value === 'Other...' ? 'block' : 'none';
@@ -85,12 +97,12 @@ document.addEventListener('DOMContentLoaded', () => {
85
 
86
  for (let i = 0; i < words.length; i++) {
87
  currentText += words[i] + ' ';
88
- msgDiv.innerHTML = marked.parse(currentText + '▌'); // Add cursor effect
89
  chatBox.scrollTop = chatBox.scrollHeight;
90
- await new Promise(resolve => setTimeout(resolve, 30)); // Snappy speed
91
  }
92
 
93
- msgDiv.innerHTML = marked.parse(fullText); // Final render without cursor
94
  chatBox.scrollTop = chatBox.scrollHeight;
95
  }
96
 
@@ -102,7 +114,6 @@ document.addEventListener('DOMContentLoaded', () => {
102
  userInput.value = '';
103
  isProcessing = true;
104
 
105
- // Add typing indicator
106
  const typingDiv = document.createElement('div');
107
  typingDiv.className = 'message bot-message';
108
  typingDiv.textContent = 'Typing...';
@@ -113,11 +124,21 @@ document.addEventListener('DOMContentLoaded', () => {
113
  const response = await fetch('/api/chat', {
114
  method: 'POST',
115
  headers: { 'Content-Type': 'application/json' },
116
- body: JSON.stringify({ prompt, history: chatHistory })
 
 
 
 
117
  });
118
  const data = await response.json();
119
 
120
  chatBox.removeChild(typingDiv);
 
 
 
 
 
 
121
  if (data.answer) {
122
  addMessage(data.answer, 'bot');
123
  chatHistory.push(prompt);
@@ -142,6 +163,7 @@ document.addEventListener('DOMContentLoaded', () => {
142
  clearChat.addEventListener('click', () => {
143
  chatBox.innerHTML = '<div class="message bot-message">Chat history cleared. How can I help?</div>';
144
  chatHistory = [];
 
145
  });
146
 
147
  // --- Audio Chat Logic ---
@@ -155,10 +177,10 @@ document.addEventListener('DOMContentLoaded', () => {
155
  const formData = new FormData();
156
  formData.append('audio', file);
157
  formData.append('history', JSON.stringify(chatHistory));
 
158
 
159
  isProcessing = true;
160
 
161
- // Add "Audio Uploaded" user message with Play button
162
  const userMsgDiv = document.createElement('div');
163
  userMsgDiv.className = 'message user-message audio-message-bubble';
164
  userMsgDiv.innerHTML = `
@@ -172,7 +194,6 @@ document.addEventListener('DOMContentLoaded', () => {
172
  chatBox.appendChild(userMsgDiv);
173
  chatBox.scrollTop = chatBox.scrollHeight;
174
 
175
- // Play functionality
176
  const playBtn = userMsgDiv.querySelector('.play-btn');
177
  const audio = new Audio(audioUrl);
178
 
@@ -201,7 +222,6 @@ document.addEventListener('DOMContentLoaded', () => {
201
  alert("Error loading the audio file for playback.");
202
  };
203
 
204
- // Add typing indicator
205
  const typingDiv = document.createElement('div');
206
  typingDiv.className = 'message bot-message';
207
  typingDiv.textContent = 'Transcribing audio...';
@@ -216,13 +236,19 @@ document.addEventListener('DOMContentLoaded', () => {
216
  const data = await response.json();
217
 
218
  chatBox.removeChild(typingDiv);
 
 
 
 
 
 
219
  if (data.transcription) {
220
  const transDiv = document.createElement('div');
221
  transDiv.innerHTML = `
222
- <div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 4px;">Raw Transcript:</div>
223
- <div style="font-size: 0.85rem; font-style: italic; opacity: 0.7; margin-bottom: 12px;">"${data.transcription}"</div>
224
- <div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 4px;">Refined Question:</div>
225
- <div style="font-size: 1rem; font-weight: 600; color: #818cf8;">"${data.summarized_question || data.transcription}"</div>
226
  `;
227
  transDiv.style.marginTop = '12px';
228
  transDiv.style.borderTop = '1px solid rgba(255,255,255,0.1)';
 
9
  const otherProviderGroup = document.getElementById('other-provider-group');
10
  const modifyGroup = document.getElementById('modify-group');
11
  const fileToModify = document.getElementById('file-to-modify');
12
+ const sidebar = document.getElementById('sidebar');
13
+ const sidebarToggle = document.getElementById('sidebar-toggle');
14
  const statusBar = document.getElementById('status-bar');
15
  const statusText = document.getElementById('status-text');
16
  const clearChat = document.getElementById('clear-chat');
17
  const audioUpload = document.getElementById('audio-upload');
18
  const audioTrigger = document.getElementById('audio-trigger');
19
 
20
+ // --- Sidebar Toggle Logic ---
21
+ const isSidebarCollapsed = localStorage.getItem('sidebarCollapsed') === 'true';
22
+ if (isSidebarCollapsed) {
23
+ sidebar.classList.add('collapsed');
24
+ }
25
+
26
+ sidebarToggle.addEventListener('click', () => {
27
+ sidebar.classList.toggle('collapsed');
28
+ localStorage.setItem('sidebarCollapsed', sidebar.classList.contains('collapsed'));
29
+ });
30
+
31
  let chatHistory = [];
32
+ let extractedEntities = {}; // State persistence
33
  let isProcessing = false;
34
  let configData = null;
35
 
 
46
  providerSelect.innerHTML = configData.providers.map(p =>
47
  `<option value="${p.name}">${p.name}</option>`
48
  ).join('');
49
+ // Trigger initial load
50
  populateCategories();
51
  }
52
 
 
55
  const provider = configData.providers.find(p => p.name === selectedProviderName);
56
  let categories = provider ? [...provider.categories] : [];
57
 
 
58
  if (!categories.includes("Other...")) {
59
  categories.push("Other...");
60
  }
 
63
  `<option value="${c}">${c}</option>`
64
  ).join('');
65
 
 
66
  const otherCategoryGroup = document.getElementById('other-category-group');
67
  if (otherCategoryGroup) {
68
  otherCategoryGroup.style.display = categorySelect.value === 'Other...' ? 'block' : 'none';
 
97
 
98
  for (let i = 0; i < words.length; i++) {
99
  currentText += words[i] + ' ';
100
+ msgDiv.innerHTML = marked.parse(currentText + '▌');
101
  chatBox.scrollTop = chatBox.scrollHeight;
102
+ await new Promise(resolve => setTimeout(resolve, 30));
103
  }
104
 
105
+ msgDiv.innerHTML = marked.parse(fullText);
106
  chatBox.scrollTop = chatBox.scrollHeight;
107
  }
108
 
 
114
  userInput.value = '';
115
  isProcessing = true;
116
 
 
117
  const typingDiv = document.createElement('div');
118
  typingDiv.className = 'message bot-message';
119
  typingDiv.textContent = 'Typing...';
 
124
  const response = await fetch('/api/chat', {
125
  method: 'POST',
126
  headers: { 'Content-Type': 'application/json' },
127
+ body: JSON.stringify({
128
+ prompt,
129
+ history: chatHistory,
130
+ extracted_entities: extractedEntities
131
+ })
132
  });
133
  const data = await response.json();
134
 
135
  chatBox.removeChild(typingDiv);
136
+
137
+ // Update state from backend response
138
+ if (data.extracted_entities) {
139
+ extractedEntities = data.extracted_entities;
140
+ }
141
+
142
  if (data.answer) {
143
  addMessage(data.answer, 'bot');
144
  chatHistory.push(prompt);
 
163
  clearChat.addEventListener('click', () => {
164
  chatBox.innerHTML = '<div class="message bot-message">Chat history cleared. How can I help?</div>';
165
  chatHistory = [];
166
+ extractedEntities = {}; // Reset state
167
  });
168
 
169
  // --- Audio Chat Logic ---
 
177
  const formData = new FormData();
178
  formData.append('audio', file);
179
  formData.append('history', JSON.stringify(chatHistory));
180
+ formData.append('extracted_entities', JSON.stringify(extractedEntities));
181
 
182
  isProcessing = true;
183
 
 
184
  const userMsgDiv = document.createElement('div');
185
  userMsgDiv.className = 'message user-message audio-message-bubble';
186
  userMsgDiv.innerHTML = `
 
194
  chatBox.appendChild(userMsgDiv);
195
  chatBox.scrollTop = chatBox.scrollHeight;
196
 
 
197
  const playBtn = userMsgDiv.querySelector('.play-btn');
198
  const audio = new Audio(audioUrl);
199
 
 
222
  alert("Error loading the audio file for playback.");
223
  };
224
 
 
225
  const typingDiv = document.createElement('div');
226
  typingDiv.className = 'message bot-message';
227
  typingDiv.textContent = 'Transcribing audio...';
 
236
  const data = await response.json();
237
 
238
  chatBox.removeChild(typingDiv);
239
+
240
+ // Update state from backend response
241
+ if (data.extracted_entities) {
242
+ extractedEntities = data.extracted_entities;
243
+ }
244
+
245
  if (data.transcription) {
246
  const transDiv = document.createElement('div');
247
  transDiv.innerHTML = `
248
+ <div style="font-size: 0.75rem; color: rgba(255,255,255,0.7); margin-bottom: 4px;">Raw Transcript:</div>
249
+ <div style="font-size: 0.85rem; font-style: italic; color: rgba(255,255,255,0.9); margin-bottom: 12px;">"${data.transcription}"</div>
250
+ <div style="font-size: 0.75rem; color: rgba(255,255,255,0.7); margin-bottom: 4px;">Refined Question:</div>
251
+ <div style="font-size: 1rem; font-weight: 600; color: #ffffff; text-shadow: 0 2px 4px rgba(0,0,0,0.2);">"${data.summarized_question || data.transcription}"</div>
252
  `;
253
  transDiv.style.marginTop = '12px';
254
  transDiv.style.borderTop = '1px solid rgba(255,255,255,0.1)';
templates/index.html CHANGED
@@ -10,14 +10,24 @@
10
  </head>
11
 
12
  <body>
13
- <aside class="sidebar">
14
- <div class="logo">
15
- <i class="fas fa-shield-halved fa-2x" style="color: #818cf8;"></i>
16
- <span>AI ADVISOR</span>
 
 
 
 
 
 
 
 
 
 
17
  </div>
18
 
19
  <div class="sidebar-section">
20
- <h3>Document Manager</h3>
21
  <div class="form-group">
22
  <select id="provider-select">
23
  <!-- Loaded dynamically -->
@@ -36,13 +46,12 @@
36
  </div>
37
 
38
  <div class="form-group">
39
- <label
40
- style="font-size: 0.8rem; color: var(--text-muted); display: block; margin-bottom: 0.5rem;">Mode</label>
41
- <div style="display: flex; gap: 10px;">
42
  <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="New Upload"
43
- checked> New</label>
44
  <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="Modify Existing">
45
- Modify</label>
46
  </div>
47
  </div>
48
 
@@ -54,8 +63,8 @@
54
 
55
  <div class="form-group">
56
  <input type="file" id="doc-upload" hidden accept=".pdf,.docx">
57
- <button class="primary-btn" id="upload-trigger">
58
- <i class="fas fa-file-upload"></i> &nbsp; Choose & Process
59
  </button>
60
  </div>
61
  </div>
@@ -70,17 +79,18 @@
70
  </div>
71
  </div>
72
 
73
- <button class="primary-btn" style="background: rgba(255,255,255,0.05); margin-top: 1rem;" id="clear-chat">
74
- <i class="fas fa-trash-alt"></i> &nbsp; Clear History
 
75
  </button>
76
  </aside>
77
 
78
  <main class="main-content">
79
- <header style="margin-bottom: 2rem;">
80
- <h1 style="font-size: 1.8rem;">Direct-to-Agent Policy Advisory</h1>
81
  <p style="color: var(--text-muted);">Get grounded answers from your insurance documents.</p>
82
- <p style="color: var(--text-muted);">Please Note :- This response is based solely on insurer-provided
83
- documents and is not financial advice.</p>
84
  </header>
85
 
86
  <div class="chat-container glass-card">
 
10
  </head>
11
 
12
  <body>
13
+ <div class="bg-glow-container">
14
+ <div class="glow-blob glow-1"></div>
15
+ <div class="glow-blob glow-2"></div>
16
+ <div class="glow-blob glow-3"></div>
17
+ </div>
18
+ <aside class="sidebar" id="sidebar">
19
+ <div class="sidebar-header">
20
+ <div class="logo">
21
+ <i class="fas fa-shield-halved fa-2x" style="color: var(--primary);"></i>
22
+ <span>AI ADVISOR</span>
23
+ </div>
24
+ <button class="sidebar-toggle" id="sidebar-toggle">
25
+ <i class="fas fa-bars"></i>
26
+ </button>
27
  </div>
28
 
29
  <div class="sidebar-section">
30
+ <h3><span>Document Manager</span></h3>
31
  <div class="form-group">
32
  <select id="provider-select">
33
  <!-- Loaded dynamically -->
 
46
  </div>
47
 
48
  <div class="form-group">
49
+ <label><span>Mode</span></label>
50
+ <div class="radio-group" style="display: flex; gap: 10px;">
 
51
  <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="New Upload"
52
+ checked> <span>New</span></label>
53
  <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="Modify Existing">
54
+ <span>Modify</span></label>
55
  </div>
56
  </div>
57
 
 
63
 
64
  <div class="form-group">
65
  <input type="file" id="doc-upload" hidden accept=".pdf,.docx">
66
+ <button class="primary-btn" id="upload-trigger" title="Upload Document">
67
+ <i class="fas fa-file-upload"></i> <span>&nbsp; Choose & Process</span>
68
  </button>
69
  </div>
70
  </div>
 
79
  </div>
80
  </div>
81
 
82
+ <button class="primary-btn" style="background: rgba(255,255,255,0.05); margin-top: 1rem;" id="clear-chat"
83
+ title="Clear History">
84
+ <i class="fas fa-trash-alt"></i> <span>&nbsp; Clear History</span>
85
  </button>
86
  </aside>
87
 
88
  <main class="main-content">
89
+ <header class="content-header">
90
+ <h1>Direct-to-Agent Policy Advisory</h1>
91
  <p style="color: var(--text-muted);">Get grounded answers from your insurance documents.</p>
92
+ <p style="font-size: 0.85rem; opacity: 0.8; margin-top: 5px;">Note: This response is based solely on
93
+ insurer-provided documents and is not financial advice.</p>
94
  </header>
95
 
96
  <div class="chat-container glass-card">
utils/cache.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LRU cache with TTL support for application data.
3
+ """
4
+ import time
5
+ import threading
6
+ from typing import Any, Optional, Dict
7
+ from collections import OrderedDict
8
+ from config import config
9
+
10
+
11
+ class LRUCacheWithTTL:
12
+ """Thread-safe LRU cache with TTL (Time To Live) support."""
13
+
14
+ def __init__(self, max_size: int = None, ttl_seconds: int = None):
15
+ """
16
+ Initialize cache.
17
+
18
+ Args:
19
+ max_size: Maximum number of items (default from config)
20
+ ttl_seconds: Time to live in seconds (default from config)
21
+ """
22
+ self.max_size = max_size or config.CACHE_MAX_SIZE
23
+ self.ttl = ttl_seconds or config.CACHE_TTL
24
+ self._cache = OrderedDict()
25
+ self._timestamps = {}
26
+ self._lock = threading.Lock()
27
+ self._hits = 0
28
+ self._misses = 0
29
+
30
+ def get(self, key: str) -> Optional[Any]:
31
+ """Get value from cache."""
32
+ with self._lock:
33
+ if key not in self._cache:
34
+ self._misses += 1
35
+ return None
36
+
37
+ # Check if expired
38
+ if time.time() - self._timestamps[key] > self.ttl:
39
+ self._evict(key)
40
+ self._misses += 1
41
+ return None
42
+
43
+ # Move to end (most recently used)
44
+ self._cache.move_to_end(key)
45
+ self._hits += 1
46
+ return self._cache[key]
47
+
48
+ def set(self, key: str, value: Any):
49
+ """Set value in cache."""
50
+ with self._lock:
51
+ # Update if exists
52
+ if key in self._cache:
53
+ self._cache.move_to_end(key)
54
+ self._cache[key] = value
55
+ self._timestamps[key] = time.time()
56
+ return
57
+
58
+ # Add new item
59
+ self._cache[key] = value
60
+ self._timestamps[key] = time.time()
61
+
62
+ # Evict oldest if necessary
63
+ if len(self._cache) > self.max_size:
64
+ oldest_key = next(iter(self._cache))
65
+ self._evict(oldest_key)
66
+
67
+ def _evict(self, key: str):
68
+ """Evict item from cache."""
69
+ if key in self._cache:
70
+ del self._cache[key]
71
+ del self._timestamps[key]
72
+
73
+ def clear(self):
74
+ """Clear all cached items."""
75
+ with self._lock:
76
+ self._cache.clear()
77
+ self._timestamps.clear()
78
+
79
+ def invalidate(self, key: str):
80
+ """Invalidate a specific cache entry."""
81
+ with self._lock:
82
+ self._evict(key)
83
+
84
+ def get_stats(self) -> Dict:
85
+ """Get cache statistics."""
86
+ with self._lock:
87
+ total_requests = self._hits + self._misses
88
+ hit_rate = self._hits / max(1, total_requests)
89
+
90
+ return {
91
+ "size": len(self._cache),
92
+ "max_size": self.max_size,
93
+ "hits": self._hits,
94
+ "misses": self._misses,
95
+ "hit_rate": hit_rate,
96
+ "ttl_seconds": self.ttl
97
+ }
98
+
99
+
100
+ class CacheManager:
101
+ """Manages multiple caches for different data types."""
102
+
103
+ def __init__(self):
104
+ # Cache for plan listings
105
+ self.plan_cache = LRUCacheWithTTL(max_size=100, ttl_seconds=300) # 5 minutes
106
+
107
+ # Cache for plan metadata
108
+ self.metadata_cache = LRUCacheWithTTL(max_size=500, ttl_seconds=600) # 10 minutes
109
+
110
+ # Cache for query rewrites
111
+ self.query_cache = LRUCacheWithTTL(max_size=1000, ttl_seconds=300) # 5 minutes
112
+
113
+ def invalidate_all(self):
114
+ """Invalidate all caches (e.g., after ingestion)."""
115
+ self.plan_cache.clear()
116
+ self.metadata_cache.clear()
117
+ # Don't clear query cache as it's query-dependent
118
+
119
+ def get_all_stats(self) -> Dict:
120
+ """Get statistics for all caches."""
121
+ return {
122
+ "plan_cache": self.plan_cache.get_stats(),
123
+ "metadata_cache": self.metadata_cache.get_stats(),
124
+ "query_cache": self.query_cache.get_stats(),
125
+ }
126
+
127
+
128
+ # Global cache manager
129
+ cache_manager = CacheManager()
utils/circuit_breaker.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Circuit breaker pattern for external dependencies.
3
+ """
4
+ import time
5
+ import threading
6
+ from enum import Enum
7
+ from typing import Callable, Any, Optional
8
+ from config import config
9
+
10
+
11
+ class CircuitState(Enum):
12
+ """Circuit breaker states."""
13
+ CLOSED = "closed" # Normal operation
14
+ OPEN = "open" # Failing, reject requests
15
+ HALF_OPEN = "half_open" # Testing recovery
16
+
17
+
18
+ class CircuitBreakerError(Exception):
19
+ """Raised when circuit is open."""
20
+ pass
21
+
22
+
23
+ class CircuitBreaker:
24
+ """
25
+ Circuit breaker for external dependencies.
26
+
27
+ States:
28
+ - CLOSED: Normal operation, all requests pass through
29
+ - OPEN: Failure threshold reached, all requests rejected
30
+ - HALF_OPEN: After timeout, allow test requests
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ name: str,
36
+ failure_threshold: int = None,
37
+ timeout_seconds: int = None
38
+ ):
39
+ """
40
+ Initialize circuit breaker.
41
+
42
+ Args:
43
+ name: Name of the circuit (for logging)
44
+ failure_threshold: Number of consecutive failures before opening
45
+ timeout_seconds: Seconds to wait before attempting recovery
46
+ """
47
+ self.name = name
48
+ self.failure_threshold = failure_threshold or config.CIRCUIT_BREAKER_FAILURE_THRESHOLD
49
+ self.timeout = timeout_seconds or config.CIRCUIT_BREAKER_TIMEOUT
50
+
51
+ self.state = CircuitState.CLOSED
52
+ self.failure_count = 0
53
+ self.last_failure_time = None
54
+ self.success_count = 0
55
+
56
+ self._lock = threading.Lock()
57
+
58
+ def call(self, func: Callable, *args, **kwargs) -> Any:
59
+ """
60
+ Execute function with circuit breaker protection.
61
+
62
+ Args:
63
+ func: Function to call
64
+ *args, **kwargs: Arguments to pass to function
65
+
66
+ Returns:
67
+ Function result
68
+
69
+ Raises:
70
+ CircuitBreakerError: If circuit is open
71
+ """
72
+ with self._lock:
73
+ # Check state transitions
74
+ if self.state == CircuitState.OPEN:
75
+ if self._should_attempt_reset():
76
+ self.state = CircuitState.HALF_OPEN
77
+ self.success_count = 0
78
+ else:
79
+ raise CircuitBreakerError(
80
+ f"Circuit breaker '{self.name}' is OPEN. "
81
+ f"Will retry after {self.timeout}s"
82
+ )
83
+
84
+ # Attempt the call
85
+ try:
86
+ result = func(*args, **kwargs)
87
+ self._on_success()
88
+ return result
89
+ except Exception as e:
90
+ self._on_failure()
91
+ raise e
92
+
93
+ def _should_attempt_reset(self) -> bool:
94
+ """Check if enough time has passed to attempt reset."""
95
+ if self.last_failure_time is None:
96
+ return True
97
+ return time.time() - self.last_failure_time >= self.timeout
98
+
99
+ def _on_success(self):
100
+ """Handle successful call."""
101
+ with self._lock:
102
+ self.failure_count = 0
103
+
104
+ if self.state == CircuitState.HALF_OPEN:
105
+ self.success_count += 1
106
+ # After 3 successful calls in HALF_OPEN, close the circuit
107
+ if self.success_count >= 3:
108
+ self.state = CircuitState.CLOSED
109
+ self.success_count = 0
110
+
111
+ def _on_failure(self):
112
+ """Handle failed call."""
113
+ with self._lock:
114
+ self.failure_count += 1
115
+ self.last_failure_time = time.time()
116
+
117
+ # In HALF_OPEN, any failure immediately opens circuit
118
+ if self.state == CircuitState.HALF_OPEN:
119
+ self.state = CircuitState.OPEN
120
+ self.failure_count = 0
121
+ return
122
+
123
+ # In CLOSED, open after threshold
124
+ if self.failure_count >= self.failure_threshold:
125
+ self.state = CircuitState.OPEN
126
+
127
+ def reset(self):
128
+ """Manually reset circuit breaker."""
129
+ with self._lock:
130
+ self.state = CircuitState.CLOSED
131
+ self.failure_count = 0
132
+ self.success_count = 0
133
+ self.last_failure_time = None
134
+
135
+ def get_state(self) -> dict:
136
+ """Get current state for monitoring."""
137
+ with self._lock:
138
+ return {
139
+ "name": self.name,
140
+ "state": self.state.value,
141
+ "failure_count": self.failure_count,
142
+ "success_count": self.success_count,
143
+ "last_failure_time": self.last_failure_time
144
+ }
145
+
146
+
147
+ class CircuitBreakerManager:
148
+ """Manages circuit breakers for different dependencies."""
149
+
150
+ def __init__(self):
151
+ self.breakers = {
152
+ "llm": CircuitBreaker("llm", failure_threshold=5, timeout_seconds=60),
153
+ "retriever": CircuitBreaker("retriever", failure_threshold=3, timeout_seconds=30),
154
+ "vector_store": CircuitBreaker("vector_store", failure_threshold=3, timeout_seconds=30),
155
+ }
156
+
157
+ def get_breaker(self, name: str) -> CircuitBreaker:
158
+ """Get circuit breaker by name."""
159
+ if name not in self.breakers:
160
+ self.breakers[name] = CircuitBreaker(name)
161
+ return self.breakers[name]
162
+
163
+ def get_all_states(self) -> dict:
164
+ """Get states of all circuit breakers."""
165
+ return {
166
+ name: breaker.get_state()
167
+ for name, breaker in self.breakers.items()
168
+ }
169
+
170
+ def reset_all(self):
171
+ """Reset all circuit breakers."""
172
+ for breaker in self.breakers.values():
173
+ breaker.reset()
174
+
175
+
176
+ # Global circuit breaker manager
177
+ circuit_breaker_manager = CircuitBreakerManager()
utils/logger.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Centralized structured logging with rotation and request ID tracking.
3
+ """
4
+ import os
5
+ import json
6
+ import logging
7
+ import threading
8
+ from logging.handlers import RotatingFileHandler
9
+ from typing import Optional
10
+ from datetime import datetime
11
+ from config import config
12
+
13
+ # Thread-local storage for request context
14
+ _request_context = threading.local()
15
+
16
+
17
+ class RequestContextFilter(logging.Filter):
18
+ """Add request ID to log records."""
19
+
20
+ def filter(self, record):
21
+ record.request_id = getattr(_request_context, 'request_id', 'N/A')
22
+ record.user_ip = getattr(_request_context, 'user_ip', 'N/A')
23
+ return True
24
+
25
+
26
+ class JSONFormatter(logging.Formatter):
27
+ """Format logs as JSON for structured logging."""
28
+
29
+ def format(self, record):
30
+ log_data = {
31
+ 'timestamp': datetime.utcnow().isoformat(),
32
+ 'level': record.levelname,
33
+ 'logger': record.name,
34
+ 'message': record.getMessage(),
35
+ 'module': record.module,
36
+ 'function': record.funcName,
37
+ 'line': record.lineno,
38
+ 'request_id': getattr(record, 'request_id', 'N/A'),
39
+ 'user_ip': getattr(record, 'user_ip', 'N/A'),
40
+ }
41
+
42
+ # Add exception info if present
43
+ if record.exc_info:
44
+ log_data['exception'] = self.formatException(record.exc_info)
45
+
46
+ # Add extra fields
47
+ if hasattr(record, 'extra_data'):
48
+ log_data['extra'] = record.extra_data
49
+
50
+ return json.dumps(log_data)
51
+
52
+
53
+ def setup_logger(name: str, log_level: Optional[str] = None) -> logging.Logger:
54
+ """
55
+ Create a logger with both file and console handlers.
56
+
57
+ Args:
58
+ name: Logger name (typically __name__)
59
+ log_level: Optional override for log level
60
+
61
+ Returns:
62
+ Configured logger instance
63
+ """
64
+ logger = logging.getLogger(name)
65
+
66
+ # Avoid duplicate handlers
67
+ if logger.handlers:
68
+ return logger
69
+
70
+ # Set level
71
+ level = log_level or config.LOG_LEVEL
72
+ logger.setLevel(getattr(logging, level.upper()))
73
+
74
+ # Add request context filter
75
+ logger.addFilter(RequestContextFilter())
76
+
77
+ # Console handler (human-readable for development)
78
+ console_handler = logging.StreamHandler()
79
+ console_handler.setLevel(logging.DEBUG if config.DEBUG else logging.INFO)
80
+
81
+ if config.ENVIRONMENT.value == "production":
82
+ # JSON format for production
83
+ console_handler.setFormatter(JSONFormatter())
84
+ else:
85
+ # Human-readable format for development
86
+ console_format = logging.Formatter(
87
+ '%(asctime)s - [%(request_id)s] - %(name)s - %(levelname)s - %(message)s'
88
+ )
89
+ console_handler.setFormatter(console_format)
90
+
91
+ logger.addHandler(console_handler)
92
+
93
+ # File handler with rotation
94
+ try:
95
+ log_dir = os.path.dirname(config.LOG_FILE_PATH)
96
+ if log_dir:
97
+ os.makedirs(log_dir, exist_ok=True)
98
+
99
+ file_handler = RotatingFileHandler(
100
+ config.LOG_FILE_PATH,
101
+ maxBytes=config.LOG_MAX_BYTES,
102
+ backupCount=config.LOG_BACKUP_COUNT
103
+ )
104
+ file_handler.setLevel(logging.DEBUG)
105
+
106
+ # Always use JSON format for file logs
107
+ file_handler.setFormatter(JSONFormatter())
108
+ logger.addHandler(file_handler)
109
+ except Exception as e:
110
+ logger.warning(f"Failed to setup file logging: {e}")
111
+
112
+ return logger
113
+
114
+
115
+ def set_request_context(request_id: str, user_ip: Optional[str] = None):
116
+ """Set request context for the current thread."""
117
+ _request_context.request_id = request_id
118
+ _request_context.user_ip = user_ip or 'unknown'
119
+
120
+
121
+ def clear_request_context():
122
+ """Clear request context for the current thread."""
123
+ if hasattr(_request_context, 'request_id'):
124
+ delattr(_request_context, 'request_id')
125
+ if hasattr(_request_context, 'user_ip'):
126
+ delattr(_request_context, 'user_ip')
127
+
128
+
129
+ def log_with_extra(logger: logging.Logger, level: str, message: str, **extra_data):
130
+ """Log with extra structured data."""
131
+ log_method = getattr(logger, level.lower())
132
+
133
+ # Create a custom log record with extra data
134
+ if extra_data:
135
+ extra_record = {'extra_data': extra_data}
136
+ log_method(message, extra=extra_record)
137
+ else:
138
+ log_method(message)
139
+
140
+
141
+ # Create module-level loggers for common components
142
+ app_logger = setup_logger('app')
143
+ agent_logger = setup_logger('agents')
144
+ retrieval_logger = setup_logger('retrieval')
145
+ ingestion_logger = setup_logger('ingestion')
146
+ llm_logger = setup_logger('llm')
147
+ api_logger = setup_logger('api')
utils/metrics.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application metrics collection and tracking.
3
+ """
4
+ import time
5
+ import threading
6
+ from typing import Dict, List, Optional
7
+ from collections import defaultdict, deque
8
+ from datetime import datetime, timedelta
9
+ from config import config
10
+
11
+
12
+ class MetricsCollector:
13
+ """Thread-safe metrics collector."""
14
+
15
+ def __init__(self):
16
+ self._lock = threading.Lock()
17
+
18
+ # Request metrics
19
+ self.request_count = 0
20
+ self.request_latencies = deque(maxlen=1000) # Keep last 1000
21
+ self.request_errors = 0
22
+
23
+ # Intent distribution
24
+ self.intent_counts = defaultdict(int)
25
+
26
+ # LLM metrics
27
+ self.llm_call_count = 0
28
+ self.llm_cache_hits = 0
29
+ self.llm_cache_misses = 0
30
+ self.llm_latencies = deque(maxlen=1000)
31
+ self.llm_errors = 0
32
+
33
+ # Retrieval metrics
34
+ self.retrieval_count = 0
35
+ self.retrieval_latencies = deque(maxlen=1000)
36
+ self.retrieval_empty_results = 0
37
+
38
+ # Cache metrics
39
+ self.cache_hits = 0
40
+ self.cache_misses = 0
41
+
42
+ # Circuit breaker metrics
43
+ self.circuit_breaker_opens = 0
44
+ self.circuit_breaker_failures = 0
45
+
46
+ # Active requests
47
+ self.active_requests = 0
48
+
49
+ # Start time
50
+ self.start_time = datetime.now()
51
+
52
+ def record_request(self, latency_ms: float, intent: Optional[str] = None, error: bool = False):
53
+ """Record a request."""
54
+ with self._lock:
55
+ self.request_count += 1
56
+ self.request_latencies.append(latency_ms)
57
+
58
+ if error:
59
+ self.request_errors += 1
60
+
61
+ if intent:
62
+ self.intent_counts[intent] += 1
63
+
64
+ def record_llm_call(self, latency_ms: float, cache_hit: bool = False, error: bool = False):
65
+ """Record an LLM call."""
66
+ with self._lock:
67
+ self.llm_call_count += 1
68
+ self.llm_latencies.append(latency_ms)
69
+
70
+ if cache_hit:
71
+ self.llm_cache_hits += 1
72
+ else:
73
+ self.llm_cache_misses += 1
74
+
75
+ if error:
76
+ self.llm_errors += 1
77
+
78
+ def record_retrieval(self, latency_ms: float, result_count: int):
79
+ """Record a retrieval operation."""
80
+ with self._lock:
81
+ self.retrieval_count += 1
82
+ self.retrieval_latencies.append(latency_ms)
83
+
84
+ if result_count == 0:
85
+ self.retrieval_empty_results += 1
86
+
87
+ def record_cache_access(self, hit: bool):
88
+ """Record cache access."""
89
+ with self._lock:
90
+ if hit:
91
+ self.cache_hits += 1
92
+ else:
93
+ self.cache_misses += 1
94
+
95
+ def record_circuit_breaker_event(self, opened: bool = False, failure: bool = False):
96
+ """Record circuit breaker event."""
97
+ with self._lock:
98
+ if opened:
99
+ self.circuit_breaker_opens += 1
100
+ if failure:
101
+ self.circuit_breaker_failures += 1
102
+
103
+ def increment_active_requests(self):
104
+ """Increment active request count."""
105
+ with self._lock:
106
+ self.active_requests += 1
107
+
108
+ def decrement_active_requests(self):
109
+ """Decrement active request count."""
110
+ with self._lock:
111
+ self.active_requests = max(0, self.active_requests - 1)
112
+
113
+ def get_metrics(self) -> Dict:
114
+ """Get all metrics as a dictionary."""
115
+ with self._lock:
116
+ uptime = datetime.now() - self.start_time
117
+
118
+ # Calculate percentiles for latencies
119
+ req_latencies_sorted = sorted(self.request_latencies) if self.request_latencies else [0]
120
+ llm_latencies_sorted = sorted(self.llm_latencies) if self.llm_latencies else [0]
121
+ ret_latencies_sorted = sorted(self.retrieval_latencies) if self.retrieval_latencies else [0]
122
+
123
+ def percentile(data, p):
124
+ if not data:
125
+ return 0
126
+ k = (len(data) - 1) * p
127
+ f = int(k)
128
+ c = k - f
129
+ if f + 1 < len(data):
130
+ return data[f] * (1 - c) + data[f + 1] * c
131
+ return data[f]
132
+
133
+ return {
134
+ "uptime_seconds": uptime.total_seconds(),
135
+ "timestamp": datetime.now().isoformat(),
136
+
137
+ # Request metrics
138
+ "requests": {
139
+ "total": self.request_count,
140
+ "active": self.active_requests,
141
+ "errors": self.request_errors,
142
+ "error_rate": self.request_errors / max(1, self.request_count),
143
+ "latency_ms": {
144
+ "min": min(req_latencies_sorted),
145
+ "max": max(req_latencies_sorted),
146
+ "p50": percentile(req_latencies_sorted, 0.50),
147
+ "p95": percentile(req_latencies_sorted, 0.95),
148
+ "p99": percentile(req_latencies_sorted, 0.99),
149
+ }
150
+ },
151
+
152
+ # Intent distribution
153
+ "intents": dict(self.intent_counts),
154
+
155
+ # LLM metrics
156
+ "llm": {
157
+ "total_calls": self.llm_call_count,
158
+ "cache_hits": self.llm_cache_hits,
159
+ "cache_misses": self.llm_cache_misses,
160
+ "cache_hit_rate": self.llm_cache_hits / max(1, self.llm_call_count),
161
+ "errors": self.llm_errors,
162
+ "latency_ms": {
163
+ "min": min(llm_latencies_sorted),
164
+ "max": max(llm_latencies_sorted),
165
+ "p50": percentile(llm_latencies_sorted, 0.50),
166
+ "p95": percentile(llm_latencies_sorted, 0.95),
167
+ }
168
+ },
169
+
170
+ # Retrieval metrics
171
+ "retrieval": {
172
+ "total_searches": self.retrieval_count,
173
+ "empty_results": self.retrieval_empty_results,
174
+ "empty_result_rate": self.retrieval_empty_results / max(1, self.retrieval_count),
175
+ "latency_ms": {
176
+ "min": min(ret_latencies_sorted),
177
+ "max": max(ret_latencies_sorted),
178
+ "p50": percentile(ret_latencies_sorted, 0.50),
179
+ "p95": percentile(ret_latencies_sorted, 0.95),
180
+ }
181
+ },
182
+
183
+ # Cache metrics
184
+ "cache": {
185
+ "hits": self.cache_hits,
186
+ "misses": self.cache_misses,
187
+ "hit_rate": self.cache_hits / max(1, self.cache_hits + self.cache_misses),
188
+ },
189
+
190
+ # Circuit breaker metrics
191
+ "circuit_breaker": {
192
+ "opens": self.circuit_breaker_opens,
193
+ "failures": self.circuit_breaker_failures,
194
+ }
195
+ }
196
+
197
+ def reset_metrics(self):
198
+ """Reset all metrics (use with caution)."""
199
+ with self._lock:
200
+ self.request_count = 0
201
+ self.request_latencies.clear()
202
+ self.request_errors = 0
203
+ self.intent_counts.clear()
204
+ self.llm_call_count = 0
205
+ self.llm_cache_hits = 0
206
+ self.llm_cache_misses = 0
207
+ self.llm_latencies.clear()
208
+ self.llm_errors = 0
209
+ self.retrieval_count = 0
210
+ self.retrieval_latencies.clear()
211
+ self.retrieval_empty_results = 0
212
+ self.cache_hits = 0
213
+ self.cache_misses = 0
214
+ self.circuit_breaker_opens = 0
215
+ self.circuit_breaker_failures = 0
216
+ self.start_time = datetime.now()
217
+
218
+
219
+ # Global metrics instance
220
+ metrics = MetricsCollector()
utils/request_logger.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLite-based request logging for analytics and debugging.
3
+ """
4
+ import sqlite3
5
+ import threading
6
+ import json
7
+ from typing import Optional, Dict, Any
8
+ from datetime import datetime
9
+ from config import config
10
+ import os
11
+
12
+
13
+ class RequestLogger:
14
+ """Thread-safe request logger with SQLite backend."""
15
+
16
+ _instance = None
17
+ _lock = threading.Lock()
18
+
19
+ def __new__(cls):
20
+ if cls._instance is None:
21
+ with cls._lock:
22
+ if cls._instance is None:
23
+ cls._instance = super(RequestLogger, cls).__new__(cls)
24
+ cls._instance._initialize_db()
25
+ return cls._instance
26
+
27
+ def _initialize_db(self):
28
+ """Initialize the SQLite database."""
29
+ db_dir = os.path.dirname(config.REQUEST_LOG_DB_PATH)
30
+ if db_dir:
31
+ os.makedirs(db_dir, exist_ok=True)
32
+
33
+ self.db_path = config.REQUEST_LOG_DB_PATH
34
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
35
+ self.conn.execute("""
36
+ CREATE TABLE IF NOT EXISTS requests (
37
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
38
+ timestamp TEXT NOT NULL,
39
+ request_id TEXT,
40
+ user_ip TEXT,
41
+ query TEXT,
42
+ intent TEXT,
43
+ extracted_entities TEXT,
44
+ retrieval_count INTEGER,
45
+ latency_ms REAL,
46
+ status TEXT,
47
+ error_message TEXT,
48
+ context_sources TEXT
49
+ )
50
+ """)
51
+
52
+ # Create indices for common queries
53
+ self.conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON requests(timestamp)")
54
+ self.conn.execute("CREATE INDEX IF NOT EXISTS idx_intent ON requests(intent)")
55
+ self.conn.execute("CREATE INDEX IF NOT EXISTS idx_status ON requests(status)")
56
+
57
+ self.conn.commit()
58
+
59
+ def log_request(
60
+ self,
61
+ request_id: str,
62
+ query: str,
63
+ intent: Optional[str] = None,
64
+ extracted_entities: Optional[Dict] = None,
65
+ retrieval_count: int = 0,
66
+ latency_ms: float = 0,
67
+ status: str = "success",
68
+ error_message: Optional[str] = None,
69
+ context_sources: Optional[list] = None,
70
+ user_ip: Optional[str] = None
71
+ ):
72
+ """Log a request to the database."""
73
+ try:
74
+ with self._lock:
75
+ self.conn.execute("""
76
+ INSERT INTO requests (
77
+ timestamp, request_id, user_ip, query, intent,
78
+ extracted_entities, retrieval_count, latency_ms,
79
+ status, error_message, context_sources
80
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
81
+ """, (
82
+ datetime.now().isoformat(),
83
+ request_id,
84
+ user_ip or 'unknown',
85
+ query[:500], # Truncate long queries
86
+ intent,
87
+ json.dumps(extracted_entities) if extracted_entities else None,
88
+ retrieval_count,
89
+ latency_ms,
90
+ status,
91
+ error_message,
92
+ json.dumps(context_sources[:10]) if context_sources else None # Limit to 10 sources
93
+ ))
94
+ self.conn.commit()
95
+ except Exception as e:
96
+ # Don't let logging errors break the application
97
+ print(f"[RequestLogger] Failed to log request: {e}")
98
+
99
+ def get_recent_requests(self, limit: int = 100) -> list:
100
+ """Get recent requests."""
101
+ try:
102
+ with self._lock:
103
+ cursor = self.conn.execute("""
104
+ SELECT timestamp, request_id, query, intent, latency_ms, status
105
+ FROM requests
106
+ ORDER BY timestamp DESC
107
+ LIMIT ?
108
+ """, (limit,))
109
+
110
+ return [
111
+ {
112
+ "timestamp": row[0],
113
+ "request_id": row[1],
114
+ "query": row[2],
115
+ "intent": row[3],
116
+ "latency_ms": row[4],
117
+ "status": row[5]
118
+ }
119
+ for row in cursor.fetchall()
120
+ ]
121
+ except Exception as e:
122
+ print(f"[RequestLogger] Failed to fetch requests: {e}")
123
+ return []
124
+
125
+ def get_intent_distribution(self, hours: int = 24) -> Dict[str, int]:
126
+ """Get intent distribution for the last N hours."""
127
+ try:
128
+ with self._lock:
129
+ from datetime import timedelta
130
+ cutoff = (datetime.now() - timedelta(hours=hours)).isoformat()
131
+
132
+ cursor = self.conn.execute("""
133
+ SELECT intent, COUNT(*) as count
134
+ FROM requests
135
+ WHERE timestamp > ?
136
+ GROUP BY intent
137
+ """, (cutoff,))
138
+
139
+ return {row[0]: row[1] for row in cursor.fetchall()}
140
+ except Exception as e:
141
+ print(f"[RequestLogger] Failed to get intent distribution: {e}")
142
+ return {}
143
+
144
+ def get_error_rate(self, hours: int = 24) -> float:
145
+ """Get error rate for the last N hours."""
146
+ try:
147
+ with self._lock:
148
+ from datetime import timedelta
149
+ cutoff = (datetime.now() - timedelta(hours=hours)).isoformat()
150
+
151
+ cursor = self.conn.execute("""
152
+ SELECT
153
+ COUNT(*) as total,
154
+ SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as errors
155
+ FROM requests
156
+ WHERE timestamp > ?
157
+ """, (cutoff,))
158
+
159
+ row = cursor.fetchone()
160
+ total, errors = row[0], row[1]
161
+
162
+ return errors / max(1, total) if total > 0 else 0.0
163
+ except Exception as e:
164
+ print(f"[RequestLogger] Failed to get error rate: {e}")
165
+ return 0.0
166
+
167
+ def get_average_latency(self, intent: Optional[str] = None, hours: int = 24) -> float:
168
+ """Get average latency, optionally filtered by intent."""
169
+ try:
170
+ with self._lock:
171
+ from datetime import timedelta
172
+ cutoff = (datetime.now() - timedelta(hours=hours)).isoformat()
173
+
174
+ if intent:
175
+ cursor = self.conn.execute("""
176
+ SELECT AVG(latency_ms)
177
+ FROM requests
178
+ WHERE timestamp > ? AND intent = ?
179
+ """, (cutoff, intent))
180
+ else:
181
+ cursor = self.conn.execute("""
182
+ SELECT AVG(latency_ms)
183
+ FROM requests
184
+ WHERE timestamp > ?
185
+ """, (cutoff,))
186
+
187
+ result = cursor.fetchone()[0]
188
+ return result if result is not None else 0.0
189
+ except Exception as e:
190
+ print(f"[RequestLogger] Failed to get average latency: {e}")
191
+ return 0.0
192
+
193
+
194
+ # Global request logger instance
195
+ request_logger = RequestLogger()
utils/validators.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Input validation and sanitization utilities.
3
+ """
4
+ import re
5
+ import os
6
+ from typing import Optional, Dict, Any
7
+ from pathlib import Path
8
+ from config import config
9
+
10
+
11
+ class ValidationError(Exception):
12
+ """Custom exception for validation errors."""
13
+ pass
14
+
15
+
16
+ class InputValidator:
17
+ """Centralized input validation."""
18
+
19
+ @staticmethod
20
+ def validate_file_upload(filename: str, file_size_bytes: int) -> bool:
21
+ """
22
+ Validate uploaded file.
23
+
24
+ Args:
25
+ filename: Name of the uploaded file
26
+ file_size_bytes: Size of the file in bytes
27
+
28
+ Raises:
29
+ ValidationError: If validation fails
30
+
31
+ Returns:
32
+ True if valid
33
+ """
34
+ # Check file extension
35
+ ext = Path(filename).suffix.lower().lstrip('.')
36
+ if ext not in config.ALLOWED_FILE_TYPES:
37
+ raise ValidationError(
38
+ f"Invalid file type '.{ext}'. Allowed types: {', '.join(config.ALLOWED_FILE_TYPES)}"
39
+ )
40
+
41
+ # Check file size
42
+ max_size_bytes = config.MAX_FILE_SIZE_MB * 1024 * 1024
43
+ if file_size_bytes > max_size_bytes:
44
+ raise ValidationError(
45
+ f"File size ({file_size_bytes / 1024 / 1024:.2f} MB) exceeds maximum allowed size ({config.MAX_FILE_SIZE_MB} MB)"
46
+ )
47
+
48
+ if file_size_bytes == 0:
49
+ raise ValidationError("File is empty")
50
+
51
+ return True
52
+
53
+ @staticmethod
54
+ def sanitize_filename(filename: str) -> str:
55
+ """
56
+ Sanitize filename to prevent directory traversal.
57
+
58
+ Args:
59
+ filename: Original filename
60
+
61
+ Returns:
62
+ Sanitized filename
63
+ """
64
+ # Remove any path components
65
+ filename = os.path.basename(filename)
66
+
67
+ # Remove or replace dangerous characters
68
+ filename = re.sub(r'[^\w\s\-\.]', '_', filename)
69
+
70
+ # Remove leading/trailing dots and spaces
71
+ filename = filename.strip('. ')
72
+
73
+ # Ensure filename is not empty after sanitization
74
+ if not filename:
75
+ filename = "unnamed_file"
76
+
77
+ return filename
78
+
79
+ @staticmethod
80
+ def sanitize_path(path: str, base_dir: str) -> str:
81
+ """
82
+ Sanitize and validate file path to prevent directory traversal.
83
+
84
+ Args:
85
+ path: User-provided path
86
+ base_dir: Base directory that path must be within
87
+
88
+ Raises:
89
+ ValidationError: If path is outside base directory
90
+
91
+ Returns:
92
+ Sanitized absolute path
93
+ """
94
+ # Resolve to absolute path
95
+ abs_base = os.path.abspath(base_dir)
96
+ abs_path = os.path.abspath(os.path.join(base_dir, path))
97
+
98
+ # Check if path is within base directory
99
+ if not abs_path.startswith(abs_base):
100
+ raise ValidationError("Invalid path: directory traversal detected")
101
+
102
+ return abs_path
103
+
104
+ @staticmethod
105
+ def validate_query_input(query: str, max_length: int = 10000) -> bool:
106
+ """
107
+ Validate user query input.
108
+
109
+ Args:
110
+ query: User query string
111
+ max_length: Maximum allowed length
112
+
113
+ Raises:
114
+ ValidationError: If validation fails
115
+
116
+ Returns:
117
+ True if valid
118
+ """
119
+ if not query or not query.strip():
120
+ raise ValidationError("Query cannot be empty")
121
+
122
+ if len(query) > max_length:
123
+ raise ValidationError(f"Query too long (max {max_length} characters)")
124
+
125
+ # Check for suspicious patterns (basic XSS prevention)
126
+ suspicious_patterns = [
127
+ r'<script',
128
+ r'javascript:',
129
+ r'onerror=',
130
+ r'onclick=',
131
+ ]
132
+
133
+ query_lower = query.lower()
134
+ for pattern in suspicious_patterns:
135
+ if re.search(pattern, query_lower):
136
+ raise ValidationError("Query contains potentially malicious content")
137
+
138
+ return True
139
+
140
+ @staticmethod
141
+ def validate_metadata_filters(filters: Dict[str, Any]) -> bool:
142
+ """
143
+ Validate metadata filters.
144
+
145
+ Args:
146
+ filters: Filter dictionary
147
+
148
+ Raises:
149
+ ValidationError: If validation fails
150
+
151
+ Returns:
152
+ True if valid
153
+ """
154
+ if not isinstance(filters, dict):
155
+ raise ValidationError("Filters must be a dictionary")
156
+
157
+ # Whitelist of allowed filter keys
158
+ allowed_keys = {
159
+ 'insurer', 'insurance_type', 'product_name',
160
+ 'document_type', 'section', 'plan_id'
161
+ }
162
+
163
+ for key in filters.keys():
164
+ if key not in allowed_keys:
165
+ raise ValidationError(f"Invalid filter key: {key}")
166
+
167
+ # Validate filter values
168
+ for key, value in filters.items():
169
+ if isinstance(value, str):
170
+ if len(value) > 500:
171
+ raise ValidationError(f"Filter value too long for key: {key}")
172
+ elif isinstance(value, list):
173
+ if len(value) > 50:
174
+ raise ValidationError(f"Too many values in filter list for key: {key}")
175
+ for item in value:
176
+ if isinstance(item, str) and len(item) > 500:
177
+ raise ValidationError(f"Filter value too long in list for key: {key}")
178
+
179
+ return True
180
+
181
+ @staticmethod
182
+ def validate_calculation_inputs(
183
+ age: Optional[int] = None,
184
+ premium_amount: Optional[float] = None,
185
+ policy_term: Optional[str] = None,
186
+ payment_term: Optional[str] = None
187
+ ) -> bool:
188
+ """
189
+ Validate inputs for benefit calculations.
190
+
191
+ Raises:
192
+ ValidationError: If validation fails
193
+
194
+ Returns:
195
+ True if valid
196
+ """
197
+ if age is not None:
198
+ if not isinstance(age, int) or age < 0 or age > 120:
199
+ raise ValidationError(f"Invalid age: {age}. Age must be between 0 and 120")
200
+
201
+ if premium_amount is not None:
202
+ if not isinstance(premium_amount, (int, float)) or premium_amount <= 0:
203
+ raise ValidationError(f"Invalid premium amount: {premium_amount}. Must be positive")
204
+
205
+ # Reasonable bounds (1000 to 1 crore)
206
+ if premium_amount < 1000 or premium_amount > 10000000:
207
+ raise ValidationError(
208
+ f"Premium amount {premium_amount} outside reasonable range (₹1,000 - ₹1,00,00,000)"
209
+ )
210
+
211
+ if policy_term is not None:
212
+ # Extract number from policy term
213
+ pt_match = re.search(r'\d+', str(policy_term))
214
+ if pt_match:
215
+ pt_years = int(pt_match.group())
216
+ if pt_years < 1 or pt_years > 100:
217
+ raise ValidationError(f"Invalid policy term: {pt_years} years. Must be between 1 and 100")
218
+
219
+ if payment_term is not None:
220
+ # Extract number from payment term
221
+ ppt_match = re.search(r'\d+', str(payment_term))
222
+ if ppt_match:
223
+ ppt_years = int(ppt_match.group())
224
+ if ppt_years < 1 or ppt_years > 100:
225
+ raise ValidationError(f"Invalid payment term: {ppt_years} years. Must be between 1 and 100")
226
+
227
+ return True
228
+
229
+ @staticmethod
230
+ def validate_api_key(provided_key: Optional[str]) -> bool:
231
+ """
232
+ Validate API key if authentication is enabled.
233
+
234
+ Args:
235
+ provided_key: API key provided by client
236
+
237
+ Raises:
238
+ ValidationError: If validation fails
239
+
240
+ Returns:
241
+ True if valid or auth disabled
242
+ """
243
+ if not config.ENABLE_API_KEY_AUTH:
244
+ return True
245
+
246
+ if not provided_key:
247
+ raise ValidationError("API key required but not provided")
248
+
249
+ if provided_key != config.API_KEY:
250
+ raise ValidationError("Invalid API key")
251
+
252
+ return True