Spaces:

DeltaVenom
/

Insurance-RAG

Sleeping

App Files Files Community

DeltaVenom commited on Feb 6

Commit

72bff80

1 Parent(s): 48d1e8f

Update app code and initialize runtime databases

Browse files

Files changed (23) hide show

.gitignore +33 -8
agents/graph.py +0 -6
agents/nodes.py +636 -229
agents/states.py +6 -0
api/main.py +4 -0
api/monitoring.py +139 -0
api/plans.py +244 -0
app.py +365 -88
config.py +184 -0
models/llm.py +110 -13
rag/embeddings_cache.json +0 -0
rag/retriever.py +3 -3
rag/vector_store.py +77 -1
requirements.txt +3 -1
static/css/style.css +235 -48
static/js/app.js +41 -15
templates/index.html +28 -18
utils/cache.py +129 -0
utils/circuit_breaker.py +177 -0
utils/logger.py +147 -0
utils/metrics.py +220 -0
utils/request_logger.py +195 -0
utils/validators.py +252 -0

.gitignore CHANGED Viewed

@@ -1,10 +1,35 @@
-.venv/
-.env
 __pycache__/
-*.pyc
-.streamlit/
-temp_docs/
-temp_faiss_index/
-debug_*.txt
-test_*.txt
 *.log

+# Python Caches
 __pycache__/
+*.py[cod]
+*$py.class
+# SQLite Databases (Recreated at runtime)
+*.db
+*.sqlite3
+# Logs
+logs/
 *.log
+extraction_debug.log
+# Environment
+.env
+env/
+venv/
+.venv/
+.python-version
+# Temporary Test Scripts
+verify_fix.py
+test_llm.py
+test_classifier_fix.py
+test_api_logic.py
+verify_extraction.py
+# RAG specific
+rag/faiss_index/
+rag/embeddings_cache.json
+# OS files
+.DS_Store
+Thumbs.db

agents/graph.py CHANGED Viewed

@@ -44,7 +44,6 @@ def build_rag_workflow() -> StateGraph:
     # Agent nodes
     workflow.add_node("listing_agent", nodes.listing_agent)
     workflow.add_node("retrieval_agent", nodes.retrieval_agent)
-    workflow.add_node("comparison_agent", nodes.comparison_agent)
     workflow.add_node("advisory_agent", nodes.advisory_agent)
     workflow.add_node("faq_agent", nodes.faq_agent)
@@ -69,10 +68,8 @@ def build_rag_workflow() -> StateGraph:
         intent = state.get("intent", "plan_details")
         if intent == "list_plans":
-            # Listing doesn't need retrieval, goes direct to listing agent
             return "listing_agent"
         else:
-            # All other intents go through retrieval first
             return "retriever"
     workflow.add_conditional_edges(
@@ -97,7 +94,6 @@ def build_rag_workflow() -> StateGraph:
         route_map = {
             "plan_details": "retrieval_agent",
-            "compare_plans": "comparison_agent",
             "recommendation": "advisory_agent",
             "general_query": "faq_agent"
         }
@@ -109,7 +105,6 @@ def build_rag_workflow() -> StateGraph:
         route_to_agent,
         {
             "retrieval_agent": "retrieval_agent",
-            "comparison_agent": "comparison_agent",
             "advisory_agent": "advisory_agent",
             "faq_agent": "faq_agent"
         }
@@ -117,7 +112,6 @@ def build_rag_workflow() -> StateGraph:
     # All agents end at guardrail
     workflow.add_edge("retrieval_agent", "guardrail")
-    workflow.add_edge("comparison_agent", "guardrail")
     workflow.add_edge("advisory_agent", "guardrail")
     workflow.add_edge("faq_agent", "guardrail")

     # Agent nodes
     workflow.add_node("listing_agent", nodes.listing_agent)
     workflow.add_node("retrieval_agent", nodes.retrieval_agent)
     workflow.add_node("advisory_agent", nodes.advisory_agent)
     workflow.add_node("faq_agent", nodes.faq_agent)
         intent = state.get("intent", "plan_details")
         if intent == "list_plans":
             return "listing_agent"
         else:
             return "retriever"
     workflow.add_conditional_edges(
         route_map = {
             "plan_details": "retrieval_agent",
             "recommendation": "advisory_agent",
             "general_query": "faq_agent"
         }
         route_to_agent,
         {
             "retrieval_agent": "retrieval_agent",
             "advisory_agent": "advisory_agent",
             "faq_agent": "faq_agent"
         }
     # All agents end at guardrail
     workflow.add_edge("retrieval_agent", "guardrail")
     workflow.add_edge("advisory_agent", "guardrail")
     workflow.add_edge("faq_agent", "guardrail")

agents/nodes.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import re
 from typing import Dict, List, Any, Optional
 from collections import defaultdict
 from agents.states import AgentState, ExtractedEntities
@@ -15,13 +17,12 @@ COMPLIANCE_DISCLAIMER = (
 # Prompting rules for all agents
 COMPLIANCE_RULES = """
 CRITICAL RULES:
-- ❌ NO invented plan names - only use plans from the provided context
-- ❌ NO assumptions beyond documents - if info is missing, say so explicitly
-- ❌ NO meta-commentary. DO NOT mention "the provided context", "the documents", "the text", or "internal state".
-- ✅ CIS overrides brochure for: exclusions, charges, conditions
-- ✅ Use structured output (markdown tables) for comparisons
-- ✅ Simple, clear language for end users
-- ✅ Provide "OUTPUT ONLY" - start answering the user's question directly.
 """
@@ -48,15 +49,19 @@ class AgentNodes:
         if retriever:
             retriever.reload()
     # =========================================================================
     # NODE 1: Query Rewriter
     # =========================================================================
     def query_rewriter_node(self, state: AgentState) -> Dict[str, Any]:
         """
-        Rewrites query to be self-contained based on chat history.
-        Resolves pronouns and references.
         """
-        llm = LLMFactory.get_llm("small")
         query = state["input"]
         history = state.get("chat_history", [])
@@ -64,16 +69,15 @@ class AgentNodes:
             return {"input": query}
         system_prompt = (
-            "You are a query rewriter for an insurance RAG system. "
-            "Your task is to rewrite the latest question to be self-contained.\n\n"
             "RULES:\n"
-            "1. ALWAYS resolve pronouns (it, they, these) or vague terms (the plan, previous one) using the previous context.\n"
-            "2. If the user asks a follow-up about 'it' or 'the plan', replace it with the specific plan name mentioned last.\n"
-            "3. If the user asks 'is it good for me' or similar, rewrite it to '[Plan Name] recommendation for [user details if any]'.\n"
-            "4. If the query is already very specific and names a plan, keep it mostly as-is but ensure insurer names are present.\n"
-            "5. Do NOT cross-pollinate unrelated queries. If the user switches topics completely, ignore the history.\n"
-            "6. NEVER return a conversational response, suggestion, or question. If you cannot resolve a reference, return the original 'Latest' query as is.\n"
-            "7. Return ONLY the rewritten query text."
         )
         history_str = "\n".join([f"- {h}" for h in history[-5:]])  # Last 5 turns
@@ -99,107 +103,232 @@ class AgentNodes:
         llm = LLMFactory.get_llm("small")
         query = state["input"].lower()
-        # Fast keyword-based classification first
-        if any(kw in query for kw in ["list", "which plans", "what plans", "all plans", "available plans", "show me plans"]):
-            return {"intent": "list_plans"}
-        if any(kw in query for kw in ["compare", "vs", "versus", "difference between", "which is better"]):
-            return {"intent": "compare_plans"}
-        if any(kw in query for kw in ["suggest", "recommend", "best for", "should i", "suitable for"]):
-            return {"intent": "recommendation"}
         # LLM-based classification for ambiguous cases
-        system_prompt = (
-            "Classify the user's insurance query into ONE of:\n"
-            "- 'plan_details': Asking about features, benefits, eligibility of a SPECIFIC plan\n"
-            "- 'list_plans': Wants to know WHICH plans are available\n"
-            "- 'compare_plans': Wants to COMPARE 2+ plans side-by-side\n"
-            "- 'recommendation': Seeks personalized advice based on their profile\n"
-            "- 'general_query': General insurance terminology or concepts\n\n"
-            "Return ONLY the category name."
-        )
-        response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=query)])
-        intent = getattr(response, 'content', str(response)).lower().strip()
-        valid_intents = ['list_plans', 'plan_details', 'compare_plans', 'recommendation', 'general_query']
-        if intent not in valid_intents:
-            intent = "plan_details"  # Default fallback
-        return {"intent": intent}
-    # =========================================================================
-    # NODE 3: Entity Extractor
-    # =========================================================================
     def entity_extractor_node(self, state: AgentState) -> Dict[str, Any]:
         """
-        Extracts structured entities from the query:
-        - provider (insurer names)
-        - insurance_type (term, ulip, savings, etc.)
-        - plan_names (specific plan names mentioned)
-        - user_profile (age, income, smoker, dependents, goal)
         """
-        query = state["input"].lower()
-        # Extract providers
-        provider_map = {
-            "edelweiss": "Edelweiss Life",
-            "tata": "TATA AIA",
-            "tata aia": "TATA AIA",
-            "generali": "Generali Central",
-            "central": "Generali Central",
-            "pramerica": "PRAMERICA"
-        }
-        providers = []
-        for keyword, name in provider_map.items():
-            if keyword in query and name not in providers:
-                providers.append(name)
-        # Extract insurance types
-        type_map = {
-            "term": ["Term Insurance", "Term Plan"],
-            "ulip": ["Unit Linked Insurance Plan", "ULIP Plan"],
-            "wealth": ["Unit Linked Insurance Plan"],
-            "savings": ["Savings Plan", "Guaranteed Return"],
-            "retirement": ["Retirement and Pension"],
-            "pension": ["Retirement and Pension"],
-            "health": ["Health Insurance"],
-            "group": ["Group Plan"]
-        }
-        insurance_types = []
-        for keyword, types in type_map.items():
-            if keyword in query:
-                for t in types:
-                    if t not in insurance_types:
-                        insurance_types.append(t)
-        # Extract specific plan names using LLM
-        plan_names = self._extract_plan_names_from_query(state["input"])
-        # Extract user profile for recommendation intent
-        user_profile = {}
-        if state.get("intent") == "recommendation":
-            user_profile = self._extract_user_profile(state["input"])
-        entities: ExtractedEntities = {
-            "provider": list(set(providers)) if providers else [],
-            "insurance_type": list(set(insurance_types)) if insurance_types else [],
-            "plan_names": list(set(plan_names)) if plan_names else [],
-            "user_profile": user_profile or {}
-        }
-        # Build metadata filters from entities
-        filters = {}
-        if providers:
-            filters["insurer"] = providers
-        if insurance_types:
-            filters["insurance_type"] = insurance_types
-        return {
-            "extracted_entities": entities,
-            "metadata_filters": filters
-        }
     def _extract_plan_names_from_query(self, query: str) -> List[str]:
         """Use LLM to extract specific plan names mentioned in query."""
@@ -234,46 +363,203 @@ class AgentNodes:
         return plan_names
-    def _extract_user_profile(self, query: str) -> Dict[str, Any]:
-        """Extract user profile information for recommendations."""
-        llm = LLMFactory.get_llm("small")
-        system_prompt = (
-            "Extract user profile from the insurance query.\n"
-            "Return in format:\n"
-            "age: <number or null>\n"
-            "smoker: <yes/no or null>\n"
-            "cover_amount: <amount or null>\n"
-            "goal: <protection/savings/retirement/wealth or null>\n"
-            "dependents: <number or null>"
-        )
-        response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=query)])
-        result = getattr(response, 'content', str(response))
         profile = {}
-        for line in result.split('\n'):
-            if ':' in line:
-                key, value = line.split(':', 1)
-                key = key.strip().lower()
-                value = value.strip().lower()
-                if value not in ['null', 'none', 'n/a', '']:
-                    if key == 'age':
-                        try:
-                            profile['age'] = int(re.search(r'\d+', value).group())
-                        except:
-                            pass
-                    elif key == 'smoker':
-                        profile['smoker'] = 'yes' in value
-                    elif key == 'cover_amount':
-                        profile['cover_amount'] = value
-                    elif key == 'goal':
-                        profile['goal'] = value
-                    elif key == 'dependents':
-                        try:
-                            profile['dependents'] = int(re.search(r'\d+', value).group())
-                        except:
-                            pass
         return profile
@@ -532,7 +818,7 @@ class AgentNodes:
             aggregated[plan_id] = final_chunks
         # Refresh context strings based on aggregated chunks
-        intent = state.get("intent", "plan_details")
         limit = 5 if intent == "compare_plans" else 3
         context = self._format_context(aggregated, limit=limit)
@@ -595,19 +881,43 @@ class AgentNodes:
     # =========================================================================
     def retrieval_agent(self, state: AgentState) -> Dict[str, Any]:
         """
-        Provides detailed information about a specific plan.
-        Grounds all responses in retrieved documents.
         """
-        llm = LLMFactory.get_llm("medium")
         query = state["input"]
         context = state.get("context", [])
         if not context:
-            # Fallback retrieval
             retriever = self._get_retriever()
             if retriever:
-                docs = retriever.search(query, k=5)
-                context = [f"[{d.metadata.get('product_name')}] {d.page_content}" for d in docs]
         context_str = "\n\n".join(context)
@@ -615,10 +925,12 @@ class AgentNodes:
 {COMPLIANCE_RULES}
-Answer the user's question using ONLY the Policy Context provided to you.
-If information is not in the context, say "I don't have that specific information in our documents."
-DO NOT mention that you are looking at documents or context. Just provide the answer.
-Be warm and helpful while maintaining accuracy."""
         prompt = f"Policy Context:\n{context_str}\n\nUser Question: {query}"
@@ -628,53 +940,13 @@ Be warm and helpful while maintaining accuracy."""
         return {"answer": answer}
     # =========================================================================
-    # NODE 9: Comparison Agent
-    # =========================================================================
-    def comparison_agent(self, state: AgentState) -> Dict[str, Any]:
-        """
-        Generates structured side-by-side comparisons.
-        Normalizes attributes across plans.
-        """
-        llm = LLMFactory.get_llm("medium")
-        query = state["input"]
-        context = state.get("context", [])
-        chunks_by_plan = state.get("retrieved_chunks", {})
-        # Get plan names being compared
-        plan_names = list(chunks_by_plan.keys()) if chunks_by_plan else []
-        if not context and not plan_names:
-            return {"answer": "I couldn't find the plans you want to compare. Please specify the plan names."}
-        context_str = "\n\n".join(context)
-        plans_info = f"\n\nPlans to compare: {', '.join(plan_names)}" if plan_names else ""
-        system_prompt = f"""You are an Insurance Comparison Expert.
-{COMPLIANCE_RULES}
-COMPARISON FORMAT:
-- Return comparison as a Markdown TABLE
-- Columns: Features | Plan 1 | Plan 2 | ...
-- Rows: Plan Type, Eligibility, Sum Assured, Premium Terms, Key Benefits, Exclusions
-- If a detail is missing, put "Not specified"
-- Include ALL plans mentioned in the context
-- Be objective and factual"""
-        prompt = f"Policy Context:\n{context_str}{plans_info}\n\nUser Question: {query}"
-        response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
-        answer = getattr(response, 'content', str(response))
-        return {"answer": answer, "reasoning_output": f"Compared {len(plan_names)} plans"}
-    # =========================================================================
-    # NODE 10: Recommendation Agent (Advisory)
     # =========================================================================
     def advisory_agent(self, state: AgentState) -> Dict[str, Any]:
         """
         Provides personalized recommendations based on user profile.
         Grounds all advice in retrieved documents.
         """
         llm = LLMFactory.get_llm("large")
         query = state["input"]
@@ -682,35 +954,75 @@ COMPARISON FORMAT:
         entities = state.get("extracted_entities", {})
         user_profile = entities.get("user_profile", {})
-        context_str = "\n\n".join(context) if context else "No specific plans found matching your criteria."
         profile_info = ""
         if user_profile:
-            profile_parts = []
-            if user_profile.get("age"):
-                profile_parts.append(f"Age: {user_profile['age']}")
-            if user_profile.get("smoker") is not None:
-                profile_parts.append(f"Smoker: {'Yes' if user_profile['smoker'] else 'No'}")
-            if user_profile.get("cover_amount"):
-                profile_parts.append(f"Cover needed: {user_profile['cover_amount']}")
-            if user_profile.get("goal"):
-                profile_parts.append(f"Goal: {user_profile['goal']}")
             if profile_parts:
                 profile_info = f"\n\nUser Profile: {', '.join(profile_parts)}"
         system_prompt = f"""You are an Expert Insurance Advisor.
 {COMPLIANCE_RULES}
 RECOMMENDATION RULES:
-- Base recommendations ONLY on plans in the context
-- Consider user's age, smoking status, cover requirement if provided
-- Explain WHY a plan suits them based on document features
-- List 2-3 suitable options if available
-- Be clear about eligibility criteria
-- DO NOT reference the "context" or "documents" in your answer. Provide the advice directly."""
-        prompt = f"Policy Context:\n{context_str}{profile_info}\n\nUser Question: {query}"
         response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
         answer = getattr(response, 'content', str(response))
@@ -722,23 +1034,39 @@ RECOMMENDATION RULES:
     # =========================================================================
     def faq_agent(self, state: AgentState) -> Dict[str, Any]:
         """
-        Handles general insurance questions.
-        Still attempts to ground in documents when possible.
         """
-        llm = LLMFactory.get_llm("small")
         query = state["input"]
         context = state.get("context", [])
         context_str = "\n\n".join(context) if context else ""
         system_prompt = f"""You are an Insurance Helpdesk Assistant.
 {COMPLIANCE_RULES}
-For general insurance terminology questions:
-- Provide accurate, helpful explanations
-- If context is available, use it to give specific examples
-- Keep explanations simple and jargon-free"""
         prompt = f"Context (if relevant):\n{context_str}\n\nUser Question: {query}" if context_str else f"User Question: {query}"
@@ -768,6 +1096,85 @@ For general insurance terminology questions:
         return {"answer": answer}
     # =========================================================================
     # HELPER METHODS
     # =========================================================================

 import re
+import time
+import json
 from typing import Dict, List, Any, Optional
 from collections import defaultdict
 from agents.states import AgentState, ExtractedEntities
 # Prompting rules for all agents
 COMPLIANCE_RULES = """
 CRITICAL RULES:
+- ❌ OUT-OF-BOUNDS REFUSAL: If the user asks about topics NOT related to insurance (e.g., booking flights, recipes, general news), you MUST politely refuse and state that you can only assist with insurance-related queries.
+- ❌ NO hallucinations - if a plan name is not in the provided context, state clearly that you do not have information about that specific plan.
+- ❌ NO assumptions - if numerical data or policy details are missing from the context, do NOT invent them. Say "Information not available."
+- ❌ NO meta-commentary - start answering the question directly.
+- ✅ PROPER REDIRECTION: After refusing an out-of-bounds query, invite the user to ask about insurance products, available plans, or policy definitions.
+- ✅ GROUNDING: Only use facts from the provided context. CIS overrides brochure for exclusions/charges.
 """
         if retriever:
             retriever.reload()
+    def _log_debug(self, msg: str):
+        """Internal debug logger."""
+        print(f"[DEBUG] {msg}")
     # =========================================================================
     # NODE 1: Query Rewriter
     # =========================================================================
     def query_rewriter_node(self, state: AgentState) -> Dict[str, Any]:
         """
+        Rewrites conversational queries into self-contained, RAG-friendly queries.
+        Uses conversation history to resolve pronouns and implicit context.
         """
+        llm = LLMFactory.get_llm("low")
         query = state["input"]
         history = state.get("chat_history", [])
             return {"input": query}
         system_prompt = (
+            "You are a professional query rewriter for an insurance consultation system. "
+            "Rewrite the latest user input to be a standalone search/extraction query.\n\n"
             "RULES:\n"
+            "1. If the user provides a missing profile detail (e.g., 'pt 20'), combine it with previous profile data into a recommendation request: "
+            "'I want an insurance calculation for [age/gender] with Policy Term 20 years'.\n"
+            "2. Resolve all pronouns (it, they) and vague terms (the plan, previous one).\n"
+            "3. IMPORTANT: For general questions (e.g., 'What is PPT?') or broad listings (e.g., 'Show all plans'), do NOT inject the user's age/gender if it wasn't requested. Keep the search query clean.\n"
+            "4. Only preserve profile details (age, budget) if the user's latest query is a follow-up about a specific calculation or plan recommendation.\n"
+            "5. Return ONLY the rewritten query text."
         )
         history_str = "\n".join([f"- {h}" for h in history[-5:]])  # Last 5 turns
         llm = LLMFactory.get_llm("small")
         query = state["input"].lower()
+        # 1. Plan Details (specific plan mentioned)
+        # Check specific plan indicators
+        specific_plan_indicators = ["star", "guaranteed income", "bharat savings", "premier", "smart value",
+                                   "raksha", "saral jeevan", "edelweiss", "tata", "generali", "pramerica",
+                                   "canara", "indusind", "max life", "hdfc", "icici"]
+        has_plan_name = any(plan in query for plan in specific_plan_indicators)
+        if has_plan_name and ("benefit" in query or "feature" in query or "detail" in query or "eligibility" in query):
+             return {"intent": "plan_details", "query_complexity": "low"}
+        # 2. Comparison (compare, difference, vs)
+        compare_keywords = ["compare", "difference", "better", "vs", "versus", "or"]
+        if any(kw in query for kw in compare_keywords) and has_plan_name:
+            return {"intent": "compare_plans", "query_complexity": "high"}
+        # 3. Listing queries - CHECK BEFORE RECOMMENDATION (to avoid "term" matching)
+        listing_keywords = ["list", "show me", "available", "which plans", "what plans",
+                            "types of", "providers", "insurers", "all plans"]
+        if any(kw in query for kw in listing_keywords):
+            return {"intent": "list_plans", "query_complexity": "low"}
+        # 4. General FAQ queries - CHECK BEFORE RECOMMENDATION
+        # These include "what is", "what does", "explain", "define"
+        faq_keywords = ["what is", "what does", "explain", "define", "meaning of", "tell me about insurance",
+                        "what are the types", "difference between", "how does insurance"]
+        if any(kw in query for kw in faq_keywords):
+             return {"intent": "general_query", "query_complexity": "low"}
+        # 5. Recommendation/Calculation queries
+        # IMPORTANT: Only specific recommendation indicators, avoiding generic words like "term", "mode"
+        recommendation_keywords = ["suggest", "recommend", "best for", "should i", "suitable for",
+                                   "calculate", "how much will i get", "what will i get",
+                                   "i am", "i'm", "my age", "my budget", "my premium",
+                                   "years old", "year old"]
+        # Also check for profile indicators (age, gender) combined with numbers/plan mention
+        has_profile = any(kw in query for kw in ["male", "female", "age =", "age=", "premium =", "premium=",
+                                                  "pt =", "pt=", "ppt =", "ppt="])
+        has_numbers_with_context = any(kw in query for kw in recommendation_keywords) or has_profile
+        if has_numbers_with_context:
+            return {"intent": "recommendation", "query_complexity": "high"}
+        # 6. Fallback for explicit plan names if not caught by others
+        if has_plan_name:
+            return {"intent": "plan_details", "query_complexity": "low"}
+        # 7. Follow-up detection
+        if len(state.get("chat_history", [])) > 0 and ("details" in query or "more" in query):
+            return {"intent": "plan_details", "query_complexity": "low"}
+        # Default fallback
+        return {"intent": "general_query", "query_complexity": "low"}
         # LLM-based classification for ambiguous cases
+        # This section is removed as per the instructions.
+        # system_prompt = (
+        #     "Classify the user's insurance query into ONE of:\n"
+        #     "- 'plan_details': Asking about features, benefits, eligibility of a SPECIFIC plan (should retrieve from documents)\n"
+        #     "- 'list_plans': Wants to know WHICH plans are available from an insurer or category\n"
+        #     "- 'recommendation': Seeks personalized benefit calculations or plan suggestions based on their profile (age, gender, premium)\n"
+        #     "- 'general_query': General insurance terminology, concepts, or FAQs (not specific plans)\n\n"
+        #     "IMPORTANT: 'What are the benefits of [Plan Name]' is 'plan_details', NOT 'recommendation'\n"
+        #     "Return ONLY the category name."
+        # )
+        # response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=query)])
+        # intent = getattr(response, 'content', str(response)).lower().strip()
+        # valid_intents = ['list_plans', 'plan_details', 'recommendation', 'general_query']
+        # if intent not in valid_intents:
+        #     intent = "plan_details"  # Default fallback
+        # return {"intent": intent}
     def entity_extractor_node(self, state: AgentState) -> Dict[str, Any]:
         """
+        Extracts structured entities from the query.
         """
+        # DEBUG: Write to file to ensure we see it
+        # try:
+        #     with open("extraction_debug.log", "a") as f:
+        #         f.write(f"\n\n[TIME] Execution at {time.time()}\n")
+        #         f.write(f"[INPUT] {state.get('input', 'NO INPUT')}\n")
+        #         f.write(f"[INTENT] {state.get('intent', 'NOT SET')}\n")
+        # except: pass
+        # DEBUG: Write to file to ensure we see it
+        try:
+            with open("extraction_debug.log", "a") as f:
+                f.write(f"\n\n[TIME] Execution at {time.time()}\n")
+                f.write(f"[INPUT] {state.get('input', 'NO INPUT')}\n")
+                f.write(f"[INTENT] {state.get('intent', 'NOT SET')}\n")
+        except: pass
+        try:
+            print(f"[ENTITY DEBUG] ===== STARTING ENTITY EXTRACTION =====")
+            # FORCE extraction for debugging if needed, but rely on logic
+            try:
+                with open("extraction_debug.log", "a") as f:
+                    f.write(f"[STATUS] Starting extraction logic...\n")
+            except: pass
+            query = state["input"].lower()
+            # Extract providers
+            provider_map = {
+                "edelweiss": "Edelweiss Life",
+                "tata": "TATA AIA",
+                "tata aia": "TATA AIA",
+                "generali": "Generali Central",
+                "central": "Generali Central",
+                "pramerica": "PRAMERICA"
+            }
+            providers = []
+            for keyword, name in provider_map.items():
+                if keyword in query and name not in providers:
+                    providers.append(name)
+            # Extract insurance types
+            type_map = {
+                "term": ["Term Insurance", "Term Plan"],
+                "ulip": ["Unit Linked Insurance Plan", "ULIP Plan"],
+                "wealth": ["Unit Linked Insurance Plan"],
+                "savings": ["Savings Plan", "Guaranteed Return"],
+                "retirement": ["Retirement and Pension"],
+                "pension": ["Retirement and Pension"],
+                "health": ["Health Insurance"],
+                "group": ["Group Plan"]
+            }
+            insurance_types = []
+            for keyword, types in type_map.items():
+                if keyword in query:
+                    for t in types:
+                        if t not in insurance_types:
+                            insurance_types.append(t)
+            # Extract specific plan names using LLM
+            plan_names = self._extract_plan_names_from_query(state["input"])
+            # Extract user profile (Merge with existing data in state AND chat history)
+            existing_profile = state.get("extracted_entities", {}).get("user_profile", {})
+            history = state.get("chat_history", [])
+            new_profile = {}
+            # Always attempt extraction if it's a recommendation or if profile indicators exist
+            profile_indicators = ["old", "male", "female", "year", "lakh", "rs", "budget", "premium", "invest", "benefit", "pt ", "ppt ", "mode", "age"]
+            should_extract = any(ind in query for ind in profile_indicators) or state.get("intent") == "recommendation"
+            print(f"[EXTRACTION DEBUG] Should extract: {should_extract}, Intent: {state.get('intent')}")
+            try:
+                with open("extraction_debug.log", "a") as f:
+                    f.write(f"[STATUS] Should extract: {should_extract}\n")
+            except: pass
+            if should_extract:
+                new_profile = self._extract_user_profile(state["input"], history=history)
+                print(f"[EXTRACTION DEBUG] Extracted profile: {new_profile}")
+                try:
+                    with open("extraction_debug.log", "a") as f:
+                        f.write(f"[STATUS] Extracted profile: {new_profile}\n")
+                except: pass
+            # Merge: new data overwrites old, but old data is kept if not in new
+            # IMPORTANT: Ensure keys with 'null' or empty values in new_profile do not overwrite valid existing data
+            user_profile = existing_profile.copy()
+            for k, v in new_profile.items():
+                if v is not None and v != "" and v != "null":
+                    user_profile[k] = v
+            # Explicitly handle keys that often get dropped or overwritten incorrectly
+            if "policy_term" in new_profile and str(new_profile["policy_term"]).strip():
+                 user_profile["policy_term"] = new_profile["policy_term"]
+            entities: ExtractedEntities = {
+                "provider": list(set(providers)) if providers else [],
+                "insurance_type": list(set(insurance_types)) if insurance_types else [],
+                "plan_names": list(set(plan_names)) if plan_names else [],
+                "user_profile": user_profile
+            }
+            # Build metadata filters from entities
+            filters = {}
+            if providers:
+                filters["insurer"] = providers
+            if insurance_types:
+                filters["insurance_type"] = insurance_types
+            try:
+                with open("extraction_debug.log", "a") as f:
+                    f.write(f"[RESULT] Entities: {entities}\n")
+                    f.write(f"[RESULT] Profile: {user_profile}\n")
+            except: pass
+            print(f"[ENTITY DEBUG] Final entities: {entities}")
+            result = {
+                "extracted_entities": entities,
+                "metadata_filters": filters
+            }
+            return result
+        except Exception as e:
+            try:
+                with open("extraction_debug.log", "a") as f:
+                    f.write(f"[ERROR] {str(e)}\n")
+                    import traceback
+                    f.write(traceback.format_exc())
+            except: pass
+            print(f"[ENTITY DEBUG] Error: {e}")
+            import traceback
+            traceback.print_exc()
+            return {
+                "extracted_entities": {
+                    "provider": [],
+                    "insurance_type": [],
+                    "plan_names": [],
+                    "user_profile": {}
+                },
+                "metadata_filters": {}
+            }
     def _extract_plan_names_from_query(self, query: str) -> List[str]:
         """Use LLM to extract specific plan names mentioned in query."""
         return plan_names
+    def _extract_user_profile(self, query: str, history: List[str] = None) -> Dict[str, Any]:
+        """Extract user profile information for recommendations, using history if available."""
         profile = {}
+        # ========================================================================
+        # PRIORITY 1: REGEX EXTRACTION (Most Reliable)
+        # ========================================================================
+        # These patterns work with formats like:
+        # "age=30", "age = 30", "age is 30", "I am 30 years old"
+        query_lower = query.lower()
+        # Age extraction
+        age_patterns = [
+            r'\bage\s*[=:]\s*(\d+)',  # age=30, age = 30, age: 30
+            r'\bage\s+is\s+(\d+)',     # age is 30
+            r'i\s+am\s+(\d+)\s+years?\s+old',  # I am 30 years old
+            r'(\d+)\s+years?\s+old',  # 30 years old
+            r'\bage\s+(\d+)\b',        # age 30
+        ]
+        for pattern in age_patterns:
+            match = re.search(pattern, query_lower)
+            if match and not profile.get('age'):
+                try:
+                    age = int(match.group(1))
+                    if 18 <= age <= 100:  # Expanded age range
+                        profile['age'] = age
+                        break
+                except: pass
+        # Gender extraction
+        if 'gender' not in profile:
+            if re.search(r'gender\s*[=:]\s*(male|m\b)', query_lower) or \
+               re.search(r'gender\s+is\s+(male|m\b)', query_lower) or \
+               re.search(r'\bmale\b', query_lower):
+                profile['gender'] = 'male'
+            elif re.search(r'gender\s*[=:]\s*(female|f\b)', query_lower) or \
+                 re.search(r'gender\s+is\s+(female|f\b)', query_lower) or \
+                 re.search(r'\bfemale\b', query_lower):
+                profile['gender'] = 'female'
+        # Premium extraction
+        premium_patterns = [
+            r'premium\s*[=:]\s*([\d,\.]+)',  # premium=100000.50
+            r'premium\s+(?:amount\s+)?(?:is\s+)?(?:of\s+)?([\d,\.]+)',
+            r'invest(?:ing)?\s+([\d,\.]+)\s*(?:lakh|lac|cr|crore|k|thousand)?',
+            r'([\d,\.]+)\s*(?:lakh|lac|cr|crore|k|thousand)\s+(?:per year|annual|premium)',
+            r'budget\s*[=:]\s*([\d,\.]+)',
+        ]
+        def parse_indian_amount(text):
+            """Parse amounts like '1 lakh', '5.5 cr', '100,000'"""
+            if not text: return None
+            text = text.lower().replace(',', '').strip()
+            multiplier = 1
+            if 'lakh' in text or 'lac' in text: multiplier = 100000
+            elif 'cr' in text or 'crore' in text: multiplier = 10000000
+            elif 'k' in text: multiplier = 1000
+            # Find the number in the segment
+            nums = re.findall(r'(\d+(?:\.\d+)?)', text)
+            if nums:
+                try:
+                    return int(float(nums[0]) * multiplier)
+                except: return None
+            return None
+        for pattern in premium_patterns:
+            match = re.search(pattern, query_lower)
+            if match and not profile.get('premium_amount'):
+                # Pass the matched segment to parser
+                amount = parse_indian_amount(match.group(0))
+                if amount and 500 <= amount <= 50000000:
+                    profile['premium_amount'] = str(amount)
+                    break
+        # Policy Term (PT)
+        pt_patterns = [
+            r'\bpt\s*[=:]\s*(\d+)',
+            r'\bpt\s+(\d+)\b',
+            r'policy\s+term\s*[=:]\s*(\d+)',
+            r'policy\s+term\s+(?:of\s+)?(\d+)',
+            r'term\s*[=:]\s*(\d+)\b',
+        ]
+        for pattern in pt_patterns:
+            match = re.search(pattern, query_lower)
+            if match and not profile.get('policy_term'):
+                pt = match.group(1)
+                profile['policy_term'] = pt + " years"
+                break
+        # Payment Term (PPT)
+        ppt_patterns = [
+            r'\bppt\s*[=:]\s*(\d+)',
+            r'\bppt\s+(\d+)\b',
+            r'(?:premium\s+)?payment\s+term\s*[=:]\s*(\d+)',
+            r'paying\s+term\s*[=:]\s*(\d+)',
+            r'pay\s+term\s*[=:]\s*(\d+)',
+        ]
+        for pattern in ppt_patterns:
+            match = re.search(pattern, query_lower)
+            if match and not profile.get('payment_term'):
+                ppt = match.group(1)
+                profile['payment_term'] = ppt + " years"
+                break
+        # Payment Mode
+        mode_patterns = [
+            r'mode\s*[=:]\s*(monthly|annual|yearly|quarterly|half\s*yearly)',
+            r'(?:premium\s+)?(?:payment\s+)?mode\s+(?:is\s+)?(monthly|annual|yearly|quarterly)',
+            r'\b(monthly|annual|yearly|quarterly)\b',
+        ]
+        for pattern in mode_patterns:
+            match = re.search(pattern, query_lower)
+            if match and not profile.get('payment_mode'):
+                mode = match.group(1).strip()
+                if mode == 'yearly': mode = 'annual'
+                profile['payment_mode'] = mode
+                break
+        # ========================================================================
+        # PRIORITY 2: LLM EXTRACTION (Fallback for complex cases)
+        # ========================================================================
+        # Use LLM if critical fields are missing OR if it's a recommendation intent
+        critical_fields = ['age', 'gender', 'premium_amount']
+        missing_critical = any(field not in profile for field in critical_fields)
+        if missing_critical:
+            llm = LLMFactory.get_llm("medium")
+            history_context = ""
+            if history:
+                history_str = "\n".join([f"- {h}" for h in history[-5:]])
+                history_context = f"\n\nCONVERSATION HISTORY:\n{history_str}"
+            system_prompt = (
+                "Extract user profile details for insurance recommendations.\n"
+                "JSON Output fields (use null if unknown):\n"
+                "- age (number)\n"
+                "- gender (male/female)\n"
+                "- premium_amount (number)\n"
+                "- policy_term (number of years)\n"
+                "- payment_term (number of years)\n"
+                "- payment_mode (Monthly/Annual/Quarterly/Half-Yearly)\n\n"
+                "MAPPING RULES:\n"
+                "- PT = policy_term\n"
+                "- PPT = payment_term\n"
+                "- mode = payment_mode\n"
+                "- Extract from latest query AND history. Latest query wins conflicts.\n"
+                "Return ONLY a raw JSON object."
+            )
+            prompt = f"LATEST QUERY: {query}{history_context}"
+            try:
+                response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
+                result_text = getattr(response, 'content', str(response))
+                # Try to parse JSON
+                try:
+                    # Clean the response in case LLM added markdown blocks
+                    clean_json = re.search(r'\{.*\}', result_text, re.DOTALL)
+                    if clean_json:
+                        llm_profile = json.loads(clean_json.group(0))
+                        # Merge LLM results into profile if regex didn't find them
+                        if 'age' not in profile and llm_profile.get('age'):
+                            profile['age'] = int(llm_profile['age'])
+                        if 'gender' not in profile and llm_profile.get('gender'):
+                            profile['gender'] = llm_profile['gender'].lower()
+                        if 'premium_amount' not in profile and llm_profile.get('premium_amount'):
+                            profile['premium_amount'] = str(llm_profile['premium_amount'])
+                        if 'policy_term' not in profile and llm_profile.get('policy_term'):
+                            profile['policy_term'] = str(llm_profile['policy_term']) + " years"
+                        if 'payment_term' not in profile and llm_profile.get('payment_term'):
+                            profile['payment_term'] = str(llm_profile['payment_term']) + " years"
+                        if 'payment_mode' not in profile and llm_profile.get('payment_mode'):
+                            profile['payment_mode'] = llm_profile['payment_mode'].title().replace('Annual', 'annual').lower()
+                except:
+                    # Fallback to line-based parsing if JSON fails
+                    for line in result_text.split('\n'):
+                        if ':' in line:
+                            parts = line.split(':', 1)
+                            k = parts[0].strip().lower()
+                            v = parts[1].strip().lower().replace('"', '').replace("'", "")
+                            if v and v != 'null':
+                                if 'age' in k and 'age' not in profile:
+                                    nums = re.findall(r'\d+', v)
+                                    if nums: profile['age'] = int(nums[0])
+                                elif 'gender' in k and 'gender' not in profile: profile['gender'] = v
+                                elif 'premium' in k and 'premium_amount' not in profile: profile['premium_amount'] = v
+                                elif 'policy_term' in k or 'pt' == k and 'policy_term' not in profile: profile['policy_term'] = v + " years"
+                                elif 'payment_term' in k or 'ppt' == k and 'payment_term' not in profile: profile['payment_term'] = v + " years"
+            except Exception as e:
+                print(f"[WARNING] LLM extraction failed: {e}")
         return profile
             aggregated[plan_id] = final_chunks
         # Refresh context strings based on aggregated chunks
+        intent = state.get("intent", "compare_plans")
         limit = 5 if intent == "compare_plans" else 3
         context = self._format_context(aggregated, limit=limit)
     # =========================================================================
     def retrieval_agent(self, state: AgentState) -> Dict[str, Any]:
         """
+        Agent for answering plan-specific or comparison questions using retrieved context.
         """
+        complexity = state.get("query_complexity", "low")
+        llm = LLMFactory.get_llm(complexity)
         query = state["input"]
         context = state.get("context", [])
+        entities = state.get("extracted_entities", {})
         if not context:
+            # Fallback retrieval with better filtering
             retriever = self._get_retriever()
             if retriever:
+                # Try to extract plan names from query for better filtering
+                plan_names = entities.get("plan_names", [])
+                filters = state.get("metadata_filters", {})
+                # If we have plan names, use them for filtering
+                if plan_names:
+                    filters["product_name"] = plan_names
+                # Retrieve with filters
+                if filters:
+                    docs = retriever.search(query, filters=filters, k=10)
+                else:
+                    docs = retriever.search(query, k=10)
+                # Format context with plan names
+                context = [f"[{d.metadata.get('product_name', 'Unknown')}] {d.page_content}" for d in docs]
+        # If still no context, provide a helpful message
+        if not context:
+            return {
+                "answer": "I couldn't find specific information about that plan in my knowledge base. "
+                         "Could you please provide more details or try asking about a different plan? "
+                         "You can also ask me to list available plans."
+            }
         context_str = "\n\n".join(context)
 {COMPLIANCE_RULES}
+STRICT GROUNDING RULES:
+- Answer the user's question using the Policy Context provided to you.
+- If the requested plan is NOT mentioned in the Policy Context, say: "I'm sorry, but I couldn't find information regarding [Plan Name] in our current policy database. Please verify the name or ask me to list available plans."
+- If the question is about non-insurance topics, refuse using the OUT-OF-BOUNDS REFUSAL rule.
+- Structure your response with clear headings and bullet points.
+"""
         prompt = f"Policy Context:\n{context_str}\n\nUser Question: {query}"
         return {"answer": answer}
     # =========================================================================
+    # NODE 9: Recommendation Agent (Advisory)
     # =========================================================================
     def advisory_agent(self, state: AgentState) -> Dict[str, Any]:
         """
         Provides personalized recommendations based on user profile.
         Grounds all advice in retrieved documents.
+        If critical info (age/gender/premium) is missing for specific plans, asks for it.
         """
         llm = LLMFactory.get_llm("large")
         query = state["input"]
         entities = state.get("extracted_entities", {})
         user_profile = entities.get("user_profile", {})
+        # Check for Insurer and Guaranteed/Savings context
+        providers = entities.get("provider", [])
+        is_guaranteed = any(t in ["Savings Plan", "Guaranteed Return"] for t in entities.get("insurance_type", []))
+        is_rec = state.get("intent") == "recommendation"
+        # Only block and ask for info IF the intent is explicitly a recommendation/calculation
+        if is_rec:
+            print(f"[ADVISORY DEBUG] Full entities: {entities}")
+            print(f"[ADVISORY DEBUG] User profile: {user_profile}")
+            missing = []
+            if not user_profile.get("age"): missing.append("age")
+            if not user_profile.get("gender"): missing.append("gender")
+            if not user_profile.get("premium_amount"): missing.append("annual premium amount")
+            if not user_profile.get("policy_term"): missing.append("policy term (PT)")
+            if not user_profile.get("payment_term"): missing.append("premium payment term (PPT)")
+            if not user_profile.get("payment_mode"): missing.append("premium payment mode")
+            print(f"[ADVISORY DEBUG] Missing fields check:")
+            for field in ["age", "gender", "premium_amount", "policy_term", "payment_term", "payment_mode"]:
+                value = user_profile.get(field)
+                print(f"  - {field}: {value} (truthy: {bool(value)})")
+            print(f"[ADVISORY DEBUG] Final missing list: {missing}")
+            # Block and ask for info for professional consultation
+            if missing:
+                missing_str = " and ".join([", ".join(missing[:-1]), missing[-1]] if len(missing) > 1 else missing)
+                return {"answer": f"To provide you with specific benefit figures and a professional recommendation, I need a few more details: **{missing_str}**. Could you please provide these?"}
+            # If we have everything, get the numbers
+            calc_result = self.plan_calculator_tool(state)
+            state["reasoning_output"] = calc_result.get("reasoning_output", "")
+        else:
+            # If not a recommendation intent, check if we have enough profile data to show numbers anyway
+            # (e.g., if user asks about a specific plan but we already know their profile)
+            if user_profile.get("age") and user_profile.get("premium_amount") and user_profile.get("policy_term"):
+                calc_result = self.plan_calculator_tool(state)
+                state["reasoning_output"] = calc_result.get("reasoning_output", "")
+        calculation_info = ""
+        raw_calc = state.get('reasoning_output', '')
+        if raw_calc:
+            try:
+                calc_json = json.loads(raw_calc)
+                table = calc_json.get("summary_table", "")
+                if table:
+                    calculation_info = f"\n\n### MANDATORY GROUNDING: NUMERICAL DATA TABLE\n{table}\n(PRIORITIZE THESE PLANS AND NUMBERS OVER ANY TEXT BELOW)\n"
+            except: pass
+        context_str = "\n\n".join(context) if context else "No plans found."
         profile_info = ""
         if user_profile:
+            profile_parts = [f"{k}: {v}" for k, v in user_profile.items() if v]
             if profile_parts:
                 profile_info = f"\n\nUser Profile: {', '.join(profile_parts)}"
         system_prompt = f"""You are an Expert Insurance Advisor.
 {COMPLIANCE_RULES}
 RECOMMENDATION RULES:
+- 🚨 PRIORITY 1: Recommending plans from the 'MANDATORY GROUNDING' table above. Use those EXACT numbers.
+- 🚨 PRIORITY 2: Only provide benefit calculations for the plans in the GROUNDING table.
+- If the user asks about plans not in the table for calculation, say you don't have calculation data for them yet.
+- If the query is out-of-bounds, use the OUT-OF-BOUNDS REFUSAL rule.
+- NEVER say "Not Available" if numbers exist in the grounding table.
+- Be consultative and grounded.
+"""
+        prompt = f"{calculation_info}\n\nPolicy Context:\n{context_str}{profile_info}\n\nUser Question: {query}"
         response = llm.invoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
         answer = getattr(response, 'content', str(response))
     # =========================================================================
     def faq_agent(self, state: AgentState) -> Dict[str, Any]:
         """
+        Agent for general insurance questions (glossary, concepts).
         """
+        llm = LLMFactory.get_llm("low")
         query = state["input"]
         context = state.get("context", [])
+        # Try to retrieve context for general insurance terms if not already provided
+        if not context:
+            retriever = self._get_retriever()
+            if retriever:
+                # Use broader search for general queries
+                docs = retriever.search(query, k=3)  # Reduced from 5 to 3 for more focused context
+                if docs:
+                    context = [d.page_content for d in docs]
         context_str = "\n\n".join(context) if context else ""
         system_prompt = f"""You are an Insurance Helpdesk Assistant.
 {COMPLIANCE_RULES}
+INSTRUCTIONS:
+- For insurance terminology: Provide a clear, concise definition.
+- 🚨 STRICT RULE: If the user asks about ANYTHING non-insurance related (e.g., travel tickets, cooking, etc.), you MUST refuse and redirect to insurance topics.
+- 🚨 NO HALLUCINATION: If the term is not common insurance knowledge and not in context, say you don't know rather than guessing.
+- Keep the total response under 150 words.
+Common Insurance Terms to use as reference:
+- **Policy Term (PT)**: The total duration for which the policy remains active.
+- **Premium Payment Term (PPT)**: The duration during which premiums must be paid.
+- **Maturity Benefit**: The lump sum amount paid when the policy matures.
+- **Sum Assured**: The guaranteed amount payable on death or maturity.
+"""
         prompt = f"Context (if relevant):\n{context_str}\n\nUser Question: {query}" if context_str else f"User Question: {query}"
         return {"answer": answer}
+    # =========================================================================
+    # TOOL: Plan Calculator Tool
+    # =========================================================================
+    def plan_calculator_tool(self, state: AgentState) -> Dict[str, Any]:
+        """
+        Tool logic to calculate benefits using the API's dummy logic.
+        Extremely robust extraction fallback for age, gender, and premium.
+        """
+        from api.plans import get_plan_benefits_tool, resolve_plan_id
+        user_profile = state.get("extracted_entities", {}).get("user_profile", {})
+        plan_names = state.get("extracted_entities", {}).get("plan_names", [])
+        query = state["input"].lower()
+        # --- ROBUST FALLBACKS ---
+        # 1. Age Fallback
+        age = user_profile.get("age")
+        if not age:
+            age_match = re.search(r'\b(\d{2})\b\s*(?:year|yr|old|male|female)?', query)
+            if age_match:
+                age = int(age_match.group(1))
+        # 2. Gender Fallback
+        gender = user_profile.get("gender")
+        if not gender:
+            if "male" in query and "female" not in query: gender = "male"
+            elif "female" in query: gender = "female"
+        # 3. Premium Fallback
+        premium = user_profile.get("premium_amount")
+        clean_premium = 0.0
+        if not premium:
+            # Look for any number followed by a potential unit
+            prem_match = re.search(r'(\d+(?:\.\d+)?)\s*(?:rs\.?|inr|lakh|cr|k|thousand)?', query)
+            if prem_match:
+                val = float(prem_match.group(1))
+                unit_search = query[prem_match.start():prem_match.end()+20] # look ahead
+                if 'lakh' in unit_search: val *= 100000
+                elif 'cr' in unit_search: val *= 10000000
+                elif any(k in unit_search for k in ['k', 'thousand']): val *= 1000
+                clean_premium = val
+        else:
+            try:
+                if isinstance(premium, (int, float)):
+                    clean_premium = float(premium)
+                else:
+                    nums = re.findall(r'\d+\.?\d*', str(premium))
+                    if nums:
+                        clean_premium = float(nums[0])
+                        if 'lakh' in str(premium).lower(): clean_premium *= 100000
+                        elif 'cr' in str(premium).lower(): clean_premium *= 10000000
+            except:
+                pass
+        if not (age and gender and clean_premium > 0):
+            return {"reasoning_output": "Insufficient data (age, gender, or premium) to calculate benefits."}
+        # 4. Resolve Plan IDs
+        pids = []
+        for name in plan_names:
+            pid = resolve_plan_id(name)
+            if pid: pids.append(pid)
+        # If no specific plan found, calculate for ALL default plans
+        target_plan_id = pids[0] if len(pids) == 1 else None
+        # 5. Execute Tool
+        calculation_json = get_plan_benefits_tool(
+            age=int(age),
+            gender=str(gender),
+            premium_amount=clean_premium,
+            plan_id=target_plan_id,
+            policy_term=user_profile.get("policy_term"),
+            payment_term=user_profile.get("payment_term"),
+            payment_mode=user_profile.get("payment_mode")
+        )
+        return {"reasoning_output": calculation_json}
     # =========================================================================
     # HELPER METHODS
     # =========================================================================

agents/states.py CHANGED Viewed

@@ -5,11 +5,16 @@ import operator
 class UserProfile(TypedDict, total=False):
     """User profile for recommendation intent."""
     age: Optional[int]
     income: Optional[str]
     smoker: Optional[bool]
     dependents: Optional[int]
     goal: Optional[str]  # "protection", "savings", "retirement", "wealth"
     cover_amount: Optional[str]  # e.g., "1 Cr", "50 Lakh"
 class ExtractedEntities(TypedDict, total=False):
@@ -31,6 +36,7 @@ class AgentState(TypedDict):
     # Query Classification
     intent: str  # 'list_plans', 'plan_details', 'compare_plans', 'recommendation', 'general_query'
     # Entity Extraction
     extracted_entities: ExtractedEntities

 class UserProfile(TypedDict, total=False):
     """User profile for recommendation intent."""
     age: Optional[int]
+    gender: Optional[str]  # "male", "female"
     income: Optional[str]
     smoker: Optional[bool]
     dependents: Optional[int]
     goal: Optional[str]  # "protection", "savings", "retirement", "wealth"
     cover_amount: Optional[str]  # e.g., "1 Cr", "50 Lakh"
+    premium_amount: Optional[str]  # e.g., "1 Lakh", "50000"
+    policy_term: Optional[str] # PT
+    payment_term: Optional[str] # PPT
+    payment_mode: Optional[str] # Mode (Monthly, Annual, etc.)
 class ExtractedEntities(TypedDict, total=False):
     # Query Classification
     intent: str  # 'list_plans', 'plan_details', 'compare_plans', 'recommendation', 'general_query'
+    query_complexity: str # 'low' | 'high'
     # Entity Extraction
     extracted_entities: ExtractedEntities

api/main.py CHANGED Viewed

@@ -4,8 +4,12 @@ from typing import List, Dict, Optional, Any
 from agents.graph import app as agent_app
 # from ingestion.pipeline import IngestionPipeline # Optional: Trigger via API
 app = FastAPI(title="Insurance Advisory AI Agent", version="1.0.0")
 class ChatRequest(BaseModel):
     message: str
     chat_history: Optional[List[str]] = []

 from agents.graph import app as agent_app
 # from ingestion.pipeline import IngestionPipeline # Optional: Trigger via API
+from api.plans import router as plans_router
 app = FastAPI(title="Insurance Advisory AI Agent", version="1.0.0")
+app.include_router(plans_router)
 class ChatRequest(BaseModel):
     message: str
     chat_history: Optional[List[str]] = []

api/monitoring.py ADDED Viewed

	@@ -0,0 +1,139 @@

+"""
+Monitoring and health check endpoints.
+"""
+from flask import Blueprint, jsonify
+import os
+from datetime import datetime
+from utils.metrics import metrics
+from utils.request_logger import request_logger
+from utils.cache import cache_manager
+from utils.circuit_breaker import circuit_breaker_manager
+from config import config
+monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/api')
+@monitoring_bp.route('/health', methods=['GET'])
+def health_check():
+    """
+    Basic health check endpoint.
+    Returns 200 if service is running.
+    """
+    return jsonify({
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "version": config.VERSION,
+        "environment": config.ENVIRONMENT.value
+    }), 200
+@monitoring_bp.route('/ready', methods=['GET'])
+def readiness_check():
+    """
+    Readiness check - validates critical dependencies.
+    Returns 200 if all dependencies are available.
+    """
+    checks = {}
+    overall_ready = True
+    # Check vector store
+    try:
+        vector_store_path = config.VECTOR_STORE_PATH
+        if os.path.exists(vector_store_path):
+            checks["vector_store"] = "ready"
+        else:
+            checks["vector_store"] = "not_found"
+            overall_ready = False
+    except Exception as e:
+        checks["vector_store"] = f"error: {str(e)}"
+        overall_ready = False
+    # Check LLM API key
+    if config.GROQ_API_KEY:
+        checks["llm_api"] = "configured"
+    else:
+        checks["llm_api"] = "missing_api_key"
+        overall_ready = False
+    # Check circuit breakers
+    breaker_states = circuit_breaker_manager.get_all_states()
+    open_breakers = [name for name, state in breaker_states.items() if state["state"] == "open"]
+    if open_breakers:
+        checks["circuit_breakers"] = f"open: {', '.join(open_breakers)}"
+        overall_ready = False
+    else:
+        checks["circuit_breakers"] = "all_closed"
+    status_code = 200 if overall_ready else 503
+    return jsonify({
+        "ready": overall_ready,
+        "checks": checks,
+        "timestamp": datetime.now().isoformat()
+    }), status_code
+@monitoring_bp.route('/metrics', methods=['GET'])
+def get_metrics():
+    """
+    Get application metrics in JSON format.
+    """
+    if not config.ENABLE_METRICS:
+        return jsonify({"error": "Metrics disabled"}), 403
+    app_metrics = metrics.get_metrics()
+    cache_stats = cache_manager.get_all_stats()
+    circuit_states = circuit_breaker_manager.get_all_states()
+    return jsonify({
+        "application": app_metrics,
+        "cache": cache_stats,
+        "circuit_breakers": circuit_states
+    }), 200
+@monitoring_bp.route('/stats', methods=['GET'])
+def get_stats():
+    """
+    Get human-readable statistics.
+    """
+    app_metrics = metrics.get_metrics()
+    # Get additional stats from request logger
+    recent_requests = request_logger.get_recent_requests(limit=10)
+    intent_dist = request_logger.get_intent_distribution(hours=24)
+    error_rate_24h = request_logger.get_error_rate(hours=24)
+    return jsonify({
+        "summary": {
+            "total_requests": app_metrics["requests"]["total"],
+            "active_requests": app_metrics["requests"]["active"],
+            "error_rate": app_metrics["requests"]["error_rate"],
+            "avg_latency_ms": app_metrics["requests"]["latency_ms"]["p50"],
+            "uptime_hours": app_metrics["uptime_seconds"] / 3600,
+        },
+        "intent_distribution": intent_dist,
+        "recent_requests": recent_requests,
+        "error_rate_24h": error_rate_24h,
+        "cache_performance": {
+            "llm_cache_hit_rate": app_metrics["llm"]["cache_hit_rate"],
+            "app_cache_hit_rate": app_metrics["cache"]["hit_rate"],
+        }
+    }), 200
+@monitoring_bp.route('/logs/recent', methods=['GET'])
+def get_recent_logs():
+    """
+    Get recent request logs.
+    """
+    limit = int(request.args.get('limit', 50))
+    limit = min(limit, 500)  # Cap at 500
+    recent = request_logger.get_recent_requests(limit=limit)
+    return jsonify({
+        "count": len(recent),
+        "requests": recent
+    }), 200

api/plans.py ADDED Viewed

	@@ -0,0 +1,244 @@

+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+from typing import List, Dict, Optional, Any
+import json
+import re
+from random import random
+router = APIRouter(prefix="/plans", tags=["Plans"])
+class PlanInfoRequest(BaseModel):
+    age: int
+    gender: str
+    premium_amount: float
+    goal: Optional[str] = "savings"
+    plan_id: Optional[int] = None
+    policy_term: Optional[str] = None
+    payment_term: Optional[str] = None
+    payment_mode: Optional[str] = None
+class BenefitItem(BaseModel):
+    name: str
+    value: str
+    description: str
+class PlanBenefitResponse(BaseModel):
+    plan_id: int
+    plan_name: str
+    eligibility_status: bool
+    reason: str
+    maturity_benefit: str
+    annual_income: str
+    sum_assured: str
+    income_start_point: str
+    income_duration: str
+    sad_multiple: str
+    payout_freq: str
+    recommendation_score: float
+    benefits: List[BenefitItem]
+# Dummy Plan Data Store
+PLANS_DATA = {
+    1: {
+        "name": "Edelweiss Life Guaranteed Income STAR",
+        "min_age": 3,
+        "max_age": 50,
+        "benefits_multiplier": 1.2,
+        "income_start": "5 years",
+        "income_duration": "20 years",
+        "payout_freq": "Yearly",
+        "sad_multiple": "10"
+    },
+    2: {
+        "name": "Edelweiss Life Bharat Savings STAR",
+        "min_age": 0,
+        "max_age": 60,
+        "benefits_multiplier": 1.1,
+        "income_start": "2nd year",
+        "income_duration": "15 years",
+        "payout_freq": "Monthly",
+        "sad_multiple": "7"
+    },
+    3: {
+        "name": "Edelweiss Life Premier Guaranteed STAR Pro",
+        "min_age": 5,
+        "max_age": 55,
+        "benefits_multiplier": 1.3,
+        "income_start": "15 years",
+        "income_duration": "20 years",
+        "payout_freq": "Yearly",
+        "sad_multiple": "11"
+    },
+     4: {
+        "name": "EdelweissLife Flexi Dream Plan",
+        "min_age": 18,
+        "max_age": 60,
+        "benefits_multiplier": 0.9,
+        "income_start": "2 years",
+        "income_duration": "10 years",
+        "payout_freq": "Yearly",
+        "sad_multiple": "8"
+    },
+    5: {
+        "name": "EdelweissLife Guaranteed Savings STAR",
+        "min_age": 0,
+        "max_age": 60,
+        "benefits_multiplier": 1.17,
+        "income_start": "3rd year",
+        "income_duration": "15 years",
+        "payout_freq": "Monthly",
+        "sad_multiple": "7"
+    },
+    6: {
+        "name": "EdelweissLife Flexi Savings STAR",
+        "min_age": 18,
+        "max_age": 65,
+        "benefits_multiplier": 1.42,
+        "income_start": "10 years",
+        "income_duration": "25 years",
+        "payout_freq": "Yearly",
+        "sad_multiple": "5"
+    }
+}
+# Plan Name to ID Mapping
+PLAN_NAME_TO_ID = {
+    "guaranteed income star": 1,
+    "bharat savings star": 2,
+    "premier guaranteed star pro": 3,
+    "Flexi Dream Plan": 4,
+    "Flexi Savings STAR": 6,
+    "Guaranteed Savings STAR": 5
+}
+def resolve_plan_id(name: str) -> Optional[int]:
+    """Resolves a plan name or substring to a Plan ID."""
+    name_lower = name.lower().strip()
+    for key, pid in PLAN_NAME_TO_ID.items():
+        if key in name_lower or name_lower in key:
+            return pid
+    return None
+def calculate_dummy_benefits(plan_id: int, request: PlanInfoRequest) -> PlanBenefitResponse:
+    plan = PLANS_DATA.get(plan_id)
+    if not plan:
+        return None
+    is_eligible = plan["min_age"] <= request.age <= plan["max_age"]
+    reason = "Eligible based on age criteria." if is_eligible else f"Ineligible: Age must be between {plan['min_age']} and {plan['max_age']}"
+    # Dummy calculation logic influenced by PT/PPT
+    pt_val = 15
+    if request.policy_term:
+        try: pt_val = int(re.search(r'\d+', request.policy_term).group())
+        except: pass
+    mult_adj = (pt_val / 15.0) # PT adjustment
+    randval = random()
+    if randval < 0.5:
+        randval = 0.5
+    maturity_val = request.premium_amount * 10 * (1+randval)* plan["benefits_multiplier"] * mult_adj
+    income_val = request.premium_amount * (plan["benefits_multiplier"]/0.467)
+    # Calculate Sum Assured
+    sad_val = request.premium_amount * float(plan["sad_multiple"])
+    sum_assured = f"₹{sad_val:,.2f}"
+    maturity_benefit = f"₹{maturity_val:,.2f}"
+    annual_income = f"₹{income_val:,.2f}"
+    benefits = [
+        BenefitItem(name="Maturity Benefit", value=maturity_benefit, description="Guaranteed lump sum"),
+        BenefitItem(name="Annual Income Benefit", value=annual_income, description="Regular payouts"),
+        BenefitItem(name="Sum Assured", value=sum_assured, description="Life Cover"),
+        BenefitItem(name="Tax Benefit", value="Exempt", description="Sec 80C")
+    ]
+    return PlanBenefitResponse(
+        plan_id=plan_id,
+        plan_name=plan["name"],
+        eligibility_status=is_eligible,
+        reason=reason,
+        maturity_benefit=maturity_benefit,
+        annual_income=annual_income,
+        sum_assured=sum_assured,
+        income_start_point=plan["income_start"],
+        income_duration=plan["income_duration"],
+        payout_freq=plan["payout_freq"],
+        sad_multiple=plan["sad_multiple"],
+        recommendation_score=0.9 if is_eligible else 0.1,
+        benefits=benefits
+    )
+@router.get("/calculate", response_model=PlanBenefitResponse)
+async def calculate_by_id(
+    plan_id: int = Query(..., alias="Planid"),
+    age: int = Query(...),
+    gender: str = Query(...),
+    premium_amount: float = Query(...),
+    goal: str = Query("savings")
+):
+    """
+    Calculates benefits for a specific Edelweiss plan using Plan ID.
+    Uses dummy logic for demonstration.
+    """
+    request = PlanInfoRequest(age=age, gender=gender, premium_amount=premium_amount, goal=goal)
+    result = calculate_dummy_benefits(plan_id, request)
+    if not result:
+        raise HTTPException(status_code=404, detail="Plan not found")
+    return result
+@router.post("/calculate", response_model=List[PlanBenefitResponse])
+async def calculate_all_benefits(request: PlanInfoRequest):
+    """
+    Calculates benefits for all Edelweiss Guaranteed Income plans.
+    """
+    if request.plan_id:
+        result = calculate_dummy_benefits(request.plan_id, request)
+        if not result:
+            raise HTTPException(status_code=404, detail="Plan not found")
+        return [result]
+    results = []
+    for pid in PLANS_DATA:
+        results.append(calculate_dummy_benefits(pid, request))
+    return results
+def get_plan_benefits_tool(age: int, gender: str, premium_amount: float, plan_id: Optional[int] = None,
+                          policy_term: Optional[str] = None, payment_term: Optional[str] = None,
+                          payment_mode: Optional[str] = None) -> str:
+    """
+    Python function to be used as a tool by LangGraph.
+    Returns a combined string with a Markdown table and JSON.
+    """
+    request = PlanInfoRequest(
+        age=age,
+        gender=gender,
+        premium_amount=premium_amount,
+        plan_id=plan_id,
+        policy_term=policy_term,
+        payment_term=payment_term,
+        payment_mode=payment_mode
+    )
+    data = []
+    if plan_id:
+        result = calculate_dummy_benefits(plan_id, request)
+        if result: data = [result.dict()]
+    else:
+        for pid in PLANS_DATA:
+            data.append(calculate_dummy_benefits(pid, request).dict())
+    if not data:
+        return "No plans found or ineligible."
+    # Create a nice Markdown Table for the LLM
+    table = "| Plan Name | Income Start | Duration | SAD Multi | Sum Assured | Maturity Benefit | Annual Income |\n"
+    table += "| :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n"
+    for d in data:
+        table += f"| {d['plan_name']} | {d['income_start_point']} | {d['income_duration']} | {d['sad_multiple']} | {d['sum_assured']} | {d['maturity_benefit']} | {d['annual_income']} |\n"
+    output = {
+        "summary_table": table,
+        "raw_data": data
+    }
+    return json.dumps(output, indent=2)

app.py CHANGED Viewed

@@ -1,17 +1,40 @@
 import os
 import json
 import threading
 import speech_recognition as sr
 from flask import Flask, render_template, request, jsonify
 from agents.graph import app as agent_app
 from dotenv import load_dotenv
 from ingestion.pipeline import IngestionPipeline
 from rag.vector_store import VectorStoreManager
 load_dotenv()
 app = Flask(__name__)
 # Global state for ingestion tracking
 ingestion_status = {
     "status": "Idle",
@@ -19,18 +42,84 @@ ingestion_status = {
     "last_error": None
 }
 def ingest_worker(file_path, delete_source=None):
     """Worker thread for background ingestion using enhanced pipeline."""
     global ingestion_status
     try:
         ingestion_status["status"] = "Starting..."
         ingestion_status["progress"] = 0
-        base_docs_dir = "docs"
         pipeline = IngestionPipeline(base_docs_dir)
         vector_manager = VectorStoreManager()
         if delete_source:
             ingestion_status["status"] = "Removing old version..."
             vector_manager.delete_documents_by_source(delete_source)
             ingestion_status["progress"] = 10
@@ -38,94 +127,197 @@ def ingest_worker(file_path, delete_source=None):
         ingestion_status["status"] = "Processing document..."
         ingestion_status["progress"] = 30
-        # Use the new unified process_single_file method
-        # Handles metadata extraction, section detection, and proper chunking
         chunks = pipeline.process_single_file(file_path)
         if chunks:
             ingestion_status["status"] = "Updating Vector Store..."
             ingestion_status["progress"] = 70
             vector_manager.update_vector_store(chunks)
-            # Reload the retriever in the agent nodes to see new documents
             from agents.nodes import nodes
             nodes.reload_retriever()
             ingestion_status["status"] = "Completed Successfully!"
             ingestion_status["progress"] = 100
         else:
             ingestion_status["status"] = "Failed: No content extracted."
             ingestion_status["progress"] = 0
     except Exception as e:
         ingestion_status["status"] = "Failed"
         ingestion_status["last_error"] = str(e)
         ingestion_status["progress"] = 0
 @app.route("/")
 def index():
     return render_template("index.html")
 @app.route("/api/chat", methods=["POST"])
 def chat():
-    data = request.json
-    prompt = data.get("prompt")
-    history = data.get("history", [])
-    if not prompt:
-        return jsonify({"error": "Prompt is required"}), 400
     try:
         initial_state = {
             "input": prompt,
             "chat_history": history,
             "intent": "",
             "context": [],
             "answer": "",
-            "metadata_filters": {}
         }
         result = agent_app.invoke(initial_state)
         return jsonify({
-            "answer": result.get("answer", ""),
-            "context": result.get("context", [])
         })
     except Exception as e:
-        return jsonify({"error": str(e), "status": "error"}), 500
 @app.route("/api/audio-chat", methods=["POST"])
 def audio_chat():
-    if 'audio' not in request.files:
-        return jsonify({"error": "No audio file part"}), 400
-    file = request.files['audio']
-    history = json.loads(request.form.get("history", "[]"))
-    if file.filename == '':
-        return jsonify({"error": "No selected file"}), 400
-    temp_path = "temp_voice_query.wav"
-    file.save(temp_path)
-    r = sr.Recognizer()
     try:
         with sr.AudioFile(temp_path) as source:
             audio_data = r.record(source)
         raw_text = r.recognize_google(audio_data)
-        # Summarize/Refine the transcribed audio text
         from models.llm import LLMFactory
         from langchain_core.messages import SystemMessage, HumanMessage
         refiner_llm = LLMFactory.get_llm("small")
         refine_system = (
             "You are an assistant that cleans up and summarizes noisy speech-to-text transcriptions. "
-            "Your goal is to extract the actual insurance-related question or request from the text.\n\n"
-            "RULES:\n"
-            "1. Remove filler words (um, ah, like, you know).\n"
-            "2. Fix grammatical errors caused by transcription.\n"
-            "3. If multiple things are mentioned, focus on the core request.\n"
             "4. Return ONLY the cleaned, professional question text."
         )
@@ -135,37 +327,64 @@ def audio_chat():
         ])
         summarized_text = getattr(refine_response, 'content', str(refine_response)).strip()
-        # Process with existing Agent using the summarized text
         initial_state = {
             "input": summarized_text,
             "chat_history": history,
             "intent": "",
             "context": [],
             "answer": "",
-            "metadata_filters": {}
         }
         result = agent_app.invoke(initial_state)
-        if os.path.exists(temp_path):
             os.remove(temp_path)
         return jsonify({
             "transcription": raw_text,
             "summarized_question": summarized_text,
             "answer": result.get("answer", ""),
-            "context": result.get("context", [])
         })
     except sr.UnknownValueError:
-        if os.path.exists(temp_path): os.remove(temp_path)
-        return jsonify({"error": "Could not understand audio"}), 400
     except sr.RequestError as e:
-        if os.path.exists(temp_path): os.remove(temp_path)
-        return jsonify({"error": f"Speech service error: {e}"}), 500
     except Exception as e:
-        if os.path.exists(temp_path): os.remove(temp_path)
-        return jsonify({"error": str(e)}), 500
 def update_doc_structure(provider_name, category_name):
     """Helper to persist new providers/categories to the config file."""
@@ -175,92 +394,150 @@ def update_doc_structure(provider_name, category_name):
             return
         with open(config_path, "r") as f:
-            config = json.load(f)
         # Find or create provider
-        provider = next((p for p in config["providers"] if p["name"] == provider_name), None)
         if not provider:
-            # Insert at the beginning (before 'Other')
             provider = {"name": provider_name, "categories": []}
-            config["providers"].insert(0, provider)
         # Add category if new
         if category_name not in provider["categories"]:
             provider["categories"].append(category_name)
-            # Sort categories for cleanliness (except if it was General)
             if len(provider["categories"]) > 1:
                 provider["categories"].sort()
         with open(config_path, "w") as f:
-            json.dump(config, f, indent=4)
     except Exception as e:
-        pass
 @app.route("/api/upload", methods=["POST"])
 def upload():
-    if 'file' not in request.files:
-        return jsonify({"error": "No file part"}), 400
-    file = request.files['file']
-    provider = request.form.get("provider")
-    category = request.form.get("category")
-    mode = request.form.get("mode", "New Upload") # "New Upload" or "Modify Existing"
-    if file.filename == '' or not provider or not category:
-        return jsonify({"error": "Missing metadata or file"}), 400
-    # Persist new structure to JSON
-    update_doc_structure(provider, category)
-    base_dir = "docs"
-    target_dir = os.path.join(base_dir, provider, category)
-    os.makedirs(target_dir, exist_ok=True)
-    file_path = os.path.join(target_dir, file.filename)
-    file.save(file_path)
-    delete_source = None
-    if mode == "Modify Existing":
-        file_to_modify = request.form.get("file_to_modify")
-        if file_to_modify:
-            delete_source = os.path.join(base_dir, provider, category, file_to_modify)
-            if os.path.abspath(delete_source) != os.path.abspath(file_path):
-                if os.path.exists(delete_source):
-                    os.remove(delete_source)
-    # Start background ingestion
-    thread = threading.Thread(target=ingest_worker, args=(file_path, delete_source))
-    thread.start()
-    return jsonify({"message": "File uploaded, ingestion started.", "path": file_path})
 @app.route("/api/status", methods=["GET"])
 def get_status():
     return jsonify(ingestion_status)
 @app.route("/api/config", methods=["GET"])
 def get_config():
     config_path = os.path.join("configs", "doc_structure.json")
     if os.path.exists(config_path):
         with open(config_path, "r") as f:
             return jsonify(json.load(f))
     return jsonify({"providers": []})
 @app.route("/api/files", methods=["GET"])
 def list_files():
     provider = request.args.get("provider")
     category = request.args.get("category")
     if not provider or not category:
         return jsonify({"files": []})
-    base_dir = "docs"
     target_dir = os.path.join(base_dir, provider, category)
     if os.path.exists(target_dir):
         files = [f for f in os.listdir(target_dir) if f.lower().endswith(('.pdf', '.docx'))]
         return jsonify({"files": files})
     return jsonify({"files": []})
 if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))
-    app.run(host="0.0.0.0", port=port)

 import os
 import json
 import threading
+import uuid
+import time
 import speech_recognition as sr
 from flask import Flask, render_template, request, jsonify
+from flask_cors import CORS
 from agents.graph import app as agent_app
 from dotenv import load_dotenv
 from ingestion.pipeline import IngestionPipeline
 from rag.vector_store import VectorStoreManager
+# Production imports
+from config import config
+from utils.logger import setup_logger, set_request_context, clear_request_context
+from utils.validators import InputValidator, ValidationError
+from utils.metrics import metrics
+from utils.request_logger import request_logger
+from utils.cache import cache_manager
+from api.monitoring import monitoring_bp
 load_dotenv()
+# Setup logging
+logger = setup_logger(__name__)
 app = Flask(__name__)
+# Register monitoring blueprint
+app.register_blueprint(monitoring_bp)
+# Configure CORS
+if config.ENABLE_CORS:
+    CORS(app, origins=config.CORS_ORIGINS)
+    logger.info(f"CORS enabled for origins: {config.CORS_ORIGINS}")
 # Global state for ingestion tracking
 ingestion_status = {
     "status": "Idle",
     "last_error": None
 }
+@app.before_request
+def before_request():
+    """Set up request context and tracking."""
+    # Generate request ID
+    request_id = str(uuid.uuid4())[:8]
+    request.request_id = request_id
+    request.start_time = time.time()
+    # Set request context for logging
+    user_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
+    set_request_context(request_id, user_ip)
+    # Track active requests
+    metrics.increment_active_requests()
+    # Log request
+    logger.info(f"Request started: {request.method} {request.path}")
+@app.after_request
+def after_request(response):
+    """Clean up request context and record metrics."""
+    if hasattr(request, 'start_time'):
+        latency_ms = (time.time() - request.start_time) * 1000
+        # Log response
+        logger.info(
+            f"Request completed: {request.method} {request.path} "
+            f"[{response.status_code}] {latency_ms:.2f}ms"
+        )
+        # Record metrics
+        metrics.record_request(
+            latency_ms=latency_ms,
+            error=(response.status_code >= 400)
+        )
+    # Decrement active requests
+    metrics.decrement_active_requests()
+    # Clear request context
+    clear_request_context()
+    return response
+@app.errorhandler(Exception)
+def handle_error(error):
+    """Global error handler."""
+    logger.error(f"Unhandled error: {str(error)}", exc_info=True)
+    # Don't expose internal errors in production
+    if config.DEBUG:
+        error_msg = str(error)
+    else:
+        error_msg = "An internal error occurred. Please try again later."
+    return jsonify({
+        "error": error_msg,
+        "request_id": getattr(request, 'request_id', 'unknown')
+    }), 500
 def ingest_worker(file_path, delete_source=None):
     """Worker thread for background ingestion using enhanced pipeline."""
     global ingestion_status
     try:
+        logger.info(f"Starting ingestion for: {file_path}")
         ingestion_status["status"] = "Starting..."
         ingestion_status["progress"] = 0
+        base_docs_dir = config.DOCS_DIR
         pipeline = IngestionPipeline(base_docs_dir)
         vector_manager = VectorStoreManager()
         if delete_source:
+            logger.info(f"Removing old version: {delete_source}")
             ingestion_status["status"] = "Removing old version..."
             vector_manager.delete_documents_by_source(delete_source)
             ingestion_status["progress"] = 10
         ingestion_status["status"] = "Processing document..."
         ingestion_status["progress"] = 30
         chunks = pipeline.process_single_file(file_path)
         if chunks:
+            logger.info(f"Extracted {len(chunks)} chunks from {file_path}")
             ingestion_status["status"] = "Updating Vector Store..."
             ingestion_status["progress"] = 70
             vector_manager.update_vector_store(chunks)
+            # Invalidate caches
+            cache_manager.invalidate_all()
+            logger.info("Caches invalidated after ingestion")
+            # Reload the retriever in the agent nodes
             from agents.nodes import nodes
             nodes.reload_retriever()
             ingestion_status["status"] = "Completed Successfully!"
             ingestion_status["progress"] = 100
+            logger.info(f"Ingestion completed successfully for: {file_path}")
         else:
+            logger.warning(f"No content extracted from: {file_path}")
             ingestion_status["status"] = "Failed: No content extracted."
             ingestion_status["progress"] = 0
     except Exception as e:
+        logger.error(f"Ingestion failed: {str(e)}", exc_info=True)
         ingestion_status["status"] = "Failed"
         ingestion_status["last_error"] = str(e)
         ingestion_status["progress"] = 0
 @app.route("/")
 def index():
+    """Serve main page."""
     return render_template("index.html")
 @app.route("/api/chat", methods=["POST"])
 def chat():
+    """Main chat endpoint with full error handling and logging."""
+    start_time = time.time()
+    request_id = getattr(request, 'request_id', 'unknown')
     try:
+        data = request.json
+        if not data:
+            raise ValidationError("Request body must be JSON")
+        prompt = data.get("prompt")
+        history = data.get("history", [])
+        extracted_entities = data.get("extracted_entities", {})
+        # Validate input
+        if not prompt:
+            raise ValidationError("Prompt is required")
+        InputValidator.validate_query_input(prompt)
+        logger.info(f"Chat request: {prompt[:100]}...")
+        # Process with agent
         initial_state = {
             "input": prompt,
             "chat_history": history,
             "intent": "",
             "context": [],
             "answer": "",
+            "metadata_filters": {},
+            "extracted_entities": extracted_entities
         }
         result = agent_app.invoke(initial_state)
+        # Extract results
+        answer = result.get("answer", "")
+        context = result.get("context", [])
+        intent = result.get("intent", "unknown")
+        entities = result.get("extracted_entities", {})
+        # Calculate latency
+        latency_ms = (time.time() - start_time) * 1000
+        # Log request to database
+        request_logger.log_request(
+            request_id=request_id,
+            query=prompt,
+            intent=intent,
+            extracted_entities=entities,
+            retrieval_count=len(context),
+            latency_ms=latency_ms,
+            status="success",
+            context_sources=[c[:100] for c in context[:5]],  # First 5 sources
+            user_ip=request.headers.get('X-Forwarded-For', request.remote_addr)
+        )
+        # Record intent in metrics
+        metrics.record_request(latency_ms=latency_ms, intent=intent, error=False)
+        logger.info(f"Chat completed successfully. Intent: {intent}, Latency: {latency_ms:.2f}ms")
         return jsonify({
+            "answer": answer,
+            "context": context,
+            "extracted_entities": entities,
+            "intent": intent,
+            "request_id": request_id
         })
+    except ValidationError as e:
+        logger.warning(f"Validation error: {str(e)}")
+        latency_ms = (time.time() - start_time) * 1000
+        request_logger.log_request(
+            request_id=request_id,
+            query=data.get("prompt", "")[:500] if data else "",
+            latency_ms=latency_ms,
+            status="validation_error",
+            error_message=str(e),
+            user_ip=request.headers.get('X-Forwarded-For', request.remote_addr)
+        )
+        return jsonify({
+            "error": str(e),
+            "request_id": request_id
+        }), 400
     except Exception as e:
+        logger.error(f"Chat error: {str(e)}", exc_info=True)
+        latency_ms = (time.time() - start_time) * 1000
+        request_logger.log_request(
+            request_id=request_id,
+            query=data.get("prompt", "")[:500] if data else "",
+            latency_ms=latency_ms,
+            status="error",
+            error_message=str(e)[:500],
+            user_ip=request.headers.get('X-Forwarded-For', request.remote_addr)
+        )
+        error_msg = str(e) if config.DEBUG else "An error occurred processing your request"
+        return jsonify({
+            "error": error_msg,
+            "request_id": request_id,
+            "status": "error"
+        }), 500
 @app.route("/api/audio-chat", methods=["POST"])
 def audio_chat():
+    """Audio chat endpoint with validation."""
+    start_time = time.time()
+    request_id = getattr(request, 'request_id', 'unknown')
+    temp_path = None
     try:
+        if 'audio' not in request.files:
+            raise ValidationError("No audio file provided")
+        file = request.files['audio']
+        history = json.loads(request.form.get("history", "[]"))
+        extracted_entities = json.loads(request.form.get("extracted_entities", "{}"))
+        if file.filename == '':
+            raise ValidationError("No file selected")
+        # Save temporarily
+        temp_path = f"temp_voice_{request_id}.wav"
+        file.save(temp_path)
+        logger.info(f"Processing audio file: {file.filename}")
+        # Transcribe
+        r = sr.Recognizer()
         with sr.AudioFile(temp_path) as source:
             audio_data = r.record(source)
         raw_text = r.recognize_google(audio_data)
+        logger.info(f"Transcribed: {raw_text}")
+        # Summarize/refine transcription
         from models.llm import LLMFactory
         from langchain_core.messages import SystemMessage, HumanMessage
         refiner_llm = LLMFactory.get_llm("small")
         refine_system = (
             "You are an assistant that cleans up and summarizes noisy speech-to-text transcriptions. "
+            "Your goal is to extract the actual insurance-related question or request from the text.\\n\\n"
+            "RULES:\\n"
+            "1. Remove filler words (um, ah, like, you know).\\n"
+            "2. Fix grammatical errors caused by transcription.\\n"
+            "3. If multiple things are mentioned, focus on the core request.\\n"
             "4. Return ONLY the cleaned, professional question text."
         )
         ])
         summarized_text = getattr(refine_response, 'content', str(refine_response)).strip()
+        logger.info(f"Refined: {summarized_text}")
+        # Process with agent (similar to chat endpoint)
         initial_state = {
             "input": summarized_text,
             "chat_history": history,
             "intent": "",
             "context": [],
             "answer": "",
+            "metadata_filters": {},
+            "extracted_entities": extracted_entities
         }
         result = agent_app.invoke(initial_state)
+        # Clean up temp file
+        if temp_path and os.path.exists(temp_path):
             os.remove(temp_path)
+        latency_ms = (time.time() - start_time) * 1000
+        logger.info(f"Audio chat completed. Latency: {latency_ms:.2f}ms")
         return jsonify({
             "transcription": raw_text,
             "summarized_question": summarized_text,
             "answer": result.get("answer", ""),
+            "context": result.get("context", []),
+            "extracted_entities": result.get("extracted_entities", {}),
+            "request_id": request_id
         })
     except sr.UnknownValueError:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)
+        logger.warning("Could not understand audio")
+        return jsonify({
+            "error": "Could not understand audio",
+            "request_id": request_id
+        }), 400
     except sr.RequestError as e:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)
+        logger.error(f"Speech service error: {e}")
+        return jsonify({
+            "error": f"Speech service error: {e}",
+            "request_id": request_id
+        }), 500
     except Exception as e:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)
+        logger.error(f"Audio chat error: {str(e)}", exc_info=True)
+        return jsonify({
+            "error": str(e) if config.DEBUG else "Error processing audio",
+            "request_id": request_id
+        }), 500
 def update_doc_structure(provider_name, category_name):
     """Helper to persist new providers/categories to the config file."""
             return
         with open(config_path, "r") as f:
+            doc_config = json.load(f)
         # Find or create provider
+        provider = next((p for p in doc_config["providers"] if p["name"] == provider_name), None)
         if not provider:
             provider = {"name": provider_name, "categories": []}
+            doc_config["providers"].insert(0, provider)
         # Add category if new
         if category_name not in provider["categories"]:
             provider["categories"].append(category_name)
             if len(provider["categories"]) > 1:
                 provider["categories"].sort()
         with open(config_path, "w") as f:
+            json.dump(doc_config, f, indent=4)
     except Exception as e:
+        logger.warning(f"Failed to update doc structure: {e}")
 @app.route("/api/upload", methods=["POST"])
 def upload():
+    """File upload endpoint with validation."""
+    try:
+        if 'file' not in request.files:
+            raise ValidationError("No file provided")
+        file = request.files['file']
+        provider = request.form.get("provider")
+        category = request.form.get("category")
+        mode = request.form.get("mode", "New Upload")
+        if file.filename == '' or not provider or not category:
+            raise ValidationError("Missing required fields: file, provider, or category")
+        # Validate file
+        file.seek(0, os.SEEK_END)
+        file_size = file.tell()
+        file.seek(0)
+        InputValidator.validate_file_upload(file.filename, file_size)
+        # Sanitize filename
+        safe_filename = InputValidator.sanitize_filename(file.filename)
+        logger.info(f"Uploading file: {safe_filename} ({file_size} bytes)")
+        # Update doc structure
+        update_doc_structure(provider, category)
+        # Save file
+        base_dir = config.DOCS_DIR
+        target_dir = os.path.join(base_dir, provider, category)
+        os.makedirs(target_dir, exist_ok=True)
+        file_path = os.path.join(target_dir, safe_filename)
+        file.save(file_path)
+        logger.info(f"File saved to: {file_path}")
+        # Handle file modification
+        delete_source = None
+        if mode == "Modify Existing":
+            file_to_modify = request.form.get("file_to_modify")
+            if file_to_modify:
+                delete_source = os.path.join(base_dir, provider, category, file_to_modify)
+                if os.path.abspath(delete_source) != os.path.abspath(file_path):
+                    if os.path.exists(delete_source):
+                        os.remove(delete_source)
+                        logger.info(f"Removed old file: {delete_source}")
+        # Start background ingestion
+        thread = threading.Thread(target=ingest_worker, args=(file_path, delete_source))
+        thread.start()
+        return jsonify({
+            "message": "File uploaded successfully, ingestion started.",
+            "filename": safe_filename,
+            "path": file_path
+        })
+    except ValidationError as e:
+        logger.warning(f"Upload validation error: {str(e)}")
+        return jsonify({"error": str(e)}), 400
+    except Exception as e:
+        logger.error(f"Upload error: {str(e)}", exc_info=True)
+        return jsonify({
+            "error": str(e) if config.DEBUG else "Upload failed"
+        }), 500
 @app.route("/api/status", methods=["GET"])
 def get_status():
+    """Get ingestion status."""
     return jsonify(ingestion_status)
 @app.route("/api/config", methods=["GET"])
 def get_config():
+    """Get document structure configuration."""
     config_path = os.path.join("configs", "doc_structure.json")
     if os.path.exists(config_path):
         with open(config_path, "r") as f:
             return jsonify(json.load(f))
     return jsonify({"providers": []})
 @app.route("/api/files", methods=["GET"])
 def list_files():
+    """List files in a provider/category directory."""
     provider = request.args.get("provider")
     category = request.args.get("category")
     if not provider or not category:
         return jsonify({"files": []})
+    base_dir = config.DOCS_DIR
     target_dir = os.path.join(base_dir, provider, category)
     if os.path.exists(target_dir):
         files = [f for f in os.listdir(target_dir) if f.lower().endswith(('.pdf', '.docx'))]
         return jsonify({"files": files})
     return jsonify({"files": []})
 if __name__ == "__main__":
+    # Log configuration on startup
+    logger.info(f"Starting {config.APP_NAME} v{config.VERSION}")
+    logger.info(f"Environment: {config.ENVIRONMENT.value}")
+    logger.info(f"Configuration: {json.dumps(config.get_summary(), indent=2)}")
+    # Validate configuration
+    try:
+        config.validate()
+        logger.info("Configuration validated successfully")
+    except ValueError as e:
+        logger.error(f"Configuration validation failed: {e}")
+    # Start application
+    port = config.PORT
+    host = config.HOST
+    debug = config.DEBUG
+    logger.info(f"Starting server on {host}:{port} (debug={debug})")
+    app.run(host=host, port=port, debug=debug)

config.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""
+Production-grade configuration management with environment-based settings.
+"""
+import os
+from typing import Optional
+from dotenv import load_dotenv
+from enum import Enum
+load_dotenv()
+class Environment(str, Enum):
+    DEVELOPMENT = "development"
+    STAGING = "staging"
+    PRODUCTION = "production"
+class Config:
+    """Base configuration class with defaults."""
+    # Environment
+    ENVIRONMENT: Environment = Environment(os.getenv("ENVIRONMENT", "development"))
+    DEBUG: bool = ENVIRONMENT == Environment.DEVELOPMENT
+    # Application
+    APP_NAME: str = "Insurance RAG System"
+    VERSION: str = "1.0.0"
+    PORT: int = int(os.getenv("PORT", 7860))
+    HOST: str = os.getenv("HOST", "0.0.0.0")
+    # LLM Configuration
+    GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+    GROQ_MODEL_SMALL: str = os.getenv("GROQ_MODEL_SMALL", "llama-3.1-8b-instant")
+    GROQ_MODEL_MEDIUM: str = os.getenv("GROQ_MODEL_MEDIUM", "llama-3.1-8b-instant")
+    GROQ_MODEL_LARGE: str = os.getenv("GROQ_MODEL_LARGE", "llama-3.1-8b-instant")
+    LLM_TIMEOUT: int = int(os.getenv("LLM_TIMEOUT", 30))
+    LLM_MAX_RETRIES: int = int(os.getenv("LLM_MAX_RETRIES", 3))
+    # LangChain / LangSmith Tracing
+    LANGCHAIN_TRACING_V2: bool = os.getenv("LANGCHAIN_TRACING_V2", "false").lower() == "true"
+    LANGCHAIN_ENDPOINT: str = os.getenv("LANGCHAIN_ENDPOINT", "https://api.smith.langchain.com")
+    LANGCHAIN_API_KEY: Optional[str] = os.getenv("LANGCHAIN_API_KEY")
+    LANGCHAIN_PROJECT: str = os.getenv("LANGCHAIN_PROJECT", "Insurance-RAG")
+    # Logging Configuration
+    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
+    LOG_FILE_PATH: str = os.getenv("LOG_FILE_PATH", "logs/app.log")
+    LOG_MAX_BYTES: int = int(os.getenv("LOG_MAX_BYTES", 10485760))  # 10MB
+    LOG_BACKUP_COUNT: int = int(os.getenv("LOG_BACKUP_COUNT", 5))
+    LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    # Performance Configuration
+    MAX_WORKERS: int = int(os.getenv("MAX_WORKERS", 4))
+    REQUEST_TIMEOUT: int = int(os.getenv("REQUEST_TIMEOUT", 30))
+    CACHE_TTL: int = int(os.getenv("CACHE_TTL", 300))  # 5 minutes
+    CACHE_MAX_SIZE: int = int(os.getenv("CACHE_MAX_SIZE", 1000))
+    # Vector Store Configuration
+    VECTOR_STORE_PATH: str = os.getenv("VECTOR_STORE_PATH", "rag/faiss_index")
+    EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+    EMBEDDING_CACHE_PATH: str = os.getenv("EMBEDDING_CACHE_PATH", "rag/embeddings_cache.json")
+    LLM_CACHE_DB_PATH: str = os.getenv("LLM_CACHE_DB_PATH", "rag/llm_cache.db")
+    # Retrieval Configuration
+    DEFAULT_RETRIEVAL_K: int = int(os.getenv("DEFAULT_RETRIEVAL_K", 5))
+    MAX_RETRIEVAL_K: int = int(os.getenv("MAX_RETRIEVAL_K", 20))
+    RETRIEVAL_SCORE_THRESHOLD: float = float(os.getenv("RETRIEVAL_SCORE_THRESHOLD", 0.5))
+    # Security Configuration
+    MAX_FILE_SIZE_MB: int = int(os.getenv("MAX_FILE_SIZE_MB", 50))
+    ALLOWED_FILE_TYPES: list = os.getenv("ALLOWED_FILE_TYPES", "pdf,docx").split(",")
+    RATE_LIMIT_PER_MINUTE: int = int(os.getenv("RATE_LIMIT_PER_MINUTE", 60))
+    ENABLE_API_KEY_AUTH: bool = os.getenv("ENABLE_API_KEY_AUTH", "false").lower() == "true"
+    API_KEY: Optional[str] = os.getenv("API_KEY", None)
+    ENABLE_CORS: bool = os.getenv("ENABLE_CORS", "true").lower() == "true"
+    CORS_ORIGINS: list = os.getenv("CORS_ORIGINS", "*").split(",")
+    # Monitoring Configuration
+    ENABLE_METRICS: bool = os.getenv("ENABLE_METRICS", "true").lower() == "true"
+    METRICS_PORT: int = int(os.getenv("METRICS_PORT", 9090))
+    REQUEST_LOG_DB_PATH: str = os.getenv("REQUEST_LOG_DB_PATH", "utils/request_logs.db")
+    # Circuit Breaker Configuration
+    CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = int(os.getenv("CIRCUIT_BREAKER_FAILURE_THRESHOLD", 5))
+    CIRCUIT_BREAKER_TIMEOUT: int = int(os.getenv("CIRCUIT_BREAKER_TIMEOUT", 60))
+    CIRCUIT_BREAKER_EXPECTED_EXCEPTION: type = Exception
+    # Document Processing
+    DOCS_DIR: str = os.getenv("DOCS_DIR", "docs")
+    BROCHURE_CHUNK_SIZE: int = int(os.getenv("BROCHURE_CHUNK_SIZE", 2600))
+    BROCHURE_CHUNK_OVERLAP: int = int(os.getenv("BROCHURE_CHUNK_OVERLAP", 400))
+    CIS_CHUNK_SIZE: int = int(os.getenv("CIS_CHUNK_SIZE", 1300))
+    CIS_CHUNK_OVERLAP: int = int(os.getenv("CIS_CHUNK_OVERLAP", 160))
+    TABLE_CHUNK_SIZE: int = int(os.getenv("TABLE_CHUNK_SIZE", 800))
+    TABLE_CHUNK_OVERLAP: int = int(os.getenv("TABLE_CHUNK_OVERLAP", 100))
+    @classmethod
+    def validate(cls) -> bool:
+        """Validate critical configuration values."""
+        errors = []
+        # Check required API keys
+        if not cls.GROQ_API_KEY:
+            errors.append("GROQ_API_KEY is not set")
+        # Validate file size limits
+        if cls.MAX_FILE_SIZE_MB <= 0 or cls.MAX_FILE_SIZE_MB > 500:
+            errors.append(f"MAX_FILE_SIZE_MB must be between 1 and 500, got {cls.MAX_FILE_SIZE_MB}")
+        # Validate rate limits
+        if cls.RATE_LIMIT_PER_MINUTE <= 0:
+            errors.append(f"RATE_LIMIT_PER_MINUTE must be positive, got {cls.RATE_LIMIT_PER_MINUTE}")
+        # Validate timeouts
+        if cls.REQUEST_TIMEOUT <= 0:
+            errors.append(f"REQUEST_TIMEOUT must be positive, got {cls.REQUEST_TIMEOUT}")
+        # Validate cache settings
+        if cls.CACHE_TTL < 0:
+            errors.append(f"CACHE_TTL cannot be negative, got {cls.CACHE_TTL}")
+        # Validate API key auth
+        if cls.ENABLE_API_KEY_AUTH and not cls.API_KEY:
+            errors.append("ENABLE_API_KEY_AUTH is true but API_KEY is not set")
+        if errors:
+            error_msg = "\n".join(f"  - {err}" for err in errors)
+            raise ValueError(f"Configuration validation failed:\n{error_msg}")
+        return True
+    @classmethod
+    def get_summary(cls) -> dict:
+        """Get configuration summary for logging."""
+        return {
+            "environment": cls.ENVIRONMENT.value,
+            "debug": cls.DEBUG,
+            "app_name": cls.APP_NAME,
+            "version": cls.VERSION,
+            "port": cls.PORT,
+            "log_level": cls.LOG_LEVEL,
+            "max_file_size_mb": cls.MAX_FILE_SIZE_MB,
+            "rate_limit_per_minute": cls.RATE_LIMIT_PER_MINUTE,
+            "cache_ttl": cls.CACHE_TTL,
+            "enable_metrics": cls.ENABLE_METRICS,
+            "enable_api_key_auth": cls.ENABLE_API_KEY_AUTH,
+        }
+class DevelopmentConfig(Config):
+    """Development-specific configuration."""
+    DEBUG = True
+    LOG_LEVEL = "DEBUG"
+class ProductionConfig(Config):
+    """Production-specific configuration."""
+    DEBUG = False
+    LOG_LEVEL = "WARNING"
+# Configuration factory
+def get_config() -> Config:
+    """Get configuration based on environment."""
+    env = os.getenv("ENVIRONMENT", "development")
+    config_map = {
+        Environment.DEVELOPMENT: DevelopmentConfig,
+        Environment.STAGING: Config,
+        Environment.PRODUCTION: ProductionConfig,
+    }
+    return config_map.get(Environment(env), Config)
+# Global config instance
+config = get_config()
+# Validate on import
+try:
+    config.validate()
+except ValueError as e:
+    print(f"[CONFIG ERROR] {e}")
+    print("[CONFIG] Continuing with invalid configuration - some features may not work correctly")

models/llm.py CHANGED Viewed

@@ -1,39 +1,136 @@
 import os
 from langchain_groq import ChatGroq
 from dotenv import load_dotenv
 load_dotenv()
 class LLMFactory:
     @staticmethod
-    def get_llm(model_type="small"):
         """
-        Returns a Groq LLM instance based on type.
         """
         api_key = os.getenv("GROQ_API_KEY")
-        # Groq specific models from environment
-        if model_type == "small":
-            model_name = os.getenv("GROQ_MODEL_SMALL", "llama-3.1-8b-instant")
-        elif model_type == "medium":
-            model_name = os.getenv("GROQ_MODEL_MEDIUM", "llama-3.1-8b-instant")
         else:
-            model_name = os.getenv("GROQ_MODEL_LARGE", "llama-3.1-8b-instant")
         if api_key:
-            return ChatGroq(
                 model=model_name,
                 temperature=0,
                 groq_api_key=api_key,
-                max_retries=3,  # Automatically retry on rate limits or transient errors
-                timeout=30      # Prevent hanging on slow responses
             )
         # Fallback to mock for testing without key
         class MockLLM:
             def invoke(self, msg):
-                return f"[Groq Mock Response for {model_type}]: Model {model_name} processing..."
         return MockLLM()

 import os
+import sqlite3
+import json
+import hashlib
+import time
+from typing import Optional, Any
 from langchain_groq import ChatGroq
 from dotenv import load_dotenv
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 load_dotenv()
+class LLMCache:
+    """
+    Simple SQLite-based cache for LLM responses.
+    """
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(LLMCache, cls).__new__(cls)
+            cls._instance._init_db()
+        return cls._instance
+    def _init_db(self):
+        self.db_path = "rag/llm_cache.db"
+        os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS responses (
+                id TEXT PRIMARY KEY,
+                prompt_hash TEXT,
+                model TEXT,
+                response TEXT,
+                timestamp REAL
+            )
+        """)
+        self.conn.commit()
+    def get(self, prompt: str, model: str) -> Optional[str]:
+        prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
+        cursor = self.conn.execute(
+            "SELECT response FROM responses WHERE prompt_hash = ? AND model = ?",
+            (prompt_hash, model)
+        )
+        row = cursor.fetchone()
+        return row[0] if row else None
+    def set(self, prompt: str, model: str, response: str):
+        prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
+        self.conn.execute(
+            "INSERT OR REPLACE INTO responses (id, prompt_hash, model, response, timestamp) VALUES (?, ?, ?, ?, ?)",
+            (f"{prompt_hash}_{model}", prompt_hash, model, response, time.time())
+        )
+        self.conn.commit()
+class CachedChatGroq:
+    """
+    Wrapper around ChatGroq to handle caching and retries.
+    """
+    def __init__(self, llm_instance, model_name):
+        self.llm = llm_instance
+        self.model_name = model_name
+        self.cache = LLMCache()
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type(Exception)
+    )
+    def invoke(self, messages: Any) -> Any:
+        # Convert messages to string for cache key
+        if isinstance(messages, list):
+            prompt_str = json.dumps([m.content for m in messages], sort_keys=True)
+        else:
+            prompt_str = str(messages)
+        # Check cache
+        cached_resp = self.cache.get(prompt_str, self.model_name)
+        if cached_resp:
+            # Reconstruct a mock response object that behaves like the real one
+            class MockResponse:
+                def __init__(self, content):
+                    self.content = content
+            return MockResponse(cached_resp)
+        # Call API
+        try:
+            response = self.llm.invoke(messages)
+            content = getattr(response, 'content', str(response))
+            # Cache success
+            self.cache.set(prompt_str, self.model_name, content)
+            return response
+        except Exception as e:
+            print(f"[LLM Error] Rate limit or network issue: {e}. Retrying...")
+            raise e
 class LLMFactory:
     @staticmethod
+    def get_llm(complexity="low"):
         """
+        Returns a routed and cached LLM instance.
+        complexity: "low" (default, instant logic) or "high" (versatile logic)
         """
         api_key = os.getenv("GROQ_API_KEY")
+        # Default to instant (cost effective)
+        default_model = "llama-3.1-8b-instant"
+        if complexity == "high":
+            # For now, map 'versatile' also to 'instant' as per user request to start cheap
+            # But keep logic ready to swap to 'llama-3.1-70b-versatile'
+            model_name = os.getenv("GROQ_MODEL_LARGE", default_model)
         else:
+            model_name = os.getenv("GROQ_MODEL_SMALL", default_model)
         if api_key:
+            real_llm = ChatGroq(
                 model=model_name,
                 temperature=0,
                 groq_api_key=api_key,
+                # We handle retries in the wrapper, so keep internal retries low
+                max_retries=1,
+                timeout=30
             )
+            return CachedChatGroq(real_llm, model_name)
         # Fallback to mock for testing without key
         class MockLLM:
             def invoke(self, msg):
+                return f"[Groq Mock Response for {complexity}]: Model {model_name} processing..."
         return MockLLM()

rag/embeddings_cache.json ADDED Viewed

The diff for this file is too large to render. See raw diff

rag/retriever.py CHANGED Viewed

@@ -40,8 +40,8 @@ class RAGRetriever:
                         match_found = False
                         for v_item in norm_values:
                             if key in ["insurer", "insurance_type"]:
-                                # Exact match for categories
-                                if v_item == met_val_str:
                                     match_found = True
                                     break
                             else:
@@ -54,7 +54,7 @@ class RAGRetriever:
                     else:
                         norm_value = str(value).lower().strip()
                         if key in ["insurer", "insurance_type"]:
-                            if norm_value != met_val_str:
                                 return False
                         else:
                             if norm_value not in met_val_str and met_val_str not in norm_value:

                         match_found = False
                         for v_item in norm_values:
                             if key in ["insurer", "insurance_type"]:
+                                # Flexible match for categories (containment)
+                                if v_item in met_val_str or met_val_str in v_item:
                                     match_found = True
                                     break
                             else:
                     else:
                         norm_value = str(value).lower().strip()
                         if key in ["insurer", "insurance_type"]:
+                            if norm_value not in met_val_str and met_val_str not in norm_value:
                                 return False
                         else:
                             if norm_value not in met_val_str and met_val_str not in norm_value:

rag/vector_store.py CHANGED Viewed

@@ -1,10 +1,83 @@
 import os
 import threading
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_core.documents import Document
 from typing import List
 class VectorStoreManager:
     _embeddings = None
     _lock = threading.Lock()
@@ -13,9 +86,12 @@ class VectorStoreManager:
         self.index_path = index_path
         if VectorStoreManager._embeddings is None:
             # Load embeddings model once
-            VectorStoreManager._embeddings = HuggingFaceEmbeddings(
                 model_name="sentence-transformers/all-MiniLM-L6-v2"
             )
         self.embeddings = VectorStoreManager._embeddings
     def create_vector_store(self, documents: List[Document], batch_size: int = 100):

 import os
 import threading
+import json
+import hashlib
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_core.documents import Document
 from typing import List
+from langchain_core.embeddings import Embeddings
+from typing import List
+class CachedEmbeddings(Embeddings):
+    """
+    Wrapper for embeddings to cache results locally.
+    Avoids re-computing embeddings for identical text.
+    """
+    def __init__(self, wrapped_embeddings, cache_path="rag/embeddings_cache.json"):
+        self.wrapped = wrapped_embeddings
+        self.cache_path = cache_path
+        self.cache = {}
+        self._load_cache()
+        self._lock = threading.Lock()
+    def _load_cache(self):
+        if os.path.exists(self.cache_path):
+            try:
+                with open(self.cache_path, "r", encoding="utf-8") as f:
+                    self.cache = json.load(f)
+            except: self.cache = {}
+    def _save_cache(self):
+        with self._lock:
+            try:
+                os.makedirs(os.path.dirname(self.cache_path), exist_ok=True)
+                with open(self.cache_path, "w", encoding="utf-8") as f:
+                    json.dump(self.cache, f)
+            except Exception as e:
+                print(f"Failed to save embedding cache: {e}")
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        results = []
+        texts_to_embed = []
+        indices_to_embed = []
+        # Check cache
+        for i, text in enumerate(texts):
+            h = hashlib.md5(text.encode()).hexdigest()
+            if h in self.cache:
+                results.append(self.cache[h])
+            else:
+                results.append(None) # Placeholder
+                texts_to_embed.append(text)
+                indices_to_embed.append(i)
+        # Compute missing
+        if texts_to_embed:
+            print(f"Computing embeddings for {len(texts_to_embed)} new items...")
+            new_embeddings = self.wrapped.embed_documents(texts_to_embed)
+            for idx, emb, text in zip(indices_to_embed, new_embeddings, texts_to_embed):
+                results[idx] = emb
+                h = hashlib.md5(text.encode()).hexdigest()
+                self.cache[h] = emb
+            # Save incrementally
+            self._save_cache()
+        return results
+    def embed_query(self, text: str) -> List[float]:
+        h = hashlib.md5(text.encode()).hexdigest()
+        if h in self.cache:
+            return self.cache[h]
+        emb = self.wrapped.embed_query(text)
+        self.cache[h] = emb
+        self._save_cache()
+        return emb
 class VectorStoreManager:
     _embeddings = None
     _lock = threading.Lock()
         self.index_path = index_path
         if VectorStoreManager._embeddings is None:
             # Load embeddings model once
+            base_embeddings = HuggingFaceEmbeddings(
                 model_name="sentence-transformers/all-MiniLM-L6-v2"
             )
+            # Wrap with caching
+            VectorStoreManager._embeddings = CachedEmbeddings(base_embeddings)
         self.embeddings = VectorStoreManager._embeddings
     def create_vector_store(self, documents: List[Document], batch_size: int = 100):

requirements.txt CHANGED Viewed

@@ -18,4 +18,6 @@ SpeechRecognition
 langchain-groq
 requests
 pdfplumber
-python-docx

 langchain-groq
 requests
 pdfplumber
+python-docx
+tenacity
+langsmith

static/css/style.css CHANGED Viewed

@@ -1,14 +1,20 @@
 @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap');
 :root {
-    --primary: #6366f1;
-    --primary-hover: #4f46e5;
-    --bg-dark: #0f172a;
-    --card-bg: rgba(30, 41, 59, 0.7);
     --text-main: #f8fafc;
     --text-muted: #94a3b8;
-    --glass-border: rgba(255, 255, 255, 0.1);
-    --animation-speed: 0.3s;
 }
 * {
@@ -19,51 +25,195 @@
 body {
     font-family: 'Outfit', sans-serif;
-    background: radial-gradient(circle at top right, #1e1b4b, #0f172a);
     color: var(--text-main);
     height: 100vh;
     overflow: hidden;
     display: flex;
 }
 /* Sidebar Styling */
 .sidebar {
-    width: 350px;
-    background: rgba(15, 23, 42, 0.8);
-    backdrop-filter: blur(20px);
     border-right: 1px solid var(--glass-border);
-    padding: 2rem;
     display: flex;
     flex-direction: column;
-    z-index: 10;
 }
 .logo {
     display: flex;
     align-items: center;
     gap: 12px;
-    margin-bottom: 3rem;
 }
 .logo span {
     font-size: 1.5rem;
-    font-weight: 700;
-    background: linear-gradient(to right, #818cf8, #c084fc);
     -webkit-background-clip: text;
     background-clip: text;
     -webkit-text-fill-color: transparent;
 }
-.sidebar-section {
-    margin-bottom: 2rem;
 }
-.sidebar-section h3 {
-    font-size: 0.85rem;
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    color: var(--text-muted);
-    margin-bottom: 1rem;
 }
 /* Main Content Area */
@@ -72,16 +222,40 @@ body {
     display: flex;
     flex-direction: column;
     position: relative;
-    padding: 2rem;
 }
 /* Glass Card Component */
 .glass-card {
     background: var(--card-bg);
-    backdrop-filter: blur(12px);
     border: 1px solid var(--glass-border);
-    border-radius: 24px;
-    box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.3);
 }
 /* Chat Container */
@@ -91,6 +265,7 @@ body {
     flex-direction: column;
     overflow: hidden;
     margin-bottom: 1.5rem;
 }
 .chat-messages {
@@ -100,6 +275,7 @@ body {
     display: flex;
     flex-direction: column;
     gap: 1.5rem;
 }
 /* Scrollbar styling */
@@ -113,12 +289,15 @@ body {
 }
 .message {
-    max-width: 80%;
-    padding: 1rem 1.5rem;
-    border-radius: 20px;
     font-size: 0.95rem;
-    line-height: 1.6;
-    animation: fadeIn 0.4s ease-out forwards;
 }
 .message p {
@@ -185,15 +364,18 @@ body {
 .user-message {
     align-self: flex-end;
-    background: var(--primary);
     color: white;
     border-bottom-right-radius: 4px;
 }
 .bot-message {
     align-self: flex-start;
-    background: rgba(255, 255, 255, 0.05);
-    border: 1px solid var(--glass-border);
     border-top-left-radius: 4px;
 }
@@ -276,19 +458,20 @@ body {
 }
 .send-btn {
-    background: var(--primary);
     border: none;
-    width: 45px;
-    height: 45px;
-    border-radius: 12px;
     color: white;
     cursor: pointer;
-    transition: var(--animation-speed);
 }
 .send-btn:hover {
-    background: var(--primary-hover);
-    transform: scale(1.05);
 }
 /* Form Styling (Sidebar) */
@@ -322,20 +505,24 @@ select option {
 .primary-btn {
     width: 100%;
-    background: var(--primary);
     border: none;
     padding: 1rem;
-    border-radius: 12px;
     color: white;
-    font-weight: 600;
     cursor: pointer;
-    transition: var(--animation-speed);
     margin-top: 1rem;
 }
 .primary-btn:hover {
-    background: var(--primary-hover);
-    box-shadow: 0 0 20px rgba(99, 102, 241, 0.4);
 }
 /* Status Bar */

 @import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap');
 :root {
+    --primary: #9333ea;
+    --primary-glow: rgba(147, 51, 234, 0.4);
+    --secondary: #c026d3;
+    --accent: #6366f1;
+    --bg-dark: #05010d;
+    --sidebar-bg: rgba(10, 2, 25, 0.85);
+    --card-bg: rgba(20, 10, 40, 0.45);
     --text-main: #f8fafc;
     --text-muted: #94a3b8;
+    --glass-border: rgba(147, 51, 234, 0.2);
+    --glass-border-light: rgba(255, 255, 255, 0.08);
+    --animation-speed: 0.4s;
+    --sidebar-width: 320px;
+    --sidebar-collapsed-width: 80px;
 }
 * {
 body {
     font-family: 'Outfit', sans-serif;
+    background-color: var(--bg-dark);
     color: var(--text-main);
     height: 100vh;
     overflow: hidden;
     display: flex;
+    position: relative;
+}
+/* Dynamic Background Glows (Spotlights) */
+.bg-glow-container {
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    z-index: -1;
+    overflow: hidden;
+    background: #05010d;
+}
+.glow-blob {
+    position: absolute;
+    width: 600px;
+    height: 600px;
+    border-radius: 50%;
+    filter: blur(120px);
+    opacity: 0.4;
+    animation: moveGlow 25s infinite alternate;
+}
+.glow-1 {
+    top: -10%;
+    right: -10%;
+    background: radial-gradient(circle, #9333ea, transparent);
+}
+.glow-2 {
+    bottom: -15%;
+    left: 10%;
+    background: radial-gradient(circle, #4f46e5, transparent);
+    animation-delay: -5s;
+}
+.glow-3 {
+    top: 50%;
+    right: 30%;
+    width: 400px;
+    height: 400px;
+    background: radial-gradient(circle, #c026d3, transparent);
+    opacity: 0.25;
+    animation-delay: -12s;
+}
+@keyframes moveGlow {
+    0% {
+        transform: translate(0, 0) scale(1);
+    }
+    33% {
+        transform: translate(100px, 150px) scale(1.1);
+    }
+    66% {
+        transform: translate(-120px, 80px) scale(0.9);
+    }
+    100% {
+        transform: translate(0, 0) scale(1);
+    }
 }
 /* Sidebar Styling */
 .sidebar {
+    width: var(--sidebar-width);
+    background: var(--sidebar-bg);
+    backdrop-filter: blur(40px);
     border-right: 1px solid var(--glass-border);
+    padding: 1.5rem;
     display: flex;
     flex-direction: column;
+    z-index: 100;
+    transition: width var(--animation-speed) cubic-bezier(0.4, 0, 0.2, 1);
+    position: relative;
+    box-shadow: 20px 0 50px rgba(0, 0, 0, 0.3);
+}
+.sidebar.collapsed {
+    width: var(--sidebar-collapsed-width);
+    padding: 1.5rem 0.5rem;
+}
+.sidebar-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    margin-bottom: 3rem;
+    padding: 0 0.5rem;
 }
 .logo {
     display: flex;
     align-items: center;
     gap: 12px;
+    transition: opacity 0.2s;
+    white-space: nowrap;
+}
+.sidebar.collapsed .logo span,
+.sidebar.collapsed .sidebar-section h3,
+.sidebar.collapsed .sidebar-section .form-group label,
+.sidebar.collapsed .sidebar-section .form-group span,
+.sidebar.collapsed .status-row,
+.sidebar.collapsed .primary-btn span,
+.sidebar.collapsed .sidebar-section select,
+.sidebar.collapsed .sidebar-section input,
+.sidebar.collapsed .sidebar-section .radio-group {
+    display: none;
 }
 .logo span {
     font-size: 1.5rem;
+    font-weight: 800;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+    background: linear-gradient(135deg, #f8fafc, #9333ea, #c026d3);
     -webkit-background-clip: text;
     background-clip: text;
     -webkit-text-fill-color: transparent;
+    filter: drop-shadow(0 0 10px rgba(147, 51, 234, 0.3));
+}
+.sidebar-toggle {
+    background: rgba(147, 51, 234, 0.1);
+    border: 1px solid var(--glass-border);
+    color: var(--text-main);
+    cursor: pointer;
+    font-size: 1.2rem;
+    padding: 0.6rem;
+    border-radius: 12px;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+    display: flex;
+    align-items: center;
+    justify-content: center;
 }
+.sidebar-toggle:hover {
+    background: var(--primary);
+    color: white;
+    transform: scale(1.05);
+    box-shadow: 0 0 20px var(--primary-glow);
 }
+.sidebar.collapsed .sidebar-toggle {
+    margin: 0 auto;
+}
+.sidebar.collapsed .sidebar-section {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+}
+.sidebar-section i {
+    width: 20px;
+    text-align: center;
+}
+.sidebar.collapsed .sidebar-section .form-group button {
+    width: 40px;
+    height: 40px;
+    padding: 0;
+    margin: 0 auto;
+    border-radius: 50%;
+}
+.sidebar.collapsed .sidebar-section .form-group button i {
+    margin: 0;
+}
+.sidebar.collapsed #clear-chat {
+    width: 40px;
+    height: 40px;
+    padding: 0;
+    margin: 1rem auto;
+    border-radius: 50%;
+}
+.sidebar.collapsed #clear-chat i {
+    margin: 0;
 }
 /* Main Content Area */
     display: flex;
     flex-direction: column;
     position: relative;
+    padding: 2rem 3rem;
+    background: transparent;
+    transition: margin var(--animation-speed);
+}
+.content-header {
+    margin-bottom: 2.5rem;
+    border-bottom: 1px solid var(--glass-border);
+    padding-bottom: 1.5rem;
+}
+.content-header h1 {
+    font-size: 2.2rem;
+    font-weight: 700;
+    margin-bottom: 0.5rem;
+    background: linear-gradient(to right, #f8fafc, #94a3b8);
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
 }
 /* Glass Card Component */
 .glass-card {
     background: var(--card-bg);
+    backdrop-filter: blur(25px);
     border: 1px solid var(--glass-border);
+    border-radius: 32px;
+    box-shadow: 0 20px 60px 0 rgba(0, 0, 0, 0.5);
+    transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+}
+.glass-card:hover {
+    border-color: rgba(147, 51, 234, 0.4);
+    box-shadow: 0 25px 80px 0 rgba(147, 51, 234, 0.15);
 }
 /* Chat Container */
     flex-direction: column;
     overflow: hidden;
     margin-bottom: 1.5rem;
+    padding: 0.5rem;
 }
 .chat-messages {
     display: flex;
     flex-direction: column;
     gap: 1.5rem;
+    scroll-behavior: smooth;
 }
 /* Scrollbar styling */
 }
 .message {
+    max-width: 85%;
+    padding: 1.25rem 1.75rem;
+    border-radius: 24px;
     font-size: 0.95rem;
+    line-height: 1.7;
+    animation: fadeIn 0.5s cubic-bezier(0.2, 0.8, 0.2, 1) forwards;
+    position: relative;
+    border: 1px solid var(--glass-border-light);
+    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
 }
 .message p {
 .user-message {
     align-self: flex-end;
+    background: linear-gradient(135deg, var(--primary), var(--secondary));
     color: white;
     border-bottom-right-radius: 4px;
+    box-shadow: 0 10px 25px rgba(147, 51, 234, 0.2);
+    border: none;
 }
 .bot-message {
     align-self: flex-start;
+    background: rgba(255, 255, 255, 0.04);
+    backdrop-filter: blur(10px);
+    border: 1px solid var(--glass-border-light);
     border-top-left-radius: 4px;
 }
 }
 .send-btn {
+    background: linear-gradient(135deg, var(--primary), var(--secondary));
     border: none;
+    width: 50px;
+    height: 50px;
+    border-radius: 16px;
     color: white;
     cursor: pointer;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+    box-shadow: 0 5px 15px rgba(147, 51, 234, 0.3);
 }
 .send-btn:hover {
+    transform: scale(1.1) rotate(5deg);
+    box-shadow: 0 8px 20px rgba(147, 51, 234, 0.5);
 }
 /* Form Styling (Sidebar) */
 .primary-btn {
     width: 100%;
+    background: linear-gradient(135deg, var(--primary), var(--secondary));
     border: none;
     padding: 1rem;
+    border-radius: 16px;
     color: white;
+    font-weight: 700;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
     cursor: pointer;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
     margin-top: 1rem;
+    box-shadow: 0 8px 15px rgba(147, 51, 234, 0.2);
 }
 .primary-btn:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 12px 25px rgba(147, 51, 234, 0.4);
+    filter: brightness(1.1);
 }
 /* Status Bar */

static/js/app.js CHANGED Viewed

@@ -9,13 +9,27 @@ document.addEventListener('DOMContentLoaded', () => {
     const otherProviderGroup = document.getElementById('other-provider-group');
     const modifyGroup = document.getElementById('modify-group');
     const fileToModify = document.getElementById('file-to-modify');
     const statusBar = document.getElementById('status-bar');
     const statusText = document.getElementById('status-text');
     const clearChat = document.getElementById('clear-chat');
     const audioUpload = document.getElementById('audio-upload');
     const audioTrigger = document.getElementById('audio-trigger');
     let chatHistory = [];
     let isProcessing = false;
     let configData = null;
@@ -32,7 +46,7 @@ document.addEventListener('DOMContentLoaded', () => {
         providerSelect.innerHTML = configData.providers.map(p =>
             `<option value="${p.name}">${p.name}</option>`
         ).join('');
-        // Trigger initial category load
         populateCategories();
     }
@@ -41,7 +55,6 @@ document.addEventListener('DOMContentLoaded', () => {
         const provider = configData.providers.find(p => p.name === selectedProviderName);
         let categories = provider ? [...provider.categories] : [];
-        // Ensure "Other..." is available
         if (!categories.includes("Other...")) {
             categories.push("Other...");
         }
@@ -50,7 +63,6 @@ document.addEventListener('DOMContentLoaded', () => {
             `<option value="${c}">${c}</option>`
         ).join('');
-        // Reset category input visibility
         const otherCategoryGroup = document.getElementById('other-category-group');
         if (otherCategoryGroup) {
             otherCategoryGroup.style.display = categorySelect.value === 'Other...' ? 'block' : 'none';
@@ -85,12 +97,12 @@ document.addEventListener('DOMContentLoaded', () => {
         for (let i = 0; i < words.length; i++) {
             currentText += words[i] + ' ';
-            msgDiv.innerHTML = marked.parse(currentText + '▌'); // Add cursor effect
             chatBox.scrollTop = chatBox.scrollHeight;
-            await new Promise(resolve => setTimeout(resolve, 30)); // Snappy speed
         }
-        msgDiv.innerHTML = marked.parse(fullText); // Final render without cursor
         chatBox.scrollTop = chatBox.scrollHeight;
     }
@@ -102,7 +114,6 @@ document.addEventListener('DOMContentLoaded', () => {
         userInput.value = '';
         isProcessing = true;
-        // Add typing indicator
         const typingDiv = document.createElement('div');
         typingDiv.className = 'message bot-message';
         typingDiv.textContent = 'Typing...';
@@ -113,11 +124,21 @@ document.addEventListener('DOMContentLoaded', () => {
             const response = await fetch('/api/chat', {
                 method: 'POST',
                 headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({ prompt, history: chatHistory })
             });
             const data = await response.json();
             chatBox.removeChild(typingDiv);
             if (data.answer) {
                 addMessage(data.answer, 'bot');
                 chatHistory.push(prompt);
@@ -142,6 +163,7 @@ document.addEventListener('DOMContentLoaded', () => {
     clearChat.addEventListener('click', () => {
         chatBox.innerHTML = '<div class="message bot-message">Chat history cleared. How can I help?</div>';
         chatHistory = [];
     });
     // --- Audio Chat Logic ---
@@ -155,10 +177,10 @@ document.addEventListener('DOMContentLoaded', () => {
         const formData = new FormData();
         formData.append('audio', file);
         formData.append('history', JSON.stringify(chatHistory));
         isProcessing = true;
-        // Add "Audio Uploaded" user message with Play button
         const userMsgDiv = document.createElement('div');
         userMsgDiv.className = 'message user-message audio-message-bubble';
         userMsgDiv.innerHTML = `
@@ -172,7 +194,6 @@ document.addEventListener('DOMContentLoaded', () => {
         chatBox.appendChild(userMsgDiv);
         chatBox.scrollTop = chatBox.scrollHeight;
-        // Play functionality
         const playBtn = userMsgDiv.querySelector('.play-btn');
         const audio = new Audio(audioUrl);
@@ -201,7 +222,6 @@ document.addEventListener('DOMContentLoaded', () => {
             alert("Error loading the audio file for playback.");
         };
-        // Add typing indicator
         const typingDiv = document.createElement('div');
         typingDiv.className = 'message bot-message';
         typingDiv.textContent = 'Transcribing audio...';
@@ -216,13 +236,19 @@ document.addEventListener('DOMContentLoaded', () => {
             const data = await response.json();
             chatBox.removeChild(typingDiv);
             if (data.transcription) {
                 const transDiv = document.createElement('div');
                 transDiv.innerHTML = `
-                    <div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 4px;">Raw Transcript:</div>
-                    <div style="font-size: 0.85rem; font-style: italic; opacity: 0.7; margin-bottom: 12px;">"${data.transcription}"</div>
-                    <div style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 4px;">Refined Question:</div>
-                    <div style="font-size: 1rem; font-weight: 600; color: #818cf8;">"${data.summarized_question || data.transcription}"</div>
                 `;
                 transDiv.style.marginTop = '12px';
                 transDiv.style.borderTop = '1px solid rgba(255,255,255,0.1)';

     const otherProviderGroup = document.getElementById('other-provider-group');
     const modifyGroup = document.getElementById('modify-group');
     const fileToModify = document.getElementById('file-to-modify');
+    const sidebar = document.getElementById('sidebar');
+    const sidebarToggle = document.getElementById('sidebar-toggle');
     const statusBar = document.getElementById('status-bar');
     const statusText = document.getElementById('status-text');
     const clearChat = document.getElementById('clear-chat');
     const audioUpload = document.getElementById('audio-upload');
     const audioTrigger = document.getElementById('audio-trigger');
+    // --- Sidebar Toggle Logic ---
+    const isSidebarCollapsed = localStorage.getItem('sidebarCollapsed') === 'true';
+    if (isSidebarCollapsed) {
+        sidebar.classList.add('collapsed');
+    }
+    sidebarToggle.addEventListener('click', () => {
+        sidebar.classList.toggle('collapsed');
+        localStorage.setItem('sidebarCollapsed', sidebar.classList.contains('collapsed'));
+    });
     let chatHistory = [];
+    let extractedEntities = {}; // State persistence
     let isProcessing = false;
     let configData = null;
         providerSelect.innerHTML = configData.providers.map(p =>
             `<option value="${p.name}">${p.name}</option>`
         ).join('');
+        // Trigger initial load
         populateCategories();
     }
         const provider = configData.providers.find(p => p.name === selectedProviderName);
         let categories = provider ? [...provider.categories] : [];
         if (!categories.includes("Other...")) {
             categories.push("Other...");
         }
             `<option value="${c}">${c}</option>`
         ).join('');
         const otherCategoryGroup = document.getElementById('other-category-group');
         if (otherCategoryGroup) {
             otherCategoryGroup.style.display = categorySelect.value === 'Other...' ? 'block' : 'none';
         for (let i = 0; i < words.length; i++) {
             currentText += words[i] + ' ';
+            msgDiv.innerHTML = marked.parse(currentText + '▌');
             chatBox.scrollTop = chatBox.scrollHeight;
+            await new Promise(resolve => setTimeout(resolve, 30));
         }
+        msgDiv.innerHTML = marked.parse(fullText);
         chatBox.scrollTop = chatBox.scrollHeight;
     }
         userInput.value = '';
         isProcessing = true;
         const typingDiv = document.createElement('div');
         typingDiv.className = 'message bot-message';
         typingDiv.textContent = 'Typing...';
             const response = await fetch('/api/chat', {
                 method: 'POST',
                 headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    prompt,
+                    history: chatHistory,
+                    extracted_entities: extractedEntities
+                })
             });
             const data = await response.json();
             chatBox.removeChild(typingDiv);
+            // Update state from backend response
+            if (data.extracted_entities) {
+                extractedEntities = data.extracted_entities;
+            }
             if (data.answer) {
                 addMessage(data.answer, 'bot');
                 chatHistory.push(prompt);
     clearChat.addEventListener('click', () => {
         chatBox.innerHTML = '<div class="message bot-message">Chat history cleared. How can I help?</div>';
         chatHistory = [];
+        extractedEntities = {}; // Reset state
     });
     // --- Audio Chat Logic ---
         const formData = new FormData();
         formData.append('audio', file);
         formData.append('history', JSON.stringify(chatHistory));
+        formData.append('extracted_entities', JSON.stringify(extractedEntities));
         isProcessing = true;
         const userMsgDiv = document.createElement('div');
         userMsgDiv.className = 'message user-message audio-message-bubble';
         userMsgDiv.innerHTML = `
         chatBox.appendChild(userMsgDiv);
         chatBox.scrollTop = chatBox.scrollHeight;
         const playBtn = userMsgDiv.querySelector('.play-btn');
         const audio = new Audio(audioUrl);
             alert("Error loading the audio file for playback.");
         };
         const typingDiv = document.createElement('div');
         typingDiv.className = 'message bot-message';
         typingDiv.textContent = 'Transcribing audio...';
             const data = await response.json();
             chatBox.removeChild(typingDiv);
+            // Update state from backend response
+            if (data.extracted_entities) {
+                extractedEntities = data.extracted_entities;
+            }
             if (data.transcription) {
                 const transDiv = document.createElement('div');
                 transDiv.innerHTML = `
+                    <div style="font-size: 0.75rem; color: rgba(255,255,255,0.7); margin-bottom: 4px;">Raw Transcript:</div>
+                    <div style="font-size: 0.85rem; font-style: italic; color: rgba(255,255,255,0.9); margin-bottom: 12px;">"${data.transcription}"</div>
+                    <div style="font-size: 0.75rem; color: rgba(255,255,255,0.7); margin-bottom: 4px;">Refined Question:</div>
+                    <div style="font-size: 1rem; font-weight: 600; color: #ffffff; text-shadow: 0 2px 4px rgba(0,0,0,0.2);">"${data.summarized_question || data.transcription}"</div>
                 `;
                 transDiv.style.marginTop = '12px';
                 transDiv.style.borderTop = '1px solid rgba(255,255,255,0.1)';

templates/index.html CHANGED Viewed

@@ -10,14 +10,24 @@
 </head>
 <body>
-    <aside class="sidebar">
-        <div class="logo">
-            <i class="fas fa-shield-halved fa-2x" style="color: #818cf8;"></i>
-            <span>AI ADVISOR</span>
         </div>
         <div class="sidebar-section">
-            <h3>Document Manager</h3>
             <div class="form-group">
                 <select id="provider-select">
                     <!-- Loaded dynamically -->
@@ -36,13 +46,12 @@
             </div>
             <div class="form-group">
-                <label
-                    style="font-size: 0.8rem; color: var(--text-muted); display: block; margin-bottom: 0.5rem;">Mode</label>
-                <div style="display: flex; gap: 10px;">
                     <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="New Upload"
-                            checked> New</label>
                     <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="Modify Existing">
-                        Modify</label>
                 </div>
             </div>
@@ -54,8 +63,8 @@
             <div class="form-group">
                 <input type="file" id="doc-upload" hidden accept=".pdf,.docx">
-                <button class="primary-btn" id="upload-trigger">
-                    <i class="fas fa-file-upload"></i> &nbsp; Choose & Process
                 </button>
             </div>
         </div>
@@ -70,17 +79,18 @@
             </div>
         </div>
-        <button class="primary-btn" style="background: rgba(255,255,255,0.05); margin-top: 1rem;" id="clear-chat">
-            <i class="fas fa-trash-alt"></i> &nbsp; Clear History
         </button>
     </aside>
     <main class="main-content">
-        <header style="margin-bottom: 2rem;">
-            <h1 style="font-size: 1.8rem;">Direct-to-Agent Policy Advisory</h1>
             <p style="color: var(--text-muted);">Get grounded answers from your insurance documents.</p>
-            <p style="color: var(--text-muted);">Please Note :- This response is based solely on insurer-provided
-                documents and is not financial advice.</p>
         </header>
         <div class="chat-container glass-card">

 </head>
 <body>
+    <div class="bg-glow-container">
+        <div class="glow-blob glow-1"></div>
+        <div class="glow-blob glow-2"></div>
+        <div class="glow-blob glow-3"></div>
+    </div>
+    <aside class="sidebar" id="sidebar">
+        <div class="sidebar-header">
+            <div class="logo">
+                <i class="fas fa-shield-halved fa-2x" style="color: var(--primary);"></i>
+                <span>AI ADVISOR</span>
+            </div>
+            <button class="sidebar-toggle" id="sidebar-toggle">
+                <i class="fas fa-bars"></i>
+            </button>
         </div>
         <div class="sidebar-section">
+            <h3><span>Document Manager</span></h3>
             <div class="form-group">
                 <select id="provider-select">
                     <!-- Loaded dynamically -->
             </div>
             <div class="form-group">
+                <label><span>Mode</span></label>
+                <div class="radio-group" style="display: flex; gap: 10px;">
                     <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="New Upload"
+                            checked> <span>New</span></label>
                     <label style="font-size: 0.85rem;"><input type="radio" name="upload-mode" value="Modify Existing">
+                        <span>Modify</span></label>
                 </div>
             </div>
             <div class="form-group">
                 <input type="file" id="doc-upload" hidden accept=".pdf,.docx">
+                <button class="primary-btn" id="upload-trigger" title="Upload Document">
+                    <i class="fas fa-file-upload"></i> <span>&nbsp; Choose & Process</span>
                 </button>
             </div>
         </div>
             </div>
         </div>
+        <button class="primary-btn" style="background: rgba(255,255,255,0.05); margin-top: 1rem;" id="clear-chat"
+            title="Clear History">
+            <i class="fas fa-trash-alt"></i> <span>&nbsp; Clear History</span>
         </button>
     </aside>
     <main class="main-content">
+        <header class="content-header">
+            <h1>Direct-to-Agent Policy Advisory</h1>
             <p style="color: var(--text-muted);">Get grounded answers from your insurance documents.</p>
+            <p style="font-size: 0.85rem; opacity: 0.8; margin-top: 5px;">Note: This response is based solely on
+                insurer-provided documents and is not financial advice.</p>
         </header>
         <div class="chat-container glass-card">

utils/cache.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""
+LRU cache with TTL support for application data.
+"""
+import time
+import threading
+from typing import Any, Optional, Dict
+from collections import OrderedDict
+from config import config
+class LRUCacheWithTTL:
+    """Thread-safe LRU cache with TTL (Time To Live) support."""
+    def __init__(self, max_size: int = None, ttl_seconds: int = None):
+        """
+        Initialize cache.
+        Args:
+            max_size: Maximum number of items (default from config)
+            ttl_seconds: Time to live in seconds (default from config)
+        """
+        self.max_size = max_size or config.CACHE_MAX_SIZE
+        self.ttl = ttl_seconds or config.CACHE_TTL
+        self._cache = OrderedDict()
+        self._timestamps = {}
+        self._lock = threading.Lock()
+        self._hits = 0
+        self._misses = 0
+    def get(self, key: str) -> Optional[Any]:
+        """Get value from cache."""
+        with self._lock:
+            if key not in self._cache:
+                self._misses += 1
+                return None
+            # Check if expired
+            if time.time() - self._timestamps[key] > self.ttl:
+                self._evict(key)
+                self._misses += 1
+                return None
+            # Move to end (most recently used)
+            self._cache.move_to_end(key)
+            self._hits += 1
+            return self._cache[key]
+    def set(self, key: str, value: Any):
+        """Set value in cache."""
+        with self._lock:
+            # Update if exists
+            if key in self._cache:
+                self._cache.move_to_end(key)
+                self._cache[key] = value
+                self._timestamps[key] = time.time()
+                return
+            # Add new item
+            self._cache[key] = value
+            self._timestamps[key] = time.time()
+            # Evict oldest if necessary
+            if len(self._cache) > self.max_size:
+                oldest_key = next(iter(self._cache))
+                self._evict(oldest_key)
+    def _evict(self, key: str):
+        """Evict item from cache."""
+        if key in self._cache:
+            del self._cache[key]
+            del self._timestamps[key]
+    def clear(self):
+        """Clear all cached items."""
+        with self._lock:
+            self._cache.clear()
+            self._timestamps.clear()
+    def invalidate(self, key: str):
+        """Invalidate a specific cache entry."""
+        with self._lock:
+            self._evict(key)
+    def get_stats(self) -> Dict:
+        """Get cache statistics."""
+        with self._lock:
+            total_requests = self._hits + self._misses
+            hit_rate = self._hits / max(1, total_requests)
+            return {
+                "size": len(self._cache),
+                "max_size": self.max_size,
+                "hits": self._hits,
+                "misses": self._misses,
+                "hit_rate": hit_rate,
+                "ttl_seconds": self.ttl
+            }
+class CacheManager:
+    """Manages multiple caches for different data types."""
+    def __init__(self):
+        # Cache for plan listings
+        self.plan_cache = LRUCacheWithTTL(max_size=100, ttl_seconds=300)  # 5 minutes
+        # Cache for plan metadata
+        self.metadata_cache = LRUCacheWithTTL(max_size=500, ttl_seconds=600)  # 10 minutes
+        # Cache for query rewrites
+        self.query_cache = LRUCacheWithTTL(max_size=1000, ttl_seconds=300)  # 5 minutes
+    def invalidate_all(self):
+        """Invalidate all caches (e.g., after ingestion)."""
+        self.plan_cache.clear()
+        self.metadata_cache.clear()
+        # Don't clear query cache as it's query-dependent
+    def get_all_stats(self) -> Dict:
+        """Get statistics for all caches."""
+        return {
+            "plan_cache": self.plan_cache.get_stats(),
+            "metadata_cache": self.metadata_cache.get_stats(),
+            "query_cache": self.query_cache.get_stats(),
+        }
+# Global cache manager
+cache_manager = CacheManager()

utils/circuit_breaker.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+Circuit breaker pattern for external dependencies.
+"""
+import time
+import threading
+from enum import Enum
+from typing import Callable, Any, Optional
+from config import config
+class CircuitState(Enum):
+    """Circuit breaker states."""
+    CLOSED = "closed"  # Normal operation
+    OPEN = "open"  # Failing, reject requests
+    HALF_OPEN = "half_open"  # Testing recovery
+class CircuitBreakerError(Exception):
+    """Raised when circuit is open."""
+    pass
+class CircuitBreaker:
+    """
+    Circuit breaker for external dependencies.
+    States:
+    - CLOSED: Normal operation, all requests pass through
+    - OPEN: Failure threshold reached, all requests rejected
+    - HALF_OPEN: After timeout, allow test requests
+    """
+    def __init__(
+        self,
+        name: str,
+        failure_threshold: int = None,
+        timeout_seconds: int = None
+    ):
+        """
+        Initialize circuit breaker.
+        Args:
+            name: Name of the circuit (for logging)
+            failure_threshold: Number of consecutive failures before opening
+            timeout_seconds: Seconds to wait before attempting recovery
+        """
+        self.name = name
+        self.failure_threshold = failure_threshold or config.CIRCUIT_BREAKER_FAILURE_THRESHOLD
+        self.timeout = timeout_seconds or config.CIRCUIT_BREAKER_TIMEOUT
+        self.state = CircuitState.CLOSED
+        self.failure_count = 0
+        self.last_failure_time = None
+        self.success_count = 0
+        self._lock = threading.Lock()
+    def call(self, func: Callable, *args, **kwargs) -> Any:
+        """
+        Execute function with circuit breaker protection.
+        Args:
+            func: Function to call
+            *args, **kwargs: Arguments to pass to function
+        Returns:
+            Function result
+        Raises:
+            CircuitBreakerError: If circuit is open
+        """
+        with self._lock:
+            # Check state transitions
+            if self.state == CircuitState.OPEN:
+                if self._should_attempt_reset():
+                    self.state = CircuitState.HALF_OPEN
+                    self.success_count = 0
+                else:
+                    raise CircuitBreakerError(
+                        f"Circuit breaker '{self.name}' is OPEN. "
+                        f"Will retry after {self.timeout}s"
+                    )
+        # Attempt the call
+        try:
+            result = func(*args, **kwargs)
+            self._on_success()
+            return result
+        except Exception as e:
+            self._on_failure()
+            raise e
+    def _should_attempt_reset(self) -> bool:
+        """Check if enough time has passed to attempt reset."""
+        if self.last_failure_time is None:
+            return True
+        return time.time() - self.last_failure_time >= self.timeout
+    def _on_success(self):
+        """Handle successful call."""
+        with self._lock:
+            self.failure_count = 0
+            if self.state == CircuitState.HALF_OPEN:
+                self.success_count += 1
+                # After 3 successful calls in HALF_OPEN, close the circuit
+                if self.success_count >= 3:
+                    self.state = CircuitState.CLOSED
+                    self.success_count = 0
+    def _on_failure(self):
+        """Handle failed call."""
+        with self._lock:
+            self.failure_count += 1
+            self.last_failure_time = time.time()
+            # In HALF_OPEN, any failure immediately opens circuit
+            if self.state == CircuitState.HALF_OPEN:
+                self.state = CircuitState.OPEN
+                self.failure_count = 0
+                return
+            # In CLOSED, open after threshold
+            if self.failure_count >= self.failure_threshold:
+                self.state = CircuitState.OPEN
+    def reset(self):
+        """Manually reset circuit breaker."""
+        with self._lock:
+            self.state = CircuitState.CLOSED
+            self.failure_count = 0
+            self.success_count = 0
+            self.last_failure_time = None
+    def get_state(self) -> dict:
+        """Get current state for monitoring."""
+        with self._lock:
+            return {
+                "name": self.name,
+                "state": self.state.value,
+                "failure_count": self.failure_count,
+                "success_count": self.success_count,
+                "last_failure_time": self.last_failure_time
+            }
+class CircuitBreakerManager:
+    """Manages circuit breakers for different dependencies."""
+    def __init__(self):
+        self.breakers = {
+            "llm": CircuitBreaker("llm", failure_threshold=5, timeout_seconds=60),
+            "retriever": CircuitBreaker("retriever", failure_threshold=3, timeout_seconds=30),
+            "vector_store": CircuitBreaker("vector_store", failure_threshold=3, timeout_seconds=30),
+        }
+    def get_breaker(self, name: str) -> CircuitBreaker:
+        """Get circuit breaker by name."""
+        if name not in self.breakers:
+            self.breakers[name] = CircuitBreaker(name)
+        return self.breakers[name]
+    def get_all_states(self) -> dict:
+        """Get states of all circuit breakers."""
+        return {
+            name: breaker.get_state()
+            for name, breaker in self.breakers.items()
+        }
+    def reset_all(self):
+        """Reset all circuit breakers."""
+        for breaker in self.breakers.values():
+            breaker.reset()
+# Global circuit breaker manager
+circuit_breaker_manager = CircuitBreakerManager()

utils/logger.py ADDED Viewed

	@@ -0,0 +1,147 @@

+"""
+Centralized structured logging with rotation and request ID tracking.
+"""
+import os
+import json
+import logging
+import threading
+from logging.handlers import RotatingFileHandler
+from typing import Optional
+from datetime import datetime
+from config import config
+# Thread-local storage for request context
+_request_context = threading.local()
+class RequestContextFilter(logging.Filter):
+    """Add request ID to log records."""
+    def filter(self, record):
+        record.request_id = getattr(_request_context, 'request_id', 'N/A')
+        record.user_ip = getattr(_request_context, 'user_ip', 'N/A')
+        return True
+class JSONFormatter(logging.Formatter):
+    """Format logs as JSON for structured logging."""
+    def format(self, record):
+        log_data = {
+            'timestamp': datetime.utcnow().isoformat(),
+            'level': record.levelname,
+            'logger': record.name,
+            'message': record.getMessage(),
+            'module': record.module,
+            'function': record.funcName,
+            'line': record.lineno,
+            'request_id': getattr(record, 'request_id', 'N/A'),
+            'user_ip': getattr(record, 'user_ip', 'N/A'),
+        }
+        # Add exception info if present
+        if record.exc_info:
+            log_data['exception'] = self.formatException(record.exc_info)
+        # Add extra fields
+        if hasattr(record, 'extra_data'):
+            log_data['extra'] = record.extra_data
+        return json.dumps(log_data)
+def setup_logger(name: str, log_level: Optional[str] = None) -> logging.Logger:
+    """
+    Create a logger with both file and console handlers.
+    Args:
+        name: Logger name (typically __name__)
+        log_level: Optional override for log level
+    Returns:
+        Configured logger instance
+    """
+    logger = logging.getLogger(name)
+    # Avoid duplicate handlers
+    if logger.handlers:
+        return logger
+    # Set level
+    level = log_level or config.LOG_LEVEL
+    logger.setLevel(getattr(logging, level.upper()))
+    # Add request context filter
+    logger.addFilter(RequestContextFilter())
+    # Console handler (human-readable for development)
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.DEBUG if config.DEBUG else logging.INFO)
+    if config.ENVIRONMENT.value == "production":
+        # JSON format for production
+        console_handler.setFormatter(JSONFormatter())
+    else:
+        # Human-readable format for development
+        console_format = logging.Formatter(
+            '%(asctime)s - [%(request_id)s] - %(name)s - %(levelname)s - %(message)s'
+        )
+        console_handler.setFormatter(console_format)
+    logger.addHandler(console_handler)
+    # File handler with rotation
+    try:
+        log_dir = os.path.dirname(config.LOG_FILE_PATH)
+        if log_dir:
+            os.makedirs(log_dir, exist_ok=True)
+        file_handler = RotatingFileHandler(
+            config.LOG_FILE_PATH,
+            maxBytes=config.LOG_MAX_BYTES,
+            backupCount=config.LOG_BACKUP_COUNT
+        )
+        file_handler.setLevel(logging.DEBUG)
+        # Always use JSON format for file logs
+        file_handler.setFormatter(JSONFormatter())
+        logger.addHandler(file_handler)
+    except Exception as e:
+        logger.warning(f"Failed to setup file logging: {e}")
+    return logger
+def set_request_context(request_id: str, user_ip: Optional[str] = None):
+    """Set request context for the current thread."""
+    _request_context.request_id = request_id
+    _request_context.user_ip = user_ip or 'unknown'
+def clear_request_context():
+    """Clear request context for the current thread."""
+    if hasattr(_request_context, 'request_id'):
+        delattr(_request_context, 'request_id')
+    if hasattr(_request_context, 'user_ip'):
+        delattr(_request_context, 'user_ip')
+def log_with_extra(logger: logging.Logger, level: str, message: str, **extra_data):
+    """Log with extra structured data."""
+    log_method = getattr(logger, level.lower())
+    # Create a custom log record with extra data
+    if extra_data:
+        extra_record = {'extra_data': extra_data}
+        log_method(message, extra=extra_record)
+    else:
+        log_method(message)
+# Create module-level loggers for common components
+app_logger = setup_logger('app')
+agent_logger = setup_logger('agents')
+retrieval_logger = setup_logger('retrieval')
+ingestion_logger = setup_logger('ingestion')
+llm_logger = setup_logger('llm')
+api_logger = setup_logger('api')

utils/metrics.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+Application metrics collection and tracking.
+"""
+import time
+import threading
+from typing import Dict, List, Optional
+from collections import defaultdict, deque
+from datetime import datetime, timedelta
+from config import config
+class MetricsCollector:
+    """Thread-safe metrics collector."""
+    def __init__(self):
+        self._lock = threading.Lock()
+        # Request metrics
+        self.request_count = 0
+        self.request_latencies = deque(maxlen=1000)  # Keep last 1000
+        self.request_errors = 0
+        # Intent distribution
+        self.intent_counts = defaultdict(int)
+        # LLM metrics
+        self.llm_call_count = 0
+        self.llm_cache_hits = 0
+        self.llm_cache_misses = 0
+        self.llm_latencies = deque(maxlen=1000)
+        self.llm_errors = 0
+        # Retrieval metrics
+        self.retrieval_count = 0
+        self.retrieval_latencies = deque(maxlen=1000)
+        self.retrieval_empty_results = 0
+        # Cache metrics
+        self.cache_hits = 0
+        self.cache_misses = 0
+        # Circuit breaker metrics
+        self.circuit_breaker_opens = 0
+        self.circuit_breaker_failures = 0
+        # Active requests
+        self.active_requests = 0
+        # Start time
+        self.start_time = datetime.now()
+    def record_request(self, latency_ms: float, intent: Optional[str] = None, error: bool = False):
+        """Record a request."""
+        with self._lock:
+            self.request_count += 1
+            self.request_latencies.append(latency_ms)
+            if error:
+                self.request_errors += 1
+            if intent:
+                self.intent_counts[intent] += 1
+    def record_llm_call(self, latency_ms: float, cache_hit: bool = False, error: bool = False):
+        """Record an LLM call."""
+        with self._lock:
+            self.llm_call_count += 1
+            self.llm_latencies.append(latency_ms)
+            if cache_hit:
+                self.llm_cache_hits += 1
+            else:
+                self.llm_cache_misses += 1
+            if error:
+                self.llm_errors += 1
+    def record_retrieval(self, latency_ms: float, result_count: int):
+        """Record a retrieval operation."""
+        with self._lock:
+            self.retrieval_count += 1
+            self.retrieval_latencies.append(latency_ms)
+            if result_count == 0:
+                self.retrieval_empty_results += 1
+    def record_cache_access(self, hit: bool):
+        """Record cache access."""
+        with self._lock:
+            if hit:
+                self.cache_hits += 1
+            else:
+                self.cache_misses += 1
+    def record_circuit_breaker_event(self, opened: bool = False, failure: bool = False):
+        """Record circuit breaker event."""
+        with self._lock:
+            if opened:
+                self.circuit_breaker_opens += 1
+            if failure:
+                self.circuit_breaker_failures += 1
+    def increment_active_requests(self):
+        """Increment active request count."""
+        with self._lock:
+            self.active_requests += 1
+    def decrement_active_requests(self):
+        """Decrement active request count."""
+        with self._lock:
+            self.active_requests = max(0, self.active_requests - 1)
+    def get_metrics(self) -> Dict:
+        """Get all metrics as a dictionary."""
+        with self._lock:
+            uptime = datetime.now() - self.start_time
+            # Calculate percentiles for latencies
+            req_latencies_sorted = sorted(self.request_latencies) if self.request_latencies else [0]
+            llm_latencies_sorted = sorted(self.llm_latencies) if self.llm_latencies else [0]
+            ret_latencies_sorted = sorted(self.retrieval_latencies) if self.retrieval_latencies else [0]
+            def percentile(data, p):
+                if not data:
+                    return 0
+                k = (len(data) - 1) * p
+                f = int(k)
+                c = k - f
+                if f + 1 < len(data):
+                    return data[f] * (1 - c) + data[f + 1] * c
+                return data[f]
+            return {
+                "uptime_seconds": uptime.total_seconds(),
+                "timestamp": datetime.now().isoformat(),
+                # Request metrics
+                "requests": {
+                    "total": self.request_count,
+                    "active": self.active_requests,
+                    "errors": self.request_errors,
+                    "error_rate": self.request_errors / max(1, self.request_count),
+                    "latency_ms": {
+                        "min": min(req_latencies_sorted),
+                        "max": max(req_latencies_sorted),
+                        "p50": percentile(req_latencies_sorted, 0.50),
+                        "p95": percentile(req_latencies_sorted, 0.95),
+                        "p99": percentile(req_latencies_sorted, 0.99),
+                    }
+                },
+                # Intent distribution
+                "intents": dict(self.intent_counts),
+                # LLM metrics
+                "llm": {
+                    "total_calls": self.llm_call_count,
+                    "cache_hits": self.llm_cache_hits,
+                    "cache_misses": self.llm_cache_misses,
+                    "cache_hit_rate": self.llm_cache_hits / max(1, self.llm_call_count),
+                    "errors": self.llm_errors,
+                    "latency_ms": {
+                        "min": min(llm_latencies_sorted),
+                        "max": max(llm_latencies_sorted),
+                        "p50": percentile(llm_latencies_sorted, 0.50),
+                        "p95": percentile(llm_latencies_sorted, 0.95),
+                    }
+                },
+                # Retrieval metrics
+                "retrieval": {
+                    "total_searches": self.retrieval_count,
+                    "empty_results": self.retrieval_empty_results,
+                    "empty_result_rate": self.retrieval_empty_results / max(1, self.retrieval_count),
+                    "latency_ms": {
+                        "min": min(ret_latencies_sorted),
+                        "max": max(ret_latencies_sorted),
+                        "p50": percentile(ret_latencies_sorted, 0.50),
+                        "p95": percentile(ret_latencies_sorted, 0.95),
+                    }
+                },
+                # Cache metrics
+                "cache": {
+                    "hits": self.cache_hits,
+                    "misses": self.cache_misses,
+                    "hit_rate": self.cache_hits / max(1, self.cache_hits + self.cache_misses),
+                },
+                # Circuit breaker metrics
+                "circuit_breaker": {
+                    "opens": self.circuit_breaker_opens,
+                    "failures": self.circuit_breaker_failures,
+                }
+            }
+    def reset_metrics(self):
+        """Reset all metrics (use with caution)."""
+        with self._lock:
+            self.request_count = 0
+            self.request_latencies.clear()
+            self.request_errors = 0
+            self.intent_counts.clear()
+            self.llm_call_count = 0
+            self.llm_cache_hits = 0
+            self.llm_cache_misses = 0
+            self.llm_latencies.clear()
+            self.llm_errors = 0
+            self.retrieval_count = 0
+            self.retrieval_latencies.clear()
+            self.retrieval_empty_results = 0
+            self.cache_hits = 0
+            self.cache_misses = 0
+            self.circuit_breaker_opens = 0
+            self.circuit_breaker_failures = 0
+            self.start_time = datetime.now()
+# Global metrics instance
+metrics = MetricsCollector()

utils/request_logger.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""
+SQLite-based request logging for analytics and debugging.
+"""
+import sqlite3
+import threading
+import json
+from typing import Optional, Dict, Any
+from datetime import datetime
+from config import config
+import os
+class RequestLogger:
+    """Thread-safe request logger with SQLite backend."""
+    _instance = None
+    _lock = threading.Lock()
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super(RequestLogger, cls).__new__(cls)
+                    cls._instance._initialize_db()
+        return cls._instance
+    def _initialize_db(self):
+        """Initialize the SQLite database."""
+        db_dir = os.path.dirname(config.REQUEST_LOG_DB_PATH)
+        if db_dir:
+            os.makedirs(db_dir, exist_ok=True)
+        self.db_path = config.REQUEST_LOG_DB_PATH
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS requests (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp TEXT NOT NULL,
+                request_id TEXT,
+                user_ip TEXT,
+                query TEXT,
+                intent TEXT,
+                extracted_entities TEXT,
+                retrieval_count INTEGER,
+                latency_ms REAL,
+                status TEXT,
+                error_message TEXT,
+                context_sources TEXT
+            )
+        """)
+        # Create indices for common queries
+        self.conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON requests(timestamp)")
+        self.conn.execute("CREATE INDEX IF NOT EXISTS idx_intent ON requests(intent)")
+        self.conn.execute("CREATE INDEX IF NOT EXISTS idx_status ON requests(status)")
+        self.conn.commit()
+    def log_request(
+        self,
+        request_id: str,
+        query: str,
+        intent: Optional[str] = None,
+        extracted_entities: Optional[Dict] = None,
+        retrieval_count: int = 0,
+        latency_ms: float = 0,
+        status: str = "success",
+        error_message: Optional[str] = None,
+        context_sources: Optional[list] = None,
+        user_ip: Optional[str] = None
+    ):
+        """Log a request to the database."""
+        try:
+            with self._lock:
+                self.conn.execute("""
+                    INSERT INTO requests (
+                        timestamp, request_id, user_ip, query, intent,
+                        extracted_entities, retrieval_count, latency_ms,
+                        status, error_message, context_sources
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    datetime.now().isoformat(),
+                    request_id,
+                    user_ip or 'unknown',
+                    query[:500],  # Truncate long queries
+                    intent,
+                    json.dumps(extracted_entities) if extracted_entities else None,
+                    retrieval_count,
+                    latency_ms,
+                    status,
+                    error_message,
+                    json.dumps(context_sources[:10]) if context_sources else None  # Limit to 10 sources
+                ))
+                self.conn.commit()
+        except Exception as e:
+            # Don't let logging errors break the application
+            print(f"[RequestLogger] Failed to log request: {e}")
+    def get_recent_requests(self, limit: int = 100) -> list:
+        """Get recent requests."""
+        try:
+            with self._lock:
+                cursor = self.conn.execute("""
+                    SELECT timestamp, request_id, query, intent, latency_ms, status
+                    FROM requests
+                    ORDER BY timestamp DESC
+                    LIMIT ?
+                """, (limit,))
+                return [
+                    {
+                        "timestamp": row[0],
+                        "request_id": row[1],
+                        "query": row[2],
+                        "intent": row[3],
+                        "latency_ms": row[4],
+                        "status": row[5]
+                    }
+                    for row in cursor.fetchall()
+                ]
+        except Exception as e:
+            print(f"[RequestLogger] Failed to fetch requests: {e}")
+            return []
+    def get_intent_distribution(self, hours: int = 24) -> Dict[str, int]:
+        """Get intent distribution for the last N hours."""
+        try:
+            with self._lock:
+                from datetime import timedelta
+                cutoff = (datetime.now() - timedelta(hours=hours)).isoformat()
+                cursor = self.conn.execute("""
+                    SELECT intent, COUNT(*) as count
+                    FROM requests
+                    WHERE timestamp > ?
+                    GROUP BY intent
+                """, (cutoff,))
+                return {row[0]: row[1] for row in cursor.fetchall()}
+        except Exception as e:
+            print(f"[RequestLogger] Failed to get intent distribution: {e}")
+            return {}
+    def get_error_rate(self, hours: int = 24) -> float:
+        """Get error rate for the last N hours."""
+        try:
+            with self._lock:
+                from datetime import timedelta
+                cutoff = (datetime.now() - timedelta(hours=hours)).isoformat()
+                cursor = self.conn.execute("""
+                    SELECT
+                        COUNT(*) as total,
+                        SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as errors
+                    FROM requests
+                    WHERE timestamp > ?
+                """, (cutoff,))
+                row = cursor.fetchone()
+                total, errors = row[0], row[1]
+                return errors / max(1, total) if total > 0 else 0.0
+        except Exception as e:
+            print(f"[RequestLogger] Failed to get error rate: {e}")
+            return 0.0
+    def get_average_latency(self, intent: Optional[str] = None, hours: int = 24) -> float:
+        """Get average latency, optionally filtered by intent."""
+        try:
+            with self._lock:
+                from datetime import timedelta
+                cutoff = (datetime.now() - timedelta(hours=hours)).isoformat()
+                if intent:
+                    cursor = self.conn.execute("""
+                        SELECT AVG(latency_ms)
+                        FROM requests
+                        WHERE timestamp > ? AND intent = ?
+                    """, (cutoff, intent))
+                else:
+                    cursor = self.conn.execute("""
+                        SELECT AVG(latency_ms)
+                        FROM requests
+                        WHERE timestamp > ?
+                    """, (cutoff,))
+                result = cursor.fetchone()[0]
+                return result if result is not None else 0.0
+        except Exception as e:
+            print(f"[RequestLogger] Failed to get average latency: {e}")
+            return 0.0
+# Global request logger instance
+request_logger = RequestLogger()

utils/validators.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+Input validation and sanitization utilities.
+"""
+import re
+import os
+from typing import Optional, Dict, Any
+from pathlib import Path
+from config import config
+class ValidationError(Exception):
+    """Custom exception for validation errors."""
+    pass
+class InputValidator:
+    """Centralized input validation."""
+    @staticmethod
+    def validate_file_upload(filename: str, file_size_bytes: int) -> bool:
+        """
+        Validate uploaded file.
+        Args:
+            filename: Name of the uploaded file
+            file_size_bytes: Size of the file in bytes
+        Raises:
+            ValidationError: If validation fails
+        Returns:
+            True if valid
+        """
+        # Check file extension
+        ext = Path(filename).suffix.lower().lstrip('.')
+        if ext not in config.ALLOWED_FILE_TYPES:
+            raise ValidationError(
+                f"Invalid file type '.{ext}'. Allowed types: {', '.join(config.ALLOWED_FILE_TYPES)}"
+            )
+        # Check file size
+        max_size_bytes = config.MAX_FILE_SIZE_MB * 1024 * 1024
+        if file_size_bytes > max_size_bytes:
+            raise ValidationError(
+                f"File size ({file_size_bytes / 1024 / 1024:.2f} MB) exceeds maximum allowed size ({config.MAX_FILE_SIZE_MB} MB)"
+            )
+        if file_size_bytes == 0:
+            raise ValidationError("File is empty")
+        return True
+    @staticmethod
+    def sanitize_filename(filename: str) -> str:
+        """
+        Sanitize filename to prevent directory traversal.
+        Args:
+            filename: Original filename
+        Returns:
+            Sanitized filename
+        """
+        # Remove any path components
+        filename = os.path.basename(filename)
+        # Remove or replace dangerous characters
+        filename = re.sub(r'[^\w\s\-\.]', '_', filename)
+        # Remove leading/trailing dots and spaces
+        filename = filename.strip('. ')
+        # Ensure filename is not empty after sanitization
+        if not filename:
+            filename = "unnamed_file"
+        return filename
+    @staticmethod
+    def sanitize_path(path: str, base_dir: str) -> str:
+        """
+        Sanitize and validate file path to prevent directory traversal.
+        Args:
+            path: User-provided path
+            base_dir: Base directory that path must be within
+        Raises:
+            ValidationError: If path is outside base directory
+        Returns:
+            Sanitized absolute path
+        """
+        # Resolve to absolute path
+        abs_base = os.path.abspath(base_dir)
+        abs_path = os.path.abspath(os.path.join(base_dir, path))
+        # Check if path is within base directory
+        if not abs_path.startswith(abs_base):
+            raise ValidationError("Invalid path: directory traversal detected")
+        return abs_path
+    @staticmethod
+    def validate_query_input(query: str, max_length: int = 10000) -> bool:
+        """
+        Validate user query input.
+        Args:
+            query: User query string
+            max_length: Maximum allowed length
+        Raises:
+            ValidationError: If validation fails
+        Returns:
+            True if valid
+        """
+        if not query or not query.strip():
+            raise ValidationError("Query cannot be empty")
+        if len(query) > max_length:
+            raise ValidationError(f"Query too long (max {max_length} characters)")
+        # Check for suspicious patterns (basic XSS prevention)
+        suspicious_patterns = [
+            r'<script',
+            r'javascript:',
+            r'onerror=',
+            r'onclick=',
+        ]
+        query_lower = query.lower()
+        for pattern in suspicious_patterns:
+            if re.search(pattern, query_lower):
+                raise ValidationError("Query contains potentially malicious content")
+        return True
+    @staticmethod
+    def validate_metadata_filters(filters: Dict[str, Any]) -> bool:
+        """
+        Validate metadata filters.
+        Args:
+            filters: Filter dictionary
+        Raises:
+            ValidationError: If validation fails
+        Returns:
+            True if valid
+        """
+        if not isinstance(filters, dict):
+            raise ValidationError("Filters must be a dictionary")
+        # Whitelist of allowed filter keys
+        allowed_keys = {
+            'insurer', 'insurance_type', 'product_name',
+            'document_type', 'section', 'plan_id'
+        }
+        for key in filters.keys():
+            if key not in allowed_keys:
+                raise ValidationError(f"Invalid filter key: {key}")
+        # Validate filter values
+        for key, value in filters.items():
+            if isinstance(value, str):
+                if len(value) > 500:
+                    raise ValidationError(f"Filter value too long for key: {key}")
+            elif isinstance(value, list):
+                if len(value) > 50:
+                    raise ValidationError(f"Too many values in filter list for key: {key}")
+                for item in value:
+                    if isinstance(item, str) and len(item) > 500:
+                        raise ValidationError(f"Filter value too long in list for key: {key}")
+        return True
+    @staticmethod
+    def validate_calculation_inputs(
+        age: Optional[int] = None,
+        premium_amount: Optional[float] = None,
+        policy_term: Optional[str] = None,
+        payment_term: Optional[str] = None
+    ) -> bool:
+        """
+        Validate inputs for benefit calculations.
+        Raises:
+            ValidationError: If validation fails
+        Returns:
+            True if valid
+        """
+        if age is not None:
+            if not isinstance(age, int) or age < 0 or age > 120:
+                raise ValidationError(f"Invalid age: {age}. Age must be between 0 and 120")
+        if premium_amount is not None:
+            if not isinstance(premium_amount, (int, float)) or premium_amount <= 0:
+                raise ValidationError(f"Invalid premium amount: {premium_amount}. Must be positive")
+            # Reasonable bounds (1000 to 1 crore)
+            if premium_amount < 1000 or premium_amount > 10000000:
+                raise ValidationError(
+                    f"Premium amount {premium_amount} outside reasonable range (₹1,000 - ₹1,00,00,000)"
+                )
+        if policy_term is not None:
+            # Extract number from policy term
+            pt_match = re.search(r'\d+', str(policy_term))
+            if pt_match:
+                pt_years = int(pt_match.group())
+                if pt_years < 1 or pt_years > 100:
+                    raise ValidationError(f"Invalid policy term: {pt_years} years. Must be between 1 and 100")
+        if payment_term is not None:
+            # Extract number from payment term
+            ppt_match = re.search(r'\d+', str(payment_term))
+            if ppt_match:
+                ppt_years = int(ppt_match.group())
+                if ppt_years < 1 or ppt_years > 100:
+                    raise ValidationError(f"Invalid payment term: {ppt_years} years. Must be between 1 and 100")
+        return True
+    @staticmethod
+    def validate_api_key(provided_key: Optional[str]) -> bool:
+        """
+        Validate API key if authentication is enabled.
+        Args:
+            provided_key: API key provided by client
+        Raises:
+            ValidationError: If validation fails
+        Returns:
+            True if valid or auth disabled
+        """
+        if not config.ENABLE_API_KEY_AUTH:
+            return True
+        if not provided_key:
+            raise ValidationError("API key required but not provided")
+        if provided_key != config.API_KEY:
+            raise ValidationError("Invalid API key")
+        return True