Spaces:

eeshanyaj
/

questrag-backend

Sleeping

App Files Files Community

eeshanyaj commited on 17 days ago

Commit

690700c

0 Parent(s):

Initial deployment to HuggingFace Spaces

Browse files

Files changed (35) hide show

.env.example +177 -0
.gitignore +479 -0
Dockerfile +27 -0
README_SPACE.md +28 -0
app/__init__.py +11 -0
app/api/__init__.py +6 -0
app/api/v1/__init__.py +8 -0
app/api/v1/auth.py +193 -0
app/api/v1/chat.py +1346 -0
app/config.py +236 -0
app/core/__init__.py +6 -0
app/core/llm_manager.py +445 -0
app/db/__init__.py +6 -0
app/db/mongodb.py +390 -0
app/db/repositories/__init__.py +6 -0
app/db/repositories/conversation_repository.py +1049 -0
app/db/repositories/user_repository.py +155 -0
app/main.py +301 -0
app/ml/__init__.py +6 -0
app/ml/policy_network.py +610 -0
app/ml/retriever.py +522 -0
app/models/__init__.py +3 -0
app/models/user.py +69 -0
app/services/__init__.py +6 -0
app/services/chat_service.py +335 -0
app/utils/dependencies.py +87 -0
app/utils/security.py +190 -0
backups/backup_chat_service.py +340 -0
backups/backup_config.py +640 -0
backups/backup_llm_manager.py +430 -0
backups/backup_main.py +275 -0
backups/backup_requirements.txt +182 -0
build_faiss_index.py +339 -0
folder_structure.txt +40 -0
requirements.txt +38 -0

.env.example ADDED Viewed

	@@ -0,0 +1,177 @@

+# ================================================================================
+# BANKING RAG CHATBOT API - ENVIRONMENT VARIABLES
+# Copy this file to .env and fill in your actual values
+# ================================================================================
+# ============================================================================
+# APPLICATION SETTINGS
+# ============================================================================
+DEBUG=False
+ENVIRONMENT=production
+# ============================================================================
+# MONGODB (Get from: https://www.mongodb.com/cloud/atlas)
+# ============================================================================
+# Connection string format:
+# example string here
+MONGODB_URI=example
+DATABASE_NAME=banking_rag_db
+# ============================================================================
+# SECURITY
+# ============================================================================
+# Generate a secure secret key with:
+# python -c "import secrets; print(secrets.token_urlsafe(32))"
+SECRET_KEY=your-secret-key-here-change-this-in-production-min-32-characters
+ALGORITHM=HS256
+ACCESS_TOKEN_EXPIRE_MINUTES=1440
+# ============================================================================
+# LLM API KEYS - ALL THREE CO-EXIST (No fallback logic)
+# ============================================================================
+# --- GOOGLE GEMINI API (PRIMARY) ---
+# Get from: https://aistudio.google.com/app/apikey
+# You have Google Pro - this is your main LLM for response generation
+GOOGLE_API_KEY=AIzaSyXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+# Which Gemini model to use
+# Options: gemini-2.0-flash-lite, gemini-1.5-flash
+GEMINI_MODEL=gemini-2.0-flash-lite
+# Gemini rate limits (Pro tier)
+GEMINI_REQUESTS_PER_MINUTE=60
+GEMINI_TOKENS_PER_MINUTE=60000
+# --- GROQ API (SECONDARY) ---
+# Get from: https://console.groq.com/keys
+# Single key for specific fast inference tasks (llama models)
+GROQ_API_KEY=gsk_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+# Groq model (fast inference for policy evaluations)
+GROQ_MODEL=llama3-70b-8192
+# Groq rate limits (Free tier)
+GROQ_REQUESTS_PER_MINUTE=30
+GROQ_TOKENS_PER_MINUTE=30000
+# --- HUGGING FACE TOKEN (REQUIRED) ---
+# Get from: https://huggingface.co/settings/tokens
+# Required for: Model downloads (e5-base-v2, BERT), embeddings
+HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+# ============================================================================
+# MODEL PATHS (Local storage)
+# ============================================================================
+RETRIEVER_MODEL_PATH=models/best_retriever_model.pth
+POLICY_MODEL_PATH=models/policy_network.pt
+FAISS_INDEX_PATH=models/faiss_index.pkl
+KB_PATH=data/final_knowledge_base.jsonl
+# ============================================================================
+# RAG PARAMETERS
+# ============================================================================
+# Number of documents to retrieve from FAISS
+TOP_K=5
+# Minimum similarity threshold for retrieval
+SIMILARITY_THRESHOLD=0.5
+# Maximum context length to send to LLM (in characters)
+MAX_CONTEXT_LENGTH=2000
+# ============================================================================
+# POLICY NETWORK PARAMETERS
+# ============================================================================
+# Maximum sequence length for policy input
+POLICY_MAX_LEN=256
+# Confidence threshold for policy decisions
+CONFIDENCE_THRESHOLD=0.7
+# ============================================================================
+# LLM GENERATION PARAMETERS
+# ============================================================================
+# Temperature for response generation (0.0 = deterministic, 1.0 = creative)
+LLM_TEMPERATURE=0.7
+# Maximum tokens to generate in response
+LLM_MAX_TOKENS=512
+# System prompt template
+SYSTEM_PROMPT=You are a helpful banking assistant. Answer questions clearly and concisely.
+# ============================================================================
+# LLM ROUTING STRATEGY
+# ============================================================================
+# Define which LLM to use for which task
+# Options: gemini, groq
+# Main chat responses (user-facing) - Use Gemini Pro (best quality)
+CHAT_LLM=gemini
+# Response evaluation (RL training) - Use Groq (fast, good enough)
+EVALUATION_LLM=groq
+# Policy network inference - Local BERT model (no API call)
+POLICY_LLM=local
+# ============================================================================
+# CORS SETTINGS (for frontend)
+# ============================================================================
+# Comma-separated list of allowed origins
+# Use "*" for development (allows all origins)
+# For production, specify exact domains:
+# ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com
+ALLOWED_ORIGINS=*
+# ============================================================================
+# LOGGING
+# ============================================================================
+LOG_LEVEL=INFO
+# ============================================================================
+# OPTIONAL: Advanced Settings
+# ============================================================================
+# Maximum conversation history to include in context
+MAX_HISTORY_TURNS=4
+# Enable/disable response caching
+ENABLE_CACHE=True
+# Cache TTL in seconds (1 hour)
+CACHE_TTL=3600
+# Environment
+ENVIRONMENT=production
+DEBUG=False
+# MongoDB
+MONGODB_URI=your_mongodb_uri_here
+# JWT
+SECRET_KEY=your-secret-key-here
+ALGORITHM=HS256
+ACCESS_TOKEN_EXPIRE_MINUTES=1440
+# Groq API Keys
+GROQ_API_KEY_1=your_groq_key_1
+GROQ_API_KEY_2=your_groq_key_2
+GROQ_API_KEY_3=your_groq_key_3
+# HuggingFace Tokens
+HF_TOKEN_1=your_hf_token_1
+HF_TOKEN_2=your_hf_token_2
+HF_TOKEN_3=your_hf_token_3
+# HuggingFace Model Repository
+HF_MODEL_REPO=YOUR_USERNAME/questrag-models
+# CORS
+ALLOWED_ORIGINS=*
+# Device
+DEVICE=cpu

.gitignore ADDED Viewed

	@@ -0,0 +1,479 @@

+# # ================================================================================
+# # BANKING RAG CHATBOT - .gitignore
+# # Prevents committing sensitive files, large models, and temporary files
+# # IMPORTANT: This protects your API keys and credentials!
+# # ================================================================================
+# # ============================================================================
+# # ENVIRONMENT VARIABLES (MOST IMPORTANT)
+# # ============================================================================
+# .env
+# .env.local
+# .env.production
+# .env.development
+# .env.*.local
+# # ============================================================================
+# # PYTHON
+# # ============================================================================
+# # Byte-compiled / optimized / DLL files
+# __pycache__/
+# *.py[cod]
+# *$py.class
+# *.so
+# # C extensions
+# *.so
+# # Distribution / packaging
+# .Python
+# build/
+# develop-eggs/
+# dist/
+# downloads/
+# eggs/
+# .eggs/
+# lib/
+# lib64/
+# parts/
+# sdist/
+# var/
+# wheels/
+# pip-wheel-metadata/
+# share/python-wheels/
+# *.egg-info/
+# .installed.cfg
+# *.egg
+# MANIFEST
+# # PyInstaller
+# *.manifest
+# *.spec
+# # Unit test / coverage reports
+# htmlcov/
+# .tox/
+# .nox/
+# .coverage
+# .coverage.*
+# .cache
+# nosetests.xml
+# coverage.xml
+# *.cover
+# *.py,cover
+# .hypothesis/
+# .pytest_cache/
+# # Jupyter Notebook
+# .ipynb_checkpoints
+# *.ipynb
+# # IPython
+# profile_default/
+# ipython_config.py
+# # Virtual environments
+# venv/
+# env/
+# ENV/
+# env.bak/
+# venv.bak/
+# .venv
+# .env/
+# # pyenv
+# .python-version
+# # pipenv
+# Pipfile.lock
+# # Poetry
+# poetry.lock
+# # ============================================================================
+# # ML MODELS (Optional - depends on Git LFS usage)
+# # ============================================================================
+# # If NOT using Git LFS, uncomment these to avoid committing large models:
+# # models/*.pth
+# # models/*.pt
+# # models/*.pkl
+# # models/*.bin
+# # models/*.h5
+# # If using Git LFS, comment out the above and models will be tracked by LFS
+# # Temporary model files
+# models/temp/
+# models/checkpoints/
+# *.ckpt
+# # ============================================================================
+# # DATA FILES
+# # ============================================================================
+# # Prevent committing large datasets
+# data/*.csv
+# data/*.json
+# data/*.jsonl
+# data/*.parquet
+# data/*.feather
+# data/raw/
+# data/processed/
+# data/temp/
+# # Keep .gitkeep files to preserve directory structure
+# !data/.gitkeep
+# !models/.gitkeep
+# # ============================================================================
+# # LOGS
+# # ============================================================================
+# # Log files
+# *.log
+# logs/
+# *.log.*
+# # ============================================================================
+# # DATABASES
+# # ============================================================================
+# # Local database files
+# *.db
+# *.sqlite
+# *.sqlite3
+# # ============================================================================
+# # IDEs & EDITORS
+# # ============================================================================
+# # VSCode
+# .vscode/
+# *.code-workspace
+# # PyCharm
+# .idea/
+# *.iml
+# *.iws
+# # Sublime Text
+# *.sublime-project
+# *.sublime-workspace
+# # Vim
+# *.swp
+# *.swo
+# *~
+# # Emacs
+# *~
+# \#*\#
+# .\#*
+# # ============================================================================
+# # MACOS
+# # ============================================================================
+# .DS_Store
+# .AppleDouble
+# .LSOverride
+# # Thumbnails
+# ._*
+# # ============================================================================
+# # WINDOWS
+# # ============================================================================
+# Thumbs.db
+# Thumbs.db:encryptable
+# ehthumbs.db
+# ehthumbs_vista.db
+# Desktop.ini
+# # ============================================================================
+# # LINUX
+# # ============================================================================
+# *~
+# # ============================================================================
+# # DOCKER
+# # ============================================================================
+# # Docker files (if we decide to add Docker later)
+# # .dockerignore is separate
+# docker-compose.override.yml
+# .docker/
+# # ============================================================================
+# # TEMPORARY FILES
+# # ============================================================================
+# *.tmp
+# *.temp
+# *.bak
+# *.backup
+# *.old
+# tmp/
+# temp/
+# # ============================================================================
+# # SECRETS & CREDENTIALS
+# # ============================================================================
+# # Any file with "secret" or "credential" in name
+# *secret*
+# *credential*
+# *password*
+# *.pem
+# *.key
+# *.crt
+# *.cer
+# # AWS credentials
+# .aws/
+# # Google Cloud credentials
+# *-credentials.json
+# service-account*.json
+# # ============================================================================
+# # CACHE
+# # ============================================================================
+# .cache/
+# *.cache
+# # Hugging Face cache
+# .huggingface/
+# transformers_cache/
+# # ============================================================================
+# # MONITORING & PROFILING
+# # ============================================================================
+# *.prof
+# *.lprof
+# # ============================================================================
+# # FRONTEND (if you add frontend to this repo)
+# # ============================================================================
+# node_modules/
+# .next/
+# out/
+# build/
+# dist/
+# # ============================================================================
+# # MISC
+# # ============================================================================
+# # System files
+# .Trash-*
+# # Backup files
+# *~
+# *.orig
+# *.rej
+# # Compressed files
+# *.zip
+# *.tar.gz
+# *.rar
+# *.7z
+# # ============================================================================
+# # PROJECT-SPECIFIC
+# # ============================================================================
+# # Add any project-specific patterns here
+# uploads/
+# downloads/
+# exports/
+# # Test outputs
+# test_outputs/
+# test_results/
+# ================================================================================
+# BANKING RAG CHATBOT - Backend .gitignore
+# Prevents committing sensitive files, large models, and temporary files
+# ================================================================================
+# ============================================================================
+# ENVIRONMENT VARIABLES (MOST IMPORTANT - PROTECTS API KEYS)
+# ============================================================================
+.env
+.env.local
+.env.production
+.env.development
+.env.*.local
+# ============================================================================
+# PYTHON
+# ============================================================================
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+.venv
+.env/
+# Testing
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+# ============================================================================
+# ML MODELS & DATA (DO NOT COMMIT - TOO LARGE 1GB+)
+# ============================================================================
+# Model files (PyTorch, TensorFlow, etc.)
+app/models/*.pth
+app/models/*.pt
+app/models/*.pkl
+app/models/*.bin
+app/models/*.h5
+app/models/*.onnx
+app/models/*.safetensors
+models/*.pth
+models/*.pt
+models/*.pkl
+models/*.bin
+# FAISS indices (large vector databases)
+app/models/*.index
+app/models/*.faiss
+*.index
+*.faiss
+# Knowledge base and data files
+app/data/*.jsonl
+app/data/*.json
+app/data/*.csv
+app/data/*.parquet
+data/*.jsonl
+data/*.json
+data/*.csv
+# Temporary model files
+models/temp/
+models/checkpoints/
+*.ckpt
+# ============================================================================
+# LOGS
+# ============================================================================
+*.log
+logs/
+*.log.*
+# ============================================================================
+# DATABASES
+# ============================================================================
+*.db
+*.sqlite
+*.sqlite3
+# ============================================================================
+# IDEs & EDITORS
+# ============================================================================
+# VSCode
+.vscode/
+*.code-workspace
+# PyCharm
+.idea/
+*.iml
+*.iws
+# ============================================================================
+# OS FILES
+# ============================================================================
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+._*
+# Windows
+Thumbs.db
+ehthumbs.db
+Desktop.ini
+# Linux
+*~
+# ============================================================================
+# CACHE & TEMPORARY
+# ============================================================================
+.cache/
+*.cache
+.huggingface/
+transformers_cache/
+sentence_transformers/
+*.tmp
+*.temp
+*.bak
+tmp/
+temp/
+# ============================================================================
+# SECRETS & CREDENTIALS (DOUBLE CHECK)
+# ============================================================================
+*secret*
+*credential*
+*password*
+*.pem
+*.key
+*.crt
+*.cer
+.aws/
+*-credentials.json
+service-account*.json
+# ============================================================================
+# KEEP DIRECTORY STRUCTURE
+# ============================================================================
+# These .gitkeep files preserve empty directories
+!app/models/.gitkeep
+!app/data/.gitkeep
+!logs/.gitkeep

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use Python 3.12 slim
+FROM python:3.12-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements
+COPY requirements.txt .
+# Install Python packages
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy entire app
+COPY . .
+# Environment variables
+ENV PYTHONUNBUFFERED=1
+# Expose HuggingFace Spaces port (7860)
+EXPOSE 7860
+# Run FastAPI
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README_SPACE.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+title: QUESTRAG Backend
+emoji: 🏦
+colorFrom: blue
+colorTo: green
+sdk: docker
+pinned: false
+---
+# QUESTRAG Banking Chatbot Backend
+FastAPI backend for QUESTRAG - Banking RAG Chatbot with Reinforcement Learning.
+## Features
+- 🤖 RAG Pipeline with FAISS
+- 🧠 RL-based Policy Network
+- ⚡ Groq (Llama 3) + HuggingFace fallback
+- 🔐 JWT Authentication
+- 📊 MongoDB Atlas
+## API Documentation
+Visit `/docs` for interactive Swagger UI.
+## Endpoints
+- `POST /api/v1/auth/register` - Register new user
+- `POST /api/v1/auth/login` - Login
+- `POST /api/v1/chat/` - Send message (requires auth)
+- `GET /health` - Health check

app/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+Banking RAG Chatbot API - Application Package
+This file marks the 'app' directory as a Python package.
+"""
+# Package version
+__version__ = "1.0.0"
+# Package metadata
+__author__ = "Eeshanya Joshi"
+__description__ = "Banking RAG Chatbot with RL-Optimized Retrieval"

app/api/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+API package - Contains all API endpoints
+"""
+__version__ = "1.0.0"
+__author__ = "Eeshanya Joshi"

app/api/v1/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+API Version 1 - All v1 endpoints
+"""
+# This makes it easier to import routers
+# Example: from app.api.v1 import chat, auth
+__version__ = "1.0.0"

app/api/v1/auth.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""
+Authentication API Endpoints
+User registration, login, and token management
+"""
+from fastapi import APIRouter, HTTPException, status, Depends
+from datetime import timedelta
+from app.models.user import UserRegister, UserLogin, Token, UserResponse, TokenData
+from app.db.repositories.user_repository import UserRepository
+from app.utils.security import verify_password, create_access_token
+from app.utils.dependencies import get_current_user
+from app.config import settings
+router = APIRouter()
+@router.post("/register", response_model=Token, status_code=status.HTTP_201_CREATED)
+async def register_user(user_data: UserRegister):
+    """
+    Register a new user.
+    Creates a new user account with hashed password and returns
+    an access token for immediate login.
+    Args:
+        user_data: User registration data (email, password, full_name)
+    Returns:
+        Token: JWT access token and user info
+    Raises:
+        HTTPException: If email already exists
+    """
+    user_repo = UserRepository()
+    try:
+        # Create user
+        user_id = await user_repo.create_user(
+            email=user_data.email,
+            password=user_data.password,
+            full_name=user_data.full_name
+        )
+        # Get created user
+        user = await user_repo.get_user_by_id(user_id)
+        # Generate access token
+        access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
+        access_token = create_access_token(
+            data={"user_id": user["user_id"], "email": user["email"]},
+            expires_delta=access_token_expires
+        )
+        # Return token and user info
+        return Token(
+            access_token=access_token,
+            token_type="bearer",
+            user=UserResponse(
+                user_id=user["user_id"],
+                email=user["email"],
+                full_name=user["full_name"],
+                created_at=user["created_at"]
+            )
+        )
+    except ValueError as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=str(e)
+        )
+    except Exception as e:
+        print(f"❌ Registration error: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to register user"
+        )
+@router.post("/login", response_model=Token)
+async def login_user(user_data: UserLogin):
+    """
+    Login user and get access token.
+    Validates user credentials and returns JWT access token.
+    Args:
+        user_data: User login data (email, password)
+    Returns:
+        Token: JWT access token and user info
+    Raises:
+        HTTPException: If credentials are invalid
+    """
+    user_repo = UserRepository()
+    # Get user by email
+    user = await user_repo.get_user_by_email(user_data.email)
+    if not user:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid email or password",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    # Verify password
+    if not verify_password(user_data.password, user["hashed_password"]):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid email or password",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    # Check if user is active
+    if not user.get("is_active", False):
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="User account is inactive"
+        )
+    # Generate access token
+    access_token_expires = timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
+    access_token = create_access_token(
+        data={"user_id": user["user_id"], "email": user["email"]},
+        expires_delta=access_token_expires
+    )
+    # Return token and user info
+    return Token(
+        access_token=access_token,
+        token_type="bearer",
+        user=UserResponse(
+            user_id=user["user_id"],
+            email=user["email"],
+            full_name=user["full_name"],
+            created_at=user["created_at"]
+        )
+    )
+@router.get("/me", response_model=UserResponse)
+async def get_current_user_info(current_user: TokenData = Depends(get_current_user)):
+    """
+    Get current authenticated user information.
+    Protected route that requires valid JWT token.
+    Args:
+        current_user: Current authenticated user (from token)
+    Returns:
+        UserResponse: Current user information
+    """
+    user_repo = UserRepository()
+    user = await user_repo.get_user_by_id(current_user.user_id)
+    if not user:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="User not found"
+        )
+    return UserResponse(
+        user_id=user["user_id"],
+        email=user["email"],
+        full_name=user["full_name"],
+        created_at=user["created_at"]
+    )
+@router.post("/logout")
+async def logout_user(current_user: TokenData = Depends(get_current_user)):
+    """
+    Logout user (client-side token deletion).
+    In JWT-based auth, logout is handled client-side by
+    deleting the token. This endpoint is for logging purposes.
+    Args:
+        current_user: Current authenticated user (from token)
+    Returns:
+        dict: Success message
+    """
+    print(f"👋 User logged out: {current_user.email}")
+    return {
+        "message": "Successfully logged out",
+        "user_id": current_user.user_id
+    }

app/api/v1/chat.py ADDED Viewed

	@@ -0,0 +1,1346 @@

+"""
+Chat API Endpoints (WITH AUTHENTICATION)
+RESTful API for the Banking RAG Chatbot
+NOW REQUIRES JWT TOKEN FOR ALL ENDPOINTS!
+Endpoints:
+- POST /chat - Send a message and get response (PROTECTED)
+- GET /chat/history/{conversation_id} - Get conversation history (PROTECTED)
+- POST /chat/conversation - Create new conversation (PROTECTED)
+- GET /chat/conversations - List user's conversations (PROTECTED)
+- DELETE /chat/conversation/{conversation_id} - Delete conversation (PROTECTED)
+- GET /chat/health - Health check (PUBLIC)
+"""
+from fastapi import APIRouter, HTTPException, status, Depends
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional
+from datetime import datetime
+from app.services.chat_service import chat_service
+from app.db.repositories.conversation_repository import ConversationRepository
+from app.utils.dependencies import get_current_user  # AUTH DEPENDENCY
+from app.models.user import TokenData  # USER DATA FROM TOKEN
+# ============================================================================
+# CREATE ROUTER
+# ============================================================================
+router = APIRouter()
+# ============================================================================
+# DEPENDENCY: Get ConversationRepository instance
+# ============================================================================
+def get_conversation_repo() -> ConversationRepository:
+    """
+    Dependency that provides ConversationRepository instance.
+    This ensures MongoDB is connected before repository is used.
+    """
+    return ConversationRepository()
+# ============================================================================
+# PYDANTIC MODELS (Request/Response schemas)
+# ============================================================================
+class ChatRequest(BaseModel):
+    """Request model for chat endpoint"""
+    query: str = Field(..., description="User query text", min_length=1, max_length=1000)
+    conversation_id: Optional[str] = Field(None, description="Optional conversation ID")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "query": "What is my account balance?",
+                "conversation_id": "conv-123"
+            }
+        }
+class ChatResponse(BaseModel):
+    """Response model for chat endpoint"""
+    response: str = Field(..., description="Generated response text")
+    conversation_id: str = Field(..., description="Conversation ID")
+    policy_action: str = Field(..., description="Policy decision: FETCH or NO_FETCH")
+    policy_confidence: float = Field(..., description="Policy confidence score (0-1)")
+    documents_retrieved: int = Field(..., description="Number of documents retrieved")
+    top_doc_score: Optional[float] = Field(None, description="Best document similarity score")
+    total_time_ms: float = Field(..., description="Total processing time in milliseconds")
+    timestamp: str = Field(..., description="Response timestamp (ISO format)")
+class ConversationCreateResponse(BaseModel):
+    """Response after creating a conversation"""
+    conversation_id: str = Field(..., description="Created conversation ID")
+    created_at: str = Field(..., description="Creation timestamp")
+class MessageModel(BaseModel):
+    """Single message in conversation history"""
+    role: str = Field(..., description="Message role: user or assistant")
+    content: str = Field(..., description="Message content")
+    timestamp: str = Field(..., description="Message timestamp")
+    metadata: Optional[Dict] = Field(None, description="Optional metadata")
+class ConversationHistoryResponse(BaseModel):
+    """Response containing conversation history"""
+    conversation_id: str
+    messages: List[MessageModel]
+    message_count: int
+# ============================================================================
+# ENDPOINTS (ALL PROTECTED WITH JWT)
+# ============================================================================
+@router.post("/", response_model=ChatResponse, status_code=status.HTTP_200_OK)
+async def chat(
+    request: ChatRequest,
+    current_user: TokenData = Depends(get_current_user),
+    repo: ConversationRepository = Depends(get_conversation_repo)  # ← INJECT REPO
+):
+    """
+    Main chat endpoint - Send a query and get a response.
+    **REQUIRES AUTHENTICATION** - JWT token must be provided in Authorization header.
+    """
+    try:
+        # Get user_id from token
+        user_id = current_user.user_id
+        # If no conversation_id provided, create a new conversation
+        conversation_id = request.conversation_id
+        if not conversation_id:
+            conversation_id = await repo.create_conversation(user_id=user_id)
+        else:
+            # Verify user owns this conversation
+            conversation = await repo.get_conversation(conversation_id)
+            if not conversation:
+                raise HTTPException(
+                    status_code=status.HTTP_404_NOT_FOUND,
+                    detail="Conversation not found"
+                )
+            if conversation["user_id"] != user_id:
+                raise HTTPException(
+                    status_code=status.HTTP_403_FORBIDDEN,
+                    detail="Access denied - you don't own this conversation"
+                )
+        # Get conversation history
+        history = await repo.get_conversation_history(
+            conversation_id=conversation_id,
+            max_messages=10
+        )
+        # Save user message
+        await repo.add_message(
+            conversation_id=conversation_id,
+            message={
+                'role': 'user',
+                'content': request.query,
+                'timestamp': datetime.now()
+            }
+        )
+        # Process query through RAG pipeline
+        result = await chat_service.process_query(
+            query=request.query,
+            conversation_history=history,
+            user_id=user_id
+        )
+        # Save assistant message
+        await repo.add_message(
+            conversation_id=conversation_id,
+            message={
+                'role': 'assistant',
+                'content': result['response'],
+                'timestamp': datetime.now(),
+                'metadata': {
+                    'policy_action': result['policy_action'],
+                    'policy_confidence': result['policy_confidence'],
+                    'documents_retrieved': result['documents_retrieved'],
+                    'top_doc_score': result['top_doc_score']
+                }
+            }
+        )
+        # Log retrieval data for RL training
+        await repo.log_retrieval({
+            'conversation_id': conversation_id,
+            'user_id': user_id,
+            'query': request.query,
+            'policy_action': result['policy_action'],
+            'policy_confidence': result['policy_confidence'],
+            'should_retrieve': result['should_retrieve'],
+            'documents_retrieved': result['documents_retrieved'],
+            'top_doc_score': result['top_doc_score'],
+            'response': result['response'],
+            'retrieval_time_ms': result['retrieval_time_ms'],
+            'generation_time_ms': result['generation_time_ms'],
+            'total_time_ms': result['total_time_ms'],
+            'retrieved_docs_metadata': result.get('retrieved_docs_metadata', []),
+            'timestamp': datetime.now()
+        })
+        # Return response
+        return ChatResponse(
+            response=result['response'],
+            conversation_id=conversation_id,
+            policy_action=result['policy_action'],
+            policy_confidence=result['policy_confidence'],
+            documents_retrieved=result['documents_retrieved'],
+            top_doc_score=result['top_doc_score'],
+            total_time_ms=result['total_time_ms'],
+            timestamp=result['timestamp']
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        print(f"❌ Chat endpoint error: {e}")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to process chat request: {str(e)}"
+        )
+@router.post("/conversation", response_model=ConversationCreateResponse, status_code=status.HTTP_201_CREATED)
+async def create_conversation(
+    current_user: TokenData = Depends(get_current_user),
+    repo: ConversationRepository = Depends(get_conversation_repo)
+):
+    """Create a new conversation"""
+    try:
+        conversation_id = await repo.create_conversation(user_id=current_user.user_id)
+        return ConversationCreateResponse(
+            conversation_id=conversation_id,
+            created_at=datetime.now().isoformat()
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to create conversation: {str(e)}"
+        )
+@router.get("/history/{conversation_id}", response_model=ConversationHistoryResponse)
+async def get_conversation_history(
+    conversation_id: str,
+    current_user: TokenData = Depends(get_current_user),
+    repo: ConversationRepository = Depends(get_conversation_repo)
+):
+    """Get conversation history by ID"""
+    try:
+        conversation = await repo.get_conversation(conversation_id)
+        if not conversation:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"Conversation {conversation_id} not found"
+            )
+        if conversation["user_id"] != current_user.user_id:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Access denied - you don't own this conversation"
+            )
+        messages = []
+        for msg in conversation.get('messages', []):
+            messages.append(MessageModel(
+                role=msg['role'],
+                content=msg['content'],
+                timestamp=msg['timestamp'].isoformat() if isinstance(msg['timestamp'], datetime) else msg['timestamp'],
+                metadata=msg.get('metadata')
+            ))
+        return ConversationHistoryResponse(
+            conversation_id=conversation_id,
+            messages=messages,
+            message_count=len(messages)
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to fetch conversation history: {str(e)}"
+        )
+@router.get("/conversations")
+async def list_user_conversations(
+    limit: int = 10,
+    skip: int = 0,
+    current_user: TokenData = Depends(get_current_user),
+    repo: ConversationRepository = Depends(get_conversation_repo)
+):
+    """List all conversations for the authenticated user"""
+    try:
+        conversations = await repo.get_user_conversations(
+            user_id=current_user.user_id,
+            limit=limit,
+            skip=skip
+        )
+        return {
+            "user_id": current_user.user_id,
+            "user_email": current_user.email,
+            "conversations": [
+                {
+                    "conversation_id": conv['conversation_id'],
+                    "created_at": conv['created_at'].isoformat() if isinstance(conv['created_at'], datetime) else conv['created_at'],
+                    "updated_at": conv['updated_at'].isoformat() if isinstance(conv['updated_at'], datetime) else conv['updated_at'],
+                    "message_count": len(conv.get('messages', []))
+                }
+                for conv in conversations
+            ],
+            "total": len(conversations)
+        }
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to fetch conversations: {str(e)}"
+        )
+@router.delete("/conversation/{conversation_id}")
+async def delete_conversation(
+    conversation_id: str,
+    current_user: TokenData = Depends(get_current_user),
+    repo: ConversationRepository = Depends(get_conversation_repo)
+):
+    """Delete a conversation"""
+    try:
+        conversation = await repo.get_conversation(conversation_id)
+        if not conversation:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"Conversation {conversation_id} not found"
+            )
+        if conversation["user_id"] != current_user.user_id:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Access denied - you don't own this conversation"
+            )
+        success = await repo.delete_conversation(conversation_id)
+        if success:
+            return {
+                "message": "Conversation deleted successfully",
+                "conversation_id": conversation_id
+            }
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Failed to delete conversation"
+            )
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to delete conversation: {str(e)}"
+        )
+@router.get("/health")
+async def chat_health():
+    """Health check for chat service (PUBLIC)"""
+    try:
+        health = await chat_service.health_check()
+        return {
+            "status": "healthy",
+            "service": "chat",
+            "components": health['components'],
+            "timestamp": datetime.now().isoformat()
+        }
+    except Exception as e:
+        return {
+            "status": "unhealthy",
+            "service": "chat",
+            "error": str(e),
+            "timestamp": datetime.now().isoformat()
+        }
+# ============================================================================
+# """
+# Chat API Endpoints (WITH AUTHENTICATION)
+# RESTful API for the Banking RAG Chatbot
+# NOW REQUIRES JWT TOKEN FOR ALL ENDPOINTS!
+# Endpoints:
+# - POST /chat - Send a message and get response (PROTECTED)
+# - GET /chat/history/{conversation_id} - Get conversation history (PROTECTED)
+# - POST /chat/conversation - Create new conversation (PROTECTED)
+# - GET /chat/conversations - List user's conversations (PROTECTED)
+# - DELETE /chat/conversation/{conversation_id} - Delete conversation (PROTECTED)
+# - GET /chat/health - Health check (PUBLIC)
+# """
+# from fastapi import APIRouter, HTTPException, status, Depends
+# from pydantic import BaseModel, Field
+# from typing import List, Dict, Optional
+# from datetime import datetime
+# from app.services.chat_service import chat_service
+# from app.db.repositories.conversation_repository import ConversationRepository
+# from app.utils.dependencies import get_current_user  # AUTH DEPENDENCY
+# from app.models.user import TokenData  # USER DATA FROM TOKEN
+# # ============================================================================
+# # CREATE ROUTER
+# # ============================================================================
+# router = APIRouter()
+# # Initialize repository
+# conversation_repo = ConversationRepository()
+# # ============================================================================
+# # PYDANTIC MODELS (Request/Response schemas)
+# # ============================================================================
+# class ChatRequest(BaseModel):
+#     """
+#     Request model for chat endpoint.
+#     NOTE: user_id is now extracted from JWT token, not from request body!
+#     Example:
+#         {
+#             "query": "What is my account balance?",
+#             "conversation_id": "abc-123"
+#         }
+#     """
+#     query: str = Field(..., description="User query text", min_length=1, max_length=1000)
+#     conversation_id: Optional[str] = Field(None, description="Optional conversation ID")
+#     class Config:
+#         json_schema_extra = {
+#             "example": {
+#                 "query": "What is my account balance?",
+#                 "conversation_id": "conv-123"
+#             }
+#         }
+# class ChatResponse(BaseModel):
+#     """
+#     Response model for chat endpoint.
+#     Contains the generated response plus metadata about the RAG pipeline.
+#     """
+#     response: str = Field(..., description="Generated response text")
+#     conversation_id: str = Field(..., description="Conversation ID")
+#     policy_action: str = Field(..., description="Policy decision: FETCH or NO_FETCH")
+#     policy_confidence: float = Field(..., description="Policy confidence score (0-1)")
+#     documents_retrieved: int = Field(..., description="Number of documents retrieved")
+#     top_doc_score: Optional[float] = Field(None, description="Best document similarity score")
+#     total_time_ms: float = Field(..., description="Total processing time in milliseconds")
+#     timestamp: str = Field(..., description="Response timestamp (ISO format)")
+# class ConversationCreateRequest(BaseModel):
+#     """Request to create a new conversation (no user_id needed - from token)"""
+#     pass  # Empty - user_id comes from JWT token
+# class ConversationCreateResponse(BaseModel):
+#     """Response after creating a conversation"""
+#     conversation_id: str = Field(..., description="Created conversation ID")
+#     created_at: str = Field(..., description="Creation timestamp")
+# class MessageModel(BaseModel):
+#     """Single message in conversation history"""
+#     role: str = Field(..., description="Message role: user or assistant")
+#     content: str = Field(..., description="Message content")
+#     timestamp: str = Field(..., description="Message timestamp")
+#     metadata: Optional[Dict] = Field(None, description="Optional metadata")
+# class ConversationHistoryResponse(BaseModel):
+#     """Response containing conversation history"""
+#     conversation_id: str
+#     messages: List[MessageModel]
+#     message_count: int
+# # ============================================================================
+# # ENDPOINTS (ALL PROTECTED WITH JWT)
+# # ============================================================================
+# @router.post("/", response_model=ChatResponse, status_code=status.HTTP_200_OK)
+# async def chat(
+#     request: ChatRequest,
+#     current_user: TokenData = Depends(get_current_user)  # ← REQUIRES AUTH!
+# ):
+#     """
+#     Main chat endpoint - Send a query and get a response.
+#     **REQUIRES AUTHENTICATION** - JWT token must be provided in Authorization header.
+#     This endpoint:
+#     1. Extracts user_id from JWT token
+#     2. Processes the query through the RAG pipeline
+#     3. Saves messages to MongoDB
+#     4. Logs retrieval data for RL training
+#     5. Returns response with metadata
+#     Args:
+#         request: ChatRequest with query and optional conversation_id
+#         current_user: Authenticated user data from JWT token
+#     Returns:
+#         ChatResponse: Generated response with metadata
+#     Raises:
+#         HTTPException: If processing fails or user not authenticated
+#     """
+#     try:
+#         # Get user_id from token (NOT from request body!)
+#         user_id = current_user.user_id
+#         # If no conversation_id provided, create a new conversation
+#         conversation_id = request.conversation_id
+#         if not conversation_id:
+#             conversation_id = await conversation_repo.create_conversation(
+#                 user_id=user_id
+#             )
+#         else:
+#             # Verify user owns this conversation
+#             conversation = await conversation_repo.get_conversation(conversation_id)
+#             if not conversation:
+#                 raise HTTPException(
+#                     status_code=status.HTTP_404_NOT_FOUND,
+#                     detail="Conversation not found"
+#                 )
+#             if conversation["user_id"] != user_id:
+#                 raise HTTPException(
+#                     status_code=status.HTTP_403_FORBIDDEN,
+#                     detail="Access denied - you don't own this conversation"
+#                 )
+#         # Get conversation history
+#         history = await conversation_repo.get_conversation_history(
+#             conversation_id=conversation_id,
+#             max_messages=10  # Last 5 turns (10 messages)
+#         )
+#         # Save user message to database
+#         await conversation_repo.add_message(
+#             conversation_id=conversation_id,
+#             message={
+#                 'role': 'user',
+#                 'content': request.query,
+#                 'timestamp': datetime.now()
+#             }
+#         )
+#         # Process query through RAG pipeline
+#         result = await chat_service.process_query(
+#             query=request.query,
+#             conversation_history=history,
+#             user_id=user_id
+#         )
+#         # Save assistant message to database
+#         await conversation_repo.add_message(
+#             conversation_id=conversation_id,
+#             message={
+#                 'role': 'assistant',
+#                 'content': result['response'],
+#                 'timestamp': datetime.now(),
+#                 'metadata': {
+#                     'policy_action': result['policy_action'],
+#                     'policy_confidence': result['policy_confidence'],
+#                     'documents_retrieved': result['documents_retrieved'],
+#                     'top_doc_score': result['top_doc_score']
+#                 }
+#             }
+#         )
+#         # Log retrieval data for RL training
+#         await conversation_repo.log_retrieval({
+#             'conversation_id': conversation_id,
+#             'user_id': user_id,
+#             'query': request.query,
+#             'policy_action': result['policy_action'],
+#             'policy_confidence': result['policy_confidence'],
+#             'should_retrieve': result['should_retrieve'],
+#             'documents_retrieved': result['documents_retrieved'],
+#             'top_doc_score': result['top_doc_score'],
+#             'response': result['response'],
+#             'retrieval_time_ms': result['retrieval_time_ms'],
+#             'generation_time_ms': result['generation_time_ms'],
+#             'total_time_ms': result['total_time_ms'],
+#             'retrieved_docs_metadata': result.get('retrieved_docs_metadata', []),
+#             'timestamp': datetime.now()
+#         })
+#         # Return response
+#         return ChatResponse(
+#             response=result['response'],
+#             conversation_id=conversation_id,
+#             policy_action=result['policy_action'],
+#             policy_confidence=result['policy_confidence'],
+#             documents_retrieved=result['documents_retrieved'],
+#             top_doc_score=result['top_doc_score'],
+#             total_time_ms=result['total_time_ms'],
+#             timestamp=result['timestamp']
+#         )
+#     except HTTPException:
+#         raise  # Re-raise HTTP exceptions
+#     except Exception as e:
+#         print(f"❌ Chat endpoint error: {e}")
+#         raise HTTPException(
+#             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#             detail=f"Failed to process chat request: {str(e)}"
+#         )
+# @router.post("/conversation", response_model=ConversationCreateResponse, status_code=status.HTTP_201_CREATED)
+# async def create_conversation(
+#     current_user: TokenData = Depends(get_current_user)  # ← REQUIRES AUTH!
+# ):
+#     """
+#     Create a new conversation.
+#     **REQUIRES AUTHENTICATION** - User ID is extracted from JWT token.
+#     Args:
+#         current_user: Authenticated user data from JWT token
+#     Returns:
+#         ConversationCreateResponse: Created conversation ID
+#     """
+#     try:
+#         conversation_id = await conversation_repo.create_conversation(
+#             user_id=current_user.user_id
+#         )
+#         return ConversationCreateResponse(
+#             conversation_id=conversation_id,
+#             created_at=datetime.now().isoformat()
+#         )
+#     except Exception as e:
+#         raise HTTPException(
+#             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#             detail=f"Failed to create conversation: {str(e)}"
+#         )
+# @router.get("/history/{conversation_id}", response_model=ConversationHistoryResponse)
+# async def get_conversation_history(
+#     conversation_id: str,
+#     current_user: TokenData = Depends(get_current_user)  # ← REQUIRES AUTH!
+# ):
+#     """
+#     Get conversation history by ID.
+#     **REQUIRES AUTHENTICATION** - User can only access their own conversations.
+#     Args:
+#         conversation_id: Conversation ID
+#         current_user: Authenticated user data from JWT token
+#     Returns:
+#         ConversationHistoryResponse: List of messages
+#     Raises:
+#         HTTPException: If conversation not found or user doesn't own it
+#     """
+#     try:
+#         # Get conversation
+#         conversation = await conversation_repo.get_conversation(conversation_id)
+#         if not conversation:
+#             raise HTTPException(
+#                 status_code=status.HTTP_404_NOT_FOUND,
+#                 detail=f"Conversation {conversation_id} not found"
+#             )
+#         # Verify user owns this conversation
+#         if conversation["user_id"] != current_user.user_id:
+#             raise HTTPException(
+#                 status_code=status.HTTP_403_FORBIDDEN,
+#                 detail="Access denied - you don't own this conversation"
+#             )
+#         # Format messages
+#         messages = []
+#         for msg in conversation.get('messages', []):
+#             messages.append(MessageModel(
+#                 role=msg['role'],
+#                 content=msg['content'],
+#                 timestamp=msg['timestamp'].isoformat() if isinstance(msg['timestamp'], datetime) else msg['timestamp'],
+#                 metadata=msg.get('metadata')
+#             ))
+#         return ConversationHistoryResponse(
+#             conversation_id=conversation_id,
+#             messages=messages,
+#             message_count=len(messages)
+#         )
+#     except HTTPException:
+#         raise
+#     except Exception as e:
+#         raise HTTPException(
+#             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#             detail=f"Failed to fetch conversation history: {str(e)}"
+#         )
+# @router.get("/conversations")
+# async def list_user_conversations(
+#     limit: int = 10,
+#     skip: int = 0,
+#     current_user: TokenData = Depends(get_current_user)  # ← REQUIRES AUTH!
+# ):
+#     """
+#     List all conversations for the authenticated user.
+#     **REQUIRES AUTHENTICATION** - User ID is extracted from JWT token.
+#     Args:
+#         limit: Maximum conversations to return (default: 10)
+#         skip: Number to skip for pagination (default: 0)
+#         current_user: Authenticated user data from JWT token
+#     Returns:
+#         dict: List of conversations for current user
+#     """
+#     try:
+#         conversations = await conversation_repo.get_user_conversations(
+#             user_id=current_user.user_id,  # From JWT token!
+#             limit=limit,
+#             skip=skip
+#         )
+#         # Format response
+#         return {
+#             "user_id": current_user.user_id,
+#             "user_email": current_user.email,
+#             "conversations": [
+#                 {
+#                     "conversation_id": conv['conversation_id'],
+#                     "created_at": conv['created_at'].isoformat() if isinstance(conv['created_at'], datetime) else conv['created_at'],
+#                     "updated_at": conv['updated_at'].isoformat() if isinstance(conv['updated_at'], datetime) else conv['updated_at'],
+#                     "message_count": len(conv.get('messages', []))
+#                 }
+#                 for conv in conversations
+#             ],
+#             "total": len(conversations)
+#         }
+#     except Exception as e:
+#         raise HTTPException(
+#             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#             detail=f"Failed to fetch conversations: {str(e)}"
+#         )
+# @router.delete("/conversation/{conversation_id}")
+# async def delete_conversation(
+#     conversation_id: str,
+#     current_user: TokenData = Depends(get_current_user)  # ← REQUIRES AUTH!
+# ):
+#     """
+#     Delete a conversation.
+#     **REQUIRES AUTHENTICATION** - User can only delete their own conversations.
+#     Args:
+#         conversation_id: Conversation ID to delete
+#         current_user: Authenticated user data from JWT token
+#     Returns:
+#         dict: Success message
+#     Raises:
+#         HTTPException: If conversation not found or user doesn't own it
+#     """
+#     try:
+#         # Get conversation
+#         conversation = await conversation_repo.get_conversation(conversation_id)
+#         if not conversation:
+#             raise HTTPException(
+#                 status_code=status.HTTP_404_NOT_FOUND,
+#                 detail=f"Conversation {conversation_id} not found"
+#             )
+#         # Verify user owns this conversation
+#         if conversation["user_id"] != current_user.user_id:
+#             raise HTTPException(
+#                 status_code=status.HTTP_403_FORBIDDEN,
+#                 detail="Access denied - you don't own this conversation"
+#             )
+#         # Delete conversation
+#         success = await conversation_repo.delete_conversation(conversation_id)
+#         if success:
+#             return {
+#                 "message": "Conversation deleted successfully",
+#                 "conversation_id": conversation_id
+#             }
+#         else:
+#             raise HTTPException(
+#                 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#                 detail="Failed to delete conversation"
+#             )
+#     except HTTPException:
+#         raise
+#     except Exception as e:
+#         raise HTTPException(
+#             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+#             detail=f"Failed to delete conversation: {str(e)}"
+#         )
+# @router.get("/health")
+# async def chat_health():
+#     """
+#     Health check for chat service.
+#     **PUBLIC ENDPOINT** - No authentication required.
+#     Returns:
+#         dict: Health status of chat service components
+#     """
+#     try:
+#         health = await chat_service.health_check()
+#         return {
+#             "status": "healthy",
+#             "service": "chat",
+#             "components": health['components'],
+#             "timestamp": datetime.now().isoformat()
+#         }
+#     except Exception as e:
+#         return {
+#             "status": "unhealthy",
+#             "service": "chat",
+#             "error": str(e),
+#             "timestamp": datetime.now().isoformat()
+#         }
+# # ============================================================================
+# # USAGE DOCUMENTATION
+# # ============================================================================
+# """
+# === API USAGE EXAMPLES (WITH AUTHENTICATION) ===
+# ALL ENDPOINTS (except /health) NOW REQUIRE JWT TOKEN IN AUTHORIZATION HEADER!
+# 1. Register user:
+#    POST /api/v1/auth/register
+#    Body: {
+#        "email": "user@example.com",
+#        "password": "SecurePass123",
+#        "full_name": "John Doe"
+#    }
+#    Response: { "access_token": "eyJ...", "user": {...} }
+# 2. Login:
+#    POST /api/v1/auth/login
+#    Body: {
+#        "email": "user@example.com",
+#        "password": "SecurePass123"
+#    }
+#    Response: { "access_token": "eyJ...", "user": {...} }
+# 3. Send chat message (WITH TOKEN):
+#    POST /api/v1/chat/
+#    Headers: { "Authorization": "Bearer eyJ..." }
+#    Body: {
+#        "query": "What is my account balance?",
+#        "conversation_id": "conv_abc"  // optional
+#    }
+# 4. Get conversation history (WITH TOKEN):
+#    GET /api/v1/chat/history/conv_abc
+#    Headers: { "Authorization": "Bearer eyJ..." }
+# 5. List conversations (WITH TOKEN):
+#    GET /api/v1/chat/conversations?limit=10
+#    Headers: { "Authorization": "Bearer eyJ..." }
+# === TESTING WITH CURL ===
+# # 1. Register
+# TOKEN=$(curl -X POST "http://localhost:8000/api/v1/auth/register" \
+#   -H "Content-Type: application/json" \
+#   -d '{"email":"test@test.com","password":"test123","full_name":"Test User"}' \
+#   | jq -r '.access_token')
+# # 2. Send chat message with token
+# curl -X POST "http://localhost:8000/api/v1/chat/" \
+#   -H "Content-Type: application/json" \
+#   -H "Authorization: Bearer $TOKEN" \
+#   -d '{"query": "What is my balance?"}'
+# """
+# # ============================================================================
+# # ======================================================================================================
+# # OLD CODE
+# # ======================================================================================================
+# # """
+# # Chat API Endpoints
+# # RESTful API for the Banking RAG Chatbot
+# # Endpoints:
+# # - POST /chat - Send a message and get response
+# # - GET /chat/history/{conversation_id} - Get conversation history
+# # - POST /chat/conversation - Create new conversation
+# # - GET /chat/conversations - List user's conversations
+# # - GET /chat/health - Health check for chat service
+# # """
+# # from fastapi import APIRouter, HTTPException, status
+# # from pydantic import BaseModel, Field
+# # from typing import List, Dict, Optional
+# # from datetime import datetime
+# # from app.services.chat_service import chat_service
+# # from app.db.repositories.conversation_repository import ConversationRepository
+# # # ============================================================================
+# # # CREATE ROUTER
+# # # ============================================================================
+# # router = APIRouter()
+# # # Initialize repository
+# # conversation_repo = ConversationRepository()
+# # # ============================================================================
+# # # PYDANTIC MODELS (Request/Response schemas)
+# # # ============================================================================
+# # class ChatRequest(BaseModel):
+# #     """
+# #     Request model for chat endpoint.
+# #     Example:
+# #         {
+# #             "query": "What is my account balance?",
+# #             "conversation_id": "abc-123",
+# #             "user_id": "user_456"
+# #         }
+# #     """
+# #     query: str = Field(..., description="User query text", min_length=1, max_length=1000)
+# #     conversation_id: Optional[str] = Field(None, description="Optional conversation ID")
+# #     user_id: str = Field(..., description="User ID")
+# #     class Config:
+# #         json_schema_extra = {
+# #             "example": {
+# #                 "query": "What is my account balance?",
+# #                 "conversation_id": "conv-123",
+# #                 "user_id": "user-456"
+# #             }
+# #         }
+# # class ChatResponse(BaseModel):
+# #     """
+# #     Response model for chat endpoint.
+# #     Contains the generated response plus metadata about the RAG pipeline.
+# #     """
+# #     response: str = Field(..., description="Generated response text")
+# #     conversation_id: str = Field(..., description="Conversation ID")
+# #     policy_action: str = Field(..., description="Policy decision: FETCH or NO_FETCH")
+# #     policy_confidence: float = Field(..., description="Policy confidence score (0-1)")
+# #     documents_retrieved: int = Field(..., description="Number of documents retrieved")
+# #     top_doc_score: Optional[float] = Field(None, description="Best document similarity score")
+# #     total_time_ms: float = Field(..., description="Total processing time in milliseconds")
+# #     timestamp: str = Field(..., description="Response timestamp (ISO format)")
+# # class ConversationCreateRequest(BaseModel):
+# #     """Request to create a new conversation"""
+# #     user_id: str = Field(..., description="User ID")
+# # class ConversationCreateResponse(BaseModel):
+# #     """Response after creating a conversation"""
+# #     conversation_id: str = Field(..., description="Created conversation ID")
+# #     created_at: str = Field(..., description="Creation timestamp")
+# # class MessageModel(BaseModel):
+# #     """Single message in conversation history"""
+# #     role: str = Field(..., description="Message role: user or assistant")
+# #     content: str = Field(..., description="Message content")
+# #     timestamp: str = Field(..., description="Message timestamp")
+# #     metadata: Optional[Dict] = Field(None, description="Optional metadata")
+# # class ConversationHistoryResponse(BaseModel):
+# #     """Response containing conversation history"""
+# #     conversation_id: str
+# #     messages: List[MessageModel]
+# #     message_count: int
+# # # ============================================================================
+# # # ENDPOINTS
+# # # ============================================================================
+# # @router.post("/", response_model=ChatResponse, status_code=status.HTTP_200_OK)
+# # async def chat(request: ChatRequest):
+# #     """
+# #     Main chat endpoint - Send a query and get a response.
+# #     This endpoint:
+# #     1. Processes the query through the RAG pipeline
+# #     2. Saves messages to MongoDB
+# #     3. Logs retrieval data for RL training
+# #     4. Returns response with metadata
+# #     Args:
+# #         request: ChatRequest with query, conversation_id, user_id
+# #     Returns:
+# #         ChatResponse: Generated response with metadata
+# #     Raises:
+# #         HTTPException: If processing fails
+# #     """
+# #     try:
+# #         # If no conversation_id provided, create a new conversation
+# #         conversation_id = request.conversation_id
+# #         if not conversation_id:
+# #             conversation_id = await conversation_repo.create_conversation(
+# #                 user_id=request.user_id
+# #             )
+# #         # Get conversation history
+# #         history = await conversation_repo.get_conversation_history(
+# #             conversation_id=conversation_id,
+# #             max_messages=10  # Last 5 turns (10 messages)
+# #         )
+# #         # Save user message to database
+# #         await conversation_repo.add_message(
+# #             conversation_id=conversation_id,
+# #             message={
+# #                 'role': 'user',
+# #                 'content': request.query,
+# #                 'timestamp': datetime.now()
+# #             }
+# #         )
+# #         # Process query through RAG pipeline
+# #         result = await chat_service.process_query(
+# #             query=request.query,
+# #             conversation_history=history,
+# #             user_id=request.user_id
+# #         )
+# #         # Save assistant message to database
+# #         await conversation_repo.add_message(
+# #             conversation_id=conversation_id,
+# #             message={
+# #                 'role': 'assistant',
+# #                 'content': result['response'],
+# #                 'timestamp': datetime.now(),
+# #                 'metadata': {
+# #                     'policy_action': result['policy_action'],
+# #                     'policy_confidence': result['policy_confidence'],
+# #                     'documents_retrieved': result['documents_retrieved'],
+# #                     'top_doc_score': result['top_doc_score']
+# #                 }
+# #             }
+# #         )
+# #         # Log retrieval data for RL training
+# #         await conversation_repo.log_retrieval({
+# #             'conversation_id': conversation_id,
+# #             'user_id': request.user_id,
+# #             'query': request.query,
+# #             'policy_action': result['policy_action'],
+# #             'policy_confidence': result['policy_confidence'],
+# #             'should_retrieve': result['should_retrieve'],
+# #             'documents_retrieved': result['documents_retrieved'],
+# #             'top_doc_score': result['top_doc_score'],
+# #             'response': result['response'],
+# #             'retrieval_time_ms': result['retrieval_time_ms'],
+# #             'generation_time_ms': result['generation_time_ms'],
+# #             'total_time_ms': result['total_time_ms'],
+# #             'retrieved_docs_metadata': result.get('retrieved_docs_metadata', []),
+# #             'timestamp': datetime.now()
+# #         })
+# #         # Return response
+# #         return ChatResponse(
+# #             response=result['response'],
+# #             conversation_id=conversation_id,
+# #             policy_action=result['policy_action'],
+# #             policy_confidence=result['policy_confidence'],
+# #             documents_retrieved=result['documents_retrieved'],
+# #             top_doc_score=result['top_doc_score'],
+# #             total_time_ms=result['total_time_ms'],
+# #             timestamp=result['timestamp']
+# #         )
+# #     except Exception as e:
+# #         print(f"❌ Chat endpoint error: {e}")
+# #         raise HTTPException(
+# #             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+# #             detail=f"Failed to process chat request: {str(e)}"
+# #         )
+# # @router.post("/conversation", response_model=ConversationCreateResponse, status_code=status.HTTP_201_CREATED)
+# # async def create_conversation(request: ConversationCreateRequest):
+# #     """
+# #     Create a new conversation.
+# #     Args:
+# #         request: ConversationCreateRequest with user_id
+# #     Returns:
+# #         ConversationCreateResponse: Created conversation ID
+# #     """
+# #     try:
+# #         conversation_id = await conversation_repo.create_conversation(
+# #             user_id=request.user_id
+# #         )
+# #         return ConversationCreateResponse(
+# #             conversation_id=conversation_id,
+# #             created_at=datetime.now().isoformat()
+# #         )
+# #     except Exception as e:
+# #         raise HTTPException(
+# #             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+# #             detail=f"Failed to create conversation: {str(e)}"
+# #         )
+# # @router.get("/history/{conversation_id}", response_model=ConversationHistoryResponse)
+# # async def get_conversation_history(conversation_id: str):
+# #     """
+# #     Get conversation history by ID.
+# #     Args:
+# #         conversation_id: Conversation ID
+# #     Returns:
+# #         ConversationHistoryResponse: List of messages
+# #     """
+# #     try:
+# #         # Get conversation
+# #         conversation = await conversation_repo.get_conversation(conversation_id)
+# #         if not conversation:
+# #             raise HTTPException(
+# #                 status_code=status.HTTP_404_NOT_FOUND,
+# #                 detail=f"Conversation {conversation_id} not found"
+# #             )
+# #         # Format messages
+# #         messages = []
+# #         for msg in conversation.get('messages', []):
+# #             messages.append(MessageModel(
+# #                 role=msg['role'],
+# #                 content=msg['content'],
+# #                 timestamp=msg['timestamp'].isoformat() if isinstance(msg['timestamp'], datetime) else msg['timestamp'],
+# #                 metadata=msg.get('metadata')
+# #             ))
+# #         return ConversationHistoryResponse(
+# #             conversation_id=conversation_id,
+# #             messages=messages,
+# #             message_count=len(messages)
+# #         )
+# #     except HTTPException:
+# #         raise
+# #     except Exception as e:
+# #         raise HTTPException(
+# #             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+# #             detail=f"Failed to fetch conversation history: {str(e)}"
+# #         )
+# # @router.get("/conversations")
+# # async def list_user_conversations(user_id: str, limit: int = 10, skip: int = 0):
+# #     """
+# #     List all conversations for a user.
+# #     Args:
+# #         user_id: User ID
+# #         limit: Maximum conversations to return (default: 10)
+# #         skip: Number to skip for pagination (default: 0)
+# #     Returns:
+# #         dict: List of conversations
+# #     """
+# #     try:
+# #         conversations = await conversation_repo.get_user_conversations(
+# #             user_id=user_id,
+# #             limit=limit,
+# #             skip=skip
+# #         )
+# #         # Format response
+# #         return {
+# #             "user_id": user_id,
+# #             "conversations": [
+# #                 {
+# #                     "conversation_id": conv['conversation_id'],
+# #                     "created_at": conv['created_at'].isoformat() if isinstance(conv['created_at'], datetime) else conv['created_at'],
+# #                     "updated_at": conv['updated_at'].isoformat() if isinstance(conv['updated_at'], datetime) else conv['updated_at'],
+# #                     "message_count": len(conv.get('messages', []))
+# #                 }
+# #                 for conv in conversations
+# #             ],
+# #             "total": len(conversations)
+# #         }
+# #     except Exception as e:
+# #         raise HTTPException(
+# #             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+# #             detail=f"Failed to fetch conversations: {str(e)}"
+# #         )
+# # @router.get("/health")
+# # async def chat_health():
+# #     """
+# #     Health check for chat service.
+# #     Returns:
+# #         dict: Health status of chat service components
+# #     """
+# #     try:
+# #         health = await chat_service.health_check()
+# #         return {
+# #             "status": "healthy",
+# #             "service": "chat",
+# #             "components": health['components'],
+# #             "timestamp": datetime.now().isoformat()
+# #         }
+# #     except Exception as e:
+# #         return {
+# #             "status": "unhealthy",
+# #             "service": "chat",
+# #             "error": str(e),
+# #             "timestamp": datetime.now().isoformat()
+# #         }
+# # # ============================================================================
+# # # USAGE DOCUMENTATION
+# # # ============================================================================
+# # """
+# # === API USAGE EXAMPLES ===
+# # 1. Send a chat message:
+# #    POST /api/v1/chat/
+# #    Body: {
+# #        "query": "What is my account balance?",
+# #        "user_id": "user_123",
+# #        "conversation_id": "conv_abc"  // optional
+# #    }
+# # 2. Create new conversation:
+# #    POST /api/v1/chat/conversation
+# #    Body: {
+# #        "user_id": "user_123"
+# #    }
+# # 3. Get conversation history:
+# #    GET /api/v1/chat/history/conv_abc
+# # 4. List user's conversations:
+# #    GET /api/v1/chat/conversations?user_id=user_123&limit=10&skip=0
+# # 5. Check health:
+# #    GET /api/v1/chat/health
+# # === TESTING WITH CURL ===
+# # # Send chat message
+# # curl -X POST "http://localhost:8000/api/v1/chat/" \
+# #   -H "Content-Type: application/json" \
+# #   -d '{
+# #     "query": "What is my balance?",
+# #     "user_id": "user_123"
+# #   }'
+# # # Get history
+# # curl "http://localhost:8000/api/v1/chat/history/conv_123"
+# # === TESTING WITH SWAGGER UI ===
+# # After starting the server, visit:
+# # http://localhost:8000/docs
+# # Interactive API documentation with "Try it out" buttons!
+# # """

app/config.py ADDED Viewed

	@@ -0,0 +1,236 @@

+"""
+Application Configuration
+Settings for Banking RAG Chatbot with JWT Authentication
+Updated to support multiple Groq API keys and HuggingFace tokens with fallback logic
+"""
+import os
+from typing import List
+from dotenv import load_dotenv
+import shutil  # Add this import
+load_dotenv()
+class Settings:
+    """Application settings loaded from environment variables"""
+    # ========================================================================
+    # ENVIRONMENT
+    # ========================================================================
+    ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
+    DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
+    # ========================================================================
+    # MONGODB
+    # ========================================================================
+    MONGODB_URI: str = os.getenv("MONGODB_URI", "")
+    DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
+    # ========================================================================
+    # JWT AUTHENTICATION
+    # ========================================================================
+    SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
+    ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
+    # ========================================================================
+    # CORS (for frontend)
+    # ========================================================================
+    ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
+    # ========================================================================
+    # GROQ API KEYS (Multiple for fallback)
+    # ========================================================================
+    GROQ_API_KEY_1: str = os.getenv("GROQ_API_KEY_1", "")  # Primary
+    GROQ_API_KEY_2: str = os.getenv("GROQ_API_KEY_2", "")  # Fallback 1
+    GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "")  # Fallback 2
+    # Model names for Groq (using correct GroqCloud naming)
+    GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama3-8b-8192")  # For chat interface
+    GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama3-70b-8192")  # For evaluation
+    # ========================================================================
+    # Commented as of now, can be re-enabled if rate limiting is needed
+    # ========================================================================
+    # GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
+    # ========================================================================
+    # HUGGING FACE TOKENS (Multiple for fallback)
+    # ========================================================================
+    HF_TOKEN_1: str = os.getenv("HF_TOKEN_1", "")  # Primary
+    HF_TOKEN_2: str = os.getenv("HF_TOKEN_2", "")  # Fallback 1
+    HF_TOKEN_3: str = os.getenv("HF_TOKEN_3", "")  # Fallback 2
+    # HuggingFace model for inference (fallback from Groq)
+    HF_CHAT_MODEL: str = os.getenv("HF_CHAT_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
+    HF_EVAL_MODEL: str = os.getenv("HF_EVAL_MODEL", "meta-llama/Meta-Llama-3-70B-Instruct")
+    # ========================================================================
+    # MODEL PATHS (for RL Policy Network and RAG models)
+    # ========================================================================
+    POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "app/models/best_policy_model.pth")
+    RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "app/models/best_retriever_model.pth")
+    FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "app/models/faiss_index.pkl")
+    KB_PATH: str = os.getenv("KB_PATH", "app/data/final_knowledge_base.jsonl")
+    # ========================================================================
+    # DEVICE SETTINGS (for PyTorch/TensorFlow models)
+    # ========================================================================
+    DEVICE: str = os.getenv("DEVICE", "cpu")
+    # ========================================================================
+    # LLM PARAMETERS
+    # ========================================================================
+    LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
+    LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024"))
+    # ========================================================================
+    # RAG PARAMETERS
+    # ========================================================================
+    TOP_K: int = int(os.getenv("TOP_K", "5"))
+    SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
+    MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
+    # ========================================================================
+    # POLICY NETWORK PARAMETERS
+    # ========================================================================
+    POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
+    CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
+        # ========================================================================
+    # HUGGING FACE MODEL REPOSITORY (for deployment)
+    # ========================================================================
+    HF_MODEL_REPO: str = os.getenv("HF_MODEL_REPO", "eeshanyaj/questrag_models")
+    def download_model_if_needed(self, hf_filename: str, local_path: str):
+        """
+        Download model from HuggingFace Hub if not exists locally.
+        This runs on startup for deployment.
+        Args:
+            hf_filename: Path in HF repo (e.g., "models/best_policy_model.pth")
+            local_path: Where to save locally (e.g., "app/models/best_policy_model.pth")
+        """
+        if not os.path.exists(local_path):
+            print(f"📥 Downloading {hf_filename} from HuggingFace Hub...")
+            os.makedirs(os.path.dirname(local_path), exist_ok=True)
+            try:
+                from huggingface_hub import hf_hub_download
+                import shutil
+                # Download from HF Hub
+                downloaded_path = hf_hub_download(
+                    repo_id=self.HF_MODEL_REPO,
+                    filename=hf_filename,
+                    repo_type="model",
+                    cache_dir=".cache"
+                )
+                # Copy to expected location
+                shutil.copy(downloaded_path, local_path)
+                print(f"✅ Downloaded {hf_filename}")
+            except Exception as e:
+                print(f"❌ Error downloading {hf_filename}: {e}")
+                raise
+        else:
+            print(f"✓ Model already exists: {local_path}")
+        return local_path
+    # ========================================================================
+    # HELPER METHODS
+    # ========================================================================
+    def get_groq_api_keys(self) -> List[str]:
+        """Get all configured Groq API keys in priority order"""
+        keys = []
+        if self.GROQ_API_KEY_1:
+            keys.append(self.GROQ_API_KEY_1)
+        if self.GROQ_API_KEY_2:
+            keys.append(self.GROQ_API_KEY_2)
+        if self.GROQ_API_KEY_3:
+            keys.append(self.GROQ_API_KEY_3)
+        return keys
+    def get_hf_tokens(self) -> List[str]:
+        """Get all configured HuggingFace tokens in priority order"""
+        tokens = []
+        if self.HF_TOKEN_1:
+            tokens.append(self.HF_TOKEN_1)
+        if self.HF_TOKEN_2:
+            tokens.append(self.HF_TOKEN_2)
+        if self.HF_TOKEN_3:
+            tokens.append(self.HF_TOKEN_3)
+        return tokens
+    def is_groq_enabled(self) -> bool:
+        """Check if at least one Groq API key is configured"""
+        return bool(self.get_groq_api_keys())
+    def is_hf_enabled(self) -> bool:
+        """Check if at least one HuggingFace token is configured"""
+        return bool(self.get_hf_tokens())
+    def get_allowed_origins(self) -> List[str]:
+        """Parse allowed origins from comma-separated string"""
+        if self.ALLOWED_ORIGINS == "*":
+            return ["*"]
+        return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
+    def get_llm_for_task(self, task: str = "chat") -> str:
+        """
+        Get LLM model name for a specific task.
+        Args:
+            task: Task type ('chat' or 'evaluation')
+        Returns:
+            str: Model name for the task
+        """
+        if task == "evaluation":
+            return self.GROQ_EVAL_MODEL  # llama3-70b-8192
+        else:
+            return self.GROQ_CHAT_MODEL  # llama3-8b-8192
+# ============================================================================
+# CREATE GLOBAL SETTINGS INSTANCE
+# ============================================================================
+settings = Settings()
+# ============================================================================
+# PRINT CONFIGURATION ON LOAD
+# ============================================================================
+print("=" * 80)
+print("✅ Configuration Loaded")
+print("=" * 80)
+print(f"Environment: {settings.ENVIRONMENT}")
+print(f"Debug Mode: {settings.DEBUG}")
+print(f"Database: {settings.DATABASE_NAME}")
+print(f"Device: {settings.DEVICE}")
+print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
+print()
+print("🔑 API Keys:")
+groq_keys = settings.get_groq_api_keys()
+print(f"   Groq Keys: {len(groq_keys)} configured")
+for i, key in enumerate(groq_keys, 1):
+    print(f"     - Key {i}: {'✅ Set' if key else '❌ Missing'}")
+hf_tokens = settings.get_hf_tokens()
+print(f"   HuggingFace Tokens: {len(hf_tokens)} configured")
+for i, token in enumerate(hf_tokens, 1):
+    print(f"     - Token {i}: {'✅ Set' if token else '❌ Missing'}")
+print(f"   MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
+print(f"   JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
+print()
+print("🤖 LLM Models:")
+print(f"   Chat Model: {settings.GROQ_CHAT_MODEL} (Llama 3 8B)")
+print(f"   Eval Model: {settings.GROQ_EVAL_MODEL} (Llama 3 70B)")
+print()
+print("🤖 Model Paths:")
+print(f"   Policy Model: {settings.POLICY_MODEL_PATH}")
+print(f"   Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
+print(f"   FAISS Index: {settings.FAISS_INDEX_PATH}")
+print(f"   Knowledge Base: {settings.KB_PATH}")
+print("=" * 80)

app/core/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Core package - Core utilities and helpers
+Contains security, authentication, API key management, and shared utilities
+"""
+__version__ = "1.0.0"

app/core/llm_manager.py ADDED Viewed

	@@ -0,0 +1,445 @@

+"""
+Multi-LLM Manager with Groq (ChatGroq) and HuggingFace Fallback Logic
+Architecture:
+- Primary: Groq API with 3 keys (sequential fallback)
+- Fallback: HuggingFace Inference API with 3 tokens (sequential fallback)
+- Llama 3 8B for chat interface
+- Llama 3 70B for evaluation
+Fallback Logic:
+1. Try GROQ_API_KEY_1
+2. If fails, try GROQ_API_KEY_2
+3. If fails, try GROQ_API_KEY_3
+4. If all Groq keys fail, try HF_TOKEN_1
+5. If fails, try HF_TOKEN_2
+6. If fails, try HF_TOKEN_3
+"""
+import time
+from typing import List, Dict, Optional, Literal
+from langchain_groq import ChatGroq
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+from huggingface_hub import InferenceClient
+from app.config import settings
+# ============================================================================
+# GROQ MANAGER WITH FALLBACK
+# ============================================================================
+class GroqManager:
+    """
+    Groq API Manager with multiple API key fallback support
+    Uses ChatGroq from langchain_groq
+    """
+    def __init__(self):
+        """Initialize Groq manager with all available API keys"""
+        self.api_keys = settings.get_groq_api_keys()
+        self.chat_model_name = settings.GROQ_CHAT_MODEL  # llama3-8b-8192
+        self.eval_model_name = settings.GROQ_EVAL_MODEL  # llama3-70b-8192
+        # Track current key index
+        self.current_key_index = 0
+        # Rate limiting tracking
+        self.requests_this_minute = 0
+        self.last_reset = time.time()
+        if not self.api_keys:
+            raise ValueError("No Groq API keys configured. Set GROQ_API_KEY_1 in .env")
+        print(f"✅ Groq Manager initialized with {len(self.api_keys)} API key(s)")
+        print(f"   Chat Model: {self.chat_model_name}")
+        print(f"   Eval Model: {self.eval_model_name}")
+    def _check_rate_limits(self):
+        """
+        Check and reset rate limit counters.
+        Groq Free: 30 requests/min
+        """
+        current_time = time.time()
+        # Reset counters every minute
+        if current_time - self.last_reset > 60:
+            self.requests_this_minute = 0
+            self.last_reset = current_time
+        # Check if limits exceeded
+        # =================================================================
+        # Uncomment below if rate limiting enforcement is needed
+        # =================================================================
+        # if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
+        #     wait_time = 60 - (current_time - self.last_reset)
+        #     print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
+        #     time.sleep(wait_time)
+        #     self._check_rate_limits()
+    def _create_llm(self, api_key: str, model_name: str) -> ChatGroq:
+        """Create ChatGroq instance with given API key and model"""
+        return ChatGroq(
+            api_key=api_key,
+            model_name=model_name,
+            temperature=settings.LLM_TEMPERATURE,
+            max_tokens=settings.LLM_MAX_TOKENS,
+            max_retries=0  # Disable automatic retries, we handle fallback manually
+        )
+    async def generate(
+        self,
+        messages: List[Dict[str, str]],
+        system_prompt: Optional[str] = None,
+        task: Literal["chat", "evaluation"] = "chat"
+    ) -> str:
+        """
+        Generate response using Groq with fallback logic.
+        Args:
+            messages: List of conversation messages
+            system_prompt: Optional system prompt
+            task: Task type to determine model (chat uses 8B, evaluation uses 70B)
+        Returns:
+            str: Generated response text
+        Raises:
+            Exception: If all Groq API keys fail
+        """
+        self._check_rate_limits()
+        # Select model based on task
+        model_name = self.eval_model_name if task == "evaluation" else self.chat_model_name
+        # Format messages for LangChain
+        formatted_messages = []
+        # Add system message if provided
+        if system_prompt:
+            formatted_messages.append(SystemMessage(content=system_prompt))
+        # Convert conversation messages
+        for msg in messages:
+            if msg['role'] == 'user':
+                formatted_messages.append(HumanMessage(content=msg['content']))
+            elif msg['role'] == 'assistant':
+                formatted_messages.append(AIMessage(content=msg['content']))
+        # Try each Groq API key sequentially
+        for key_index, api_key in enumerate(self.api_keys, 1):
+            try:
+                print(f"🔑 Trying Groq API Key {key_index}/{len(self.api_keys)} with {model_name}...")
+                # Create LLM instance with current key
+                llm = self._create_llm(api_key, model_name)
+                # Generate response
+                response = await llm.ainvoke(formatted_messages)
+                # Track rate limits
+                self.requests_this_minute += 1
+                print(f"✅ Groq API Key {key_index} succeeded")
+                return response.content
+            except Exception as e:
+                print(f"❌ Groq API Key {key_index} failed: {e}")
+                # If this was the last key, raise exception
+                if key_index == len(self.api_keys):
+                    print(f"❌ All {len(self.api_keys)} Groq API keys exhausted")
+                    raise Exception(f"All Groq API keys failed. Last error: {e}")
+                # Otherwise, continue to next key
+                print(f"⏭️ Falling back to next Groq API key...")
+                continue
+# ============================================================================
+# HUGGINGFACE MANAGER WITH FALLBACK
+# ============================================================================
+class HuggingFaceManager:
+    """
+    HuggingFace Inference API Manager with multiple token fallback support
+    Uses InferenceClient from huggingface_hub
+    """
+    def __init__(self):
+        """Initialize HuggingFace manager with all available tokens"""
+        self.tokens = settings.get_hf_tokens()
+        self.chat_model_name = settings.HF_CHAT_MODEL
+        self.eval_model_name = settings.HF_EVAL_MODEL
+        if not self.tokens:
+            raise ValueError("No HuggingFace tokens configured. Set HF_TOKEN_1 in .env")
+        print(f"✅ HuggingFace Manager initialized with {len(self.tokens)} token(s)")
+        print(f"   Chat Model: {self.chat_model_name}")
+        print(f"   Eval Model: {self.eval_model_name}")
+    def _create_client(self, token: str, model_name: str) -> InferenceClient:
+        """Create InferenceClient instance with given token and model"""
+        return InferenceClient(
+            model=model_name,
+            token=token
+        )
+    async def generate(
+        self,
+        messages: List[Dict[str, str]],
+        system_prompt: Optional[str] = None,
+        task: Literal["chat", "evaluation"] = "chat"
+    ) -> str:
+        """
+        Generate response using HuggingFace Inference API with fallback logic.
+        Args:
+            messages: List of conversation messages
+            system_prompt: Optional system prompt
+            task: Task type to determine model
+        Returns:
+            str: Generated response text
+        Raises:
+            Exception: If all HuggingFace tokens fail
+        """
+        # Select model based on task
+        model_name = self.eval_model_name if task == "evaluation" else self.chat_model_name
+        # Format messages for HuggingFace chat API
+        formatted_messages = []
+        # Add system message if provided
+        if system_prompt:
+            formatted_messages.append({
+                "role": "system",
+                "content": system_prompt
+            })
+        # Convert conversation messages
+        for msg in messages:
+            formatted_messages.append({
+                "role": msg['role'],
+                "content": msg['content']
+            })
+        # Try each HuggingFace token sequentially
+        for token_index, token in enumerate(self.tokens, 1):
+            try:
+                print(f"🔑 Trying HuggingFace Token {token_index}/{len(self.tokens)} with {model_name}...")
+                # Create client with current token
+                client = self._create_client(token, model_name)
+                # Generate response using chat completion
+                response = client.chat_completion(
+                    messages=formatted_messages,
+                    max_tokens=settings.LLM_MAX_TOKENS,
+                    temperature=settings.LLM_TEMPERATURE
+                )
+                # Extract content from response
+                content = response.choices[0].message.content
+                print(f"✅ HuggingFace Token {token_index} succeeded")
+                return content
+            except Exception as e:
+                print(f"❌ HuggingFace Token {token_index} failed: {e}")
+                # If this was the last token, raise exception
+                if token_index == len(self.tokens):
+                    print(f"❌ All {len(self.tokens)} HuggingFace tokens exhausted")
+                    raise Exception(f"All HuggingFace tokens failed. Last error: {e}")
+                # Otherwise, continue to next token
+                print(f"⏭️ Falling back to next HuggingFace token...")
+                continue
+# ============================================================================
+# UNIFIED LLM MANAGER (Groq Primary, HuggingFace Fallback)
+# ============================================================================
+class LLMManager:
+    """
+    Unified LLM Manager with cascading fallback logic:
+    1. Try all Groq API keys (primary)
+    2. If all fail, try all HuggingFace tokens (fallback)
+    Models:
+    - Chat: Llama 3 8B (for user-facing chat responses)
+    - Evaluation: Llama 3 70B (for response evaluation)
+    """
+    def __init__(self):
+        """Initialize all LLM managers"""
+        self.groq = None
+        self.huggingface = None
+        # Initialize Groq if configured
+        if settings.is_groq_enabled():
+            try:
+                self.groq = GroqManager()
+            except Exception as e:
+                print(f"⚠️ Failed to initialize Groq: {e}")
+        # Initialize HuggingFace if configured
+        if settings.is_hf_enabled():
+            try:
+                self.huggingface = HuggingFaceManager()
+            except Exception as e:
+                print(f"⚠️ Failed to initialize HuggingFace: {e}")
+        # Check if at least one is available
+        if not self.groq and not self.huggingface:
+            raise ValueError("No LLM provider configured. Set either Groq or HuggingFace credentials in .env")
+        print("✅ LLM Manager initialized with fallback logic")
+    async def generate(
+        self,
+        messages: List[Dict[str, str]],
+        system_prompt: Optional[str] = None,
+        task: Literal["chat", "evaluation"] = "chat"
+    ) -> str:
+        """
+        Generate response with cascading fallback logic.
+        Fallback order:
+        1. Try all Groq API keys (3 keys)
+        2. If all Groq keys fail, try all HuggingFace tokens (3 tokens)
+        Args:
+            messages: Conversation messages
+            system_prompt: Optional system prompt
+            task: Task type - "chat" (8B) or "evaluation" (70B)
+        Returns:
+            str: Generated response
+        Raises:
+            ValueError: If all providers fail
+        """
+        # Try Groq first (if available)
+        if self.groq:
+            try:
+                print("🚀 Attempting Groq API (Primary)...")
+                response = await self.groq.generate(messages, system_prompt, task)
+                return response
+            except Exception as groq_error:
+                print(f"❌ All Groq API keys failed: {groq_error}")
+                # Fall back to HuggingFace if available
+                if self.huggingface:
+                    print("🔄 Falling back to HuggingFace Inference API...")
+                else:
+                    raise ValueError(f"Groq failed and no HuggingFace fallback configured: {groq_error}")
+        # Try HuggingFace (if Groq failed or not available)
+        if self.huggingface:
+            try:
+                print("🚀 Attempting HuggingFace API (Fallback)...")
+                response = await self.huggingface.generate(messages, system_prompt, task)
+                return response
+            except Exception as hf_error:
+                raise ValueError(f"All LLM providers exhausted. HuggingFace error: {hf_error}")
+        raise ValueError("No LLM provider available")
+    async def generate_chat_response(
+        self,
+        query: str,
+        context: str,
+        history: List[Dict[str, str]]
+    ) -> str:
+        """
+        Generate chat response (uses Llama 3 8B).
+        Args:
+            query: User query
+            context: Retrieved context (from FAISS)
+            history: Conversation history
+        Returns:
+            str: Chat response
+        """
+        # Import the detailed prompt
+        from app.services.chat_service import BANKING_SYSTEM_PROMPT
+        # Build enhanced system prompt with context
+        system_prompt = BANKING_SYSTEM_PROMPT
+        if context:
+            system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
+        else:
+            system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
+        # Build messages
+        messages = history + [{'role': 'user', 'content': query}]
+        # Generate using chat task (Llama 3 8B)
+        return await self.generate(messages, system_prompt, task="chat")
+    async def evaluate_response(
+        self,
+        query: str,
+        response: str,
+        context: str = ""
+    ) -> Dict:
+        """
+        Evaluate response quality (uses Llama 3 70B for better evaluation).
+        Used during RL training.
+        Args:
+            query: User query
+            response: Generated response
+            context: Retrieved context (if any)
+        Returns:
+            dict: Evaluation results
+            {'quality': 'Good'/'Bad', 'explanation': '...'}
+        """
+        eval_prompt = f"""Evaluate this response:
+Query: {query}
+Response: {response}
+Context used: {context if context else 'None'}
+Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explanation."""
+        messages = [{'role': 'user', 'content': eval_prompt}]
+        # Generate using evaluation task (Llama 3 70B)
+        result = await self.generate(messages, task="evaluation")
+        # Parse result
+        quality = "Good" if "Good" in result else "Bad"
+        return {
+            'quality': quality,
+            'explanation': result
+        }
+# ============================================================================
+# GLOBAL LLM MANAGER INSTANCE
+# ============================================================================
+llm_manager = LLMManager()
+# ============================================================================
+# USAGE EXAMPLE (for reference)
+# ============================================================================
+"""
+# In your service file:
+from app.core.llm_manager import llm_manager
+# Generate chat response (uses Llama 3 8B with Groq → HF fallback)
+response = await llm_manager.generate_chat_response(
+    query="What is my account balance?",
+    context="Your balance is $1000",
+    history=[]
+)
+# Evaluate response (uses Llama 3 70B with Groq → HF fallback)
+evaluation = await llm_manager.evaluate_response(
+    query="What is my balance?",
+    response="Your balance is $1000",
+    context="Balance: $1000"
+)
+"""

app/db/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Database package - Database connections and repositories
+Contains MongoDB connection and repository pattern implementations
+"""
+__version__ = "1.0.0"

app/db/mongodb.py ADDED Viewed

	@@ -0,0 +1,390 @@

+"""
+MongoDB Connection with Motor (Async Driver)
+Handles async connection to MongoDB Atlas for conversation storage
+"""
+import motor.motor_asyncio
+from app.config import settings
+# ============================================================================
+# GLOBAL VARIABLES
+# ============================================================================
+mongodb_client = None
+mongodb_database = None
+# ============================================================================
+# CONNECTION FUNCTIONS
+# ============================================================================
+async def connect_to_mongo():
+    """
+    Connect to MongoDB Atlas on application startup.
+    This is called from main.py during FastAPI lifespan startup.
+    Uses Motor for async MongoDB operations.
+    Returns:
+        database: MongoDB database instance or None if connection fails
+    """
+    global mongodb_client, mongodb_database
+    try:
+        print("\n🔌 Connecting to MongoDB Atlas...")
+        # Hide password in logs
+        # uri_display = settings.MONGODB_URI[:50] + "..." if len(settings.MONGODB_URI) > 50 else settings.MONGODB_URI
+        # print(f"   URI: {uri_display}")
+        print(f"   Database: {settings.DATABASE_NAME}")
+        # Create Motor client (async MongoDB driver)
+        mongodb_client = motor.motor_asyncio.AsyncIOMotorClient(
+            settings.MONGODB_URI,
+            serverSelectionTimeoutMS=5000,  # 5 second timeout
+            connectTimeoutMS=10000,         # 10 second connection timeout
+            socketTimeoutMS=10000           # 10 second socket timeout
+        )
+        # Get database reference
+        mongodb_database = mongodb_client[settings.DATABASE_NAME]
+        # Test connection with ping
+        await mongodb_client.admin.command('ping')
+        print(f"✅ MongoDB connected successfully!")
+        print(f"   Database: {settings.DATABASE_NAME}")
+        return mongodb_database
+    except Exception as e:
+        print(f"\n❌ MongoDB connection FAILED!")
+        print(f"   Error: {str(e)}")
+        print(f"\n💡 Troubleshooting:")
+        print(f"   1. Check MONGODB_URI in .env file")
+        print(f"   2. Verify MongoDB Atlas cluster is running")
+        print(f"   3. Check network access settings (allow your IP)")
+        print(f"   4. Verify database user credentials")
+        print(f"\n⚠️  Backend will start but MongoDB features won't work!\n")
+        # Set to None (app can still start for debugging)
+        mongodb_database = None
+        return None
+async def close_mongo_connection():
+    """
+    Close MongoDB connection on application shutdown.
+    This is called from main.py during FastAPI lifespan shutdown.
+    """
+    global mongodb_client
+    if mongodb_client:
+        print("\n🔌 Closing MongoDB connection...")
+        mongodb_client.close()
+        print("✅ MongoDB connection closed")
+    else:
+        print("ℹ️  No MongoDB connection to close")
+def get_database():
+    """
+    Get MongoDB database instance.
+    This is used by repositories to access the database.
+    Returns None if MongoDB is not connected (for graceful degradation).
+    Returns:
+        database: MongoDB database instance or None
+    """
+    if mongodb_database is None:
+        print("\n⚠️  WARNING: MongoDB database not available!")
+        print("   Attempting to use database features without connection")
+        print("   Make sure MongoDB connection succeeded during startup\n")
+    return mongodb_database
+# ============================================================================
+# USAGE EXAMPLE (for reference)
+# ============================================================================
+"""
+# In main.py (FastAPI lifespan):
+from app.db.mongodb import connect_to_mongo, close_mongo_connection
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    await connect_to_mongo()
+    yield
+    # Shutdown
+    await close_mongo_connection()
+# In repositories:
+from app.db.mongodb import get_database
+class SomeRepository:
+    def __init__(self):
+        self.db = get_database()
+        if self.db:
+            self.collection = self.db["my_collection"]
+"""
+# """
+# MongoDB Connection Handler
+# Manages connection to MongoDB Atlas (cloud database)
+# This uses Motor - an async MongoDB driver for Python
+# Works perfectly with FastAPI's async/await
+# """
+# from motor.motor_asyncio import AsyncIOMotorClient
+# from app.config import settings
+# # ============================================================================
+# # MONGODB CLIENT SINGLETON
+# # ============================================================================
+# class MongoDB:
+#     """
+#     MongoDB client singleton.
+#     Stores the connection and database instance.
+#     Attributes:
+#         client: Motor async client connection
+#         db: Database instance
+#     """
+#     client: AsyncIOMotorClient = None
+#     db = None
+# # Create global instance
+# mongodb = MongoDB()
+# # ============================================================================
+# # CONNECTION FUNCTIONS
+# # ============================================================================
+# async def connect_to_mongo():
+#     """
+#     Connect to MongoDB Atlas on application startup.
+#     This establishes a connection pool that will be reused
+#     for all database operations throughout the app's lifetime.
+#     Connection string format (from .env):
+#     Raises:
+#         Exception: If connection fails
+#     """
+#     try:
+#         # Create async MongoDB client
+#         # serverSelectionTimeoutMS: How long to wait before giving up (5 seconds)
+#         mongodb.client = AsyncIOMotorClient(
+#             settings.MONGODB_URI,
+#             serverSelectionTimeoutMS=5000
+#         )
+#         # Get database instance
+#         mongodb.db = mongodb.client[settings.DATABASE_NAME]
+#         # Verify connection by pinging the database
+#         await mongodb.client.admin.command('ping')
+#         print(f"✅ Connected to MongoDB Atlas")
+#         print(f"   Database: {settings.DATABASE_NAME}")
+#     except Exception as e:
+#         print(f"❌ MongoDB connection failed: {e}")
+#         raise
+# async def close_mongo_connection():
+#     """
+#     Close MongoDB connection on application shutdown.
+#     This properly closes the connection pool and releases resources.
+#     """
+#     if mongodb.client:
+#         mongodb.client.close()
+#         print("✅ MongoDB connection closed")
+# def get_database():
+#     """
+#     Get the current database instance.
+#     This function is used by repositories to access the database.
+#     Returns:
+#         AsyncIOMotorDatabase: Database instance
+#     Example:
+#         from app.db.mongodb import get_database
+#         db = get_database()
+#         collection = db["conversations"]
+#         result = await collection.find_one({"user_id": "123"})
+#     """
+#     return mongodb.db
+# # ============================================================================
+# # HELPER FUNCTIONS
+# # ============================================================================
+# async def check_connection() -> bool:
+#     """
+#     Check if MongoDB connection is alive.
+#     Returns:
+#         bool: True if connected, False otherwise
+#     """
+#     try:
+#         if mongodb.client is None:
+#             return False
+#         # Try to ping the database
+#         await mongodb.client.admin.command('ping')
+#         return True
+#     except Exception:
+#         return False
+# async def get_collection_names():
+#     """
+#     Get list of all collection names in the database.
+#     Useful for debugging and admin operations.
+#     Returns:
+#         list: List of collection names
+#     """
+#     if mongodb.db is None:
+#         return []
+#     return await mongodb.db.list_collection_names()
+# async def create_indexes():
+#     """
+#     Create database indexes for better query performance.
+#     This should be called once after first deployment.
+#     Indexes speed up queries on specific fields.
+#     Collections and their indexes:
+#     1. conversations:
+#        - conversation_id (unique)
+#        - user_id (for user queries)
+#        - created_at (for sorting)
+#     2. users:
+#        - user_id (unique)
+#        - email (unique)
+#     3. retrieval_logs:
+#        - log_id (unique)
+#        - timestamp (for time-series queries)
+#     """
+#     db = get_database()
+#     # Conversations collection
+#     conversations = db["conversations"]
+#     await conversations.create_index("conversation_id", unique=True)
+#     await conversations.create_index("user_id")
+#     await conversations.create_index("created_at")
+#     print("✅ Created indexes for 'conversations' collection")
+#     # Users collection
+#     users = db["users"]
+#     await users.create_index("user_id", unique=True)
+#     await users.create_index("email", unique=True)
+#     print("✅ Created indexes for 'users' collection")
+#     # Retrieval logs collection
+#     retrieval_logs = db["retrieval_logs"]
+#     await retrieval_logs.create_index("log_id", unique=True)
+#     await retrieval_logs.create_index("timestamp")
+#     await retrieval_logs.create_index("user_id")
+#     print("✅ Created indexes for 'retrieval_logs' collection")
+#     print("✅ All database indexes created successfully")
+# # ============================================================================
+# # USAGE EXAMPLES (for reference)
+# # ============================================================================
+# """
+# # In your repository or service:
+# from app.db.mongodb import get_database
+# async def get_user_conversations(user_id: str):
+#     db = get_database()
+#     conversations = db["conversations"]
+#     cursor = conversations.find({"user_id": user_id})
+#     results = await cursor.to_list(length=10)
+#     return results
+# # In main.py startup:
+# from app.db.mongodb import connect_to_mongo, create_indexes
+# await connect_to_mongo()
+# await create_indexes()  # Run once on first deployment
+# """
+# # Key Features:
+# # Async/Await - Works with FastAPI's async nature
+# # Connection Pooling - Reuses connections efficiently
+# # Singleton Pattern - One connection for entire app
+# # MongoDB Atlas Compatible - Works with cloud MongoDB
+# # Index Creation - Optimizes query performance
+# # Health Check - check_connection() for monitoring
+# # How to Use:
+# # python
+# # # In any repository/service file:
+# # from app.db.mongodb import get_database
+# # async def save_conversation(data: dict):
+# #     db = get_database()
+# #     collection = db["conversations"]
+# #     result = await collection.insert_one(data)
+# #     return str(result.inserted_id)

app/db/repositories/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Repositories package - Data access layer
+Contains repository classes for database operations (CRUD)
+"""
+__version__ = "1.0.0"

app/db/repositories/conversation_repository.py ADDED Viewed

	@@ -0,0 +1,1049 @@

+# """
+# Conversation Repository - MongoDB CRUD operations
+# Handles storing and retrieving conversations from MongoDB Atlas
+# Repository Pattern: Separates database logic from business logic
+# This makes code cleaner and easier to test
+# """
+# import uuid
+# from datetime import datetime
+# from typing import List, Dict, Optional
+# from bson import ObjectId
+# from app.db.mongodb import get_database
+# # ============================================================================
+# # CONVERSATION REPOSITORY
+# # ============================================================================
+# class ConversationRepository:
+#     """
+#     Repository for conversation data in MongoDB.
+#     Collections used:
+#     - conversations: Stores complete conversations with messages
+#     - retrieval_logs: Logs each retrieval operation (for RL training)
+#     """
+#     def __init__(self):
+#         """Initialize repository with database connection"""
+#         self.db = get_database()
+#         self.conversations = self.db["conversations"]
+#         self.retrieval_logs = self.db["retrieval_logs"]
+#     # ========================================================================
+#     # CONVERSATION CRUD OPERATIONS
+#     # ========================================================================
+#     async def create_conversation(
+#         self,
+#         user_id: str,
+#         conversation_id: Optional[str] = None
+#     ) -> str:
+#         """
+#         Create a new conversation.
+#         Args:
+#             user_id: User ID who owns this conversation
+#             conversation_id: Optional custom conversation ID (auto-generated if None)
+#         Returns:
+#             str: Conversation ID
+#         """
+#         if conversation_id is None:
+#             conversation_id = str(uuid.uuid4())
+#         conversation = {
+#             "conversation_id": conversation_id,
+#             "user_id": user_id,
+#             "messages": [],  # Will store all messages
+#             "created_at": datetime.now(),
+#             "updated_at": datetime.now(),
+#             "status": "active"  # active, archived, deleted
+#         }
+#         await self.conversations.insert_one(conversation)
+#         return conversation_id
+#     async def get_conversation(self, conversation_id: str) -> Optional[Dict]:
+#         """
+#         Get a conversation by ID.
+#         Args:
+#             conversation_id: Conversation ID
+#         Returns:
+#             dict or None: Conversation document
+#         """
+#         conversation = await self.conversations.find_one(
+#             {"conversation_id": conversation_id}
+#         )
+#         # Convert MongoDB ObjectId to string for JSON serialization
+#         if conversation and "_id" in conversation:
+#             conversation["_id"] = str(conversation["_id"])
+#         return conversation
+#     async def get_user_conversations(
+#         self,
+#         user_id: str,
+#         limit: int = 10,
+#         skip: int = 0
+#     ) -> List[Dict]:
+#         """
+#         Get all conversations for a user.
+#         Args:
+#             user_id: User ID
+#             limit: Maximum number of conversations to return
+#             skip: Number of conversations to skip (for pagination)
+#         Returns:
+#             list: List of conversation documents
+#         """
+#         cursor = self.conversations.find(
+#             {"user_id": user_id, "status": "active"}
+#         ).sort("updated_at", -1).skip(skip).limit(limit)
+#         conversations = await cursor.to_list(length=limit)
+#         # Convert ObjectIds to strings
+#         for conv in conversations:
+#             if "_id" in conv:
+#                 conv["_id"] = str(conv["_id"])
+#         return conversations
+#     async def add_message(
+#         self,
+#         conversation_id: str,
+#         message: Dict
+#     ) -> bool:
+#         """
+#         Add a message to a conversation.
+#         Args:
+#             conversation_id: Conversation ID
+#             message: Message dict
+#                 {
+#                     'role': 'user' or 'assistant',
+#                     'content': str,
+#                     'timestamp': datetime,
+#                     'metadata': dict (optional - policy_action, docs_retrieved, etc.)
+#                 }
+#         Returns:
+#             bool: Success status
+#         """
+#         # Ensure timestamp exists
+#         if "timestamp" not in message:
+#             message["timestamp"] = datetime.now()
+#         # Add message to conversation
+#         result = await self.conversations.update_one(
+#             {"conversation_id": conversation_id},
+#             {
+#                 "$push": {"messages": message},
+#                 "$set": {"updated_at": datetime.now()}
+#             }
+#         )
+#         return result.modified_count > 0
+#     async def get_conversation_history(
+#         self,
+#         conversation_id: str,
+#         max_messages: int = None
+#     ) -> List[Dict]:
+#         """
+#         Get conversation history (messages only).
+#         Args:
+#             conversation_id: Conversation ID
+#             max_messages: Optional limit on number of messages
+#         Returns:
+#             list: List of messages
+#         """
+#         conversation = await self.get_conversation(conversation_id)
+#         if not conversation:
+#             return []
+#         messages = conversation.get("messages", [])
+#         if max_messages:
+#             messages = messages[-max_messages:]
+#         return messages
+#     async def delete_conversation(self, conversation_id: str) -> bool:
+#         """
+#         Soft delete a conversation (mark as deleted, don't actually delete).
+#         Args:
+#             conversation_id: Conversation ID
+#         Returns:
+#             bool: Success status
+#         """
+#         result = await self.conversations.update_one(
+#             {"conversation_id": conversation_id},
+#             {
+#                 "$set": {
+#                     "status": "deleted",
+#                     "deleted_at": datetime.now()
+#                 }
+#             }
+#         )
+#         return result.modified_count > 0
+#     # ========================================================================
+#     # RETRIEVAL LOGS (for RL training)
+#     # ========================================================================
+#     async def log_retrieval(
+#         self,
+#         log_data: Dict
+#     ) -> str:
+#         """
+#         Log a retrieval operation (for RL training and analysis).
+#         Args:
+#             log_data: Log data dict
+#                 {
+#                     'conversation_id': str,
+#                     'user_id': str,
+#                     'query': str,
+#                     'policy_action': 'FETCH' or 'NO_FETCH',
+#                     'policy_confidence': float,
+#                     'documents_retrieved': int,
+#                     'top_doc_score': float or None,
+#                     'retrieved_docs_metadata': list,
+#                     'response': str,
+#                     'retrieval_time_ms': float,
+#                     'generation_time_ms': float,
+#                     'total_time_ms': float,
+#                     'timestamp': datetime
+#                 }
+#         Returns:
+#             str: Log ID
+#         """
+#         # Add timestamp if not present
+#         if "timestamp" not in log_data:
+#             log_data["timestamp"] = datetime.now()
+#         # Generate log ID
+#         log_id = str(uuid.uuid4())
+#         log_data["log_id"] = log_id
+#         # Insert log
+#         await self.retrieval_logs.insert_one(log_data)
+#         return log_id
+#     async def get_retrieval_logs(
+#         self,
+#         conversation_id: Optional[str] = None,
+#         user_id: Optional[str] = None,
+#         limit: int = 100,
+#         skip: int = 0
+#     ) -> List[Dict]:
+#         """
+#         Get retrieval logs (for analysis and RL training).
+#         Args:
+#             conversation_id: Optional filter by conversation
+#             user_id: Optional filter by user
+#             limit: Maximum number of logs
+#             skip: Number of logs to skip
+#         Returns:
+#             list: List of log documents
+#         """
+#         # Build query
+#         query = {}
+#         if conversation_id:
+#             query["conversation_id"] = conversation_id
+#         if user_id:
+#             query["user_id"] = user_id
+#         # Fetch logs
+#         cursor = self.retrieval_logs.find(query).sort("timestamp", -1).skip(skip).limit(limit)
+#         logs = await cursor.to_list(length=limit)
+#         # Convert ObjectIds to strings
+#         for log in logs:
+#             if "_id" in log:
+#                 log["_id"] = str(log["_id"])
+#         return logs
+#     async def get_logs_for_rl_training(
+#         self,
+#         min_date: Optional[datetime] = None,
+#         limit: int = 1000
+#     ) -> List[Dict]:
+#         """
+#         Get logs specifically for RL training.
+#         Filters for logs with both policy decision and retrieval results.
+#         Args:
+#             min_date: Optional minimum date for logs
+#             limit: Maximum number of logs
+#         Returns:
+#             list: List of log documents suitable for RL training
+#         """
+#         # Build query
+#         query = {
+#             "policy_action": {"$exists": True},
+#             "response": {"$exists": True}
+#         }
+#         if min_date:
+#             query["timestamp"] = {"$gte": min_date}
+#         # Fetch logs
+#         cursor = self.retrieval_logs.find(query).sort("timestamp", -1).limit(limit)
+#         logs = await cursor.to_list(length=limit)
+#         # Convert ObjectIds
+#         for log in logs:
+#             if "_id" in log:
+#                 log["_id"] = str(log["_id"])
+#         return logs
+#     # ========================================================================
+#     # ANALYTICS QUERIES
+#     # ========================================================================
+#     async def get_conversation_stats(self, user_id: str) -> Dict:
+#         """
+#         Get conversation statistics for a user.
+#         Args:
+#             user_id: User ID
+#         Returns:
+#             dict: Statistics
+#         """
+#         # Count total conversations
+#         total_conversations = await self.conversations.count_documents({
+#             "user_id": user_id,
+#             "status": "active"
+#         })
+#         # Count total messages
+#         pipeline = [
+#             {"$match": {"user_id": user_id, "status": "active"}},
+#             {"$project": {"message_count": {"$size": "$messages"}}}
+#         ]
+#         result = await self.conversations.aggregate(pipeline).to_list(length=None)
+#         total_messages = sum(doc.get("message_count", 0) for doc in result)
+#         return {
+#             "total_conversations": total_conversations,
+#             "total_messages": total_messages,
+#             "avg_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
+#         }
+#     async def get_policy_stats(self, user_id: Optional[str] = None) -> Dict:
+#         """
+#         Get policy decision statistics.
+#         Args:
+#             user_id: Optional user ID filter
+#         Returns:
+#             dict: Policy statistics
+#         """
+#         # Build query
+#         query = {}
+#         if user_id:
+#             query["user_id"] = user_id
+#         # Count FETCH vs NO_FETCH
+#         fetch_count = await self.retrieval_logs.count_documents({
+#             **query,
+#             "policy_action": "FETCH"
+#         })
+#         no_fetch_count = await self.retrieval_logs.count_documents({
+#             **query,
+#             "policy_action": "NO_FETCH"
+#         })
+#         total = fetch_count + no_fetch_count
+#         return {
+#             "fetch_count": fetch_count,
+#             "no_fetch_count": no_fetch_count,
+#             "total": total,
+#             "fetch_rate": fetch_count / total if total > 0 else 0,
+#             "no_fetch_rate": no_fetch_count / total if total > 0 else 0
+#         }
+# # ============================================================================
+# # USAGE EXAMPLE (for reference)
+# # ============================================================================
+# """
+# # In your service or API endpoint:
+# from app.db.repositories.conversation_repository import ConversationRepository
+# repo = ConversationRepository()
+# # Create conversation
+# conv_id = await repo.create_conversation(user_id="user_123")
+# # Add user message
+# await repo.add_message(conv_id, {
+#     'role': 'user',
+#     'content': 'What is my balance?',
+#     'timestamp': datetime.now()
+# })
+# # Add assistant message
+# await repo.add_message(conv_id, {
+#     'role': 'assistant',
+#     'content': 'Your balance is $1000',
+#     'timestamp': datetime.now(),
+#     'metadata': {
+#         'policy_action': 'FETCH',
+#         'documents_retrieved': 3
+#     }
+# })
+# # Get conversation history
+# history = await repo.get_conversation_history(conv_id)
+# # Log retrieval for RL training
+# await repo.log_retrieval({
+#     'conversation_id': conv_id,
+#     'user_id': 'user_123',
+#     'query': 'What is my balance?',
+#     'policy_action': 'FETCH',
+#     'documents_retrieved': 3,
+#     'response': 'Your balance is $1000'
+# })
+# """
+"""
+Conversation Repository - MongoDB CRUD operations
+Handles storing and retrieving conversations from MongoDB Atlas
+Repository Pattern: Separates database logic from business logic
+This makes code cleaner and easier to test
+Collections:
+- conversations: Stores complete conversations with messages
+- retrieval_logs: Logs each retrieval operation (for RL training data)
+"""
+import uuid
+from datetime import datetime
+from typing import List, Dict, Optional
+from bson import ObjectId
+from app.db.mongodb import get_database
+# ============================================================================
+# CONVERSATION REPOSITORY
+# ============================================================================
+class ConversationRepository:
+    """
+    Repository for conversation data in MongoDB.
+    Provides CRUD operations for:
+    1. Conversations (user chat sessions)
+    2. Retrieval logs (for RL training and analytics)
+    """
+    def __init__(self):
+        """
+        Initialize repository with database connection.
+        Gracefully handles case where MongoDB is not connected.
+        """
+        self.db = get_database()
+        # Graceful handling if MongoDB not connected
+        if self.db is None:
+            print("⚠️ ConversationRepository: MongoDB not connected")
+            print("   Repository will not function until database is connected")
+            self.conversations = None
+            self.retrieval_logs = None
+        else:
+            self.conversations = self.db["conversations"]
+            self.retrieval_logs = self.db["retrieval_logs"]
+            print("✅ ConversationRepository initialized with MongoDB")
+    def _check_connection(self):
+        """
+        Check if MongoDB is connected.
+        Raises:
+            RuntimeError: If MongoDB is not connected
+        """
+        if self.db is None or self.conversations is None:
+            raise RuntimeError(
+                "MongoDB not connected. Cannot perform database operations. "
+                "Check MONGODB_URI in .env file."
+            )
+    # ========================================================================
+    # CONVERSATION CRUD OPERATIONS
+    # ========================================================================
+    async def create_conversation(
+        self,
+        user_id: str,
+        conversation_id: Optional[str] = None
+    ) -> str:
+        """
+        Create a new conversation.
+        Args:
+            user_id: User ID who owns this conversation
+            conversation_id: Optional custom conversation ID (auto-generated if None)
+        Returns:
+            str: Conversation ID
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        if conversation_id is None:
+            conversation_id = str(uuid.uuid4())
+        conversation = {
+            "conversation_id": conversation_id,
+            "user_id": user_id,
+            "messages": [],  # Will store all messages
+            "created_at": datetime.now(),
+            "updated_at": datetime.now(),
+            "status": "active"  # active, archived, deleted
+        }
+        await self.conversations.insert_one(conversation)
+        return conversation_id
+    async def get_conversation(self, conversation_id: str) -> Optional[Dict]:
+        """
+        Get a conversation by ID.
+        Args:
+            conversation_id: Conversation ID
+        Returns:
+            dict or None: Conversation document
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        conversation = await self.conversations.find_one(
+            {"conversation_id": conversation_id}
+        )
+        # Convert MongoDB ObjectId to string for JSON serialization
+        if conversation and "_id" in conversation:
+            conversation["_id"] = str(conversation["_id"])
+        return conversation
+    # async def get_user_conversations(
+    #     self,
+    #     user_id: str,
+    #     limit: int = 10,
+    #     skip: int = 0
+    # ) -> List[Dict]:
+    #     """
+    #     Get all conversations for a user.
+    #     Args:
+    #         user_id: User ID
+    #         limit: Maximum number of conversations to return
+    #         skip: Number of conversations to skip (for pagination)
+    #     Returns:
+    #         list: List of conversation documents
+    #     Raises:
+    #         RuntimeError: If MongoDB not connected
+    #     """
+    #     self._check_connection()
+    #     cursor = self.conversations.find(
+    #         {"user_id": user_id, "status": "active"}
+    #     ).sort("updated_at", -1).skip(skip).limit(limit)
+    #     conversations = await cursor.to_list(length=limit)
+    #     # Convert ObjectIds to strings
+    #     for conv in conversations:
+    #         if "_id" in conv:
+    #             conv["_id"] = str(conv["_id"])
+    #     return conversations
+    async def get_user_conversations(
+        self,
+        user_id: str,
+        limit: int = 10,
+        skip: int = 0
+    ) -> List[Dict]:
+        """Get all conversations for a user."""
+        # Gracefully return empty list if not connected
+        if self.db is None or self.conversations is None:
+            print("⚠️  MongoDB not connected - returning empty conversations list")
+            return []
+        cursor = self.conversations.find(
+            {"user_id": user_id, "status": "active"}
+        ).sort("updated_at", -1).skip(skip).limit(limit)
+        conversations = await cursor.to_list(length=limit)
+        # Convert ObjectIds to strings
+        for conv in conversations:
+            if "_id" in conv:
+                conv["_id"] = str(conv["_id"])
+        return conversations
+    async def add_message(
+        self,
+        conversation_id: str,
+        message: Dict
+    ) -> bool:
+        """
+        Add a message to a conversation.
+        Args:
+            conversation_id: Conversation ID
+            message: Message dict
+                {
+                    'role': 'user' or 'assistant',
+                    'content': str,
+                    'timestamp': datetime,
+                    'metadata': dict (optional - policy_action, docs_retrieved, etc.)
+                }
+        Returns:
+            bool: Success status
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        # Ensure timestamp exists
+        if "timestamp" not in message:
+            message["timestamp"] = datetime.now()
+        # Add message to conversation
+        result = await self.conversations.update_one(
+            {"conversation_id": conversation_id},
+            {
+                "$push": {"messages": message},
+                "$set": {"updated_at": datetime.now()}
+            }
+        )
+        return result.modified_count > 0
+    async def get_conversation_history(
+        self,
+        conversation_id: str,
+        max_messages: int = None
+    ) -> List[Dict]:
+        """
+        Get conversation history (messages only).
+        Args:
+            conversation_id: Conversation ID
+            max_messages: Optional limit on number of messages
+        Returns:
+            list: List of messages
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        conversation = await self.get_conversation(conversation_id)
+        if not conversation:
+            return []
+        messages = conversation.get("messages", [])
+        if max_messages:
+            messages = messages[-max_messages:]
+        return messages
+    async def delete_conversation(self, conversation_id: str) -> bool:
+        """
+        Soft delete a conversation (mark as deleted, don't actually delete).
+        Args:
+            conversation_id: Conversation ID
+        Returns:
+            bool: Success status
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        result = await self.conversations.update_one(
+            {"conversation_id": conversation_id},
+            {
+                "$set": {
+                    "status": "deleted",
+                    "deleted_at": datetime.now()
+                }
+            }
+        )
+        return result.modified_count > 0
+    # ========================================================================
+    # RETRIEVAL LOGS (for RL training)
+    # ========================================================================
+    async def log_retrieval(
+        self,
+        log_data: Dict
+    ) -> str:
+        """
+        Log a retrieval operation (for RL training and analysis).
+        Args:
+            log_data: Log data dict
+                {
+                    'conversation_id': str,
+                    'user_id': str,
+                    'query': str,
+                    'policy_action': 'FETCH' or 'NO_FETCH',
+                    'policy_confidence': float,
+                    'documents_retrieved': int,
+                    'top_doc_score': float or None,
+                    'retrieved_docs_metadata': list,
+                    'response': str,
+                    'retrieval_time_ms': float,
+                    'generation_time_ms': float,
+                    'total_time_ms': float,
+                    'timestamp': datetime
+                }
+        Returns:
+            str: Log ID
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        # Add timestamp if not present
+        if "timestamp" not in log_data:
+            log_data["timestamp"] = datetime.now()
+        # Generate log ID
+        log_id = str(uuid.uuid4())
+        log_data["log_id"] = log_id
+        # Insert log
+        await self.retrieval_logs.insert_one(log_data)
+        return log_id
+    async def get_retrieval_logs(
+        self,
+        conversation_id: Optional[str] = None,
+        user_id: Optional[str] = None,
+        limit: int = 100,
+        skip: int = 0
+    ) -> List[Dict]:
+        """
+        Get retrieval logs (for analysis and RL training).
+        Args:
+            conversation_id: Optional filter by conversation
+            user_id: Optional filter by user
+            limit: Maximum number of logs
+            skip: Number of logs to skip
+        Returns:
+            list: List of log documents
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        # Build query
+        query = {}
+        if conversation_id:
+            query["conversation_id"] = conversation_id
+        if user_id:
+            query["user_id"] = user_id
+        # Fetch logs
+        cursor = self.retrieval_logs.find(query).sort("timestamp", -1).skip(skip).limit(limit)
+        logs = await cursor.to_list(length=limit)
+        # Convert ObjectIds to strings
+        for log in logs:
+            if "_id" in log:
+                log["_id"] = str(log["_id"])
+        return logs
+    async def get_logs_for_rl_training(
+        self,
+        min_date: Optional[datetime] = None,
+        limit: int = 1000
+    ) -> List[Dict]:
+        """
+        Get logs specifically for RL training.
+        Filters for logs with both policy decision and retrieval results.
+        Args:
+            min_date: Optional minimum date for logs
+            limit: Maximum number of logs
+        Returns:
+            list: List of log documents suitable for RL training
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        # Build query
+        query = {
+            "policy_action": {"$exists": True},
+            "response": {"$exists": True}
+        }
+        if min_date:
+            query["timestamp"] = {"$gte": min_date}
+        # Fetch logs
+        cursor = self.retrieval_logs.find(query).sort("timestamp", -1).limit(limit)
+        logs = await cursor.to_list(length=limit)
+        # Convert ObjectIds
+        for log in logs:
+            if "_id" in log:
+                log["_id"] = str(log["_id"])
+        return logs
+    # ========================================================================
+    # ANALYTICS QUERIES
+    # ========================================================================
+    async def get_conversation_stats(self, user_id: str) -> Dict:
+        """
+        Get conversation statistics for a user.
+        Args:
+            user_id: User ID
+        Returns:
+            dict: Statistics
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        # Count total conversations
+        total_conversations = await self.conversations.count_documents({
+            "user_id": user_id,
+            "status": "active"
+        })
+        # Count total messages
+        pipeline = [
+            {"$match": {"user_id": user_id, "status": "active"}},
+            {"$project": {"message_count": {"$size": "$messages"}}}
+        ]
+        result = await self.conversations.aggregate(pipeline).to_list(length=None)
+        total_messages = sum(doc.get("message_count", 0) for doc in result)
+        return {
+            "total_conversations": total_conversations,
+            "total_messages": total_messages,
+            "avg_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
+        }
+    async def get_policy_stats(self, user_id: Optional[str] = None) -> Dict:
+        """
+        Get policy decision statistics.
+        Args:
+            user_id: Optional user ID filter
+        Returns:
+            dict: Policy statistics
+        Raises:
+            RuntimeError: If MongoDB not connected
+        """
+        self._check_connection()
+        # Build query
+        query = {}
+        if user_id:
+            query["user_id"] = user_id
+        # Count FETCH vs NO_FETCH
+        fetch_count = await self.retrieval_logs.count_documents({
+            **query,
+            "policy_action": "FETCH"
+        })
+        no_fetch_count = await self.retrieval_logs.count_documents({
+            **query,
+            "policy_action": "NO_FETCH"
+        })
+        total = fetch_count + no_fetch_count
+        return {
+            "fetch_count": fetch_count,
+            "no_fetch_count": no_fetch_count,
+            "total": total,
+            "fetch_rate": fetch_count / total if total > 0 else 0,
+            "no_fetch_rate": no_fetch_count / total if total > 0 else 0
+        }
+# ============================================================================
+# USAGE EXAMPLE (for reference)
+# ============================================================================
+"""
+# In your service or API endpoint:
+from app.db.repositories.conversation_repository import ConversationRepository
+repo = ConversationRepository()
+# Create conversation
+conv_id = await repo.create_conversation(user_id="user_123")
+# Add user message
+await repo.add_message(conv_id, {
+    'role': 'user',
+    'content': 'What is my balance?',
+    'timestamp': datetime.now()
+})
+# Add assistant message
+await repo.add_message(conv_id, {
+    'role': 'assistant',
+    'content': 'Your balance is $1000',
+    'timestamp': datetime.now(),
+    'metadata': {
+        'policy_action': 'FETCH',
+        'documents_retrieved': 3
+    }
+})
+# Get conversation history
+history = await repo.get_conversation_history(conv_id)
+# Log retrieval for RL training
+await repo.log_retrieval({
+    'conversation_id': conv_id,
+    'user_id': 'user_123',
+    'query': 'What is my balance?',
+    'policy_action': 'FETCH',
+    'documents_retrieved': 3,
+    'response': 'Your balance is $1000'
+})
+"""

app/db/repositories/user_repository.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""
+User Repository - MongoDB CRUD for Users
+Handles user registration, retrieval, and management
+"""
+import uuid
+from datetime import datetime
+from typing import Optional, Dict
+from app.db.mongodb import get_database
+from app.utils.security import hash_password
+class UserRepository:
+    """Repository for user data in MongoDB"""
+    def __init__(self):
+        """Initialize repository with database connection"""
+        self.db = get_database()
+        if self.db is None:
+            print("⚠️ UserRepository: MongoDB not connected")
+            self.users = None
+        else:
+            self.users = self.db["users"]
+            print("✅ UserRepository initialized")
+    def _check_connection(self):
+        """Check if MongoDB is connected"""
+        if self.db is None or self.users is None:
+            raise RuntimeError("MongoDB not connected")
+    async def create_user(
+        self,
+        email: str,
+        password: str,
+        full_name: str
+    ) -> str:
+        """
+        Create a new user.
+        Args:
+            email: User email (unique)
+            password: Plain text password (will be hashed)
+            full_name: User's full name
+        Returns:
+            str: User ID
+        Raises:
+            ValueError: If email already exists
+        """
+        self._check_connection()
+        # Check if user already exists
+        existing_user = await self.users.find_one({"email": email})
+        if existing_user:
+            raise ValueError("Email already registered")
+        # Create user document
+        user_id = str(uuid.uuid4())
+        user = {
+            "user_id": user_id,
+            "email": email,
+            "hashed_password": hash_password(password),
+            "full_name": full_name,
+            "created_at": datetime.now(),
+            "is_active": True
+        }
+        await self.users.insert_one(user)
+        print(f"✅ Created user: {email}")
+        return user_id
+    async def get_user_by_email(self, email: str) -> Optional[Dict]:
+        """
+        Get user by email.
+        Args:
+            email: User email
+        Returns:
+            dict or None: User document
+        """
+        self._check_connection()
+        user = await self.users.find_one({"email": email})
+        if user and "_id" in user:
+            user["_id"] = str(user["_id"])
+        return user
+    async def get_user_by_id(self, user_id: str) -> Optional[Dict]:
+        """
+        Get user by ID.
+        Args:
+            user_id: User ID
+        Returns:
+            dict or None: User document
+        """
+        self._check_connection()
+        user = await self.users.find_one({"user_id": user_id})
+        if user and "_id" in user:
+            user["_id"] = str(user["_id"])
+        return user
+    async def update_user(self, user_id: str, updates: Dict) -> bool:
+        """
+        Update user information.
+        Args:
+            user_id: User ID
+            updates: Dictionary of fields to update
+        Returns:
+            bool: Success status
+        """
+        self._check_connection()
+        # Don't allow updating certain fields
+        forbidden_fields = ["user_id", "email", "hashed_password", "created_at"]
+        for field in forbidden_fields:
+            updates.pop(field, None)
+        result = await self.users.update_one(
+            {"user_id": user_id},
+            {"$set": updates}
+        )
+        return result.modified_count > 0
+    async def delete_user(self, user_id: str) -> bool:
+        """
+        Soft delete a user (mark as inactive).
+        Args:
+            user_id: User ID
+        Returns:
+            bool: Success status
+        """
+        self._check_connection()
+        result = await self.users.update_one(
+            {"user_id": user_id},
+            {"$set": {"is_active": False, "deleted_at": datetime.now()}}
+        )
+        return result.modified_count > 0

app/main.py ADDED Viewed

	@@ -0,0 +1,301 @@

+"""
+FastAPI Main Application Entry Point
+Banking RAG Chatbot API with JWT Authentication
+This file:
+1. Creates the FastAPI app
+2. Configures CORS middleware
+3. Connects to MongoDB on startup/shutdown
+4. Includes API routers (auth + chat)
+5. Provides health check endpoints
+"""
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from contextlib import asynccontextmanager
+from app.config import settings
+from app.db.mongodb import connect_to_mongo, close_mongo_connection
+# ============================================================================
+# LIFESPAN MANAGER (Startup & Shutdown)
+# ============================================================================
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Manage application lifespan events.
+    Startup:
+    - Connect to MongoDB Atlas
+    - ML models load lazily on first use
+    Shutdown:
+    - Close MongoDB connection
+    - Cleanup resources
+    """
+    # ========================================================================
+    # STARTUP
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("🚀 STARTING BANKING RAG CHATBOT API")
+    print("=" * 80)
+    print(f"Environment: {settings.ENVIRONMENT}")
+    print(f"Debug Mode: {settings.DEBUG}")
+    print("=" * 80)
+    # Connect to MongoDB
+    await connect_to_mongo()
+    print("\n💡 ML Models Info:")
+    print("   Policy Network: Loads on first chat request (lazy loading)")
+    print("   Retriever Model: Loads on first retrieval (lazy loading)")
+    print("   LLM: Groq (ChatGroq) with HuggingFace fallback")
+    print("\n🤖 LLM Configuration:")
+    print(f"   Chat Model: {settings.GROQ_CHAT_MODEL} (Llama 3 8B)")
+    print(f"   Eval Model: {settings.GROQ_EVAL_MODEL} (Llama 3 70B)")
+    print(f"   Groq API Keys: {len(settings.get_groq_api_keys())} configured")
+    print(f"   HuggingFace Tokens: {len(settings.get_hf_tokens())} configured")
+    print(f"   Fallback: Groq → HuggingFace")
+    print("\n✅ Backend startup complete!")
+    print("=" * 80)
+    print(f"📖 API Docs: http://localhost:8000/docs")
+    print(f"🏥 Health Check: http://localhost:8000/health")
+    print(f"🔐 Register: POST http://localhost:8000/api/v1/auth/register")
+    print(f"🔑 Login: POST http://localhost:8000/api/v1/auth/login")
+    print("=" * 80 + "\n")
+    yield  # Application runs here
+    # ========================================================================
+    # SHUTDOWN
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("🛑 SHUTTING DOWN API")
+    print("=" * 80)
+    # Close MongoDB connection
+    await close_mongo_connection()
+    print("✅ Shutdown complete")
+    print("=" * 80 + "\n")
+# ============================================================================
+# CREATE FASTAPI APPLICATION
+# ============================================================================
+app = FastAPI(
+    title="Banking RAG Chatbot API",
+    description="""
+🤖 AI-powered Banking Assistant with:
+**Features:**
+- 🔐 JWT Authentication (Sign up, Login, Protected routes)
+- 💬 RAG (Retrieval-Augmented Generation)
+- 🧠 RL-based Policy Network (BERT)
+- 🔍 Custom E5 Retriever
+- ⚡ Groq LLM with HuggingFace Fallback (Llama 3 models)
+**Capabilities:**
+- Intelligent document retrieval
+- Context-aware responses
+- Conversation history
+- Real-time chat
+- User authentication & authorization
+- Multi-provider LLM with automatic fallback
+    """,
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+    lifespan=lifespan
+)
+# ============================================================================
+# CORS MIDDLEWARE
+# ============================================================================
+allowed_origins = settings.get_allowed_origins()
+print("\n🌐 CORS Configuration:")
+print(f"   Allowed Origins: {allowed_origins}")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allowed_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ============================================================================
+# INCLUDE API ROUTERS
+# ============================================================================
+from app.api.v1 import chat, auth
+# Auth router (public endpoints - register, login)
+app.include_router(
+    auth.router,
+    prefix="/api/v1/auth",
+    tags=["🔐 Authentication"]
+)
+# Chat router (protected endpoints - requires JWT token)
+app.include_router(
+    chat.router,
+    prefix="/api/v1/chat",
+    tags=["💬 Chat"]
+)
+# ============================================================================
+# ROOT ENDPOINTS
+# ============================================================================
+@app.get("/", tags=["📍 Root"])
+async def root():
+    """
+    Root endpoint - API information and available endpoints
+    """
+    return {
+        "message": "Banking RAG Chatbot API with Authentication",
+        "version": "1.0.0",
+        "status": "online",
+        "authentication": "JWT Bearer Token Required for chat endpoints",
+        "llm_provider": "Groq (ChatGroq) with HuggingFace fallback",
+        "models": {
+            "chat": settings.GROQ_CHAT_MODEL,
+            "evaluation": settings.GROQ_EVAL_MODEL
+        },
+        "documentation": {
+            "swagger_ui": "/docs",
+            "redoc": "/redoc"
+        },
+        "endpoints": {
+            "auth": {
+                "register": "POST /api/v1/auth/register",
+                "login": "POST /api/v1/auth/login",
+                "me": "GET /api/v1/auth/me (requires token)",
+                "logout": "POST /api/v1/auth/logout (requires token)"
+            },
+            "chat": {
+                "send_message": "POST /api/v1/chat/ (requires token)",
+                "get_history": "GET /api/v1/chat/history/{conversation_id} (requires token)",
+                "list_conversations": "GET /api/v1/chat/conversations (requires token)",
+                "delete_conversation": "DELETE /api/v1/chat/conversation/{conversation_id} (requires token)"
+            },
+            "health": "GET /health"
+        }
+    }
+@app.get("/health", tags=["🏥 Health"])
+async def health_check():
+    """
+    Comprehensive health check endpoint
+    Checks status of:
+    - API service
+    - MongoDB connection
+    - ML models (lazy loaded)
+    - Authentication system
+    - LLM providers (Groq & HuggingFace)
+    Returns:
+        dict: Health status of all components
+    """
+    from app.db.mongodb import get_database
+    # Check MongoDB
+    mongodb_status = "connected" if get_database() is not None else "disconnected"
+    # Check ML models (don't load them, just check readiness)
+    ml_models_status = {
+        "policy_network": "ready (lazy load)",
+        "retriever": "ready (lazy load)",
+        "llm": "ready (API-based)"
+    }
+    # Check LLM providers
+    llm_providers = {
+        "groq": {
+            "enabled": settings.is_groq_enabled(),
+            "api_keys_configured": len(settings.get_groq_api_keys()),
+            "chat_model": settings.GROQ_CHAT_MODEL,
+            "eval_model": settings.GROQ_EVAL_MODEL
+        },
+        "huggingface": {
+            "enabled": settings.is_hf_enabled(),
+            "tokens_configured": len(settings.get_hf_tokens()),
+            "chat_model": settings.HF_CHAT_MODEL,
+            "eval_model": settings.HF_EVAL_MODEL
+        }
+    }
+    # Check authentication
+    auth_status = {
+        "jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
+        "algorithm": settings.ALGORITHM,
+        "token_expiry_minutes": settings.ACCESS_TOKEN_EXPIRE_MINUTES
+    }
+    # Overall health
+    is_healthy = (
+        mongodb_status == "connected" and
+        auth_status["jwt_enabled"] and
+        (llm_providers["groq"]["enabled"] or llm_providers["huggingface"]["enabled"])
+    )
+    return {
+        "status": "healthy" if is_healthy else "degraded",
+        "api": "online",
+        "mongodb": mongodb_status,
+        "authentication": auth_status,
+        "llm_providers": llm_providers,
+        "ml_models": ml_models_status,
+        "environment": settings.ENVIRONMENT,
+        "debug_mode": settings.DEBUG
+    }
+# ============================================================================
+# GLOBAL EXCEPTION HANDLER
+# ============================================================================
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """
+    Global exception handler for unhandled errors
+    """
+    print(f"\n❌ Unhandled Exception:")
+    print(f"   Path: {request.url.path}")
+    print(f"   Error: {str(exc)}")
+    if settings.DEBUG:
+        import traceback
+        traceback.print_exc()
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": "Internal Server Error",
+            "detail": str(exc) if settings.DEBUG else "An unexpected error occurred",
+            "path": str(request.url.path)
+        }
+    )
+# ============================================================================
+# MAIN ENTRY POINT (for direct execution)
+# ============================================================================
+if __name__ == "__main__":
+    import uvicorn
+    print("\n🚀 Starting server directly...")
+    print("   Note: For production, use: uvicorn app.main:app --host 0.0.0.0 --port 8000")
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=settings.DEBUG  # Auto-reload only in debug mode
+    )

app/ml/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Machine Learning package - ML models and inference
+Contains retriever, policy network, and model loading utilities
+"""
+__version__ = "1.0.0"

app/ml/policy_network.py ADDED Viewed

	@@ -0,0 +1,610 @@

+"""
+BERT-based Policy Network for FETCH/NO_FETCH decisions
+Trained with Reinforcement Learning (Policy Gradient + Entropy Regularization)
+This is adapted from your RL.py with:
+- PolicyNetwork class (BERT-based)
+- State encoding from conversation history
+- Action prediction (FETCH vs NO_FETCH)
+- Module-level caching (load once on startup)
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from typing import List, Dict, Optional, Tuple
+from transformers import AutoTokenizer, AutoModel
+from app.config import settings
+# ============================================================================
+# POLICY NETWORK (From RL.py)
+# ============================================================================
+class PolicyNetwork(nn.Module):
+    """
+    BERT-based Policy Network for deciding FETCH vs NO_FETCH actions.
+    Architecture:
+    - Base: BERT-base-uncased (pre-trained)
+    - Input: Current query + conversation history + previous actions
+    - Output: 2-class softmax (FETCH=0, NO_FETCH=1)
+    - Special tokens: [FETCH], [NO_FETCH] for action encoding
+    Training Details:
+    - Loss: Policy Gradient + Entropy Regularization
+    - Optimizer: AdamW
+    - Reward structure:
+        * FETCH: +0.5 (always)
+        * NO_FETCH + Good: +2.0
+        * NO_FETCH + Bad: -0.5
+    """
+    def __init__(self, model_name: str = "bert-base-uncased", dropout_rate: float = 0.1):
+        super(PolicyNetwork, self).__init__()
+        # Load pre-trained BERT
+        self.bert = AutoModel.from_pretrained(model_name)
+        # Load tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Add special tokens for actions: [FETCH] and [NO_FETCH]
+        special_tokens = {"additional_special_tokens": ["[FETCH]", "[NO_FETCH]"]}
+        self.tokenizer.add_special_tokens(special_tokens)
+        # Resize BERT embeddings to accommodate new tokens
+        self.bert.resize_token_embeddings(len(self.tokenizer))
+        # Initialize random embeddings for special tokens
+        self._init_action_embeddings()
+        # Classification head: BERT hidden size (768) → 2 classes
+        self.classifier = nn.Linear(self.bert.config.hidden_size, 2)
+        # Dropout for regularization
+        self.dropout = nn.Dropout(dropout_rate)
+    def _init_action_embeddings(self):
+        """
+        Initialize random embeddings for [FETCH] and [NO_FETCH] tokens.
+        These are learned during training.
+        """
+        with torch.no_grad():
+            # Get token IDs for special tokens
+            fetch_id = self.tokenizer.convert_tokens_to_ids("[FETCH]")
+            no_fetch_id = self.tokenizer.convert_tokens_to_ids("[NO_FETCH]")
+            # Get embedding dimension
+            embedding_dim = self.bert.config.hidden_size
+            # Initialize with small random values (same as BERT initialization)
+            self.bert.embeddings.word_embeddings.weight[fetch_id] = torch.randn(embedding_dim) * 0.02
+            self.bert.embeddings.word_embeddings.weight[no_fetch_id] = torch.randn(embedding_dim) * 0.02
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Forward pass through BERT + classifier.
+        Args:
+            input_ids: Tokenized input IDs (shape: [batch_size, seq_len])
+            attention_mask: Attention mask (shape: [batch_size, seq_len])
+        Returns:
+            logits: Raw logits (shape: [batch_size, 2])
+            probs: Softmax probabilities (shape: [batch_size, 2])
+        """
+        # Pass through BERT
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        # Extract [CLS] token representation (first token)
+        cls_output = outputs.last_hidden_state[:, 0, :]
+        # Apply dropout
+        cls_output = self.dropout(cls_output)
+        # Classification
+        logits = self.classifier(cls_output)
+        # Softmax for probabilities
+        probs = F.softmax(logits, dim=-1)
+        return logits, probs
+    def encode_state(
+        self,
+        state: Dict,
+        max_length: int = None
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Encode conversation state into BERT input format.
+        State structure:
+        {
+            'previous_queries': [query1, query2, ...],
+            'previous_actions': ['FETCH', 'NO_FETCH', ...],
+            'current_query': 'user query'
+        }
+        Encoding format:
+        "Previous query 1: <text> [Action: [FETCH]] Previous query 2: <text> [Action: [NO_FETCH]] Current query: <text>"
+        Args:
+            state: State dictionary
+            max_length: Maximum sequence length (default from config)
+        Returns:
+            dict: Tokenized inputs (input_ids, attention_mask)
+        """
+        if max_length is None:
+            max_length = settings.POLICY_MAX_LEN
+        # Build state text from conversation history
+        state_text = ""
+        # Add previous queries and their actions
+        prev_queries = state.get('previous_queries', [])
+        prev_actions = state.get('previous_actions', [])
+        if prev_queries and prev_actions:
+            for i, (prev_query, prev_action) in enumerate(zip(prev_queries, prev_actions)):
+                state_text += f"Previous query {i+1}: {prev_query} [Action: [{prev_action}]] "
+        # Add current query
+        current_query = state.get('current_query', '')
+        state_text += f"Current query: {current_query}"
+        # Tokenize
+        encoding = self.tokenizer(
+            state_text,
+            truncation=True,
+            padding='max_length',
+            max_length=max_length,
+            return_tensors='pt'
+        )
+        return encoding
+    def predict_action(
+        self,
+        state: Dict,
+        use_dropout: bool = False,
+        num_samples: int = 10
+    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        """
+        Predict action probabilities for a given state.
+        Args:
+            state: Conversation state dictionary
+            use_dropout: Whether to use MC Dropout for uncertainty estimation
+            num_samples: Number of MC Dropout samples (if use_dropout=True)
+        Returns:
+            probs: Action probabilities (shape: [1, 2]) - [P(FETCH), P(NO_FETCH)]
+            uncertainty: Standard deviation across samples (if use_dropout=True)
+        """
+        device = next(self.parameters()).device
+        if use_dropout:
+            # MC Dropout for uncertainty estimation
+            self.train()  # Enable dropout during inference
+            all_probs = []
+            for _ in range(num_samples):
+                with torch.no_grad():
+                    encoding = self.encode_state(state)
+                    input_ids = encoding['input_ids'].to(device)
+                    attention_mask = encoding['attention_mask'].to(device)
+                    _, probs = self.forward(input_ids, attention_mask)
+                    all_probs.append(probs.cpu().numpy())
+            # Average probabilities across samples
+            avg_probs = np.mean(all_probs, axis=0)
+            # Calculate uncertainty (standard deviation)
+            uncertainty = np.std(all_probs, axis=0)
+            return avg_probs, uncertainty
+        else:
+            # Standard inference (no uncertainty estimation)
+            self.eval()
+            with torch.no_grad():
+                encoding = self.encode_state(state)
+                input_ids = encoding['input_ids'].to(device)
+                attention_mask = encoding['attention_mask'].to(device)
+                _, probs = self.forward(input_ids, attention_mask)
+            return probs.cpu().numpy(), None
+# ============================================================================
+# MODULE-LEVEL CACHING (Load once on import)
+# ============================================================================
+# Global variables for caching
+POLICY_MODEL: Optional[PolicyNetwork] = None
+POLICY_TOKENIZER: Optional[AutoTokenizer] = None
+# =============================================================================================
+# Latest version given by perplexity, should work, if not then use one of the other versions.
+# =============================================================================================
+def load_policy_model() -> PolicyNetwork:
+    """
+    Load trained policy model (called once on startup).
+    Downloads from HuggingFace Hub if not present locally.
+    Uses module-level caching - model stays in RAM.
+    Returns:
+        PolicyNetwork: Loaded policy model
+    """
+    global POLICY_MODEL, POLICY_TOKENIZER
+    if POLICY_MODEL is None:
+        # Download model from HF Hub if needed (for deployment)
+        settings.download_model_if_needed(
+            hf_filename="models/best_policy_model.pth",
+            local_path=settings.POLICY_MODEL_PATH
+        )
+        print(f"Loading policy network from {settings.POLICY_MODEL_PATH}...")
+        try:
+            # Load checkpoint first to get vocab size
+            checkpoint = torch.load(settings.POLICY_MODEL_PATH, map_location=settings.DEVICE)
+            # Create model instance
+            POLICY_MODEL = PolicyNetwork(
+                model_name="bert-base-uncased",
+                dropout_rate=0.1
+            )
+            # **KEY FIX**: Resize model embeddings to match saved checkpoint BEFORE loading weights
+            saved_vocab_size = checkpoint['bert.embeddings.word_embeddings.weight'].shape[0]
+            current_vocab_size = len(POLICY_MODEL.tokenizer)
+            if saved_vocab_size != current_vocab_size:
+                print(f"⚠️ Vocab size mismatch: saved={saved_vocab_size}, current={current_vocab_size}")
+                print(f"✅ Resizing tokenizer and embeddings to match saved model...")
+                # Resize model to match saved checkpoint
+                POLICY_MODEL.bert.resize_token_embeddings(saved_vocab_size)
+            # Move to device
+            POLICY_MODEL = POLICY_MODEL.to(settings.DEVICE)
+            # Now load trained weights (sizes will match!)
+            if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+                POLICY_MODEL.load_state_dict(checkpoint['model_state_dict'])
+            else:
+                POLICY_MODEL.load_state_dict(checkpoint)
+            # Set to evaluation mode
+            POLICY_MODEL.eval()
+            # Cache tokenizer
+            POLICY_TOKENIZER = POLICY_MODEL.tokenizer
+            print("✅ Policy network loaded and cached")
+        except FileNotFoundError:
+            print(f"❌ Policy model file not found: {settings.POLICY_MODEL_PATH}")
+            print(f"⚠️ Make sure models are uploaded to HuggingFace Hub: {settings.HF_MODEL_REPO}")
+            raise
+        except Exception as e:
+            print(f"❌ Failed to load policy model: {e}")
+            raise
+    return POLICY_MODEL
+# ===========================================================================
+# This version is used in the code, atleast for localhost testing
+# ===========================================================================
+# def load_policy_model() -> PolicyNetwork:
+#     """
+#     Load trained policy model (called once on startup).
+#     Uses module-level caching - model stays in RAM.
+#     Returns:
+#         PolicyNetwork: Loaded policy model
+#     """
+#     global POLICY_MODEL, POLICY_TOKENIZER
+#     if POLICY_MODEL is None:
+#         print(f"Loading policy network from {settings.POLICY_MODEL_PATH}...")
+#         try:
+#             # Load checkpoint first to get vocab size
+#             checkpoint = torch.load(settings.POLICY_MODEL_PATH, map_location=settings.DEVICE)
+#             # Create model instance
+#             POLICY_MODEL = PolicyNetwork(
+#                 model_name="bert-base-uncased",
+#                 dropout_rate=0.1
+#             )
+#             # **KEY FIX**: Resize model embeddings to match saved checkpoint BEFORE loading weights
+#             saved_vocab_size = checkpoint['bert.embeddings.word_embeddings.weight'].shape[0]
+#             current_vocab_size = len(POLICY_MODEL.tokenizer)
+#             if saved_vocab_size != current_vocab_size:
+#                 print(f"⚠️  Vocab size mismatch: saved={saved_vocab_size}, current={current_vocab_size}")
+#                 print(f"✅ Resizing tokenizer and embeddings to match saved model...")
+#                 # Resize model to match saved checkpoint
+#                 POLICY_MODEL.bert.resize_token_embeddings(saved_vocab_size)
+#             # Move to device
+#             POLICY_MODEL = POLICY_MODEL.to(settings.DEVICE)
+#             # Now load trained weights (sizes will match!)
+#             if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+#                 POLICY_MODEL.load_state_dict(checkpoint['model_state_dict'])
+#             else:
+#                 POLICY_MODEL.load_state_dict(checkpoint)
+#             # Set to evaluation mode
+#             POLICY_MODEL.eval()
+#             # Cache tokenizer
+#             POLICY_TOKENIZER = POLICY_MODEL.tokenizer
+#             print("✅ Policy network loaded and cached")
+#         except FileNotFoundError:
+#             print(f"❌ Policy model file not found: {settings.POLICY_MODEL_PATH}")
+#             print("⚠️  You need to train the policy network first!")
+#             raise
+#         except Exception as e:
+#             print(f"❌ Failed to load policy model: {e}")
+#             raise
+#     return POLICY_MODEL
+# =====================================================================================
+# This is the older version or proably a different version, potentially still useful
+# =====================================================================================
+# def load_policy_model() -> PolicyNetwork:
+#     """
+#     Load trained policy model (called once on startup).
+#     Uses module-level caching - model stays in RAM.
+#     Returns:
+#         PolicyNetwork: Loaded policy model
+#     """
+#     global POLICY_MODEL, POLICY_TOKENIZER
+#     if POLICY_MODEL is None:
+#         print(f"Loading policy network from {settings.POLICY_MODEL_PATH}...")
+#         try:
+#             # Create model instance
+#             POLICY_MODEL = PolicyNetwork(
+#                 model_name="bert-base-uncased",
+#                 dropout_rate=0.1
+#             ).to(settings.DEVICE)
+#             # Load trained weights
+#             checkpoint = torch.load(settings.POLICY_MODEL_PATH, map_location=settings.DEVICE)
+#             # Handle different checkpoint formats
+#             if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+#                 # Full checkpoint with metadata
+#                 POLICY_MODEL.load_state_dict(checkpoint['model_state_dict'])
+#             else:
+#                 # Just state dict
+#                 POLICY_MODEL.load_state_dict(checkpoint)
+#             # Set to evaluation mode
+#             POLICY_MODEL.eval()
+#             # Cache tokenizer
+#             POLICY_TOKENIZER = POLICY_MODEL.tokenizer
+#             print("✅ Policy network loaded and cached")
+#         except FileNotFoundError:
+#             print(f"❌ Policy model file not found: {settings.POLICY_MODEL_PATH}")
+#             print("⚠️  You need to train the policy network first!")
+#             raise
+#         except Exception as e:
+#             print(f"❌ Failed to load policy model: {e}")
+#             raise
+#     return POLICY_MODEL
+# ============================================================================
+# PREDICTION FUNCTIONS
+# ============================================================================
+def create_state_from_history(
+    current_query: str,
+    conversation_history: List[Dict],
+    max_history: int = 2
+) -> Dict:
+    """
+    Create state dictionary from conversation history.
+    Extracts last N query-action pairs.
+    Args:
+        current_query: Current user query
+        conversation_history: List of conversation turns
+            Each turn: {'role': 'user'/'assistant', 'content': '...', 'metadata': {...}}
+        max_history: Maximum number of previous turns to include (default: 2)
+    Returns:
+        dict: State dictionary for policy network
+    """
+    state = {
+        'current_query': current_query,
+        'previous_queries': [],
+        'previous_actions': []
+    }
+    if not conversation_history:
+        return state
+    # Extract last N conversation turns (user + assistant pairs)
+    relevant_history = conversation_history[-(max_history * 2):]
+    for i, turn in enumerate(relevant_history):
+        # User turns
+        if turn.get('role') == 'user':
+            query = turn.get('content', '')
+            state['previous_queries'].append(query)
+            # Look for corresponding assistant turn
+            if i + 1 < len(relevant_history):
+                bot_turn = relevant_history[i + 1]
+                if bot_turn.get('role') == 'assistant':
+                    metadata = bot_turn.get('metadata', {})
+                    action = metadata.get('policy_action', 'FETCH')
+                    state['previous_actions'].append(action)
+    return state
+def predict_policy_action(
+    query: str,
+    history: List[Dict] = None,
+    return_probs: bool = False
+) -> Dict:
+    """
+    Predict FETCH/NO_FETCH action for a query.
+    Args:
+        query: User query text
+        history: Conversation history (optional)
+        return_probs: Whether to return full probability distribution
+    Returns:
+        dict: Prediction results
+            {
+                'action': 'FETCH' or 'NO_FETCH',
+                'confidence': float (0-1),
+                'fetch_prob': float,
+                'no_fetch_prob': float,
+                'should_retrieve': bool
+            }
+    """
+    # Load model (cached after first call)
+    model = load_policy_model()
+    # Create state from history
+    if history is None:
+        history = []
+    state = create_state_from_history(query, history)
+    # Predict action
+    probs, _ = model.predict_action(state, use_dropout=False)
+    # Extract probabilities
+    fetch_prob = float(probs[0][0])
+    no_fetch_prob = float(probs[0][1])
+    # Determine action (argmax)
+    action_idx = np.argmax(probs[0])
+    action = "FETCH" if action_idx == 0 else "NO_FETCH"
+    confidence = float(probs[0][action_idx])
+    # Check confidence threshold
+    should_retrieve = (action == "FETCH") or (action == "NO_FETCH" and confidence < settings.CONFIDENCE_THRESHOLD)
+    result = {
+        'action': action,
+        'confidence': confidence,
+        'should_retrieve': should_retrieve,
+        'policy_decision': action
+    }
+    if return_probs:
+        result['fetch_prob'] = fetch_prob
+        result['no_fetch_prob'] = no_fetch_prob
+    return result
+# ============================================================================
+# USAGE EXAMPLE (for reference)
+# ============================================================================
+"""
+# In your service file:
+from app.ml.policy_network import predict_policy_action
+# Predict action
+history = [
+    {'role': 'user', 'content': 'What is my balance?'},
+    {'role': 'assistant', 'content': '$1000', 'metadata': {'policy_action': 'FETCH'}}
+]
+result = predict_policy_action(
+    query="Thank you!",
+    history=history,
+    return_probs=True
+)
+print(result)
+# {
+#     'action': 'NO_FETCH',
+#     'confidence': 0.95,
+#     'should_retrieve': False,
+#     'fetch_prob': 0.05,
+#     'no_fetch_prob': 0.95
+# }
+"""

app/ml/retriever.py ADDED Viewed

	@@ -0,0 +1,522 @@

+"""
+Custom Retriever with E5-Base-V2 and FAISS
+Trained with InfoNCE + Triplet Loss for banking domain
+This is adapted from your RAG.py with:
+- CustomSentenceTransformer (e5-base-v2)
+- Mean pooling + L2 normalization
+- FAISS vector search
+- Module-level caching (load once on startup)
+"""
+import os
+import json
+import pickle
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import faiss
+import numpy as np
+from typing import List, Dict, Optional
+from transformers import AutoTokenizer, AutoModel
+from app.config import settings
+# ============================================================================
+# CUSTOM SENTENCE TRANSFORMER (From RAG.py)
+# ============================================================================
+class CustomSentenceTransformer(nn.Module):
+    """
+    Custom SentenceTransformer matching your training code.
+    Uses e5-base-v2 with mean pooling and L2 normalization.
+    Training Details:
+    - Base model: intfloat/e5-base-v2
+    - Loss: InfoNCE + Triplet Loss
+    - Pooling: Mean pooling on last hidden state
+    - Normalization: L2 normalization
+    """
+    def __init__(self, model_name: str = "intfloat/e5-base-v2"):
+        super().__init__()
+        # Load pre-trained e5-base-v2 encoder
+        self.encoder = AutoModel.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.config = self.encoder.config
+    def forward(self, input_ids, attention_mask):
+        """
+        Forward pass through BERT encoder.
+        Args:
+            input_ids: Tokenized input IDs
+            attention_mask: Attention mask for padding
+        Returns:
+            torch.Tensor: L2-normalized embeddings (shape: [batch_size, 768])
+        """
+        # Get BERT outputs
+        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
+        # Mean pooling - same as training
+        # Take hidden states from last layer
+        token_embeddings = outputs.last_hidden_state
+        # Expand attention mask to match token embeddings shape
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        # Sum embeddings (weighted by attention mask) and divide by sum of mask
+        embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+            input_mask_expanded.sum(1), min=1e-9
+        )
+        # L2 normalize embeddings - same as training
+        embeddings = F.normalize(embeddings, p=2, dim=1)
+        return embeddings
+    def encode(
+        self,
+        sentences: List[str],
+        batch_size: int = 32,
+        convert_to_numpy: bool = True,
+        show_progress_bar: bool = False
+    ) -> np.ndarray:
+        """
+        Encode sentences using the same method as training.
+        Adds 'query: ' prefix for e5-base-v2 compatibility.
+        Args:
+            sentences: List of sentences to encode
+            batch_size: Batch size for encoding
+            convert_to_numpy: Whether to convert to numpy array
+            show_progress_bar: Whether to show progress bar
+        Returns:
+            np.ndarray: Encoded embeddings (shape: [num_sentences, 768])
+        """
+        self.eval()  # Set model to evaluation mode
+        # Handle single string input
+        if isinstance(sentences, str):
+            sentences = [sentences]
+        # Add 'query: ' prefix for e5-base-v2 (required by model)
+        # Handle None values and empty strings
+        processed_sentences = []
+        for sentence in sentences:
+            if sentence is None:
+                processed_sentences.append("query: ")  # Default empty query
+            elif isinstance(sentence, str):
+                processed_sentences.append(f"query: {sentence.strip()}")
+            else:
+                processed_sentences.append(f"query: {str(sentence)}")
+        all_embeddings = []
+        # Encode in batches
+        with torch.no_grad():  # No gradient computation
+            for i in range(0, len(processed_sentences), batch_size):
+                batch_sentences = processed_sentences[i:i + batch_size]
+                # Tokenize batch
+                tokens = self.tokenizer(
+                    batch_sentences,
+                    truncation=True,
+                    padding=True,
+                    max_length=128,  # Same as training
+                    return_tensors='pt'
+                ).to(next(self.parameters()).device)
+                # Get embeddings
+                embeddings = self.forward(tokens['input_ids'], tokens['attention_mask'])
+                # Convert to numpy if requested
+                if convert_to_numpy:
+                    embeddings = embeddings.cpu().numpy()
+                all_embeddings.append(embeddings)
+        # Combine all batches
+        if convert_to_numpy:
+            all_embeddings = np.vstack(all_embeddings)
+        else:
+            all_embeddings = torch.cat(all_embeddings, dim=0)
+        return all_embeddings
+# ============================================================================
+# CUSTOM RETRIEVER MODEL (Wrapper)
+# ============================================================================
+class CustomRetrieverModel:
+    """
+    Wrapper for your custom trained retriever model.
+    Handles both knowledge base documents and query encoding.
+    """
+    def __init__(self, model_path: str, device: str = "cpu"):
+        """
+        Initialize retriever model.
+        Args:
+            model_path: Path to trained model weights (.pth file)
+            device: Device to load model on ('cpu' or 'cuda')
+        """
+        self.device = device
+        # Create model instance
+        self.model = CustomSentenceTransformer("intfloat/e5-base-v2").to(device)
+        # Load your trained weights
+        try:
+            state_dict = torch.load(model_path, map_location=device)
+            self.model.load_state_dict(state_dict)
+            print(f"✅ Custom retriever model loaded from {model_path}")
+        except Exception as e:
+            print(f"❌ Failed to load custom model: {e}")
+            print("🔄 Using base e5-base-v2 model (not trained)...")
+        # Set to evaluation mode
+        self.model.eval()
+    def encode_documents(self, documents: List[str], batch_size: int = 32) -> np.ndarray:
+        """
+        Encode knowledge base documents.
+        These are the responses/instructions we're retrieving.
+        Args:
+            documents: List of document texts
+            batch_size: Batch size for encoding
+        Returns:
+            np.ndarray: Document embeddings (shape: [num_docs, 768])
+        """
+        return self.model.encode(documents, batch_size=batch_size, convert_to_numpy=True)
+    def encode_query(self, query: str) -> np.ndarray:
+        """
+        Encode user query for retrieval.
+        Args:
+            query: User query text
+        Returns:
+            np.ndarray: Query embedding (shape: [1, 768])
+        """
+        return self.model.encode([query], convert_to_numpy=True)
+# ============================================================================
+# MODULE-LEVEL CACHING (Load once on import)
+# ============================================================================
+# Global variables for caching
+RETRIEVER_MODEL: Optional[CustomRetrieverModel] = None
+FAISS_INDEX: Optional[faiss.Index] = None
+KB_DATA: Optional[List[Dict]] = None
+# =============================================================================================
+# Latest version given by perplexity, should work, if not then use one of the other versions.
+# =============================================================================================
+def load_retriever() -> CustomRetrieverModel:
+    """
+    Load custom retriever model (called once on startup).
+    Downloads from HuggingFace Hub if not present locally.
+    Uses module-level caching - model stays in RAM.
+    Returns:
+        CustomRetrieverModel: Loaded retriever model
+    """
+    global RETRIEVER_MODEL
+    if RETRIEVER_MODEL is None:
+        # Download model from HF Hub if needed (for deployment)
+        settings.download_model_if_needed(
+            hf_filename="models/best_retriever_model.pth",
+            local_path=settings.RETRIEVER_MODEL_PATH
+        )
+        print(f"Loading custom retriever from {settings.RETRIEVER_MODEL_PATH}...")
+        RETRIEVER_MODEL = CustomRetrieverModel(
+            model_path=settings.RETRIEVER_MODEL_PATH,
+            device=settings.DEVICE
+        )
+        print("✅ Retriever model loaded and cached")
+    return RETRIEVER_MODEL
+# ===========================================================================
+# This version is used in the code, atleast for localhost testing
+# ===========================================================================
+# def load_retriever() -> CustomRetrieverModel:
+#     """
+#     Load custom retriever model (called once on startup).
+#     Uses module-level caching - model stays in RAM.
+#     Returns:
+#         CustomRetrieverModel: Loaded retriever model
+#     """
+#     global RETRIEVER_MODEL
+#     if RETRIEVER_MODEL is None:
+#         print(f"Loading custom retriever from {settings.RETRIEVER_MODEL_PATH}...")
+#         RETRIEVER_MODEL = CustomRetrieverModel(
+#             model_path=settings.RETRIEVER_MODEL_PATH,
+#             device=settings.DEVICE
+#         )
+#         print("✅ Retriever model loaded and cached")
+#     return RETRIEVER_MODEL
+# =============================================================================================
+# Latest version given by perplexity, should work, if not then use one of the other versions.
+# =============================================================================================
+def load_faiss_index():
+    """
+    Load FAISS index + knowledge base from pickle file.
+    Downloads from HuggingFace Hub if not present locally.
+    Uses module-level caching - loaded once on startup.
+    Returns:
+        tuple: (faiss.Index, List[Dict]) - FAISS index and KB data
+    """
+    global FAISS_INDEX, KB_DATA
+    if FAISS_INDEX is None or KB_DATA is None:
+        # Download FAISS index from HF Hub if needed (for deployment)
+        settings.download_model_if_needed(
+            hf_filename="models/faiss_index.pkl",
+            local_path=settings.FAISS_INDEX_PATH
+        )
+        # Download knowledge base from HF Hub if needed (for deployment)
+        settings.download_model_if_needed(
+            hf_filename="data/final_knowledge_base.jsonl",
+            local_path=settings.KB_PATH
+        )
+        print(f"Loading FAISS index from {settings.FAISS_INDEX_PATH}...")
+        try:
+            # Load pickled FAISS index + KB data
+            with open(settings.FAISS_INDEX_PATH, 'rb') as f:
+                FAISS_INDEX, KB_DATA = pickle.load(f)
+            print(f"✅ FAISS index loaded: {FAISS_INDEX.ntotal} vectors")
+            print(f"✅ Knowledge base loaded: {len(KB_DATA)} documents")
+        except FileNotFoundError:
+            print(f"❌ FAISS index file not found: {settings.FAISS_INDEX_PATH}")
+            print(f"⚠️ Make sure models are uploaded to HuggingFace Hub: {settings.HF_MODEL_REPO}")
+            raise
+        except Exception as e:
+            print(f"❌ Failed to load FAISS index: {e}")
+            raise
+    return FAISS_INDEX, KB_DATA
+# ===========================================================================
+# This version is used in the code, atleast for localhost testing
+# ===========================================================================
+# def load_faiss_index():
+#     """
+#     Load FAISS index + knowledge base from pickle file.
+#     Uses module-level caching - loaded once on startup.
+#     Returns:
+#         tuple: (faiss.Index, List[Dict]) - FAISS index and KB data
+#     """
+#     global FAISS_INDEX, KB_DATA
+#     if FAISS_INDEX is None or KB_DATA is None:
+#         print(f"Loading FAISS index from {settings.FAISS_INDEX_PATH}...")
+#         try:
+#             # Load pickled FAISS index + KB data
+#             with open(settings.FAISS_INDEX_PATH, 'rb') as f:
+#                 FAISS_INDEX, KB_DATA = pickle.load(f)
+#             print(f"✅ FAISS index loaded: {FAISS_INDEX.ntotal} vectors")
+#             print(f"✅ Knowledge base loaded: {len(KB_DATA)} documents")
+#         except FileNotFoundError:
+#             print(f"❌ FAISS index file not found: {settings.FAISS_INDEX_PATH}")
+#             print("⚠️  You need to create the FAISS index first!")
+#             raise
+#         except Exception as e:
+#             print(f"❌ Failed to load FAISS index: {e}")
+#             raise
+#     return FAISS_INDEX, KB_DATA
+# ============================================================================
+# RETRIEVAL FUNCTIONS
+# ============================================================================
+def retrieve_documents(
+    query: str,
+    top_k: int = None,
+    min_similarity: float = None
+) -> List[Dict]:
+    """
+    Retrieve top-k documents for a query using custom retriever + FAISS.
+    Args:
+        query: User query text
+        top_k: Number of documents to retrieve (default from config)
+        min_similarity: Minimum similarity threshold (default from config)
+    Returns:
+        List[Dict]: Retrieved documents with scores
+            Each dict contains:
+            - instruction: FAQ question
+            - response: FAQ answer
+            - category: Document category
+            - intent: Document intent
+            - score: Similarity score (0-1)
+            - rank: Rank in results (1-indexed)
+            - faq_id: Document ID
+    """
+    # Use config defaults if not provided
+    if top_k is None:
+        top_k = settings.TOP_K
+    if min_similarity is None:
+        min_similarity = settings.SIMILARITY_THRESHOLD
+    # Validate query
+    if not query or query.strip() == "":
+        print("⚠️ Empty query provided")
+        return []
+    # Load models (cached, no overhead after first call)
+    retriever = load_retriever()
+    index, kb = load_faiss_index()
+    try:
+        # Step 1: Encode query
+        query_embedding = retriever.encode_query(query)
+        # Step 2: Normalize for cosine similarity
+        faiss.normalize_L2(query_embedding)
+        # Step 3: Search in FAISS index
+        similarities, indices = index.search(query_embedding, top_k)
+        # Step 4: Check similarity threshold for top result
+        if similarities[0][0] < min_similarity:
+            print(f"🚫 NO_FETCH (similarity: {similarities[0][0]:.3f} < {min_similarity})")
+            return []
+        print(f"✅ FETCH (similarity: {similarities[0][0]:.3f} >= {min_similarity})")
+        # Step 5: Format results
+        results = []
+        for rank, (similarity, idx) in enumerate(zip(similarities[0], indices[0])):
+            if idx < len(kb):
+                doc = kb[idx]
+                results.append({
+                    'instruction': doc.get('instruction', ''),
+                    'response': doc.get('response', ''),
+                    'category': doc.get('category', 'Unknown'),
+                    'intent': doc.get('intent', 'Unknown'),
+                    'score': float(similarity),
+                    'rank': rank + 1,
+                    'faq_id': doc.get('faq_id', f'doc_{idx}')
+                })
+        return results
+    except Exception as e:
+        print(f"❌ Retrieval error: {e}")
+        import traceback
+        traceback.print_exc()
+        return []
+def format_context(retrieved_docs: List[Dict], max_context_length: int = None) -> str:
+    """
+    Format retrieved documents into context string for LLM.
+    Prioritizes by score and limits total length.
+    Args:
+        retrieved_docs: List of retrieved documents
+        max_context_length: Maximum context length in characters
+    Returns:
+        str: Formatted context string
+    """
+    if max_context_length is None:
+        max_context_length = settings.MAX_CONTEXT_LENGTH
+    if not retrieved_docs:
+        return ""
+    context_parts = []
+    current_length = 0
+    for doc in retrieved_docs:
+        # Create context entry with None checks
+        instruction = doc.get('instruction', '') or ''
+        response = doc.get('response', '') or ''
+        category = doc.get('category', 'N/A') or 'N/A'
+        context_entry = f"[Rank {doc['rank']}, Score: {doc['score']:.3f}]\n"
+        context_entry += f"Q: {instruction}\n"
+        context_entry += f"A: {response}\n"
+        context_entry += f"Category: {category}\n\n"
+        # Check length limit
+        if current_length + len(context_entry) > max_context_length:
+            break
+        context_parts.append(context_entry)
+        current_length += len(context_entry)
+    return "".join(context_parts)
+# ============================================================================
+# USAGE EXAMPLE (for reference)
+# ============================================================================
+"""
+# In your service file:
+from app.ml.retriever import retrieve_documents, format_context
+# Retrieve documents
+docs = retrieve_documents("What is my account balance?", top_k=5)
+# Format context for LLM
+context = format_context(docs)
+# Use context in LLM prompt
+prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
+"""

app/models/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Pydantic models package
+"""

app/models/user.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+User Models for Authentication
+Pydantic models for user registration, login, and responses
+"""
+from pydantic import BaseModel, EmailStr, Field
+from typing import Optional
+from datetime import datetime
+class UserRegister(BaseModel):
+    """User registration request"""
+    email: EmailStr
+    password: str = Field(..., min_length=6, max_length=100)
+    full_name: str = Field(..., min_length=2, max_length=100)
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "email": "user@example.com",
+                "password": "SecurePass123",
+                "full_name": "John Doe"
+            }
+        }
+class UserLogin(BaseModel):
+    """User login request"""
+    email: EmailStr
+    password: str
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "email": "user@example.com",
+                "password": "SecurePass123"
+            }
+        }
+class UserResponse(BaseModel):
+    """User response (without password)"""
+    user_id: str
+    email: str
+    full_name: str
+    created_at: datetime
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "user_id": "abc-123",
+                "email": "user@example.com",
+                "full_name": "John Doe",
+                "created_at": "2025-10-28T20:00:00"
+            }
+        }
+class Token(BaseModel):
+    """JWT Token response"""
+    access_token: str
+    token_type: str = "bearer"
+    user: UserResponse
+class TokenData(BaseModel):
+    """Data stored in JWT token"""
+    user_id: Optional[str] = None
+    email: Optional[str] = None

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""
+Services package - Business logic layer
+Contains all service classes that handle core application logic
+"""
+__version__ = "1.0.0"

app/services/chat_service.py ADDED Viewed

	@@ -0,0 +1,335 @@

+"""
+Chat Service - Main RAG Pipeline
+Combines: Policy Network → Retriever → LLM Generator
+This is the core service that orchestrates:
+1. Policy decision (FETCH vs NO_FETCH)
+2. Document retrieval (if FETCH)
+3. Response generation (Groq/HuggingFace with Llama 3)
+4. Logging to MongoDB
+Adapted from your RAG.py workflow
+"""
+import time
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from app.config import settings
+from app.ml.policy_network import predict_policy_action
+from app.ml.retriever import retrieve_documents, format_context
+from app.core.llm_manager import llm_manager
+# ============================================================================
+# SYSTEM PROMPTS
+# ============================================================================
+BANKING_SYSTEM_PROMPT = """You are an expert banking assistant specialized in Indian financial regulations and banking practices. You have access to a comprehensive knowledge base of banking policies, procedures, and RBI regulations.
+Instructions:
+- Answer the user query accurately using the provided context when available
+- If context is insufficient or query is outside banking domain, still respond helpfully but mention your banking specialization
+- If no banking context is available, provide a general helpful response but acknowledge your expertise is in banking
+- Never refuse to answer - always be helpful while being transparent about your specialization
+- Cite relevant policy numbers or document references when available in context
+- Never fabricate specific policies, rates, or eligibility criteria
+- If uncertain about current rates or policies, acknowledge the limitation
+- Maintain a helpful and professional tone
+- Keep responses concise, clear, and actionable
+"""
+EVALUATION_PROMPT = """You are evaluating a banking assistant's response for quality and accuracy.
+Criteria:
+1. Accuracy: Is the response factually correct?
+2. Relevance: Does it address the user's question?
+3. Completeness: Are all aspects of the question covered?
+4. Clarity: Is the response easy to understand?
+5. Context Usage: Does it properly use the retrieved context?
+Rate the response as:
+- "Good": Accurate, relevant, complete, and clear
+- "Bad": Inaccurate, irrelevant, incomplete, or unclear
+Provide your rating and brief explanation."""
+# ============================================================================
+# CHAT SERVICE
+# ============================================================================
+class ChatService:
+    """
+    Main chat service that handles the complete RAG pipeline.
+    Pipeline:
+    1. User query comes in
+    2. Policy network decides: FETCH or NO_FETCH
+    3. If FETCH: Retrieve documents from FAISS
+    4. Generate response using Groq/HuggingFace (with or without context)
+    5. Return response + metadata
+    """
+    def __init__(self):
+        """Initialize chat service"""
+        print("🤖 ChatService initialized")
+    async def process_query(
+        self,
+        query: str,
+        conversation_history: List[Dict[str, str]] = None,
+        user_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Process a user query through the complete RAG pipeline.
+        This is the MAIN function that combines everything:
+        - Policy decision
+        - Retrieval
+        - Generation
+        Args:
+            query: User query text
+            conversation_history: Previous conversation turns
+                Format: [{'role': 'user'/'assistant', 'content': '...', 'metadata': {...}}]
+            user_id: Optional user ID for logging
+        Returns:
+            dict: Complete response with metadata
+            {
+                'response': str,  # Generated response
+                'policy_action': str,  # FETCH or NO_FETCH
+                'policy_confidence': float,  # Confidence score
+                'should_retrieve': bool,  # Whether retrieval was done
+                'documents_retrieved': int,  # Number of docs retrieved
+                'top_doc_score': float or None,  # Best similarity score
+                'retrieval_time_ms': float,  # Time spent on retrieval
+                'generation_time_ms': float,  # Time spent on generation
+                'total_time_ms': float,  # Total processing time
+                'timestamp': str  # ISO timestamp
+            }
+        """
+        start_time = time.time()
+        # Initialize history if None
+        if conversation_history is None:
+            conversation_history = []
+        # Validate query
+        if not query or query.strip() == "":
+            return {
+                'response': "I didn't receive a valid question. Could you please try again?",
+                'policy_action': 'NO_FETCH',
+                'policy_confidence': 1.0,
+                'should_retrieve': False,
+                'documents_retrieved': 0,
+                'top_doc_score': None,
+                'retrieval_time_ms': 0,
+                'generation_time_ms': 0,
+                'total_time_ms': 0,
+                'timestamp': datetime.now().isoformat()
+            }
+        # ====================================================================
+        # STEP 1: POLICY DECISION (Local BERT model)
+        # ====================================================================
+        print(f"\n{'='*80}")
+        print(f"🔍 Processing Query: {query[:50]}...")
+        print(f"{'='*80}")
+        policy_start = time.time()
+        # Predict action using policy network
+        policy_result = predict_policy_action(
+            query=query,
+            history=conversation_history,
+            return_probs=True
+        )
+        policy_time = (time.time() - policy_start) * 1000
+        print(f"\n📊 Policy Decision:")
+        print(f"   Action: {policy_result['action']}")
+        print(f"   Confidence: {policy_result['confidence']:.3f}")
+        print(f"   Should Retrieve: {policy_result['should_retrieve']}")
+        print(f"   Time: {policy_time:.2f}ms")
+        # ====================================================================
+        # STEP 2: RETRIEVAL (if FETCH or low confidence NO_FETCH)
+        # ====================================================================
+        retrieved_docs = []
+        context = ""
+        retrieval_time = 0
+        if policy_result['should_retrieve']:
+            print(f"\n🔎 Retrieving documents...")
+            retrieval_start = time.time()
+            try:
+                # Retrieve documents using custom retriever + FAISS
+                retrieved_docs = retrieve_documents(
+                    query=query,
+                    top_k=settings.TOP_K,
+                    min_similarity=settings.SIMILARITY_THRESHOLD
+                )
+                retrieval_time = (time.time() - retrieval_start) * 1000
+                if retrieved_docs:
+                    print(f"   ✅ Retrieved {len(retrieved_docs)} documents")
+                    print(f"   Top score: {retrieved_docs[0]['score']:.3f}")
+                    # Format context for LLM
+                    context = format_context(
+                        retrieved_docs,
+                        max_context_length=settings.MAX_CONTEXT_LENGTH
+                    )
+                else:
+                    print(f"   ⚠️ No documents above threshold")
+            except Exception as e:
+                print(f"   ❌ Retrieval error: {e}")
+                # Continue without retrieval
+        else:
+            print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
+        # ====================================================================
+        # STEP 3: GENERATE RESPONSE (Groq/HuggingFace with fallback)
+        # ====================================================================
+        print(f"\n💬 Generating response...")
+        generation_start = time.time()
+        try:
+            # Generate response using LLM manager (Groq → HuggingFace fallback)
+            response = await llm_manager.generate_chat_response(
+                query=query,
+                context=context,
+                history=conversation_history
+            )
+            generation_time = (time.time() - generation_start) * 1000
+            print(f"   ✅ Response generated")
+            print(f"   Length: {len(response)} chars")
+            print(f"   Time: {generation_time:.2f}ms")
+        except Exception as e:
+            print(f"   ❌ Generation error: {e}")
+            response = "I apologize, but I encountered an error generating a response. Please try again."
+            generation_time = (time.time() - generation_start) * 1000
+        # ====================================================================
+        # STEP 4: COMPILE RESULTS
+        # ====================================================================
+        total_time = (time.time() - start_time) * 1000
+        result = {
+            'response': response,
+            'policy_action': policy_result['action'],
+            'policy_confidence': policy_result['confidence'],
+            'should_retrieve': policy_result['should_retrieve'],
+            'documents_retrieved': len(retrieved_docs),
+            'top_doc_score': retrieved_docs[0]['score'] if retrieved_docs else None,
+            'retrieval_time_ms': round(retrieval_time, 2),
+            'generation_time_ms': round(generation_time, 2),
+            'total_time_ms': round(total_time, 2),
+            'timestamp': datetime.now().isoformat()
+        }
+        # Add retrieved docs metadata (for logging, not sent to user)
+        if retrieved_docs:
+            result['retrieved_docs_metadata'] = [
+                {
+                    'faq_id': doc['faq_id'],
+                    'score': doc['score'],
+                    'category': doc['category'],
+                    'rank': doc['rank']
+                }
+                for doc in retrieved_docs
+            ]
+        print(f"\n{'='*80}")
+        print(f"✅ Query processed successfully")
+        print(f"   Total time: {total_time:.2f}ms")
+        print(f"{'='*80}\n")
+        return result
+    async def health_check(self) -> Dict[str, Any]:
+        """
+        Check health of all service components.
+        Returns:
+            dict: Health status
+        """
+        health = {
+            'service': 'chat_service',
+            'status': 'healthy',
+            'components': {}
+        }
+        # Check policy network
+        try:
+            from app.ml.policy_network import POLICY_MODEL
+            health['components']['policy_network'] = 'loaded' if POLICY_MODEL else 'not_loaded'
+        except Exception as e:
+            health['components']['policy_network'] = f'error: {str(e)}'
+        # Check retriever
+        try:
+            from app.ml.retriever import RETRIEVER_MODEL, FAISS_INDEX
+            health['components']['retriever'] = 'loaded' if RETRIEVER_MODEL else 'not_loaded'
+            health['components']['faiss_index'] = 'loaded' if FAISS_INDEX else 'not_loaded'
+        except Exception as e:
+            health['components']['retriever'] = f'error: {str(e)}'
+        # Check LLM manager
+        try:
+            from app.core.llm_manager import llm_manager as llm
+            health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
+            health['components']['huggingface'] = 'enabled' if llm.huggingface else 'disabled'
+        except Exception as e:
+            health['components']['llm_manager'] = f'error: {str(e)}'
+        # Overall status
+        failed_components = [k for k, v in health['components'].items() if 'error' in str(v)]
+        if failed_components:
+            health['status'] = 'degraded'
+            health['failed_components'] = failed_components
+        return health
+# ============================================================================
+# GLOBAL CHAT SERVICE INSTANCE
+# ============================================================================
+chat_service = ChatService()
+# ============================================================================
+# USAGE EXAMPLE (for reference)
+# ============================================================================
+"""
+# In your API endpoint (chat.py):
+from app.services.chat_service import chat_service
+# Process user query
+result = await chat_service.process_query(
+    query="What is my account balance?",
+    conversation_history=[
+        {'role': 'user', 'content': 'Hello'},
+        {'role': 'assistant', 'content': 'Hi! How can I help?', 'metadata': {'policy_action': 'NO_FETCH'}}
+    ],
+    user_id="user_123"
+)
+# Result contains:
+# - response: "Your account balance is $1,234.56"
+# - policy_action: "FETCH"
+# - documents_retrieved: 3
+# - total_time_ms: 450.23
+# etc.
+# Get service health
+health = await chat_service.health_check()
+"""

app/utils/dependencies.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+FastAPI Dependencies
+Authentication and authorization dependencies
+"""
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from typing import Optional
+from app.utils.security import decode_access_token
+from app.db.repositories.user_repository import UserRepository
+from app.models.user import TokenData
+# HTTP Bearer token scheme
+security = HTTPBearer()
+async def get_current_user(
+    credentials: HTTPAuthorizationCredentials = Depends(security)
+) -> TokenData:
+    """
+    Get current authenticated user from JWT token.
+    This dependency extracts and validates the JWT token from the
+    Authorization header and returns the user data.
+    Args:
+        credentials: HTTP Bearer credentials
+    Returns:
+        TokenData: User data from token
+    Raises:
+        HTTPException: If token is invalid or expired
+    """
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    # Decode token
+    token = credentials.credentials
+    payload = decode_access_token(token)
+    if payload is None:
+        raise credentials_exception
+    # Extract user data
+    user_id: str = payload.get("user_id")
+    email: str = payload.get("email")
+    if user_id is None or email is None:
+        raise credentials_exception
+    # Verify user exists
+    user_repo = UserRepository()
+    user = await user_repo.get_user_by_id(user_id)
+    if user is None or not user.get("is_active", False):
+        raise credentials_exception
+    return TokenData(user_id=user_id, email=email)
+async def get_optional_current_user(
+    credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)
+) -> Optional[TokenData]:
+    """
+    Get current user if authenticated, None otherwise.
+    This is a non-required version of get_current_user for optional auth.
+    Args:
+        credentials: Optional HTTP Bearer credentials
+    Returns:
+        TokenData or None: User data from token or None
+    """
+    if credentials is None:
+        return None
+    try:
+        return await get_current_user(credentials)
+    except HTTPException:
+        return None

app/utils/security.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# """
+# Security Utilities
+# Password hashing and JWT token management
+# """
+# from datetime import datetime, timedelta
+# from typing import Optional
+# from jose import JWTError, jwt
+# from passlib.context import CryptContext
+# from app.config import settings
+# # Password hashing context
+# pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+# # ============================================================================
+# # PASSWORD HASHING
+# # ============================================================================
+# def hash_password(password: str) -> str:
+#     """
+#     Hash a password using bcrypt.
+#     Args:
+#         password: Plain text password
+#     Returns:
+#         str: Hashed password
+#     """
+#     return pwd_context.hash(password)
+# def verify_password(plain_password: str, hashed_password: str) -> bool:
+#     """
+#     Verify a password against a hash.
+#     Args:
+#         plain_password: Plain text password to verify
+#         hashed_password: Hashed password from database
+#     Returns:
+#         bool: True if password matches, False otherwise
+#     """
+#     return pwd_context.verify(plain_password, hashed_password)
+# # ============================================================================
+# # JWT TOKEN MANAGEMENT
+# # ============================================================================
+# def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
+#     """
+#     Create a JWT access token.
+#     Args:
+#         data: Data to encode in token (user_id, email, etc.)
+#         expires_delta: Optional custom expiration time
+#     Returns:
+#         str: Encoded JWT token
+#     """
+#     to_encode = data.copy()
+#     if expires_delta:
+#         expire = datetime.utcnow() + expires_delta
+#     else:
+#         expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
+#     to_encode.update({"exp": expire})
+#     encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM)
+#     return encoded_jwt
+# def decode_access_token(token: str) -> Optional[dict]:
+#     """
+#     Decode and verify a JWT token.
+#     Args:
+#         token: JWT token to decode
+#     Returns:
+#         dict: Decoded token data or None if invalid
+#     """
+#     try:
+#         payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM])
+#         return payload
+#     except JWTError:
+#         return None
+"""
+Security utilities for password hashing and JWT tokens
+"""
+from passlib.context import CryptContext
+from datetime import datetime, timedelta
+from jose import JWTError, jwt
+from app.config import settings
+# Password hashing context
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+def hash_password(password: str) -> str:
+    """
+    Hash a password using bcrypt
+    Args:
+        password: Plain text password
+    Returns:
+        Hashed password
+    """
+    # Bcrypt has a 72 byte limit, truncate if longer
+    if len(password.encode('utf-8')) > 72:
+        password = password[:72]
+    return pwd_context.hash(password)
+def verify_password(plain_password: str, hashed_password: str) -> bool:
+    """
+    Verify a password against its hash
+    Args:
+        plain_password: Plain text password to verify
+        hashed_password: Stored hashed password
+    Returns:
+        True if password matches, False otherwise
+    """
+    # Truncate to 72 bytes for bcrypt
+    if len(plain_password.encode('utf-8')) > 72:
+        plain_password = plain_password[:72]
+    return pwd_context.verify(plain_password, hashed_password)
+def create_access_token(data: dict, expires_delta: timedelta = None) -> str:
+    """
+    Create a JWT access token
+    Args:
+        data: Data to encode in the token (usually user_id, email)
+        expires_delta: Token expiration time (default: from settings)
+    Returns:
+        Encoded JWT token string
+    """
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, settings.SECRET_KEY, algorithm=settings.ALGORITHM)
+    return encoded_jwt
+def decode_access_token(token: str) -> dict:
+    """
+    Decode and verify a JWT token
+    Args:
+        token: JWT token string
+    Returns:
+        Decoded token data (dict)
+    Raises:
+        JWTError: If token is invalid or expired
+    """
+    try:
+        payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM])
+        return payload
+    except JWTError:
+        return None

backups/backup_chat_service.py ADDED Viewed

	@@ -0,0 +1,340 @@

+# """
+# Chat Service - Main RAG Pipeline
+# Combines: Policy Network → Retriever → LLM Generator
+# This is the core service that orchestrates:
+# 1. Policy decision (FETCH vs NO_FETCH)
+# 2. Document retrieval (if FETCH)
+# 3. Response generation (Gemini)
+# 4. Logging to MongoDB
+# Adapted from your RAG.py workflow
+# """
+# import time
+# from datetime import datetime
+# from typing import List, Dict, Any, Optional
+# from app.config import settings
+# from app.ml.policy_network import predict_policy_action
+# from app.ml.retriever import retrieve_documents, format_context
+# from app.core.llm_manager import llm_manager
+# # ============================================================================
+# # SYSTEM PROMPTS
+# # ============================================================================
+# BANKING_SYSTEM_PROMPT = """You are an expert banking assistant specialized in Indian financial regulations and banking practices. You have access to a comprehensive knowledge base of banking policies, procedures, and RBI regulations.
+# Instructions:
+# - Answer the user query accurately using the provided context when available
+# - If context is insufficient or query is outside banking domain, still respond helpfully but mention your banking specialization
+# - If no banking context is available, provide a general helpful response but acknowledge your expertise is in banking
+# - Never refuse to answer - always be helpful while being transparent about your specialization
+# - Cite relevant policy numbers or document references when available in context
+# - Never fabricate specific policies, rates, or eligibility criteria
+# - If uncertain about current rates or policies, acknowledge the limitation
+# - Maintain a helpful and professional tone
+# - Keep responses concise, clear, and actionable
+# """
+# EVALUATION_PROMPT = """You are evaluating a banking assistant's response for quality and accuracy.
+# Criteria:
+# 1. Accuracy: Is the response factually correct?
+# 2. Relevance: Does it address the user's question?
+# 3. Completeness: Are all aspects of the question covered?
+# 4. Clarity: Is the response easy to understand?
+# 5. Context Usage: Does it properly use the retrieved context?
+# Rate the response as:
+# - "Good": Accurate, relevant, complete, and clear
+# - "Bad": Inaccurate, irrelevant, incomplete, or unclear
+# Provide your rating and brief explanation."""
+# # ============================================================================
+# # CHAT SERVICE
+# # ============================================================================
+# class ChatService:
+#     """
+#     Main chat service that handles the complete RAG pipeline.
+#     Pipeline:
+#     1. User query comes in
+#     2. Policy network decides: FETCH or NO_FETCH
+#     3. If FETCH: Retrieve documents from FAISS
+#     4. Generate response using Gemini (with or without context)
+#     5. Return response + metadata
+#     """
+#     def __init__(self):
+#         """Initialize chat service"""
+#         print("🤖 ChatService initialized")
+#     async def process_query(
+#         self,
+#         query: str,
+#         conversation_history: List[Dict[str, str]] = None,
+#         user_id: Optional[str] = None
+#     ) -> Dict[str, Any]:
+#         """
+#         Process a user query through the complete RAG pipeline.
+#         This is the MAIN function that combines everything:
+#         - Policy decision
+#         - Retrieval
+#         - Generation
+#         Args:
+#             query: User query text
+#             conversation_history: Previous conversation turns
+#                 Format: [{'role': 'user'/'assistant', 'content': '...', 'metadata': {...}}]
+#             user_id: Optional user ID for logging
+#         Returns:
+#             dict: Complete response with metadata
+#                 {
+#                     'response': str,                  # Generated response
+#                     'policy_action': str,             # FETCH or NO_FETCH
+#                     'policy_confidence': float,       # Confidence score
+#                     'should_retrieve': bool,          # Whether retrieval was done
+#                     'documents_retrieved': int,       # Number of docs retrieved
+#                     'top_doc_score': float or None,   # Best similarity score
+#                     'retrieval_time_ms': float,       # Time spent on retrieval
+#                     'generation_time_ms': float,      # Time spent on generation
+#                     'total_time_ms': float,           # Total processing time
+#                     'timestamp': str                  # ISO timestamp
+#                 }
+#         """
+#         start_time = time.time()
+#         # Initialize history if None
+#         if conversation_history is None:
+#             conversation_history = []
+#         # Validate query
+#         if not query or query.strip() == "":
+#             return {
+#                 'response': "I didn't receive a valid question. Could you please try again?",
+#                 'policy_action': 'NO_FETCH',
+#                 'policy_confidence': 1.0,
+#                 'should_retrieve': False,
+#                 'documents_retrieved': 0,
+#                 'top_doc_score': None,
+#                 'retrieval_time_ms': 0,
+#                 'generation_time_ms': 0,
+#                 'total_time_ms': 0,
+#                 'timestamp': datetime.now().isoformat()
+#             }
+#         # ====================================================================
+#         # STEP 1: POLICY DECISION (Local BERT model)
+#         # ====================================================================
+#         print(f"\n{'='*80}")
+#         print(f"🔍 Processing Query: {query[:50]}...")
+#         print(f"{'='*80}")
+#         policy_start = time.time()
+#         # Predict action using policy network
+#         policy_result = predict_policy_action(
+#             query=query,
+#             history=conversation_history,
+#             return_probs=True
+#         )
+#         policy_time = (time.time() - policy_start) * 1000
+#         print(f"\n📊 Policy Decision:")
+#         print(f"   Action: {policy_result['action']}")
+#         print(f"   Confidence: {policy_result['confidence']:.3f}")
+#         print(f"   Should Retrieve: {policy_result['should_retrieve']}")
+#         print(f"   Time: {policy_time:.2f}ms")
+#         # ====================================================================
+#         # STEP 2: RETRIEVAL (if FETCH or low confidence NO_FETCH)
+#         # ====================================================================
+#         retrieved_docs = []
+#         context = ""
+#         retrieval_time = 0
+#         if policy_result['should_retrieve']:
+#             print(f"\n🔎 Retrieving documents...")
+#             retrieval_start = time.time()
+#             try:
+#                 # Retrieve documents using custom retriever + FAISS
+#                 retrieved_docs = retrieve_documents(
+#                     query=query,
+#                     top_k=settings.TOP_K,
+#                     min_similarity=settings.SIMILARITY_THRESHOLD
+#                 )
+#                 retrieval_time = (time.time() - retrieval_start) * 1000
+#                 if retrieved_docs:
+#                     print(f"   ✅ Retrieved {len(retrieved_docs)} documents")
+#                     print(f"   Top score: {retrieved_docs[0]['score']:.3f}")
+#                     # Format context for LLM
+#                     context = format_context(
+#                         retrieved_docs,
+#                         max_context_length=settings.MAX_CONTEXT_LENGTH
+#                     )
+#                 else:
+#                     print(f"   ⚠️ No documents above threshold")
+#             except Exception as e:
+#                 print(f"   ❌ Retrieval error: {e}")
+#                 # Continue without retrieval
+#         else:
+#             print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
+#         # ====================================================================
+#         # STEP 3: GENERATE RESPONSE (Gemini)
+#         # ====================================================================
+#         print(f"\n💬 Generating response...")
+#         generation_start = time.time()
+#         try:
+#             # Generate response using LLM manager (Gemini)
+#             response = await llm_manager.generate_chat_response(
+#                 query=query,
+#                 context=context,
+#                 history=conversation_history
+#             )
+#             generation_time = (time.time() - generation_start) * 1000
+#             print(f"   ✅ Response generated")
+#             print(f"   Length: {len(response)} chars")
+#             print(f"   Time: {generation_time:.2f}ms")
+#         except Exception as e:
+#             print(f"   ❌ Generation error: {e}")
+#             response = "I apologize, but I encountered an error generating a response. Please try again."
+#             generation_time = (time.time() - generation_start) * 1000
+#         # ====================================================================
+#         # STEP 4: COMPILE RESULTS
+#         # ====================================================================
+#         total_time = (time.time() - start_time) * 1000
+#         result = {
+#             'response': response,
+#             'policy_action': policy_result['action'],
+#             'policy_confidence': policy_result['confidence'],
+#             'should_retrieve': policy_result['should_retrieve'],
+#             'documents_retrieved': len(retrieved_docs),
+#             'top_doc_score': retrieved_docs[0]['score'] if retrieved_docs else None,
+#             'retrieval_time_ms': round(retrieval_time, 2),
+#             'generation_time_ms': round(generation_time, 2),
+#             'total_time_ms': round(total_time, 2),
+#             'timestamp': datetime.now().isoformat()
+#         }
+#         # Add retrieved docs metadata (for logging, not sent to user)
+#         if retrieved_docs:
+#             result['retrieved_docs_metadata'] = [
+#                 {
+#                     'faq_id': doc['faq_id'],
+#                     'score': doc['score'],
+#                     'category': doc['category'],
+#                     'rank': doc['rank']
+#                 }
+#                 for doc in retrieved_docs
+#             ]
+#         print(f"\n{'='*80}")
+#         print(f"✅ Query processed successfully")
+#         print(f"   Total time: {total_time:.2f}ms")
+#         print(f"{'='*80}\n")
+#         return result
+#     async def health_check(self) -> Dict[str, Any]:
+#         """
+#         Check health of all service components.
+#         Returns:
+#             dict: Health status
+#         """
+#         health = {
+#             'service': 'chat_service',
+#             'status': 'healthy',
+#             'components': {}
+#         }
+#         # Check policy network
+#         try:
+#             from app.ml.policy_network import POLICY_MODEL
+#             health['components']['policy_network'] = 'loaded' if POLICY_MODEL else 'not_loaded'
+#         except Exception as e:
+#             health['components']['policy_network'] = f'error: {str(e)}'
+#         # Check retriever
+#         try:
+#             from app.ml.retriever import RETRIEVER_MODEL, FAISS_INDEX
+#             health['components']['retriever'] = 'loaded' if RETRIEVER_MODEL else 'not_loaded'
+#             health['components']['faiss_index'] = 'loaded' if FAISS_INDEX else 'not_loaded'
+#         except Exception as e:
+#             health['components']['retriever'] = f'error: {str(e)}'
+#         # Check LLM manager
+#         try:
+#             from app.core.llm_manager import llm_manager as llm
+#             health['components']['gemini'] = 'enabled' if llm.gemini else 'disabled'
+#             health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
+#         except Exception as e:
+#             health['components']['llm_manager'] = f'error: {str(e)}'
+#         # Overall status
+#         failed_components = [k for k, v in health['components'].items() if 'error' in str(v)]
+#         if failed_components:
+#             health['status'] = 'degraded'
+#             health['failed_components'] = failed_components
+#         return health
+# # ============================================================================
+# # GLOBAL CHAT SERVICE INSTANCE
+# # ============================================================================
+# chat_service = ChatService()
+# # ============================================================================
+# # USAGE EXAMPLE (for reference)
+# # ============================================================================
+# """
+# # In your API endpoint (chat.py):
+# from app.services.chat_service import chat_service
+# # Process user query
+# result = await chat_service.process_query(
+#     query="What is my account balance?",
+#     conversation_history=[
+#         {'role': 'user', 'content': 'Hello'},
+#         {'role': 'assistant', 'content': 'Hi! How can I help?', 'metadata': {'policy_action': 'NO_FETCH'}}
+#     ],
+#     user_id="user_123"
+# )
+# # Result contains:
+# # - response: "Your account balance is $1,234.56"
+# # - policy_action: "FETCH"
+# # - documents_retrieved: 3
+# # - total_time_ms: 450.23
+# # etc.
+# # Get service health
+# health = await chat_service.health_check()
+# """

backups/backup_config.py ADDED Viewed

	@@ -0,0 +1,640 @@

+# LINE 80 VERY IMP CHANGE OF LLM MAX TOKENS FROM 512 TO 1024
+"""
+Application Configuration
+Settings for Banking RAG Chatbot with JWT Authentication
+Includes all settings needed by existing llm_manager.py
+"""
+import os
+from typing import List
+from dotenv import load_dotenv
+load_dotenv()
+class Settings:
+    """Application settings loaded from environment variables"""
+    # ========================================================================
+    # ENVIRONMENT
+    # ========================================================================
+    ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
+    DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
+    # ========================================================================
+    # MONGODB
+    # ========================================================================
+    MONGODB_URI: str = os.getenv("MONGODB_URI", "")
+    DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
+    # ========================================================================
+    # JWT AUTHENTICATION
+    # ========================================================================
+    SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
+    ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
+    ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
+    # ========================================================================
+    # CORS (for frontend)
+    # ========================================================================
+    ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
+    # ========================================================================
+    # GOOGLE GEMINI API
+    # ========================================================================
+    GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
+    GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
+    GEMINI_REQUESTS_PER_MINUTE: int = int(os.getenv("GEMINI_REQUESTS_PER_MINUTE", "60"))
+    # ========================================================================
+    # GROQ API (Optional - for evaluation)
+    # ========================================================================
+    GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+    GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
+    GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
+    # ========================================================================
+    # HUGGING FACE (Optional - for model downloads)
+    # ========================================================================
+    HF_TOKEN: str = os.getenv("HF_TOKEN", "")
+    # ========================================================================
+    # MODEL PATHS (for RL Policy Network and RAG models)
+    # ========================================================================
+    POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "app/models/best_policy_model.pth")
+    RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "app/models/best_retriever_model.pth")
+    FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "app/models/faiss_index.pkl")
+    KB_PATH: str = os.getenv("KB_PATH", "app/data/final_knowledge_base.jsonl")
+    # ========================================================================
+    # DEVICE SETTINGS (for PyTorch/TensorFlow models)
+    # ========================================================================
+    DEVICE: str = os.getenv("DEVICE", "cpu")
+    # ========================================================================
+    # LLM PARAMETERS
+    # ========================================================================
+    LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
+    LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024")) # VERY IMPORTANT CHANGE =============================================================================================
+    # ============================================================================
+    # ========================================================================
+    # RAG PARAMETERS
+    # ========================================================================
+    TOP_K: int = int(os.getenv("TOP_K", "5"))
+    SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
+    MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
+    # ========================================================================
+    # POLICY NETWORK PARAMETERS
+    # ========================================================================
+    POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
+    CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
+    # ========================================================================
+    # HELPER METHODS (Required by llm_manager.py)
+    # ========================================================================
+    def is_gemini_enabled(self) -> bool:
+        """Check if Google Gemini API is configured"""
+        return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
+    def is_groq_enabled(self) -> bool:
+        """Check if Groq API is configured"""
+        return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
+    def is_hf_enabled(self) -> bool:
+        """Check if HuggingFace token is configured"""
+        return bool(self.HF_TOKEN and self.HF_TOKEN != "")
+    def get_allowed_origins(self) -> List[str]:
+        """Parse allowed origins from comma-separated string"""
+        if self.ALLOWED_ORIGINS == "*":
+            return ["*"]
+        return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
+    def get_llm_for_task(self, task: str = "qa") -> str:
+        """
+        Get LLM name for a specific task.
+        Args:
+            task: Task type ('chat', 'evaluation', etc.')
+        Returns:
+            str: LLM name ('gemini' or 'groq')
+        """
+        # Use Gemini for chat, Groq for evaluation
+        if task == "evaluation":
+            return "groq" if self.is_groq_enabled() else "gemini"
+        else:
+            return "gemini"  # Default to Gemini for all tasks
+# ============================================================================
+# CREATE GLOBAL SETTINGS INSTANCE
+# ============================================================================
+settings = Settings()
+# ============================================================================
+# PRINT CONFIGURATION ON LOAD
+# ============================================================================
+print("=" * 80)
+print("✅ Configuration Loaded")
+print("=" * 80)
+print(f"Environment: {settings.ENVIRONMENT}")
+print(f"Debug Mode: {settings.DEBUG}")
+print(f"Database: {settings.DATABASE_NAME}")
+print(f"Device: {settings.DEVICE}")
+print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
+print()
+print("🔑 API Keys:")
+print(f"   Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
+print(f"   Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️  Optional (not set)'}")
+print(f"   HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️  Optional (not set)'}")
+print(f"   MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
+print(f"   JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️  Using default (CHANGE THIS!)'}")
+print()
+print("🤖 Model Paths:")
+print(f"   Policy Model: {settings.POLICY_MODEL_PATH}")
+print(f"   Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
+print(f"   FAISS Index: {settings.FAISS_INDEX_PATH}")
+print(f"   Knowledge Base: {settings.KB_PATH}")
+print("=" * 80)
+# ============================================================================
+# """
+# Application Configuration
+# Settings for Banking RAG Chatbot with JWT Authentication
+# Includes all settings needed by existing llm_manager.py
+# """
+# import os
+# from typing import List
+# from dotenv import load_dotenv
+# load_dotenv()
+# class Settings:
+#     """Application settings loaded from environment variables"""
+#     # ========================================================================
+#     # ENVIRONMENT
+#     # ========================================================================
+#     ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
+#     DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
+#     # ========================================================================
+#     # MONGODB
+#     # ========================================================================
+#     MONGODB_URI: str = os.getenv("MONGODB_URI", "")
+#     DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
+#     # ========================================================================
+#     # JWT AUTHENTICATION
+#     # ========================================================================
+#     SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
+#     ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
+#     ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
+#     # ========================================================================
+#     # CORS (for frontend)
+#     # ========================================================================
+#     ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
+#     # ========================================================================
+#     # GOOGLE GEMINI API
+#     # ========================================================================
+#     GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
+#     GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
+#     # ========================================================================
+#     # GROQ API (Optional - for your llm_manager)
+#     # ========================================================================
+#     GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+#     GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
+#     # ========================================================================
+#     # HUGGING FACE (Optional - for model downloads)
+#     # ========================================================================
+#     HF_TOKEN: str = os.getenv("HF_TOKEN", "")
+#     # ========================================================================
+#     # MODEL PATHS (for RL Policy Network and RAG models)
+#     # ========================================================================
+#     POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
+#     RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
+#     FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
+#     KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
+#     # ========================================================================
+#     # DEVICE SETTINGS (for PyTorch/TensorFlow models)
+#     # ========================================================================
+#     DEVICE: str = os.getenv("DEVICE", "cpu")
+#     # ========================================================================
+#     # LLM PARAMETERS
+#     # ========================================================================
+#     LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
+#     LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
+#     # ========================================================================
+#     # RAG PARAMETERS
+#     # ========================================================================
+#     TOP_K: int = int(os.getenv("TOP_K", "5"))
+#     SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
+#     MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
+#     # ========================================================================
+#     # POLICY NETWORK PARAMETERS
+#     # ========================================================================
+#     POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
+#     CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
+#     # ========================================================================
+#     # HELPER METHODS (Required by llm_manager.py)
+#     # ========================================================================
+#     def is_gemini_enabled(self) -> bool:
+#         """Check if Google Gemini API is configured"""
+#         return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
+#     def is_groq_enabled(self) -> bool:
+#         """Check if Groq API is configured"""
+#         return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
+#     def is_hf_enabled(self) -> bool:
+#         """Check if HuggingFace token is configured"""
+#         return bool(self.HF_TOKEN and self.HF_TOKEN != "")
+#     def get_allowed_origins(self) -> List[str]:
+#         """Parse allowed origins from comma-separated string"""
+#         if self.ALLOWED_ORIGINS == "*":
+#             return ["*"]
+#         return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
+#     # def get_llm_for_task(self, task: str = "qa"):
+#     #     """
+#     #     Get LLM configuration for a specific task.
+#     #     Returns a dict with model settings.
+#     #     Args:
+#     #         task: Task type ('qa', 'retrieval', 'summary', etc.)
+#     #     Returns:
+#     #         dict: LLM configuration
+#     #     """
+#     #     return {
+#     #         'api_key': self.GOOGLE_API_KEY,
+#     #         'model': self.GEMINI_MODEL,
+#     #         'temperature': self.LLM_TEMPERATURE,
+#     #         'max_tokens': self.LLM_MAX_TOKENS,
+#     #         'task': task
+#     #     }
+#     def get_llm_for_task(self, task: str = "qa") -> str:
+#         """
+#         Get LLM name for a specific task.
+#         Args:
+#             task: Task type ('chat', 'evaluation', etc.)
+#         Returns:
+#             str: LLM name ('gemini' or 'groq')
+#         """
+#         # Use Gemini for chat, Groq for evaluation
+#         if task == "evaluation":
+#             return "groq" if self.is_groq_enabled() else "gemini"
+#         else:
+#             return "gemini"  # Default to Gemini for all other tasks
+# # ============================================================================
+# # CREATE GLOBAL SETTINGS INSTANCE
+# # ============================================================================
+# settings = Settings()
+# # ============================================================================
+# # PRINT CONFIGURATION ON LOAD
+# # ============================================================================
+# print("=" * 80)
+# print("✅ Configuration Loaded")
+# print("=" * 80)
+# print(f"Environment: {settings.ENVIRONMENT}")
+# print(f"Debug Mode: {settings.DEBUG}")
+# print(f"Database: {settings.DATABASE_NAME}")
+# print(f"Device: {settings.DEVICE}")
+# print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
+# print()
+# print("🔑 API Keys:")
+# print(f"   Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
+# print(f"   Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️  Optional (not set)'}")
+# print(f"   HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️  Optional (not set)'}")
+# print(f"   MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
+# print(f"   JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️  Using default (CHANGE THIS!)'}")
+# print()
+# print("🤖 Model Paths:")
+# print(f"   Policy Model: {settings.POLICY_MODEL_PATH}")
+# print(f"   Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
+# print(f"   FAISS Index: {settings.FAISS_INDEX_PATH}")
+# print(f"   Knowledge Base: {settings.KB_PATH}")
+# print("=" * 80)
+# # # ============================================================================
+# # """
+# # Application Configuration
+# # Settings for Banking RAG Chatbot with JWT Authentication
+# # Includes all settings needed by existing llm_manager.py
+# # """
+# # import os
+# # from typing import List
+# # from dotenv import load_dotenv
+# # load_dotenv()
+# # class Settings:
+# #     """Application settings loaded from environment variables"""
+# #     # ========================================================================
+# #     # ENVIRONMENT
+# #     # ========================================================================
+# #     ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
+# #     DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
+# #     # ========================================================================
+# #     # MONGODB
+# #     # ========================================================================
+# #     MONGODB_URI: str = os.getenv("MONGODB_URI", "")
+# #     DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
+# #     # ========================================================================
+# #     # JWT AUTHENTICATION
+# #     # ========================================================================
+# #     SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
+# #     ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
+# #     ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
+# #     # ========================================================================
+# #     # CORS (for frontend)
+# #     # ========================================================================
+# #     ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
+# #     # ========================================================================
+# #     # GOOGLE GEMINI API
+# #     # ========================================================================
+# #     GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
+# #     GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
+# #     # ========================================================================
+# #     # GROQ API (Optional - for your llm_manager)
+# #     # ========================================================================
+# #     GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+# #     GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
+# #     # ========================================================================
+# #     # HUGGING FACE (Optional - for model downloads)
+# #     # ========================================================================
+# #     HF_TOKEN: str = os.getenv("HF_TOKEN", "")
+# #     # ========================================================================
+# #     # HELPER METHODS (Required by llm_manager.py)
+# #     # ========================================================================
+# #     def is_gemini_enabled(self) -> bool:
+# #         """Check if Google Gemini API is configured"""
+# #         return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
+# #     def is_groq_enabled(self) -> bool:
+# #         """Check if Groq API is configured"""
+# #         return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
+# #     def is_hf_enabled(self) -> bool:
+# #         """Check if HuggingFace token is configured"""
+# #         return bool(self.HF_TOKEN and self.HF_TOKEN != "")
+# #     def get_allowed_origins(self) -> List[str]:
+# #         """Parse allowed origins from comma-separated string"""
+# #         if self.ALLOWED_ORIGINS == "*":
+# #             return ["*"]
+# #         return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
+# # # ============================================================================
+# # # CREATE GLOBAL SETTINGS INSTANCE
+# # # ============================================================================
+# # settings = Settings()
+# # # ============================================================================
+# # # PRINT CONFIGURATION ON LOAD
+# # # ============================================================================
+# # print("=" * 80)
+# # print("✅ Configuration Loaded")
+# # print("=" * 80)
+# # print(f"Environment: {settings.ENVIRONMENT}")
+# # print(f"Debug Mode: {settings.DEBUG}")
+# # print(f"Database: {settings.DATABASE_NAME}")
+# # # print(f"JWT Algorithm: {settings.ALGORITHM}")
+# # # print(f"Token Expiry: {settings.ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
+# # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
+# # print()
+# # print("🔑 API Keys:")
+# # print(f"   Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
+# # print(f"   Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️  Optional (not set)'}")
+# # print(f"   HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️  Optional (not set)'}")
+# # print(f"   MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
+# # print(f"   JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️  Using default (CHANGE THIS!)'}")
+# # print("=" * 80)
+# """
+# Application Configuration
+# Settings for Banking RAG Chatbot with JWT Authentication
+# Includes all settings needed by existing llm_manager.py
+# """
+# import os
+# from typing import List
+# from dotenv import load_dotenv
+# load_dotenv()
+# class Settings:
+#     """Application settings loaded from environment variables"""
+#     # ========================================================================
+#     # ENVIRONMENT
+#     # ========================================================================
+#     ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
+#     DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
+#     # ========================================================================
+#     # MONGODB
+#     # ========================================================================
+#     MONGODB_URI: str = os.getenv("MONGODB_URI", "")
+#     DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
+#     # ========================================================================
+#     # JWT AUTHENTICATION
+#     # ========================================================================
+#     SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
+#     ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
+#     ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
+#     # ========================================================================
+#     # CORS (for frontend)
+#     # ========================================================================
+#     ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
+#     # ========================================================================
+#     # GOOGLE GEMINI API
+#     # ========================================================================
+#     GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
+#     GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
+#     # ========================================================================
+#     # GROQ API (Optional - for your llm_manager)
+#     # ========================================================================
+#     GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+#     GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
+#     # ========================================================================
+#     # HUGGING FACE (Optional - for model downloads)
+#     # ========================================================================
+#     HF_TOKEN: str = os.getenv("HF_TOKEN", "")
+#     # ========================================================================
+#     # MODEL PATHS (for RL Policy Network and RAG models)
+#     # ========================================================================
+#     POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
+#     RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
+#     FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
+#     KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
+#     # ========================================================================
+#     # LLM PARAMETERS
+#     # ========================================================================
+#     LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
+#     LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
+#     # ========================================================================
+#     # RAG PARAMETERS
+#     # ========================================================================
+#     TOP_K: int = int(os.getenv("TOP_K", "5"))
+#     SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
+#     MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
+#     # ========================================================================
+#     # HELPER METHODS (Required by llm_manager.py)
+#     # ========================================================================
+#     def is_gemini_enabled(self) -> bool:
+#         """Check if Google Gemini API is configured"""
+#         return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
+#     def is_groq_enabled(self) -> bool:
+#         """Check if Groq API is configured"""
+#         return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
+#     def is_hf_enabled(self) -> bool:
+#         """Check if HuggingFace token is configured"""
+#         return bool(self.HF_TOKEN and self.HF_TOKEN != "")
+#     def get_allowed_origins(self) -> List[str]:
+#         """Parse allowed origins from comma-separated string"""
+#         if self.ALLOWED_ORIGINS == "*":
+#             return ["*"]
+#         return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
+# # ============================================================================
+# # CREATE GLOBAL SETTINGS INSTANCE
+# # ============================================================================
+# settings = Settings()
+# # ============================================================================
+# # PRINT CONFIGURATION ON LOAD
+# # ============================================================================
+# print("=" * 80)
+# print("✅ Configuration Loaded")
+# print("=" * 80)
+# print(f"Environment: {settings.ENVIRONMENT}")
+# print(f"Debug Mode: {settings.DEBUG}")
+# print(f"Database: {settings.DATABASE_NAME}")
+# print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
+# print()
+# print("🔑 API Keys:")
+# print(f"   Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
+# print(f"   Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️  Optional (not set)'}")
+# print(f"   HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️  Optional (not set)'}")
+# print(f"   MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
+# print(f"   JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️  Using default (CHANGE THIS!)'}")
+# print("=" * 80)

backups/backup_llm_manager.py ADDED Viewed

	@@ -0,0 +1,430 @@

+# """
+# Multi-LLM Manager for Google Gemini, Groq, and HuggingFace
+# All three APIs co-exist for different purposes (no fallback logic)
+# Architecture:
+# - Google Gemini (Primary): User-facing chat responses (best quality)
+# - Groq (Secondary): Fast inference for evaluation and specific tasks
+# - HuggingFace: Model downloads and embeddings (always required)
+# Each API has its designated purpose based on config settings.
+# """
+# import time
+# import google.generativeai as genai
+# from typing import List, Dict, Optional, Literal
+# from langchain_groq import ChatGroq
+# from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+# from app.config import settings
+# # ============================================================================
+# # GOOGLE GEMINI MANAGER
+# # ============================================================================
+# class GeminiManager:
+#     """
+#     Google Gemini API Manager (Primary LLM)
+#     Handles Google Pro account with gemini-2.0-flash-lite model
+#     """
+#     def __init__(self):
+#         """Initialize Gemini API with your Google API key"""
+#         self.api_key = settings.GOOGLE_API_KEY
+#         self.model_name = settings.GEMINI_MODEL
+#         # Configure Gemini
+#         genai.configure(api_key=self.api_key)
+#         # Create model instance with safety settings
+#         self.model = genai.GenerativeModel(
+#             model_name=self.model_name,
+#             generation_config={
+#                 "temperature": settings.LLM_TEMPERATURE,
+#                 "max_output_tokens": settings.LLM_MAX_TOKENS,
+#             }
+#         )
+#         # Rate limiting tracking
+#         self.requests_this_minute = 0
+#         self.tokens_this_minute = 0
+#         self.last_reset = time.time()
+#         print(f"✅ Gemini Manager initialized: {self.model_name}")
+#     def _check_rate_limits(self):
+#         """
+#         Check and reset rate limit counters.
+#         Gemini Pro: 60 requests/min, 60,000 tokens/min
+#         """
+#         current_time = time.time()
+#         # Reset counters every minute
+#         if current_time - self.last_reset > 60:
+#             self.requests_this_minute = 0
+#             self.tokens_this_minute = 0
+#             self.last_reset = current_time
+#         # Check if limits exceeded
+#         if self.requests_this_minute >= settings.GEMINI_REQUESTS_PER_MINUTE:
+#             wait_time = 60 - (current_time - self.last_reset)
+#             print(f"⚠️ Gemini rate limit hit. Waiting {wait_time:.1f}s...")
+#             time.sleep(wait_time)
+#             self._check_rate_limits()  # Recursive check after waiting
+#     async def generate(
+#         self,
+#         messages: List[Dict[str, str]],
+#         system_prompt: Optional[str] = None
+#     ) -> str:
+#         """
+#         Generate response using Gemini.
+#         Args:
+#             messages: List of conversation messages
+#                 Format: [{'role': 'user'/'assistant', 'content': '...'}]
+#             system_prompt: Optional system prompt (prepended to first message)
+#         Returns:
+#             str: Generated response text
+#         """
+#         self._check_rate_limits()
+#         try:
+#             # Format messages for Gemini
+#             # Gemini uses 'user' and 'model' roles
+#             formatted_messages = []
+#             # Add system prompt as first user message if provided
+#             if system_prompt:
+#                 formatted_messages.append({
+#                     'role': 'user',
+#                     'parts': [system_prompt]
+#                 })
+#             # Convert messages
+#             for msg in messages:
+#                 role = 'model' if msg['role'] == 'assistant' else 'user'
+#                 formatted_messages.append({
+#                     'role': role,
+#                     'parts': [msg['content']]
+#                 })
+#             # Generate response
+#             chat = self.model.start_chat(history=formatted_messages[:-1])
+#             response = chat.send_message(formatted_messages[-1]['parts'][0])
+#             # Track rate limits
+#             self.requests_this_minute += 1
+#             # Note: Token counting would require additional API call
+#             # For now, estimate ~4 chars per token
+#             estimated_tokens = len(response.text) // 4
+#             self.tokens_this_minute += estimated_tokens
+#             return response.text
+#         except Exception as e:
+#             print(f"❌ Gemini API error: {e}")
+#             raise
+# # ============================================================================
+# # GROQ MANAGER
+# # ============================================================================
+# class GroqManager:
+#     """
+#     Groq API Manager (Secondary LLM)
+#     Handles fast inference with Llama-3-70B
+#     """
+#     def __init__(self):
+#         """Initialize Groq API with single API key"""
+#         self.api_key = settings.GROQ_API_KEY
+#         self.model_name = settings.GROQ_MODEL
+#         # Create ChatGroq instance
+#         self.llm = ChatGroq(
+#             api_key=self.api_key,
+#             model_name=self.model_name,
+#             temperature=settings.LLM_TEMPERATURE,
+#             max_tokens=settings.LLM_MAX_TOKENS
+#         )
+#         # Rate limiting tracking
+#         self.requests_this_minute = 0
+#         self.tokens_this_minute = 0
+#         self.last_reset = time.time()
+#         print(f"✅ Groq Manager initialized: {self.model_name}")
+#     def _check_rate_limits(self):
+#         """
+#         Check and reset rate limit counters.
+#         Groq Free: 30 requests/min, 30,000 tokens/min
+#         """
+#         current_time = time.time()
+#         # Reset counters every minute
+#         if current_time - self.last_reset > 60:
+#             self.requests_this_minute = 0
+#             self.tokens_this_minute = 0
+#             self.last_reset = current_time
+#         # Check if limits exceeded
+#         if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
+#             wait_time = 60 - (current_time - self.last_reset)
+#             print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
+#             time.sleep(wait_time)
+#             self._check_rate_limits()
+#     async def generate(
+#         self,
+#         messages: List[Dict[str, str]],
+#         system_prompt: Optional[str] = None
+#     ) -> str:
+#         """
+#         Generate response using Groq.
+#         Args:
+#             messages: List of conversation messages
+#                 Format: [{'role': 'user'/'assistant', 'content': '...'}]
+#             system_prompt: Optional system prompt
+#         Returns:
+#             str: Generated response text
+#         """
+#         self._check_rate_limits()
+#         try:
+#             # Format messages for LangChain
+#             formatted_messages = []
+#             # Add system message if provided
+#             if system_prompt:
+#                 formatted_messages.append(SystemMessage(content=system_prompt))
+#             # Convert conversation messages
+#             for msg in messages:
+#                 if msg['role'] == 'user':
+#                     formatted_messages.append(HumanMessage(content=msg['content']))
+#                 elif msg['role'] == 'assistant':
+#                     formatted_messages.append(AIMessage(content=msg['content']))
+#             # Generate response
+#             response = await self.llm.ainvoke(formatted_messages)
+#             # Track rate limits
+#             self.requests_this_minute += 1
+#             # Estimate tokens (rough approximation)
+#             estimated_tokens = len(response.content) // 4
+#             self.tokens_this_minute += estimated_tokens
+#             return response.content
+#         except Exception as e:
+#             print(f"❌ Groq API error: {e}")
+#             raise
+# # ============================================================================
+# # UNIFIED LLM MANAGER (Routes to appropriate LLM)
+# # ============================================================================
+# class LLMManager:
+#     """
+#     Unified LLM Manager that routes requests to appropriate LLM.
+#     Routing strategy (from config):
+#     - Chat responses → Gemini (best quality for users)
+#     - Evaluation → Groq (fast, good enough for RL)
+#     - Policy → Local BERT (no API call)
+#     """
+#     def __init__(self):
+#         """Initialize all LLM managers"""
+#         self.gemini = None
+#         self.groq = None
+#         # Initialize Gemini if configured
+#         if settings.is_gemini_enabled():
+#             try:
+#                 self.gemini = GeminiManager()
+#             except Exception as e:
+#                 print(f"⚠️ Failed to initialize Gemini: {e}")
+#         # Initialize Groq if configured
+#         if settings.is_groq_enabled():
+#             try:
+#                 self.groq = GroqManager()
+#             except Exception as e:
+#                 print(f"⚠️ Failed to initialize Groq: {e}")
+#         print("✅ LLM Manager initialized")
+#     async def generate(
+#         self,
+#         messages: List[Dict[str, str]],
+#         system_prompt: Optional[str] = None,
+#         task: Literal["chat", "evaluation"] = "chat"
+#     ) -> str:
+#         """
+#         Generate response using appropriate LLM based on task.
+#         Args:
+#             messages: Conversation messages
+#             system_prompt: Optional system prompt
+#             task: Task type - "chat" (user-facing) or "evaluation" (RL training)
+#         Returns:
+#             str: Generated response
+#         Raises:
+#             ValueError: If appropriate LLM is not configured
+#         """
+#         # Determine which LLM to use based on task
+#         llm_choice = settings.get_llm_for_task(task)
+#         if llm_choice == "gemini":
+#             if self.gemini is None:
+#                 raise ValueError("Gemini API not configured. Set GOOGLE_API_KEY in .env")
+#             return await self.gemini.generate(messages, system_prompt)
+#         elif llm_choice == "groq":
+#             if self.groq is None:
+#                 raise ValueError("Groq API not configured. Set GROQ_API_KEY in .env")
+#             return await self.groq.generate(messages, system_prompt)
+#         else:
+#             raise ValueError(f"Unknown LLM choice: {llm_choice}")
+#     # async def generate_chat_response(
+#     #     self,
+#     #     query: str,
+#     #     context: str,
+#     #     history: List[Dict[str, str]]
+#     # ) -> str:
+#     #     """
+#     #     Generate chat response (uses Gemini by default).
+#     #     Args:
+#     #         query: User query
+#     #         context: Retrieved context (from FAISS)
+#     #         history: Conversation history
+#     #     Returns:
+#     #         str: Chat response
+#     #     """
+#     #     # Build system prompt
+#     #     system_prompt = settings.SYSTEM_PROMPT
+#     #     if context:
+#     #         system_prompt += f"\n\nRelevant Information:\n{context}"
+#     #     # Build messages
+#     #     messages = history + [{'role': 'user', 'content': query}]
+#     #     # Generate using chat LLM (Gemini)
+#     #     return await self.generate(messages, system_prompt, task="chat")
+#     async def generate_chat_response(
+#         self,
+#         query: str,
+#         context: str,
+#         history: List[Dict[str, str]]
+#     ) -> str:
+#         """Generate chat response (uses Gemini by default)."""
+#         # Import the detailed prompt
+#         from app.services.chat_service import BANKING_SYSTEM_PROMPT
+#         # Build enhanced system prompt with context
+#         system_prompt = BANKING_SYSTEM_PROMPT
+#         if context:
+#             system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
+#         else:
+#             system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
+#         # Build messages
+#         messages = history + [{'role': 'user', 'content': query}]
+#         # Generate using chat LLM (Gemini)
+#         return await self.generate(messages, system_prompt, task="chat")
+#     async def evaluate_response(
+#         self,
+#         query: str,
+#         response: str,
+#         context: str = ""
+#     ) -> Dict:
+#         """
+#         Evaluate response quality (uses Groq for speed).
+#         Used during RL training.
+#         Args:
+#             query: User query
+#             response: Generated response
+#             context: Retrieved context (if any)
+#         Returns:
+#             dict: Evaluation results
+#                 {'quality': 'Good'/'Bad', 'explanation': '...'}
+#         """
+#         eval_prompt = f"""Evaluate this response:
+# Query: {query}
+# Response: {response}
+# Context used: {context if context else 'None'}
+# Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explanation."""
+#         messages = [{'role': 'user', 'content': eval_prompt}]
+#         # Generate using evaluation LLM (Groq)
+#         result = await self.generate(messages, task="evaluation")
+#         # Parse result
+#         quality = "Good" if "Good" in result else "Bad"
+#         return {
+#             'quality': quality,
+#             'explanation': result
+#         }
+# # ============================================================================
+# # GLOBAL LLM MANAGER INSTANCE
+# # ============================================================================
+# llm_manager = LLMManager()
+# # ============================================================================
+# # USAGE EXAMPLE (for reference)
+# # ============================================================================
+# """
+# # In your service file:
+# from app.core.llm_manager import llm_manager
+# # Generate chat response (uses Gemini)
+# response = await llm_manager.generate_chat_response(
+#     query="What is my account balance?",
+#     context="Your balance is $1000",
+#     history=[]
+# )
+# # Evaluate response (uses Groq)
+# evaluation = await llm_manager.evaluate_response(
+#     query="What is my balance?",
+#     response="Your balance is $1000",
+#     context="Balance: $1000"
+# )
+# """

backups/backup_main.py ADDED Viewed

	@@ -0,0 +1,275 @@

+"""
+FastAPI Main Application Entry Point
+Banking RAG Chatbot API with JWT Authentication
+This file:
+1. Creates the FastAPI app
+2. Configures CORS middleware
+3. Connects to MongoDB on startup/shutdown
+4. Includes API routers (auth + chat)
+5. Provides health check endpoints
+"""
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from contextlib import asynccontextmanager
+from app.config import settings
+from app.db.mongodb import connect_to_mongo, close_mongo_connection
+# ============================================================================
+# LIFESPAN MANAGER (Startup & Shutdown)
+# ============================================================================
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Manage application lifespan events.
+    Startup:
+    - Connect to MongoDB Atlas
+    - ML models load lazily on first use
+    Shutdown:
+    - Close MongoDB connection
+    - Cleanup resources
+    """
+    # ========================================================================
+    # STARTUP
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("🚀 STARTING BANKING RAG CHATBOT API")
+    print("=" * 80)
+    print(f"Environment: {settings.ENVIRONMENT}")
+    print(f"Debug Mode: {settings.DEBUG}")
+    print("=" * 80)
+    # Connect to MongoDB
+    await connect_to_mongo()
+    print("\n💡 ML Models Info:")
+    print("   Policy Network: Loads on first chat request (lazy loading)")
+    print("   Retriever Model: Loads on first retrieval (lazy loading)")
+    print("   LLM (Gemini): Connects on first generation")
+    print("\n✅ Backend startup complete!")
+    print("=" * 80)
+    print(f"📖 API Docs: http://localhost:8000/docs")
+    print(f"🏥 Health Check: http://localhost:8000/health")
+    print(f"🔐 Register: POST http://localhost:8000/api/v1/auth/register")
+    print(f"🔑 Login: POST http://localhost:8000/api/v1/auth/login")
+    print("=" * 80 + "\n")
+    yield  # Application runs here
+    # ========================================================================
+    # SHUTDOWN
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("🛑 SHUTTING DOWN API")
+    print("=" * 80)
+    # Close MongoDB connection
+    await close_mongo_connection()
+    print("✅ Shutdown complete")
+    print("=" * 80 + "\n")
+# ============================================================================
+# CREATE FASTAPI APPLICATION
+# ============================================================================
+app = FastAPI(
+    title="Banking RAG Chatbot API",
+    description="""
+    🤖 AI-powered Banking Assistant with:
+    **Features:**
+    - 🔐 JWT Authentication (Sign up, Login, Protected routes)
+    - 💬 RAG (Retrieval-Augmented Generation)
+    - 🧠 RL-based Policy Network (BERT)
+    - 🔍 Custom E5 Retriever
+    - ✨ Google Gemini LLM
+    **Capabilities:**
+    - Intelligent document retrieval
+    - Context-aware responses
+    - Conversation history
+    - Real-time chat
+    - User authentication & authorization
+    """,
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+    lifespan=lifespan
+)
+# ============================================================================
+# CORS MIDDLEWARE
+# ============================================================================
+allowed_origins = settings.get_allowed_origins()
+print("\n🌐 CORS Configuration:")
+print(f"   Allowed Origins: {allowed_origins}")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allowed_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ============================================================================
+# INCLUDE API ROUTERS
+# ============================================================================
+from app.api.v1 import chat, auth
+# Auth router (public endpoints - register, login)
+app.include_router(
+    auth.router,
+    prefix="/api/v1/auth",
+    tags=["🔐 Authentication"]
+)
+# Chat router (protected endpoints - requires JWT token)
+app.include_router(
+    chat.router,
+    prefix="/api/v1/chat",
+    tags=["💬 Chat"]
+)
+# ============================================================================
+# ROOT ENDPOINTS
+# ============================================================================
+@app.get("/", tags=["📍 Root"])
+async def root():
+    """
+    Root endpoint - API information and available endpoints
+    """
+    return {
+        "message": "Banking RAG Chatbot API with Authentication",
+        "version": "1.0.0",
+        "status": "online",
+        "authentication": "JWT Bearer Token Required for chat endpoints",
+        "documentation": {
+            "swagger_ui": "/docs",
+            "redoc": "/redoc"
+        },
+        "endpoints": {
+            "auth": {
+                "register": "POST /api/v1/auth/register",
+                "login": "POST /api/v1/auth/login",
+                "me": "GET /api/v1/auth/me (requires token)",
+                "logout": "POST /api/v1/auth/logout (requires token)"
+            },
+            "chat": {
+                "send_message": "POST /api/v1/chat/ (requires token)",
+                "get_history": "GET /api/v1/chat/history/{conversation_id} (requires token)",
+                "list_conversations": "GET /api/v1/chat/conversations (requires token)",
+                "delete_conversation": "DELETE /api/v1/chat/conversation/{conversation_id} (requires token)"
+            },
+            "health": "GET /health"
+        }
+    }
+@app.get("/health", tags=["🏥 Health"])
+async def health_check():
+    """
+    Comprehensive health check endpoint
+    Checks status of:
+    - API service
+    - MongoDB connection
+    - ML models (lazy loaded)
+    - Authentication system
+    Returns:
+        dict: Health status of all components
+    """
+    from app.db.mongodb import get_database
+    # Check MongoDB
+    mongodb_status = "connected" if get_database() is not None else "disconnected"
+    # Check ML models (don't load them, just check readiness)
+    ml_models_status = {
+        "policy_network": "ready (lazy load)",
+        "retriever": "ready (lazy load)",
+        "llm": "ready (API-based)"
+    }
+    # Check authentication
+    auth_status = {
+        "jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
+        "algorithm": settings.ALGORITHM,
+        "token_expiry_minutes": settings.ACCESS_TOKEN_EXPIRE_MINUTES
+    }
+    # Overall health
+    is_healthy = mongodb_status == "connected" and auth_status["jwt_enabled"]
+    return {
+        "status": "healthy" if is_healthy else "degraded",
+        "api": "online",
+        "mongodb": mongodb_status,
+        "authentication": auth_status,
+        "ml_models": ml_models_status,
+        "environment": settings.ENVIRONMENT,
+        "debug_mode": settings.DEBUG
+    }
+# ============================================================================
+# GLOBAL EXCEPTION HANDLER
+# ============================================================================
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """
+    Global exception handler for unhandled errors
+    """
+    print(f"\n❌ Unhandled Exception:")
+    print(f"   Path: {request.url.path}")
+    print(f"   Error: {str(exc)}")
+    if settings.DEBUG:
+        import traceback
+        traceback.print_exc()
+    return JSONResponse(
+        status_code=500,
+        content={
+            "error": "Internal Server Error",
+            "detail": str(exc) if settings.DEBUG else "An unexpected error occurred",
+            "path": str(request.url.path)
+        }
+    )
+# ============================================================================
+# MAIN ENTRY POINT (for direct execution)
+# ============================================================================
+if __name__ == "__main__":
+    import uvicorn
+    print("\n🚀 Starting server directly...")
+    print("   Note: For production, use: uvicorn app.main:app --host 0.0.0.0 --port 8000")
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=settings.DEBUG  # Auto-reload only in debug mode
+    )

backups/backup_requirements.txt ADDED Viewed

	@@ -0,0 +1,182 @@

+# # ================================================================================
+# # BANKING RAG CHATBOT API - DEPENDENCIES
+# # Python 3.10+ required
+# # ================================================================================
+# # ============================================================================
+# # CORE WEB FRAMEWORK
+# # ============================================================================
+# # FastAPI - Modern async web framework
+# fastapi==0.104.1
+# # Uvicorn - ASGI server for FastAPI
+# uvicorn[standard]==0.24.0
+# # Python multipart for file uploads (if needed later)
+# python-multipart==0.0.6
+# # ============================================================================
+# # CONFIGURATION & ENVIRONMENT
+# # ============================================================================
+# # Pydantic - Data validation and settings management
+# pydantic==2.5.0
+# pydantic-settings==2.1.0
+# # Python-dotenv - Load environment variables from .env file
+# python-dotenv==1.0.0
+# # ============================================================================
+# # DATABASE - MongoDB
+# # ============================================================================
+# # Motor - Async MongoDB driver for FastAPI
+# motor==3.3.2
+# # PyMongo - MongoDB Python driver (used by Motor)
+# pymongo==4.6.0
+# # ============================================================================
+# # AUTHENTICATION & SECURITY
+# # ============================================================================
+# # Python-jose - JWT token handling
+# python-jose[cryptography]==3.3.0
+# # Passlib - Password hashing
+# passlib[bcrypt]==1.7.4
+# # ============================================================================
+# # MACHINE LEARNING - PYTORCH & TRANSFORMERS
+# # ============================================================================
+# # PyTorch - Deep learning framework
+# torch==2.1.0
+# # Transformers - HuggingFace transformers library (BERT, e5-base-v2)
+# transformers==4.35.0
+# # Sentence-Transformers - Sentence embeddings
+# sentence-transformers==2.2.2
+# # ============================================================================
+# # VECTOR SEARCH
+# # ============================================================================
+# # FAISS - Facebook AI Similarity Search (CPU version)
+# faiss-cpu==1.7.4
+# # ============================================================================
+# # LLM INTEGRATIONS
+# # ============================================================================
+# # LangChain - LLM orchestration framework
+# langchain==0.1.0
+# # LangChain Groq integration
+# langchain-groq==0.0.1
+# # LangChain Google GenAI (for Gemini)
+# langchain-google-genai==1.0.0
+# # Google Generative AI - Direct Gemini API
+# google-generativeai==0.3.2
+# # ============================================================================
+# # UTILITIES
+# # ============================================================================
+# # NumPy - Numerical computing
+# numpy==1.24.3
+# # Tiktoken - OpenAI tokenizer (for token counting)
+# tiktoken==0.5.1
+# # Rich - Beautiful terminal output (for logging)
+# rich==13.7.0
+# # Requests - HTTP library
+# requests==2.31.0
+# # ============================================================================
+# # OPTIONAL: DEVELOPMENT TOOLS (comment out for production)
+# # ============================================================================
+# # Pytest - Testing framework
+# # pytest==7.4.3
+# # Black - Code formatter
+# # black==23.12.0
+# # Flake8 - Linter
+# # flake8==6.1.0
+# fastapi==0.104.1
+# uvicorn[standard]==0.24.0
+# pydantic==2.5.0
+# pydantic-settings==2.1.0
+# python-dotenv==1.0.0
+# motor==3.3.2
+# pymongo==4.6.0
+# google-generativeai==0.3.1
+# sentence-transformers==2.2.2
+# faiss-cpu==1.7.4
+# numpy==1.24.3
+# torch==2.1.0
+# transformers==4.35.2
+# # AUTH DEPENDENCIES (NEW!)
+# python-jose[cryptography]==3.3.0
+# passlib[bcrypt]==1.7.4
+# python-multipart==0.0.6
+# bcrypt==4.1.1
+# FastAPI & Server
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+# Data Validation
+pydantic==2.5.0
+pydantic-settings==2.1.0
+python-dotenv==1.0.0
+# Database
+motor==3.3.2
+pymongo==4.6.0
+# LLM & AI Libraries
+langchain-groq==0.1.0
+langchain-core==0.1.0
+huggingface-hub==0.20.0
+# Embeddings & Vector Search
+sentence-transformers==2.2.2
+faiss-cpu==1.7.4
+numpy==1.24.3
+# ML/Deep Learning
+torch==2.1.0
+transformers==4.35.2
+# Authentication
+python-jose[cryptography]==3.3.0
+passlib[bcrypt]==1.7.4
+python-multipart==0.0.6
+bcrypt==4.1.1

build_faiss_index.py ADDED Viewed

	@@ -0,0 +1,339 @@

+"""
+Build FAISS Index from Scratch
+Creates faiss_index.pkl from your knowledge base and trained retriever model
+Run this ONCE before starting the backend:
+    python build_faiss_index.py
+Author: Banking RAG Chatbot
+Date: October 2025
+"""
+# Add these lines at the very top (after docstring)
+import os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow info/warnings
+os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'  # Disable oneDNN messages
+import warnings
+warnings.filterwarnings('ignore')  # Suppress all warnings
+import os
+import pickle
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import faiss
+import numpy as np
+from pathlib import Path
+from transformers import AutoTokenizer, AutoModel
+from typing import List
+# ============================================================================
+# CONFIGURATION - UPDATE THESE PATHS!
+# ============================================================================
+# Where is your knowledge base JSONL file?
+KB_JSONL_FILE = "data/final_knowledge_base.jsonl"
+# Where is your trained retriever model?
+RETRIEVER_MODEL_PATH = "app/models/best_retriever_model.pth"
+# Where to save the output FAISS pickle?
+OUTPUT_PKL_FILE = "app/models/faiss_index.pkl"
+# Device (auto-detect GPU/CPU)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Batch size for encoding (reduce if you get OOM errors)
+BATCH_SIZE = 32
+# ============================================================================
+# CUSTOM SENTENCE TRANSFORMER (Same as retriever.py)
+# ============================================================================
+class CustomSentenceTransformer(nn.Module):
+    """
+    Custom SentenceTransformer - exact copy from retriever.py
+    Uses e5-base-v2 with mean pooling and L2 normalization
+    """
+    def __init__(self, model_name: str = "intfloat/e5-base-v2"):
+        super().__init__()
+        print(f"   Loading base model: {model_name}...")
+        self.encoder = AutoModel.from_pretrained(model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.config = self.encoder.config
+        print(f"   ✅ Base model loaded")
+    def forward(self, input_ids, attention_mask):
+        """Forward pass through BERT encoder"""
+        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
+        # Mean pooling
+        token_embeddings = outputs.last_hidden_state
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+            input_mask_expanded.sum(1), min=1e-9
+        )
+        # L2 normalize
+        embeddings = F.normalize(embeddings, p=2, dim=1)
+        return embeddings
+    def encode(self, sentences: List[str], batch_size: int = 32) -> np.ndarray:
+        """Encode sentences - same as training"""
+        self.eval()
+        if isinstance(sentences, str):
+            sentences = [sentences]
+        # Add 'query: ' prefix for e5-base-v2
+        processed_sentences = [f"query: {s.strip()}" for s in sentences]
+        all_embeddings = []
+        with torch.no_grad():
+            for i in range(0, len(processed_sentences), batch_size):
+                batch_sentences = processed_sentences[i:i + batch_size]
+                # Tokenize
+                tokens = self.tokenizer(
+                    batch_sentences,
+                    truncation=True,
+                    padding=True,
+                    max_length=128,
+                    return_tensors='pt'
+                ).to(self.encoder.device)
+                # Get embeddings
+                embeddings = self.forward(tokens['input_ids'], tokens['attention_mask'])
+                all_embeddings.append(embeddings.cpu().numpy())
+        return np.vstack(all_embeddings)
+# ============================================================================
+# RETRIEVER MODEL (Wrapper)
+# ============================================================================
+class RetrieverModel:
+    """Wrapper for trained retriever model"""
+    def __init__(self, model_path: str, device: str = "cpu"):
+        print(f"\n🤖 Loading retriever model...")
+        print(f"   Device: {device}")
+        self.device = device
+        self.model = CustomSentenceTransformer("intfloat/e5-base-v2").to(device)
+        # Load trained weights
+        print(f"   Loading weights from: {model_path}")
+        try:
+            state_dict = torch.load(model_path, map_location=device)
+            self.model.load_state_dict(state_dict)
+            print(f"   ✅ Trained weights loaded")
+        except Exception as e:
+            print(f"   ⚠️  Warning: Could not load trained weights: {e}")
+            print(f"   Using base e5-base-v2 model instead")
+        self.model.eval()
+    def encode_documents(self, documents: List[str], batch_size: int = 32) -> np.ndarray:
+        """Encode documents"""
+        return self.model.encode(documents, batch_size=batch_size)
+# ============================================================================
+# MAIN: BUILD FAISS INDEX
+# ============================================================================
+def build_faiss_index():
+    """Main function to build FAISS index from scratch"""
+    print("=" * 80)
+    print("🏗️  BUILDING FAISS INDEX FROM SCRATCH")
+    print("=" * 80)
+    # ========================================================================
+    # STEP 1: LOAD KNOWLEDGE BASE
+    # ========================================================================
+    print(f"\n📖 STEP 1: Loading knowledge base...")
+    print(f"   File: {KB_JSONL_FILE}")
+    if not os.path.exists(KB_JSONL_FILE):
+        print(f"   ❌ ERROR: File not found!")
+        print(f"   Please copy your knowledge base to: {KB_JSONL_FILE}")
+        return False
+    kb_data = []
+    with open(KB_JSONL_FILE, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            try:
+                kb_data.append(json.loads(line))
+            except json.JSONDecodeError as e:
+                print(f"   ⚠️  Warning: Skipping invalid JSON on line {line_num}: {e}")
+    print(f"   ✅ Loaded {len(kb_data)} documents")
+    if len(kb_data) == 0:
+        print(f"   ❌ ERROR: Knowledge base is empty!")
+        return False
+    # ========================================================================
+    # STEP 2: PREPARE DOCUMENTS FOR ENCODING
+    # ========================================================================
+    print(f"\n📝 STEP 2: Preparing documents for encoding...")
+    documents = []
+    for i, item in enumerate(kb_data):
+        # Combine instruction + response for embedding (same as training)
+        instruction = item.get('instruction', '')
+        response = item.get('response', '')
+        # Create combined text
+        if instruction and response:
+            text = f"{instruction} {response}"
+        elif instruction:
+            text = instruction
+        elif response:
+            text = response
+        else:
+            print(f"   ⚠️  Warning: Document {i} has no content, using placeholder")
+            text = "empty document"
+        documents.append(text)
+    print(f"   ✅ Prepared {len(documents)} documents for encoding")
+    print(f"   Average length: {sum(len(d) for d in documents) / len(documents):.1f} chars")
+    # ========================================================================
+    # STEP 3: LOAD RETRIEVER AND ENCODE DOCUMENTS
+    # ========================================================================
+    print(f"\n🔮 STEP 3: Encoding documents with trained retriever...")
+    if not os.path.exists(RETRIEVER_MODEL_PATH):
+        print(f"   ❌ ERROR: Retriever model not found!")
+        print(f"   Please copy your trained model to: {RETRIEVER_MODEL_PATH}")
+        return False
+    # Load retriever
+    retriever = RetrieverModel(RETRIEVER_MODEL_PATH, device=DEVICE)
+    # Encode all documents
+    print(f"   Encoding {len(documents)} documents...")
+    print(f"   Batch size: {BATCH_SIZE}")
+    print(f"   This may take a few minutes... ☕")
+    try:
+        embeddings = retriever.encode_documents(documents, batch_size=BATCH_SIZE)
+        print(f"   ✅ Encoded {embeddings.shape[0]} documents")
+        print(f"   Embedding dimension: {embeddings.shape[1]}")
+    except Exception as e:
+        print(f"   ❌ ERROR during encoding: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+    # ========================================================================
+    # STEP 4: BUILD FAISS INDEX
+    # ========================================================================
+    print(f"\n🔍 STEP 4: Building FAISS index...")
+    dimension = embeddings.shape[1]
+    print(f"   Dimension: {dimension}")
+    # Create FAISS index (Inner Product = Cosine similarity after normalization)
+    index = faiss.IndexFlatIP(dimension)
+    # Normalize embeddings for cosine similarity
+    print(f"   Normalizing embeddings...")
+    faiss.normalize_L2(embeddings)
+    # Add to index
+    print(f"   Adding {embeddings.shape[0]} vectors to FAISS index...")
+    index.add(embeddings.astype('float32'))
+    print(f"   ✅ FAISS index built successfully")
+    print(f"   Total vectors: {index.ntotal}")
+    # ========================================================================
+    # STEP 5: SAVE AS PICKLE FILE
+    # ========================================================================
+    print(f"\n💾 STEP 5: Saving as pickle file...")
+    # Create models directory if it doesn't exist
+    os.makedirs(os.path.dirname(OUTPUT_PKL_FILE), exist_ok=True)
+    # Save tuple of (index, kb_data)
+    print(f"   Pickling (index, kb_data) tuple...")
+    try:
+        with open(OUTPUT_PKL_FILE, 'wb') as f:
+            pickle.dump((index, kb_data), f)
+        file_size_mb = Path(OUTPUT_PKL_FILE).stat().st_size / (1024 * 1024)
+        print(f"   ✅ Saved: {OUTPUT_PKL_FILE}")
+        print(f"   File size: {file_size_mb:.2f} MB")
+    except Exception as e:
+        print(f"   ❌ ERROR saving pickle: {e}")
+        return False
+    # ========================================================================
+    # STEP 6: VERIFY SAVED FILE
+    # ========================================================================
+    print(f"\n✅ STEP 6: Verifying saved file...")
+    try:
+        with open(OUTPUT_PKL_FILE, 'rb') as f:
+            loaded_index, loaded_kb = pickle.load(f)
+        print(f"   ✅ Verification successful")
+        print(f"   Index vectors: {loaded_index.ntotal}")
+        print(f"   KB documents: {len(loaded_kb)}")
+        if loaded_index.ntotal != len(loaded_kb):
+            print(f"   ⚠️  WARNING: Size mismatch detected!")
+    except Exception as e:
+        print(f"   ❌ ERROR verifying file: {e}")
+        return False
+    # ========================================================================
+    # SUCCESS!
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("🎉 SUCCESS! FAISS INDEX BUILT AND SAVED")
+    print("=" * 80)
+    print(f"\n📊 Summary:")
+    print(f"   Documents: {len(kb_data)}")
+    print(f"   Vectors: {index.ntotal}")
+    print(f"   Dimension: {dimension}")
+    print(f"   File: {OUTPUT_PKL_FILE} ({file_size_mb:.2f} MB)")
+    print(f"\n🚀 You can now start the backend:")
+    print(f"   cd backend")
+    print(f"   uvicorn app.main:app --reload")
+    print("=" * 80 + "\n")
+    return True
+# ============================================================================
+# RUN SCRIPT
+# ============================================================================
+if __name__ == "__main__":
+    success = build_faiss_index()
+    if not success:
+        print("\n" + "=" * 80)
+        print("❌ FAILED TO BUILD FAISS INDEX")
+        print("=" * 80)
+        print("\nPlease check:")
+        print("1. Knowledge base file exists: data/final_knowledge_base.jsonl")
+        print("2. Retriever model exists: models/best_retriever_model.pth")
+        print("3. You have enough RAM (embeddings need ~1GB for 10k docs)")
+        print("=" * 80 + "\n")
+        exit(1)

folder_structure.txt ADDED Viewed

	@@ -0,0 +1,40 @@

+AML-IA-IMP/
+├── backend/
+│   ├── app/
+│   │   ├── __init__.py
+│   │   ├── config.py
+│   │   ├── main.py
+│   │   ├── api/
+│   │   │   ├── __init__.py
+│   │   │   └── v1/
+│   │   │       ├── __init__.py
+│   │   │       ├── chat.py
+│   │   │       ├── auth.py
+│   │   │       └── admin.py
+│   │   ├── services/
+│   │   │   ├── __init__.py
+│   │   │   └── chat_service.py
+│   │   ├── ml/
+│   │   │   ├── __init__.py
+│   │   │   ├── retriever.py
+│   │   │   └── policy_network.py
+│   │   ├── db/
+│   │   │   ├── __init__.py
+│   │   │   ├── mongodb.py
+│   │   │   └── repositories/
+│   │   │       ├── __init__.py
+│   │   │       └── conversation_repository.py
+│   │   └── core/
+│   │       ├── __init__.py
+│   │       ├── security.py
+│   │       └── api_key_manager.py
+│   ├── models/
+│   │   └── (your .pth, .pt, .pkl files go here)
+│   ├── data/
+│   │   └── final_knowledge_base.jsonl
+│   ├── .env
+│   ├── .env.example
+│   ├── requirements.txt
+│   ├── Dockerfile
+│   ├── .gitignore
+│   └── README.md

requirements.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+# FastAPI & Server
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+# Data Validation
+pydantic==2.5.0
+pydantic-settings==2.1.0
+python-dotenv==1.0.0
+# Database
+motor==3.3.2
+pymongo==4.6.0
+# LLM & AI Libraries
+langchain-groq==0.1.9
+langchain-core==0.2.38
+huggingface-hub==0.24.6
+# Embeddings & Vector Search
+sentence-transformers==2.2.2
+faiss-cpu==1.7.4
+numpy==1.24.3
+# ML/Deep Learning
+torch==2.1.0
+transformers==4.35.2
+# Authentication
+python-jose[cryptography]==3.3.0
+passlib[bcrypt]==1.7.4
+python-multipart==0.0.6
+bcrypt==4.1.1