Spaces:

eeshanyaj
/

questrag-backend

Sleeping

File size: 5,858 Bytes

# ================================================================================
# BANKING RAG CHATBOT API - ENVIRONMENT VARIABLES
# Copy this file to .env and fill in your actual values
# ================================================================================

# ============================================================================
# APPLICATION SETTINGS
# ============================================================================
DEBUG=False
ENVIRONMENT=production

# ============================================================================
# MONGODB (Get from: https://www.mongodb.com/cloud/atlas)
# ============================================================================
# Connection string format:
# example string here
MONGODB_URI=example  
DATABASE_NAME=banking_rag_db

# ============================================================================
# SECURITY
# ============================================================================
# Generate a secure secret key with:
# python -c "import secrets; print(secrets.token_urlsafe(32))"
SECRET_KEY=your-secret-key-here-change-this-in-production-min-32-characters
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440

# ============================================================================
# LLM API KEYS - ALL THREE CO-EXIST (No fallback logic)
# ============================================================================

# --- GOOGLE GEMINI API (PRIMARY) ---
# Get from: https://aistudio.google.com/app/apikey
# You have Google Pro - this is your main LLM for response generation
GOOGLE_API_KEY=your_google_api_key_here

# Which Gemini model to use
# Options: gemini-2.0-flash-lite, gemini-1.5-flash
GEMINI_MODEL=gemini-2.0-flash-lite

# Gemini rate limits (Pro tier)
GEMINI_REQUESTS_PER_MINUTE=60
GEMINI_TOKENS_PER_MINUTE=60000


# --- GROQ API (SECONDARY) ---
# Get from: https://console.groq.com/keys
# Single key for specific fast inference tasks (llama models)
GROQ_API_KEY=your_groq_api_key_here

# Groq model (fast inference for policy evaluations)
GROQ_MODEL=llama-3.3-70b-versatile

# Groq rate limits (Free tier)
GROQ_REQUESTS_PER_MINUTE=30
GROQ_TOKENS_PER_MINUTE=30000


# --- HUGGING FACE TOKEN (REQUIRED) ---
# Get from: https://huggingface.co/settings/tokens
# Required for: Model downloads (e5-base-v2, BERT), embeddings
HF_TOKEN=your_hf_token_here

# ============================================================================
# MODEL PATHS (Local storage)
# ============================================================================
RETRIEVER_MODEL_PATH=models/best_retriever_model.pth
POLICY_MODEL_PATH=models/policy_network.pt
FAISS_INDEX_PATH=models/faiss_index.pkl
KB_PATH=data/final_knowledge_base.jsonl

# ============================================================================
# RAG PARAMETERS
# ============================================================================
# Number of documents to retrieve from FAISS
TOP_K=5

# Minimum similarity threshold for retrieval
SIMILARITY_THRESHOLD=0.5

# Maximum context length to send to LLM (in characters)
MAX_CONTEXT_LENGTH=2000

# ============================================================================
# POLICY NETWORK PARAMETERS
# ============================================================================
# Maximum sequence length for policy input
POLICY_MAX_LEN=256

# Confidence threshold for policy decisions
CONFIDENCE_THRESHOLD=0.7

# ============================================================================
# LLM GENERATION PARAMETERS
# ============================================================================
# Temperature for response generation (0.0 = deterministic, 1.0 = creative)
LLM_TEMPERATURE=0.7

# Maximum tokens to generate in response
LLM_MAX_TOKENS=512

# System prompt template
SYSTEM_PROMPT=You are a helpful banking assistant. Answer questions clearly and concisely.

# ============================================================================
# LLM ROUTING STRATEGY
# ============================================================================
# Define which LLM to use for which task
# Options: gemini, groq

# Main chat responses (user-facing) - Use Gemini Pro (best quality)
CHAT_LLM=gemini

# Response evaluation (RL training) - Use Groq (fast, good enough)
EVALUATION_LLM=groq

# Policy network inference - Local BERT model (no API call)
POLICY_LLM=local

# ============================================================================
# CORS SETTINGS (for frontend)
# ============================================================================
# Comma-separated list of allowed origins
# Use "*" for development (allows all origins)
# For production, specify exact domains:
# ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com
ALLOWED_ORIGINS=*

# ============================================================================
# LOGGING
# ============================================================================
LOG_LEVEL=INFO

# ============================================================================
# OPTIONAL: Advanced Settings
# ============================================================================
# Maximum conversation history to include in context
MAX_HISTORY_TURNS=4

# Enable/disable response caching
ENABLE_CACHE=True

# Cache TTL in seconds (1 hour)
CACHE_TTL=3600

# Environment
ENVIRONMENT=production
DEBUG=False

# MongoDB
MONGODB_URI=your_mongodb_uri_here

# JWT
SECRET_KEY=your-secret-key-here
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440

# Groq API Keys
GROQ_API_KEY_1=your_groq_key_1
GROQ_API_KEY_2=your_groq_key_2
GROQ_API_KEY_3=your_groq_key_3

# HuggingFace Tokens
HF_TOKEN_1=your_hf_token_1
HF_TOKEN_2=your_hf_token_2
HF_TOKEN_3=your_hf_token_3

# HuggingFace Model Repository
HF_MODEL_REPO=YOUR_USERNAME/questrag-models

# CORS
ALLOWED_ORIGINS=*

# Device
DEVICE=cpu