questrag-backend / .env.example
eeshanyaj's picture
Fix: update model names - auto-resolve
a2b2f2d
# ================================================================================
# BANKING RAG CHATBOT API - ENVIRONMENT VARIABLES
# Copy this file to .env and fill in your actual values
# ================================================================================
# ============================================================================
# APPLICATION SETTINGS
# ============================================================================
DEBUG=False
ENVIRONMENT=production
# ============================================================================
# MONGODB (Get from: https://www.mongodb.com/cloud/atlas)
# ============================================================================
# Connection string format:
# example string here
MONGODB_URI=example
DATABASE_NAME=banking_rag_db
# ============================================================================
# SECURITY
# ============================================================================
# Generate a secure secret key with:
# python -c "import secrets; print(secrets.token_urlsafe(32))"
SECRET_KEY=your-secret-key-here-change-this-in-production-min-32-characters
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440
# ============================================================================
# LLM API KEYS - ALL THREE CO-EXIST (No fallback logic)
# ============================================================================
# --- GOOGLE GEMINI API (PRIMARY) ---
# Get from: https://aistudio.google.com/app/apikey
# You have Google Pro - this is your main LLM for response generation
GOOGLE_API_KEY=your_google_api_key_here
# Which Gemini model to use
# Options: gemini-2.0-flash-lite, gemini-1.5-flash
GEMINI_MODEL=gemini-2.0-flash-lite
# Gemini rate limits (Pro tier)
GEMINI_REQUESTS_PER_MINUTE=60
GEMINI_TOKENS_PER_MINUTE=60000
# --- GROQ API (SECONDARY) ---
# Get from: https://console.groq.com/keys
# Single key for specific fast inference tasks (llama models)
GROQ_API_KEY=your_groq_api_key_here
# Groq model (fast inference for policy evaluations)
GROQ_MODEL=llama-3.3-70b-versatile
# Groq rate limits (Free tier)
GROQ_REQUESTS_PER_MINUTE=30
GROQ_TOKENS_PER_MINUTE=30000
# --- HUGGING FACE TOKEN (REQUIRED) ---
# Get from: https://huggingface.co/settings/tokens
# Required for: Model downloads (e5-base-v2, BERT), embeddings
HF_TOKEN=your_hf_token_here
# ============================================================================
# MODEL PATHS (Local storage)
# ============================================================================
RETRIEVER_MODEL_PATH=models/best_retriever_model.pth
POLICY_MODEL_PATH=models/policy_network.pt
FAISS_INDEX_PATH=models/faiss_index.pkl
KB_PATH=data/final_knowledge_base.jsonl
# ============================================================================
# RAG PARAMETERS
# ============================================================================
# Number of documents to retrieve from FAISS
TOP_K=5
# Minimum similarity threshold for retrieval
SIMILARITY_THRESHOLD=0.5
# Maximum context length to send to LLM (in characters)
MAX_CONTEXT_LENGTH=2000
# ============================================================================
# POLICY NETWORK PARAMETERS
# ============================================================================
# Maximum sequence length for policy input
POLICY_MAX_LEN=256
# Confidence threshold for policy decisions
CONFIDENCE_THRESHOLD=0.7
# ============================================================================
# LLM GENERATION PARAMETERS
# ============================================================================
# Temperature for response generation (0.0 = deterministic, 1.0 = creative)
LLM_TEMPERATURE=0.7
# Maximum tokens to generate in response
LLM_MAX_TOKENS=512
# System prompt template
SYSTEM_PROMPT=You are a helpful banking assistant. Answer questions clearly and concisely.
# ============================================================================
# LLM ROUTING STRATEGY
# ============================================================================
# Define which LLM to use for which task
# Options: gemini, groq
# Main chat responses (user-facing) - Use Gemini Pro (best quality)
CHAT_LLM=gemini
# Response evaluation (RL training) - Use Groq (fast, good enough)
EVALUATION_LLM=groq
# Policy network inference - Local BERT model (no API call)
POLICY_LLM=local
# ============================================================================
# CORS SETTINGS (for frontend)
# ============================================================================
# Comma-separated list of allowed origins
# Use "*" for development (allows all origins)
# For production, specify exact domains:
# ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com
ALLOWED_ORIGINS=*
# ============================================================================
# LOGGING
# ============================================================================
LOG_LEVEL=INFO
# ============================================================================
# OPTIONAL: Advanced Settings
# ============================================================================
# Maximum conversation history to include in context
MAX_HISTORY_TURNS=4
# Enable/disable response caching
ENABLE_CACHE=True
# Cache TTL in seconds (1 hour)
CACHE_TTL=3600
# Environment
ENVIRONMENT=production
DEBUG=False
# MongoDB
MONGODB_URI=your_mongodb_uri_here
# JWT
SECRET_KEY=your-secret-key-here
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440
# Groq API Keys
GROQ_API_KEY_1=your_groq_key_1
GROQ_API_KEY_2=your_groq_key_2
GROQ_API_KEY_3=your_groq_key_3
# HuggingFace Tokens
HF_TOKEN_1=your_hf_token_1
HF_TOKEN_2=your_hf_token_2
HF_TOKEN_3=your_hf_token_3
# HuggingFace Model Repository
HF_MODEL_REPO=YOUR_USERNAME/questrag-models
# CORS
ALLOWED_ORIGINS=*
# Device
DEVICE=cpu