Spaces:

eeshanyaj
/

questrag-backend

Sleeping

App Files Files Community

questrag-backend / .env.example

eeshanyaj

Fix: update model names - auto-resolve

a2b2f2d 17 days ago

raw

history blame contribute delete

5.86 kB

	# ================================================================================
	# BANKING RAG CHATBOT API - ENVIRONMENT VARIABLES
	# Copy this file to .env and fill in your actual values
	# ================================================================================

	# ============================================================================
	# APPLICATION SETTINGS
	# ============================================================================
	DEBUG=False
	ENVIRONMENT=production

	# ============================================================================
	# MONGODB (Get from: https://www.mongodb.com/cloud/atlas)
	# ============================================================================
	# Connection string format:
	# example string here
	MONGODB_URI=example
	DATABASE_NAME=banking_rag_db

	# ============================================================================
	# SECURITY
	# ============================================================================
	# Generate a secure secret key with:
	# python -c "import secrets; print(secrets.token_urlsafe(32))"
	SECRET_KEY=your-secret-key-here-change-this-in-production-min-32-characters
	ALGORITHM=HS256
	ACCESS_TOKEN_EXPIRE_MINUTES=1440

	# ============================================================================
	# LLM API KEYS - ALL THREE CO-EXIST (No fallback logic)
	# ============================================================================

	# --- GOOGLE GEMINI API (PRIMARY) ---
	# Get from: https://aistudio.google.com/app/apikey
	# You have Google Pro - this is your main LLM for response generation
	GOOGLE_API_KEY=your_google_api_key_here

	# Which Gemini model to use
	# Options: gemini-2.0-flash-lite, gemini-1.5-flash
	GEMINI_MODEL=gemini-2.0-flash-lite

	# Gemini rate limits (Pro tier)
	GEMINI_REQUESTS_PER_MINUTE=60
	GEMINI_TOKENS_PER_MINUTE=60000


	# --- GROQ API (SECONDARY) ---
	# Get from: https://console.groq.com/keys
	# Single key for specific fast inference tasks (llama models)
	GROQ_API_KEY=your_groq_api_key_here

	# Groq model (fast inference for policy evaluations)
	GROQ_MODEL=llama-3.3-70b-versatile

	# Groq rate limits (Free tier)
	GROQ_REQUESTS_PER_MINUTE=30
	GROQ_TOKENS_PER_MINUTE=30000


	# --- HUGGING FACE TOKEN (REQUIRED) ---
	# Get from: https://huggingface.co/settings/tokens
	# Required for: Model downloads (e5-base-v2, BERT), embeddings
	HF_TOKEN=your_hf_token_here

	# ============================================================================
	# MODEL PATHS (Local storage)
	# ============================================================================
	RETRIEVER_MODEL_PATH=models/best_retriever_model.pth
	POLICY_MODEL_PATH=models/policy_network.pt
	FAISS_INDEX_PATH=models/faiss_index.pkl
	KB_PATH=data/final_knowledge_base.jsonl

	# ============================================================================
	# RAG PARAMETERS
	# ============================================================================
	# Number of documents to retrieve from FAISS
	TOP_K=5

	# Minimum similarity threshold for retrieval
	SIMILARITY_THRESHOLD=0.5

	# Maximum context length to send to LLM (in characters)
	MAX_CONTEXT_LENGTH=2000

	# ============================================================================
	# POLICY NETWORK PARAMETERS
	# ============================================================================
	# Maximum sequence length for policy input
	POLICY_MAX_LEN=256

	# Confidence threshold for policy decisions
	CONFIDENCE_THRESHOLD=0.7

	# ============================================================================
	# LLM GENERATION PARAMETERS
	# ============================================================================
	# Temperature for response generation (0.0 = deterministic, 1.0 = creative)
	LLM_TEMPERATURE=0.7

	# Maximum tokens to generate in response
	LLM_MAX_TOKENS=512

	# System prompt template
	SYSTEM_PROMPT=You are a helpful banking assistant. Answer questions clearly and concisely.

	# ============================================================================
	# LLM ROUTING STRATEGY
	# ============================================================================
	# Define which LLM to use for which task
	# Options: gemini, groq

	# Main chat responses (user-facing) - Use Gemini Pro (best quality)
	CHAT_LLM=gemini

	# Response evaluation (RL training) - Use Groq (fast, good enough)
	EVALUATION_LLM=groq

	# Policy network inference - Local BERT model (no API call)
	POLICY_LLM=local

	# ============================================================================
	# CORS SETTINGS (for frontend)
	# ============================================================================
	# Comma-separated list of allowed origins
	# Use "*" for development (allows all origins)
	# For production, specify exact domains:
	# ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com
	ALLOWED_ORIGINS=*

	# ============================================================================
	# LOGGING
	# ============================================================================
	LOG_LEVEL=INFO

	# ============================================================================
	# OPTIONAL: Advanced Settings
	# ============================================================================
	# Maximum conversation history to include in context
	MAX_HISTORY_TURNS=4

	# Enable/disable response caching
	ENABLE_CACHE=True

	# Cache TTL in seconds (1 hour)
	CACHE_TTL=3600

	# Environment
	ENVIRONMENT=production
	DEBUG=False

	# MongoDB
	MONGODB_URI=your_mongodb_uri_here

	# JWT
	SECRET_KEY=your-secret-key-here
	ALGORITHM=HS256
	ACCESS_TOKEN_EXPIRE_MINUTES=1440

	# Groq API Keys
	GROQ_API_KEY_1=your_groq_key_1
	GROQ_API_KEY_2=your_groq_key_2
	GROQ_API_KEY_3=your_groq_key_3

	# HuggingFace Tokens
	HF_TOKEN_1=your_hf_token_1
	HF_TOKEN_2=your_hf_token_2
	HF_TOKEN_3=your_hf_token_3

	# HuggingFace Model Repository
	HF_MODEL_REPO=YOUR_USERNAME/questrag-models

	# CORS
	ALLOWED_ORIGINS=*

	# Device
	DEVICE=cpu