Spaces:
Sleeping
Sleeping
File size: 5,858 Bytes
690700c a2b2f2d 690700c a2b2f2d 690700c a2b2f2d 690700c a2b2f2d 690700c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# ================================================================================
# BANKING RAG CHATBOT API - ENVIRONMENT VARIABLES
# Copy this file to .env and fill in your actual values
# ================================================================================
# ============================================================================
# APPLICATION SETTINGS
# ============================================================================
DEBUG=False
ENVIRONMENT=production
# ============================================================================
# MONGODB (Get from: https://www.mongodb.com/cloud/atlas)
# ============================================================================
# Connection string format:
# example string here
MONGODB_URI=example
DATABASE_NAME=banking_rag_db
# ============================================================================
# SECURITY
# ============================================================================
# Generate a secure secret key with:
# python -c "import secrets; print(secrets.token_urlsafe(32))"
SECRET_KEY=your-secret-key-here-change-this-in-production-min-32-characters
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440
# ============================================================================
# LLM API KEYS - ALL THREE CO-EXIST (No fallback logic)
# ============================================================================
# --- GOOGLE GEMINI API (PRIMARY) ---
# Get from: https://aistudio.google.com/app/apikey
# You have Google Pro - this is your main LLM for response generation
GOOGLE_API_KEY=your_google_api_key_here
# Which Gemini model to use
# Options: gemini-2.0-flash-lite, gemini-1.5-flash
GEMINI_MODEL=gemini-2.0-flash-lite
# Gemini rate limits (Pro tier)
GEMINI_REQUESTS_PER_MINUTE=60
GEMINI_TOKENS_PER_MINUTE=60000
# --- GROQ API (SECONDARY) ---
# Get from: https://console.groq.com/keys
# Single key for specific fast inference tasks (llama models)
GROQ_API_KEY=your_groq_api_key_here
# Groq model (fast inference for policy evaluations)
GROQ_MODEL=llama-3.3-70b-versatile
# Groq rate limits (Free tier)
GROQ_REQUESTS_PER_MINUTE=30
GROQ_TOKENS_PER_MINUTE=30000
# --- HUGGING FACE TOKEN (REQUIRED) ---
# Get from: https://huggingface.co/settings/tokens
# Required for: Model downloads (e5-base-v2, BERT), embeddings
HF_TOKEN=your_hf_token_here
# ============================================================================
# MODEL PATHS (Local storage)
# ============================================================================
RETRIEVER_MODEL_PATH=models/best_retriever_model.pth
POLICY_MODEL_PATH=models/policy_network.pt
FAISS_INDEX_PATH=models/faiss_index.pkl
KB_PATH=data/final_knowledge_base.jsonl
# ============================================================================
# RAG PARAMETERS
# ============================================================================
# Number of documents to retrieve from FAISS
TOP_K=5
# Minimum similarity threshold for retrieval
SIMILARITY_THRESHOLD=0.5
# Maximum context length to send to LLM (in characters)
MAX_CONTEXT_LENGTH=2000
# ============================================================================
# POLICY NETWORK PARAMETERS
# ============================================================================
# Maximum sequence length for policy input
POLICY_MAX_LEN=256
# Confidence threshold for policy decisions
CONFIDENCE_THRESHOLD=0.7
# ============================================================================
# LLM GENERATION PARAMETERS
# ============================================================================
# Temperature for response generation (0.0 = deterministic, 1.0 = creative)
LLM_TEMPERATURE=0.7
# Maximum tokens to generate in response
LLM_MAX_TOKENS=512
# System prompt template
SYSTEM_PROMPT=You are a helpful banking assistant. Answer questions clearly and concisely.
# ============================================================================
# LLM ROUTING STRATEGY
# ============================================================================
# Define which LLM to use for which task
# Options: gemini, groq
# Main chat responses (user-facing) - Use Gemini Pro (best quality)
CHAT_LLM=gemini
# Response evaluation (RL training) - Use Groq (fast, good enough)
EVALUATION_LLM=groq
# Policy network inference - Local BERT model (no API call)
POLICY_LLM=local
# ============================================================================
# CORS SETTINGS (for frontend)
# ============================================================================
# Comma-separated list of allowed origins
# Use "*" for development (allows all origins)
# For production, specify exact domains:
# ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com
ALLOWED_ORIGINS=*
# ============================================================================
# LOGGING
# ============================================================================
LOG_LEVEL=INFO
# ============================================================================
# OPTIONAL: Advanced Settings
# ============================================================================
# Maximum conversation history to include in context
MAX_HISTORY_TURNS=4
# Enable/disable response caching
ENABLE_CACHE=True
# Cache TTL in seconds (1 hour)
CACHE_TTL=3600
# Environment
ENVIRONMENT=production
DEBUG=False
# MongoDB
MONGODB_URI=your_mongodb_uri_here
# JWT
SECRET_KEY=your-secret-key-here
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440
# Groq API Keys
GROQ_API_KEY_1=your_groq_key_1
GROQ_API_KEY_2=your_groq_key_2
GROQ_API_KEY_3=your_groq_key_3
# HuggingFace Tokens
HF_TOKEN_1=your_hf_token_1
HF_TOKEN_2=your_hf_token_2
HF_TOKEN_3=your_hf_token_3
# HuggingFace Model Repository
HF_MODEL_REPO=YOUR_USERNAME/questrag-models
# CORS
ALLOWED_ORIGINS=*
# Device
DEVICE=cpu
|