File size: 5,858 Bytes
690700c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b2f2d
690700c
 
 
 
 
 
 
 
 
 
 
 
 
a2b2f2d
690700c
 
a2b2f2d
690700c
 
 
 
 
 
 
 
 
a2b2f2d
690700c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# ================================================================================
# BANKING RAG CHATBOT API - ENVIRONMENT VARIABLES
# Copy this file to .env and fill in your actual values
# ================================================================================

# ============================================================================
# APPLICATION SETTINGS
# ============================================================================
DEBUG=False
ENVIRONMENT=production

# ============================================================================
# MONGODB (Get from: https://www.mongodb.com/cloud/atlas)
# ============================================================================
# Connection string format:
# example string here
MONGODB_URI=example  
DATABASE_NAME=banking_rag_db

# ============================================================================
# SECURITY
# ============================================================================
# Generate a secure secret key with:
# python -c "import secrets; print(secrets.token_urlsafe(32))"
SECRET_KEY=your-secret-key-here-change-this-in-production-min-32-characters
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440

# ============================================================================
# LLM API KEYS - ALL THREE CO-EXIST (No fallback logic)
# ============================================================================

# --- GOOGLE GEMINI API (PRIMARY) ---
# Get from: https://aistudio.google.com/app/apikey
# You have Google Pro - this is your main LLM for response generation
GOOGLE_API_KEY=your_google_api_key_here

# Which Gemini model to use
# Options: gemini-2.0-flash-lite, gemini-1.5-flash
GEMINI_MODEL=gemini-2.0-flash-lite

# Gemini rate limits (Pro tier)
GEMINI_REQUESTS_PER_MINUTE=60
GEMINI_TOKENS_PER_MINUTE=60000


# --- GROQ API (SECONDARY) ---
# Get from: https://console.groq.com/keys
# Single key for specific fast inference tasks (llama models)
GROQ_API_KEY=your_groq_api_key_here

# Groq model (fast inference for policy evaluations)
GROQ_MODEL=llama-3.3-70b-versatile

# Groq rate limits (Free tier)
GROQ_REQUESTS_PER_MINUTE=30
GROQ_TOKENS_PER_MINUTE=30000


# --- HUGGING FACE TOKEN (REQUIRED) ---
# Get from: https://huggingface.co/settings/tokens
# Required for: Model downloads (e5-base-v2, BERT), embeddings
HF_TOKEN=your_hf_token_here

# ============================================================================
# MODEL PATHS (Local storage)
# ============================================================================
RETRIEVER_MODEL_PATH=models/best_retriever_model.pth
POLICY_MODEL_PATH=models/policy_network.pt
FAISS_INDEX_PATH=models/faiss_index.pkl
KB_PATH=data/final_knowledge_base.jsonl

# ============================================================================
# RAG PARAMETERS
# ============================================================================
# Number of documents to retrieve from FAISS
TOP_K=5

# Minimum similarity threshold for retrieval
SIMILARITY_THRESHOLD=0.5

# Maximum context length to send to LLM (in characters)
MAX_CONTEXT_LENGTH=2000

# ============================================================================
# POLICY NETWORK PARAMETERS
# ============================================================================
# Maximum sequence length for policy input
POLICY_MAX_LEN=256

# Confidence threshold for policy decisions
CONFIDENCE_THRESHOLD=0.7

# ============================================================================
# LLM GENERATION PARAMETERS
# ============================================================================
# Temperature for response generation (0.0 = deterministic, 1.0 = creative)
LLM_TEMPERATURE=0.7

# Maximum tokens to generate in response
LLM_MAX_TOKENS=512

# System prompt template
SYSTEM_PROMPT=You are a helpful banking assistant. Answer questions clearly and concisely.

# ============================================================================
# LLM ROUTING STRATEGY
# ============================================================================
# Define which LLM to use for which task
# Options: gemini, groq

# Main chat responses (user-facing) - Use Gemini Pro (best quality)
CHAT_LLM=gemini

# Response evaluation (RL training) - Use Groq (fast, good enough)
EVALUATION_LLM=groq

# Policy network inference - Local BERT model (no API call)
POLICY_LLM=local

# ============================================================================
# CORS SETTINGS (for frontend)
# ============================================================================
# Comma-separated list of allowed origins
# Use "*" for development (allows all origins)
# For production, specify exact domains:
# ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com
ALLOWED_ORIGINS=*

# ============================================================================
# LOGGING
# ============================================================================
LOG_LEVEL=INFO

# ============================================================================
# OPTIONAL: Advanced Settings
# ============================================================================
# Maximum conversation history to include in context
MAX_HISTORY_TURNS=4

# Enable/disable response caching
ENABLE_CACHE=True

# Cache TTL in seconds (1 hour)
CACHE_TTL=3600

# Environment
ENVIRONMENT=production
DEBUG=False

# MongoDB
MONGODB_URI=your_mongodb_uri_here

# JWT
SECRET_KEY=your-secret-key-here
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440

# Groq API Keys
GROQ_API_KEY_1=your_groq_key_1
GROQ_API_KEY_2=your_groq_key_2
GROQ_API_KEY_3=your_groq_key_3

# HuggingFace Tokens
HF_TOKEN_1=your_hf_token_1
HF_TOKEN_2=your_hf_token_2
HF_TOKEN_3=your_hf_token_3

# HuggingFace Model Repository
HF_MODEL_REPO=YOUR_USERNAME/questrag-models

# CORS
ALLOWED_ORIGINS=*

# Device
DEVICE=cpu