Spaces:
Sleeping
Sleeping
major changes
Browse files- app/config.py +67 -515
- app/core/llm_manager.py +254 -235
- app/main.py +53 -27
- app/services/chat_service.py +20 -23
- backups/backup_chat_service.py +340 -0
- backups/backup_config.py +640 -0
- backups/backup_llm_manager.py +430 -0
- backups/backup_main.py +275 -0
- backups/backup_requirements.txt +182 -0
- requirements.txt +20 -121
app/config.py
CHANGED
|
@@ -1,10 +1,7 @@
|
|
| 1 |
-
# LINE 80 VERY IMP CHANGE OF LLM MAX TOKENS FROM 512 TO 1024
|
| 2 |
-
|
| 3 |
-
|
| 4 |
"""
|
| 5 |
Application Configuration
|
| 6 |
Settings for Banking RAG Chatbot with JWT Authentication
|
| 7 |
-
|
| 8 |
"""
|
| 9 |
|
| 10 |
import os
|
|
@@ -13,7 +10,6 @@ from dotenv import load_dotenv
|
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
| 16 |
-
|
| 17 |
class Settings:
|
| 18 |
"""Application settings loaded from environment variables"""
|
| 19 |
|
|
@@ -42,23 +38,32 @@ class Settings:
|
|
| 42 |
ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 43 |
|
| 44 |
# ========================================================================
|
| 45 |
-
#
|
| 46 |
# ========================================================================
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# ========================================================================
|
| 52 |
-
#
|
| 53 |
# ========================================================================
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
|
| 57 |
|
| 58 |
# ========================================================================
|
| 59 |
-
# HUGGING FACE (
|
| 60 |
# ========================================================================
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# ========================================================================
|
| 64 |
# MODEL PATHS (for RL Policy Network and RAG models)
|
|
@@ -77,8 +82,7 @@ class Settings:
|
|
| 77 |
# LLM PARAMETERS
|
| 78 |
# ========================================================================
|
| 79 |
LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 80 |
-
LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024"))
|
| 81 |
-
# ============================================================================
|
| 82 |
|
| 83 |
# ========================================================================
|
| 84 |
# RAG PARAMETERS
|
|
@@ -94,20 +98,37 @@ class Settings:
|
|
| 94 |
CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
|
| 95 |
|
| 96 |
# ========================================================================
|
| 97 |
-
# HELPER METHODS
|
| 98 |
-
# ========================================================================
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def is_groq_enabled(self) -> bool:
|
| 105 |
-
"""Check if Groq API is configured"""
|
| 106 |
-
return bool(self.
|
| 107 |
|
| 108 |
def is_hf_enabled(self) -> bool:
|
| 109 |
-
"""Check if HuggingFace token is configured"""
|
| 110 |
-
return bool(self.
|
| 111 |
|
| 112 |
def get_allowed_origins(self) -> List[str]:
|
| 113 |
"""Parse allowed origins from comma-separated string"""
|
|
@@ -115,29 +136,26 @@ class Settings:
|
|
| 115 |
return ["*"]
|
| 116 |
return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 117 |
|
| 118 |
-
def get_llm_for_task(self, task: str = "
|
| 119 |
"""
|
| 120 |
-
Get LLM name for a specific task.
|
| 121 |
|
| 122 |
Args:
|
| 123 |
-
task: Task type ('chat'
|
| 124 |
|
| 125 |
Returns:
|
| 126 |
-
str:
|
| 127 |
"""
|
| 128 |
-
# Use Gemini for chat, Groq for evaluation
|
| 129 |
if task == "evaluation":
|
| 130 |
-
return
|
| 131 |
else:
|
| 132 |
-
return
|
| 133 |
-
|
| 134 |
|
| 135 |
# ============================================================================
|
| 136 |
# CREATE GLOBAL SETTINGS INSTANCE
|
| 137 |
# ============================================================================
|
| 138 |
settings = Settings()
|
| 139 |
|
| 140 |
-
|
| 141 |
# ============================================================================
|
| 142 |
# PRINT CONFIGURATION ON LOAD
|
| 143 |
# ============================================================================
|
|
@@ -151,11 +169,20 @@ print(f"Device: {settings.DEVICE}")
|
|
| 151 |
print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 152 |
print()
|
| 153 |
print("🔑 API Keys:")
|
| 154 |
-
|
| 155 |
-
print(f" Groq
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 158 |
-
print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
print()
|
| 160 |
print("🤖 Model Paths:")
|
| 161 |
print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
|
|
@@ -163,478 +190,3 @@ print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
|
|
| 163 |
print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
|
| 164 |
print(f" Knowledge Base: {settings.KB_PATH}")
|
| 165 |
print("=" * 80)
|
| 166 |
-
# ============================================================================
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
# """
|
| 185 |
-
# Application Configuration
|
| 186 |
-
# Settings for Banking RAG Chatbot with JWT Authentication
|
| 187 |
-
# Includes all settings needed by existing llm_manager.py
|
| 188 |
-
# """
|
| 189 |
-
|
| 190 |
-
# import os
|
| 191 |
-
# from typing import List
|
| 192 |
-
# from dotenv import load_dotenv
|
| 193 |
-
|
| 194 |
-
# load_dotenv()
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
# class Settings:
|
| 198 |
-
# """Application settings loaded from environment variables"""
|
| 199 |
-
|
| 200 |
-
# # ========================================================================
|
| 201 |
-
# # ENVIRONMENT
|
| 202 |
-
# # ========================================================================
|
| 203 |
-
# ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 204 |
-
# DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 205 |
-
|
| 206 |
-
# # ========================================================================
|
| 207 |
-
# # MONGODB
|
| 208 |
-
# # ========================================================================
|
| 209 |
-
# MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 210 |
-
# DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 211 |
-
|
| 212 |
-
# # ========================================================================
|
| 213 |
-
# # JWT AUTHENTICATION
|
| 214 |
-
# # ========================================================================
|
| 215 |
-
# SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 216 |
-
# ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 217 |
-
# ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 218 |
-
|
| 219 |
-
# # ========================================================================
|
| 220 |
-
# # CORS (for frontend)
|
| 221 |
-
# # ========================================================================
|
| 222 |
-
# ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 223 |
-
|
| 224 |
-
# # ========================================================================
|
| 225 |
-
# # GOOGLE GEMINI API
|
| 226 |
-
# # ========================================================================
|
| 227 |
-
# GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 228 |
-
# GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 229 |
-
|
| 230 |
-
# # ========================================================================
|
| 231 |
-
# # GROQ API (Optional - for your llm_manager)
|
| 232 |
-
# # ========================================================================
|
| 233 |
-
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 234 |
-
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 235 |
-
|
| 236 |
-
# # ========================================================================
|
| 237 |
-
# # HUGGING FACE (Optional - for model downloads)
|
| 238 |
-
# # ========================================================================
|
| 239 |
-
# HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 240 |
-
|
| 241 |
-
# # ========================================================================
|
| 242 |
-
# # MODEL PATHS (for RL Policy Network and RAG models)
|
| 243 |
-
# # ========================================================================
|
| 244 |
-
# POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
|
| 245 |
-
# RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
|
| 246 |
-
# FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
|
| 247 |
-
# KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
|
| 248 |
-
|
| 249 |
-
# # ========================================================================
|
| 250 |
-
# # DEVICE SETTINGS (for PyTorch/TensorFlow models)
|
| 251 |
-
# # ========================================================================
|
| 252 |
-
# DEVICE: str = os.getenv("DEVICE", "cpu")
|
| 253 |
-
|
| 254 |
-
# # ========================================================================
|
| 255 |
-
# # LLM PARAMETERS
|
| 256 |
-
# # ========================================================================
|
| 257 |
-
# LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 258 |
-
# LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
|
| 259 |
-
|
| 260 |
-
# # ========================================================================
|
| 261 |
-
# # RAG PARAMETERS
|
| 262 |
-
# # ========================================================================
|
| 263 |
-
# TOP_K: int = int(os.getenv("TOP_K", "5"))
|
| 264 |
-
# SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
|
| 265 |
-
# MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
|
| 266 |
-
|
| 267 |
-
# # ========================================================================
|
| 268 |
-
# # POLICY NETWORK PARAMETERS
|
| 269 |
-
# # ========================================================================
|
| 270 |
-
# POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
|
| 271 |
-
# CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
# # ========================================================================
|
| 275 |
-
# # HELPER METHODS (Required by llm_manager.py)
|
| 276 |
-
# # ========================================================================
|
| 277 |
-
|
| 278 |
-
# def is_gemini_enabled(self) -> bool:
|
| 279 |
-
# """Check if Google Gemini API is configured"""
|
| 280 |
-
# return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 281 |
-
|
| 282 |
-
# def is_groq_enabled(self) -> bool:
|
| 283 |
-
# """Check if Groq API is configured"""
|
| 284 |
-
# return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 285 |
-
|
| 286 |
-
# def is_hf_enabled(self) -> bool:
|
| 287 |
-
# """Check if HuggingFace token is configured"""
|
| 288 |
-
# return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 289 |
-
|
| 290 |
-
# def get_allowed_origins(self) -> List[str]:
|
| 291 |
-
# """Parse allowed origins from comma-separated string"""
|
| 292 |
-
# if self.ALLOWED_ORIGINS == "*":
|
| 293 |
-
# return ["*"]
|
| 294 |
-
# return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 295 |
-
|
| 296 |
-
# # def get_llm_for_task(self, task: str = "qa"):
|
| 297 |
-
# # """
|
| 298 |
-
# # Get LLM configuration for a specific task.
|
| 299 |
-
# # Returns a dict with model settings.
|
| 300 |
-
|
| 301 |
-
# # Args:
|
| 302 |
-
# # task: Task type ('qa', 'retrieval', 'summary', etc.)
|
| 303 |
-
|
| 304 |
-
# # Returns:
|
| 305 |
-
# # dict: LLM configuration
|
| 306 |
-
# # """
|
| 307 |
-
# # return {
|
| 308 |
-
# # 'api_key': self.GOOGLE_API_KEY,
|
| 309 |
-
# # 'model': self.GEMINI_MODEL,
|
| 310 |
-
# # 'temperature': self.LLM_TEMPERATURE,
|
| 311 |
-
# # 'max_tokens': self.LLM_MAX_TOKENS,
|
| 312 |
-
# # 'task': task
|
| 313 |
-
# # }
|
| 314 |
-
# def get_llm_for_task(self, task: str = "qa") -> str:
|
| 315 |
-
# """
|
| 316 |
-
# Get LLM name for a specific task.
|
| 317 |
-
|
| 318 |
-
# Args:
|
| 319 |
-
# task: Task type ('chat', 'evaluation', etc.)
|
| 320 |
-
|
| 321 |
-
# Returns:
|
| 322 |
-
# str: LLM name ('gemini' or 'groq')
|
| 323 |
-
# """
|
| 324 |
-
# # Use Gemini for chat, Groq for evaluation
|
| 325 |
-
# if task == "evaluation":
|
| 326 |
-
# return "groq" if self.is_groq_enabled() else "gemini"
|
| 327 |
-
# else:
|
| 328 |
-
# return "gemini" # Default to Gemini for all other tasks
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
# # ============================================================================
|
| 334 |
-
# # CREATE GLOBAL SETTINGS INSTANCE
|
| 335 |
-
# # ============================================================================
|
| 336 |
-
# settings = Settings()
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
# # ============================================================================
|
| 340 |
-
# # PRINT CONFIGURATION ON LOAD
|
| 341 |
-
# # ============================================================================
|
| 342 |
-
# print("=" * 80)
|
| 343 |
-
# print("✅ Configuration Loaded")
|
| 344 |
-
# print("=" * 80)
|
| 345 |
-
# print(f"Environment: {settings.ENVIRONMENT}")
|
| 346 |
-
# print(f"Debug Mode: {settings.DEBUG}")
|
| 347 |
-
# print(f"Database: {settings.DATABASE_NAME}")
|
| 348 |
-
# print(f"Device: {settings.DEVICE}")
|
| 349 |
-
# print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 350 |
-
# print()
|
| 351 |
-
# print("🔑 API Keys:")
|
| 352 |
-
# print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 353 |
-
# print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 354 |
-
# print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 355 |
-
# print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 356 |
-
# print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 357 |
-
# print()
|
| 358 |
-
# print("🤖 Model Paths:")
|
| 359 |
-
# print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
|
| 360 |
-
# print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
|
| 361 |
-
# print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
|
| 362 |
-
# print(f" Knowledge Base: {settings.KB_PATH}")
|
| 363 |
-
# print("=" * 80)
|
| 364 |
-
# # # ============================================================================
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
# # """
|
| 387 |
-
# # Application Configuration
|
| 388 |
-
# # Settings for Banking RAG Chatbot with JWT Authentication
|
| 389 |
-
# # Includes all settings needed by existing llm_manager.py
|
| 390 |
-
# # """
|
| 391 |
-
|
| 392 |
-
# # import os
|
| 393 |
-
# # from typing import List
|
| 394 |
-
# # from dotenv import load_dotenv
|
| 395 |
-
|
| 396 |
-
# # load_dotenv()
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
# # class Settings:
|
| 400 |
-
# # """Application settings loaded from environment variables"""
|
| 401 |
-
|
| 402 |
-
# # # ========================================================================
|
| 403 |
-
# # # ENVIRONMENT
|
| 404 |
-
# # # ========================================================================
|
| 405 |
-
# # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 406 |
-
# # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 407 |
-
|
| 408 |
-
# # # ========================================================================
|
| 409 |
-
# # # MONGODB
|
| 410 |
-
# # # ========================================================================
|
| 411 |
-
# # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 412 |
-
# # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 413 |
-
|
| 414 |
-
# # # ========================================================================
|
| 415 |
-
# # # JWT AUTHENTICATION
|
| 416 |
-
# # # ========================================================================
|
| 417 |
-
# # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 418 |
-
# # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 419 |
-
# # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 420 |
-
|
| 421 |
-
# # # ========================================================================
|
| 422 |
-
# # # CORS (for frontend)
|
| 423 |
-
# # # ========================================================================
|
| 424 |
-
# # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 425 |
-
|
| 426 |
-
# # # ========================================================================
|
| 427 |
-
# # # GOOGLE GEMINI API
|
| 428 |
-
# # # ========================================================================
|
| 429 |
-
# # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 430 |
-
# # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 431 |
-
|
| 432 |
-
# # # ========================================================================
|
| 433 |
-
# # # GROQ API (Optional - for your llm_manager)
|
| 434 |
-
# # # ========================================================================
|
| 435 |
-
# # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 436 |
-
# # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 437 |
-
|
| 438 |
-
# # # ========================================================================
|
| 439 |
-
# # # HUGGING FACE (Optional - for model downloads)
|
| 440 |
-
# # # ========================================================================
|
| 441 |
-
# # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 442 |
-
|
| 443 |
-
# # # ========================================================================
|
| 444 |
-
# # # HELPER METHODS (Required by llm_manager.py)
|
| 445 |
-
# # # ========================================================================
|
| 446 |
-
|
| 447 |
-
# # def is_gemini_enabled(self) -> bool:
|
| 448 |
-
# # """Check if Google Gemini API is configured"""
|
| 449 |
-
# # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 450 |
-
|
| 451 |
-
# # def is_groq_enabled(self) -> bool:
|
| 452 |
-
# # """Check if Groq API is configured"""
|
| 453 |
-
# # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 454 |
-
|
| 455 |
-
# # def is_hf_enabled(self) -> bool:
|
| 456 |
-
# # """Check if HuggingFace token is configured"""
|
| 457 |
-
# # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 458 |
-
|
| 459 |
-
# # def get_allowed_origins(self) -> List[str]:
|
| 460 |
-
# # """Parse allowed origins from comma-separated string"""
|
| 461 |
-
# # if self.ALLOWED_ORIGINS == "*":
|
| 462 |
-
# # return ["*"]
|
| 463 |
-
# # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
# # # ============================================================================
|
| 467 |
-
# # # CREATE GLOBAL SETTINGS INSTANCE
|
| 468 |
-
# # # ============================================================================
|
| 469 |
-
# # settings = Settings()
|
| 470 |
-
|
| 471 |
-
# # # ============================================================================
|
| 472 |
-
# # # PRINT CONFIGURATION ON LOAD
|
| 473 |
-
# # # ============================================================================
|
| 474 |
-
# # print("=" * 80)
|
| 475 |
-
# # print("✅ Configuration Loaded")
|
| 476 |
-
# # print("=" * 80)
|
| 477 |
-
# # print(f"Environment: {settings.ENVIRONMENT}")
|
| 478 |
-
# # print(f"Debug Mode: {settings.DEBUG}")
|
| 479 |
-
# # print(f"Database: {settings.DATABASE_NAME}")
|
| 480 |
-
# # # print(f"JWT Algorithm: {settings.ALGORITHM}")
|
| 481 |
-
# # # print(f"Token Expiry: {settings.ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
|
| 482 |
-
# # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 483 |
-
# # print()
|
| 484 |
-
# # print("🔑 API Keys:")
|
| 485 |
-
# # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 486 |
-
# # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 487 |
-
# # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 488 |
-
# # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 489 |
-
# # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 490 |
-
# # print("=" * 80)
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
# """
|
| 517 |
-
# Application Configuration
|
| 518 |
-
# Settings for Banking RAG Chatbot with JWT Authentication
|
| 519 |
-
# Includes all settings needed by existing llm_manager.py
|
| 520 |
-
# """
|
| 521 |
-
|
| 522 |
-
# import os
|
| 523 |
-
# from typing import List
|
| 524 |
-
# from dotenv import load_dotenv
|
| 525 |
-
|
| 526 |
-
# load_dotenv()
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
# class Settings:
|
| 530 |
-
# """Application settings loaded from environment variables"""
|
| 531 |
-
|
| 532 |
-
# # ========================================================================
|
| 533 |
-
# # ENVIRONMENT
|
| 534 |
-
# # ========================================================================
|
| 535 |
-
# ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 536 |
-
# DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 537 |
-
|
| 538 |
-
# # ========================================================================
|
| 539 |
-
# # MONGODB
|
| 540 |
-
# # ========================================================================
|
| 541 |
-
# MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 542 |
-
# DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 543 |
-
|
| 544 |
-
# # ========================================================================
|
| 545 |
-
# # JWT AUTHENTICATION
|
| 546 |
-
# # ========================================================================
|
| 547 |
-
# SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 548 |
-
# ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 549 |
-
# ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 550 |
-
|
| 551 |
-
# # ========================================================================
|
| 552 |
-
# # CORS (for frontend)
|
| 553 |
-
# # ========================================================================
|
| 554 |
-
# ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 555 |
-
|
| 556 |
-
# # ========================================================================
|
| 557 |
-
# # GOOGLE GEMINI API
|
| 558 |
-
# # ========================================================================
|
| 559 |
-
# GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 560 |
-
# GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 561 |
-
|
| 562 |
-
# # ========================================================================
|
| 563 |
-
# # GROQ API (Optional - for your llm_manager)
|
| 564 |
-
# # ========================================================================
|
| 565 |
-
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 566 |
-
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 567 |
-
|
| 568 |
-
# # ========================================================================
|
| 569 |
-
# # HUGGING FACE (Optional - for model downloads)
|
| 570 |
-
# # ========================================================================
|
| 571 |
-
# HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 572 |
-
|
| 573 |
-
# # ========================================================================
|
| 574 |
-
# # MODEL PATHS (for RL Policy Network and RAG models)
|
| 575 |
-
# # ========================================================================
|
| 576 |
-
# POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
|
| 577 |
-
# RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
|
| 578 |
-
# FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
|
| 579 |
-
# KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
|
| 580 |
-
|
| 581 |
-
# # ========================================================================
|
| 582 |
-
# # LLM PARAMETERS
|
| 583 |
-
# # ========================================================================
|
| 584 |
-
# LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 585 |
-
# LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
|
| 586 |
-
|
| 587 |
-
# # ========================================================================
|
| 588 |
-
# # RAG PARAMETERS
|
| 589 |
-
# # ========================================================================
|
| 590 |
-
# TOP_K: int = int(os.getenv("TOP_K", "5"))
|
| 591 |
-
# SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
|
| 592 |
-
# MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
|
| 593 |
-
|
| 594 |
-
# # ========================================================================
|
| 595 |
-
# # HELPER METHODS (Required by llm_manager.py)
|
| 596 |
-
# # ========================================================================
|
| 597 |
-
|
| 598 |
-
# def is_gemini_enabled(self) -> bool:
|
| 599 |
-
# """Check if Google Gemini API is configured"""
|
| 600 |
-
# return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 601 |
-
|
| 602 |
-
# def is_groq_enabled(self) -> bool:
|
| 603 |
-
# """Check if Groq API is configured"""
|
| 604 |
-
# return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 605 |
-
|
| 606 |
-
# def is_hf_enabled(self) -> bool:
|
| 607 |
-
# """Check if HuggingFace token is configured"""
|
| 608 |
-
# return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 609 |
-
|
| 610 |
-
# def get_allowed_origins(self) -> List[str]:
|
| 611 |
-
# """Parse allowed origins from comma-separated string"""
|
| 612 |
-
# if self.ALLOWED_ORIGINS == "*":
|
| 613 |
-
# return ["*"]
|
| 614 |
-
# return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
# # ============================================================================
|
| 618 |
-
# # CREATE GLOBAL SETTINGS INSTANCE
|
| 619 |
-
# # ============================================================================
|
| 620 |
-
# settings = Settings()
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
# # ============================================================================
|
| 624 |
-
# # PRINT CONFIGURATION ON LOAD
|
| 625 |
-
# # ============================================================================
|
| 626 |
-
# print("=" * 80)
|
| 627 |
-
# print("✅ Configuration Loaded")
|
| 628 |
-
# print("=" * 80)
|
| 629 |
-
# print(f"Environment: {settings.ENVIRONMENT}")
|
| 630 |
-
# print(f"Debug Mode: {settings.DEBUG}")
|
| 631 |
-
# print(f"Database: {settings.DATABASE_NAME}")
|
| 632 |
-
# print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 633 |
-
# print()
|
| 634 |
-
# print("🔑 API Keys:")
|
| 635 |
-
# print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 636 |
-
# print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 637 |
-
# print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 638 |
-
# print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 639 |
-
# print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 640 |
-
# print("=" * 80)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Application Configuration
|
| 3 |
Settings for Banking RAG Chatbot with JWT Authentication
|
| 4 |
+
Updated to support multiple Groq API keys and HuggingFace tokens with fallback logic
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
|
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
|
|
|
|
| 13 |
class Settings:
|
| 14 |
"""Application settings loaded from environment variables"""
|
| 15 |
|
|
|
|
| 38 |
ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 39 |
|
| 40 |
# ========================================================================
|
| 41 |
+
# GROQ API KEYS (Multiple for fallback)
|
| 42 |
# ========================================================================
|
| 43 |
+
GROQ_API_KEY_1: str = os.getenv("GROQ_API_KEY_1", "") # Primary
|
| 44 |
+
GROQ_API_KEY_2: str = os.getenv("GROQ_API_KEY_2", "") # Fallback 1
|
| 45 |
+
GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "") # Fallback 2
|
| 46 |
+
|
| 47 |
+
# Model names for Groq (using correct GroqCloud naming)
|
| 48 |
+
GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama3-8b-8192") # For chat interface
|
| 49 |
+
GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama3-70b-8192") # For evaluation
|
| 50 |
|
| 51 |
# ========================================================================
|
| 52 |
+
# Commented as of now, can be re-enabled if rate limiting is needed
|
| 53 |
# ========================================================================
|
| 54 |
+
|
| 55 |
+
# GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
|
|
|
|
| 56 |
|
| 57 |
# ========================================================================
|
| 58 |
+
# HUGGING FACE TOKENS (Multiple for fallback)
|
| 59 |
# ========================================================================
|
| 60 |
+
HF_TOKEN_1: str = os.getenv("HF_TOKEN_1", "") # Primary
|
| 61 |
+
HF_TOKEN_2: str = os.getenv("HF_TOKEN_2", "") # Fallback 1
|
| 62 |
+
HF_TOKEN_3: str = os.getenv("HF_TOKEN_3", "") # Fallback 2
|
| 63 |
+
|
| 64 |
+
# HuggingFace model for inference (fallback from Groq)
|
| 65 |
+
HF_CHAT_MODEL: str = os.getenv("HF_CHAT_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 66 |
+
HF_EVAL_MODEL: str = os.getenv("HF_EVAL_MODEL", "meta-llama/Meta-Llama-3-70B-Instruct")
|
| 67 |
|
| 68 |
# ========================================================================
|
| 69 |
# MODEL PATHS (for RL Policy Network and RAG models)
|
|
|
|
| 82 |
# LLM PARAMETERS
|
| 83 |
# ========================================================================
|
| 84 |
LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 85 |
+
LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024"))
|
|
|
|
| 86 |
|
| 87 |
# ========================================================================
|
| 88 |
# RAG PARAMETERS
|
|
|
|
| 98 |
CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
|
| 99 |
|
| 100 |
# ========================================================================
|
| 101 |
+
# HELPER METHODS
|
| 102 |
+
# ========================================================================
|
| 103 |
+
def get_groq_api_keys(self) -> List[str]:
|
| 104 |
+
"""Get all configured Groq API keys in priority order"""
|
| 105 |
+
keys = []
|
| 106 |
+
if self.GROQ_API_KEY_1:
|
| 107 |
+
keys.append(self.GROQ_API_KEY_1)
|
| 108 |
+
if self.GROQ_API_KEY_2:
|
| 109 |
+
keys.append(self.GROQ_API_KEY_2)
|
| 110 |
+
if self.GROQ_API_KEY_3:
|
| 111 |
+
keys.append(self.GROQ_API_KEY_3)
|
| 112 |
+
return keys
|
| 113 |
+
|
| 114 |
+
def get_hf_tokens(self) -> List[str]:
|
| 115 |
+
"""Get all configured HuggingFace tokens in priority order"""
|
| 116 |
+
tokens = []
|
| 117 |
+
if self.HF_TOKEN_1:
|
| 118 |
+
tokens.append(self.HF_TOKEN_1)
|
| 119 |
+
if self.HF_TOKEN_2:
|
| 120 |
+
tokens.append(self.HF_TOKEN_2)
|
| 121 |
+
if self.HF_TOKEN_3:
|
| 122 |
+
tokens.append(self.HF_TOKEN_3)
|
| 123 |
+
return tokens
|
| 124 |
|
| 125 |
def is_groq_enabled(self) -> bool:
|
| 126 |
+
"""Check if at least one Groq API key is configured"""
|
| 127 |
+
return bool(self.get_groq_api_keys())
|
| 128 |
|
| 129 |
def is_hf_enabled(self) -> bool:
|
| 130 |
+
"""Check if at least one HuggingFace token is configured"""
|
| 131 |
+
return bool(self.get_hf_tokens())
|
| 132 |
|
| 133 |
def get_allowed_origins(self) -> List[str]:
|
| 134 |
"""Parse allowed origins from comma-separated string"""
|
|
|
|
| 136 |
return ["*"]
|
| 137 |
return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 138 |
|
| 139 |
+
def get_llm_for_task(self, task: str = "chat") -> str:
|
| 140 |
"""
|
| 141 |
+
Get LLM model name for a specific task.
|
| 142 |
|
| 143 |
Args:
|
| 144 |
+
task: Task type ('chat' or 'evaluation')
|
| 145 |
|
| 146 |
Returns:
|
| 147 |
+
str: Model name for the task
|
| 148 |
"""
|
|
|
|
| 149 |
if task == "evaluation":
|
| 150 |
+
return self.GROQ_EVAL_MODEL # llama3-70b-8192
|
| 151 |
else:
|
| 152 |
+
return self.GROQ_CHAT_MODEL # llama3-8b-8192
|
|
|
|
| 153 |
|
| 154 |
# ============================================================================
|
| 155 |
# CREATE GLOBAL SETTINGS INSTANCE
|
| 156 |
# ============================================================================
|
| 157 |
settings = Settings()
|
| 158 |
|
|
|
|
| 159 |
# ============================================================================
|
| 160 |
# PRINT CONFIGURATION ON LOAD
|
| 161 |
# ============================================================================
|
|
|
|
| 169 |
print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 170 |
print()
|
| 171 |
print("🔑 API Keys:")
|
| 172 |
+
groq_keys = settings.get_groq_api_keys()
|
| 173 |
+
print(f" Groq Keys: {len(groq_keys)} configured")
|
| 174 |
+
for i, key in enumerate(groq_keys, 1):
|
| 175 |
+
print(f" - Key {i}: {'✅ Set' if key else '❌ Missing'}")
|
| 176 |
+
hf_tokens = settings.get_hf_tokens()
|
| 177 |
+
print(f" HuggingFace Tokens: {len(hf_tokens)} configured")
|
| 178 |
+
for i, token in enumerate(hf_tokens, 1):
|
| 179 |
+
print(f" - Token {i}: {'✅ Set' if token else '❌ Missing'}")
|
| 180 |
print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 181 |
+
print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 182 |
+
print()
|
| 183 |
+
print("🤖 LLM Models:")
|
| 184 |
+
print(f" Chat Model: {settings.GROQ_CHAT_MODEL} (Llama 3 8B)")
|
| 185 |
+
print(f" Eval Model: {settings.GROQ_EVAL_MODEL} (Llama 3 70B)")
|
| 186 |
print()
|
| 187 |
print("🤖 Model Paths:")
|
| 188 |
print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
|
|
|
|
| 190 |
print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
|
| 191 |
print(f" Knowledge Base: {settings.KB_PATH}")
|
| 192 |
print("=" * 80)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/core/llm_manager.py
CHANGED
|
@@ -1,258 +1,278 @@
|
|
| 1 |
"""
|
| 2 |
-
Multi-LLM Manager
|
| 3 |
-
All three APIs co-exist for different purposes (no fallback logic)
|
| 4 |
|
| 5 |
Architecture:
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
-
|
|
|
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
import time
|
| 14 |
-
import google.generativeai as genai
|
| 15 |
from typing import List, Dict, Optional, Literal
|
| 16 |
from langchain_groq import ChatGroq
|
| 17 |
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
|
| 18 |
-
|
| 19 |
from app.config import settings
|
| 20 |
|
| 21 |
-
|
| 22 |
# ============================================================================
|
| 23 |
-
#
|
| 24 |
# ============================================================================
|
| 25 |
-
|
| 26 |
-
class GeminiManager:
|
| 27 |
"""
|
| 28 |
-
|
| 29 |
-
|
| 30 |
"""
|
| 31 |
|
| 32 |
def __init__(self):
|
| 33 |
-
"""Initialize
|
| 34 |
-
self.
|
| 35 |
-
self.
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
# Create model instance with safety settings
|
| 41 |
-
self.model = genai.GenerativeModel(
|
| 42 |
-
model_name=self.model_name,
|
| 43 |
-
generation_config={
|
| 44 |
-
"temperature": settings.LLM_TEMPERATURE,
|
| 45 |
-
"max_output_tokens": settings.LLM_MAX_TOKENS,
|
| 46 |
-
}
|
| 47 |
-
)
|
| 48 |
|
| 49 |
# Rate limiting tracking
|
| 50 |
self.requests_this_minute = 0
|
| 51 |
-
self.tokens_this_minute = 0
|
| 52 |
self.last_reset = time.time()
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def _check_rate_limits(self):
|
| 57 |
"""
|
| 58 |
Check and reset rate limit counters.
|
| 59 |
-
|
| 60 |
"""
|
| 61 |
current_time = time.time()
|
| 62 |
|
| 63 |
# Reset counters every minute
|
| 64 |
if current_time - self.last_reset > 60:
|
| 65 |
self.requests_this_minute = 0
|
| 66 |
-
self.tokens_this_minute = 0
|
| 67 |
self.last_reset = current_time
|
| 68 |
|
| 69 |
# Check if limits exceeded
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
async def generate(
|
| 77 |
self,
|
| 78 |
messages: List[Dict[str, str]],
|
| 79 |
-
system_prompt: Optional[str] = None
|
|
|
|
| 80 |
) -> str:
|
| 81 |
"""
|
| 82 |
-
Generate response using
|
| 83 |
|
| 84 |
Args:
|
| 85 |
messages: List of conversation messages
|
| 86 |
-
|
| 87 |
-
|
| 88 |
|
| 89 |
Returns:
|
| 90 |
str: Generated response text
|
|
|
|
|
|
|
|
|
|
| 91 |
"""
|
| 92 |
self._check_rate_limits()
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
formatted_messages.append(
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
# ============================================================================
|
| 133 |
-
#
|
| 134 |
# ============================================================================
|
| 135 |
-
|
| 136 |
-
class GroqManager:
|
| 137 |
"""
|
| 138 |
-
|
| 139 |
-
|
| 140 |
"""
|
| 141 |
|
| 142 |
def __init__(self):
|
| 143 |
-
"""Initialize
|
| 144 |
-
self.
|
| 145 |
-
self.
|
| 146 |
-
|
| 147 |
-
# Create ChatGroq instance
|
| 148 |
-
self.llm = ChatGroq(
|
| 149 |
-
api_key=self.api_key,
|
| 150 |
-
model_name=self.model_name,
|
| 151 |
-
temperature=settings.LLM_TEMPERATURE,
|
| 152 |
-
max_tokens=settings.LLM_MAX_TOKENS
|
| 153 |
-
)
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
self.tokens_this_minute = 0
|
| 158 |
-
self.last_reset = time.time()
|
| 159 |
|
| 160 |
-
print(f"✅
|
|
|
|
|
|
|
| 161 |
|
| 162 |
-
def
|
| 163 |
-
"""
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
# Reset counters every minute
|
| 170 |
-
if current_time - self.last_reset > 60:
|
| 171 |
-
self.requests_this_minute = 0
|
| 172 |
-
self.tokens_this_minute = 0
|
| 173 |
-
self.last_reset = current_time
|
| 174 |
-
|
| 175 |
-
# Check if limits exceeded
|
| 176 |
-
if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
|
| 177 |
-
wait_time = 60 - (current_time - self.last_reset)
|
| 178 |
-
print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
|
| 179 |
-
time.sleep(wait_time)
|
| 180 |
-
self._check_rate_limits()
|
| 181 |
|
| 182 |
async def generate(
|
| 183 |
self,
|
| 184 |
messages: List[Dict[str, str]],
|
| 185 |
-
system_prompt: Optional[str] = None
|
|
|
|
| 186 |
) -> str:
|
| 187 |
"""
|
| 188 |
-
Generate response using
|
| 189 |
|
| 190 |
Args:
|
| 191 |
messages: List of conversation messages
|
| 192 |
-
Format: [{'role': 'user'/'assistant', 'content': '...'}]
|
| 193 |
system_prompt: Optional system prompt
|
|
|
|
| 194 |
|
| 195 |
Returns:
|
| 196 |
str: Generated response text
|
| 197 |
-
"""
|
| 198 |
-
self._check_rate_limits()
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
# ============================================================================
|
| 232 |
-
# UNIFIED LLM MANAGER (
|
| 233 |
# ============================================================================
|
| 234 |
-
|
| 235 |
class LLMManager:
|
| 236 |
"""
|
| 237 |
-
Unified LLM Manager
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
|
| 240 |
-
- Chat
|
| 241 |
-
- Evaluation
|
| 242 |
-
- Policy → Local BERT (no API call)
|
| 243 |
"""
|
| 244 |
|
| 245 |
def __init__(self):
|
| 246 |
"""Initialize all LLM managers"""
|
| 247 |
-
self.gemini = None
|
| 248 |
self.groq = None
|
| 249 |
-
|
| 250 |
-
# Initialize Gemini if configured
|
| 251 |
-
if settings.is_gemini_enabled():
|
| 252 |
-
try:
|
| 253 |
-
self.gemini = GeminiManager()
|
| 254 |
-
except Exception as e:
|
| 255 |
-
print(f"⚠️ Failed to initialize Gemini: {e}")
|
| 256 |
|
| 257 |
# Initialize Groq if configured
|
| 258 |
if settings.is_groq_enabled():
|
|
@@ -261,7 +281,18 @@ class LLMManager:
|
|
| 261 |
except Exception as e:
|
| 262 |
print(f"⚠️ Failed to initialize Groq: {e}")
|
| 263 |
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
async def generate(
|
| 267 |
self,
|
|
@@ -270,62 +301,48 @@ class LLMManager:
|
|
| 270 |
task: Literal["chat", "evaluation"] = "chat"
|
| 271 |
) -> str:
|
| 272 |
"""
|
| 273 |
-
Generate response
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
Args:
|
| 276 |
messages: Conversation messages
|
| 277 |
system_prompt: Optional system prompt
|
| 278 |
-
task: Task type - "chat" (
|
| 279 |
|
| 280 |
Returns:
|
| 281 |
str: Generated response
|
| 282 |
|
| 283 |
Raises:
|
| 284 |
-
ValueError: If
|
| 285 |
"""
|
| 286 |
-
#
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
-
|
| 300 |
-
raise ValueError(f"Unknown LLM choice: {llm_choice}")
|
| 301 |
-
|
| 302 |
-
# async def generate_chat_response(
|
| 303 |
-
# self,
|
| 304 |
-
# query: str,
|
| 305 |
-
# context: str,
|
| 306 |
-
# history: List[Dict[str, str]]
|
| 307 |
-
# ) -> str:
|
| 308 |
-
# """
|
| 309 |
-
# Generate chat response (uses Gemini by default).
|
| 310 |
-
|
| 311 |
-
# Args:
|
| 312 |
-
# query: User query
|
| 313 |
-
# context: Retrieved context (from FAISS)
|
| 314 |
-
# history: Conversation history
|
| 315 |
-
|
| 316 |
-
# Returns:
|
| 317 |
-
# str: Chat response
|
| 318 |
-
# """
|
| 319 |
-
# # Build system prompt
|
| 320 |
-
# system_prompt = settings.SYSTEM_PROMPT
|
| 321 |
-
# if context:
|
| 322 |
-
# system_prompt += f"\n\nRelevant Information:\n{context}"
|
| 323 |
-
|
| 324 |
-
# # Build messages
|
| 325 |
-
# messages = history + [{'role': 'user', 'content': query}]
|
| 326 |
-
|
| 327 |
-
# # Generate using chat LLM (Gemini)
|
| 328 |
-
# return await self.generate(messages, system_prompt, task="chat")
|
| 329 |
|
| 330 |
async def generate_chat_response(
|
| 331 |
self,
|
|
@@ -333,28 +350,32 @@ class LLMManager:
|
|
| 333 |
context: str,
|
| 334 |
history: List[Dict[str, str]]
|
| 335 |
) -> str:
|
| 336 |
-
"""
|
| 337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
# Import the detailed prompt
|
| 339 |
from app.services.chat_service import BANKING_SYSTEM_PROMPT
|
| 340 |
-
|
| 341 |
# Build enhanced system prompt with context
|
| 342 |
system_prompt = BANKING_SYSTEM_PROMPT
|
| 343 |
-
|
| 344 |
if context:
|
| 345 |
system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
|
| 346 |
else:
|
| 347 |
system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
|
| 348 |
-
|
| 349 |
# Build messages
|
| 350 |
messages = history + [{'role': 'user', 'content': query}]
|
| 351 |
-
|
| 352 |
-
# Generate using chat
|
| 353 |
return await self.generate(messages, system_prompt, task="chat")
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
|
| 359 |
async def evaluate_response(
|
| 360 |
self,
|
|
@@ -363,7 +384,7 @@ class LLMManager:
|
|
| 363 |
context: str = ""
|
| 364 |
) -> Dict:
|
| 365 |
"""
|
| 366 |
-
Evaluate response quality (uses
|
| 367 |
Used during RL training.
|
| 368 |
|
| 369 |
Args:
|
|
@@ -373,9 +394,10 @@ class LLMManager:
|
|
| 373 |
|
| 374 |
Returns:
|
| 375 |
dict: Evaluation results
|
| 376 |
-
|
| 377 |
"""
|
| 378 |
eval_prompt = f"""Evaluate this response:
|
|
|
|
| 379 |
Query: {query}
|
| 380 |
Response: {response}
|
| 381 |
Context used: {context if context else 'None'}
|
|
@@ -384,7 +406,7 @@ Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explan
|
|
| 384 |
|
| 385 |
messages = [{'role': 'user', 'content': eval_prompt}]
|
| 386 |
|
| 387 |
-
# Generate using evaluation
|
| 388 |
result = await self.generate(messages, task="evaluation")
|
| 389 |
|
| 390 |
# Parse result
|
|
@@ -395,32 +417,29 @@ Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explan
|
|
| 395 |
'explanation': result
|
| 396 |
}
|
| 397 |
|
| 398 |
-
|
| 399 |
# ============================================================================
|
| 400 |
# GLOBAL LLM MANAGER INSTANCE
|
| 401 |
# ============================================================================
|
| 402 |
llm_manager = LLMManager()
|
| 403 |
|
| 404 |
-
|
| 405 |
# ============================================================================
|
| 406 |
# USAGE EXAMPLE (for reference)
|
| 407 |
# ============================================================================
|
| 408 |
"""
|
| 409 |
# In your service file:
|
| 410 |
-
|
| 411 |
from app.core.llm_manager import llm_manager
|
| 412 |
|
| 413 |
-
# Generate chat response (uses
|
| 414 |
response = await llm_manager.generate_chat_response(
|
| 415 |
query="What is my account balance?",
|
| 416 |
context="Your balance is $1000",
|
| 417 |
history=[]
|
| 418 |
)
|
| 419 |
|
| 420 |
-
# Evaluate response (uses Groq)
|
| 421 |
evaluation = await llm_manager.evaluate_response(
|
| 422 |
query="What is my balance?",
|
| 423 |
response="Your balance is $1000",
|
| 424 |
context="Balance: $1000"
|
| 425 |
)
|
| 426 |
-
"""
|
|
|
|
| 1 |
"""
|
| 2 |
+
Multi-LLM Manager with Groq (ChatGroq) and HuggingFace Fallback Logic
|
|
|
|
| 3 |
|
| 4 |
Architecture:
|
| 5 |
+
- Primary: Groq API with 3 keys (sequential fallback)
|
| 6 |
+
- Fallback: HuggingFace Inference API with 3 tokens (sequential fallback)
|
| 7 |
+
- Llama 3 8B for chat interface
|
| 8 |
+
- Llama 3 70B for evaluation
|
| 9 |
|
| 10 |
+
Fallback Logic:
|
| 11 |
+
1. Try GROQ_API_KEY_1
|
| 12 |
+
2. If fails, try GROQ_API_KEY_2
|
| 13 |
+
3. If fails, try GROQ_API_KEY_3
|
| 14 |
+
4. If all Groq keys fail, try HF_TOKEN_1
|
| 15 |
+
5. If fails, try HF_TOKEN_2
|
| 16 |
+
6. If fails, try HF_TOKEN_3
|
| 17 |
"""
|
| 18 |
|
| 19 |
import time
|
|
|
|
| 20 |
from typing import List, Dict, Optional, Literal
|
| 21 |
from langchain_groq import ChatGroq
|
| 22 |
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
|
| 23 |
+
from huggingface_hub import InferenceClient
|
| 24 |
from app.config import settings
|
| 25 |
|
|
|
|
| 26 |
# ============================================================================
|
| 27 |
+
# GROQ MANAGER WITH FALLBACK
|
| 28 |
# ============================================================================
|
| 29 |
+
class GroqManager:
|
|
|
|
| 30 |
"""
|
| 31 |
+
Groq API Manager with multiple API key fallback support
|
| 32 |
+
Uses ChatGroq from langchain_groq
|
| 33 |
"""
|
| 34 |
|
| 35 |
def __init__(self):
|
| 36 |
+
"""Initialize Groq manager with all available API keys"""
|
| 37 |
+
self.api_keys = settings.get_groq_api_keys()
|
| 38 |
+
self.chat_model_name = settings.GROQ_CHAT_MODEL # llama3-8b-8192
|
| 39 |
+
self.eval_model_name = settings.GROQ_EVAL_MODEL # llama3-70b-8192
|
| 40 |
+
|
| 41 |
+
# Track current key index
|
| 42 |
+
self.current_key_index = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Rate limiting tracking
|
| 45 |
self.requests_this_minute = 0
|
|
|
|
| 46 |
self.last_reset = time.time()
|
| 47 |
|
| 48 |
+
if not self.api_keys:
|
| 49 |
+
raise ValueError("No Groq API keys configured. Set GROQ_API_KEY_1 in .env")
|
| 50 |
+
|
| 51 |
+
print(f"✅ Groq Manager initialized with {len(self.api_keys)} API key(s)")
|
| 52 |
+
print(f" Chat Model: {self.chat_model_name}")
|
| 53 |
+
print(f" Eval Model: {self.eval_model_name}")
|
| 54 |
|
| 55 |
def _check_rate_limits(self):
|
| 56 |
"""
|
| 57 |
Check and reset rate limit counters.
|
| 58 |
+
Groq Free: 30 requests/min
|
| 59 |
"""
|
| 60 |
current_time = time.time()
|
| 61 |
|
| 62 |
# Reset counters every minute
|
| 63 |
if current_time - self.last_reset > 60:
|
| 64 |
self.requests_this_minute = 0
|
|
|
|
| 65 |
self.last_reset = current_time
|
| 66 |
|
| 67 |
# Check if limits exceeded
|
| 68 |
+
# =================================================================
|
| 69 |
+
# Uncomment below if rate limiting enforcement is needed
|
| 70 |
+
# =================================================================
|
| 71 |
+
|
| 72 |
+
# if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
|
| 73 |
+
# wait_time = 60 - (current_time - self.last_reset)
|
| 74 |
+
# print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
|
| 75 |
+
# time.sleep(wait_time)
|
| 76 |
+
# self._check_rate_limits()
|
| 77 |
+
|
| 78 |
+
def _create_llm(self, api_key: str, model_name: str) -> ChatGroq:
|
| 79 |
+
"""Create ChatGroq instance with given API key and model"""
|
| 80 |
+
return ChatGroq(
|
| 81 |
+
api_key=api_key,
|
| 82 |
+
model_name=model_name,
|
| 83 |
+
temperature=settings.LLM_TEMPERATURE,
|
| 84 |
+
max_tokens=settings.LLM_MAX_TOKENS,
|
| 85 |
+
max_retries=0 # Disable automatic retries, we handle fallback manually
|
| 86 |
+
)
|
| 87 |
|
| 88 |
async def generate(
|
| 89 |
self,
|
| 90 |
messages: List[Dict[str, str]],
|
| 91 |
+
system_prompt: Optional[str] = None,
|
| 92 |
+
task: Literal["chat", "evaluation"] = "chat"
|
| 93 |
) -> str:
|
| 94 |
"""
|
| 95 |
+
Generate response using Groq with fallback logic.
|
| 96 |
|
| 97 |
Args:
|
| 98 |
messages: List of conversation messages
|
| 99 |
+
system_prompt: Optional system prompt
|
| 100 |
+
task: Task type to determine model (chat uses 8B, evaluation uses 70B)
|
| 101 |
|
| 102 |
Returns:
|
| 103 |
str: Generated response text
|
| 104 |
+
|
| 105 |
+
Raises:
|
| 106 |
+
Exception: If all Groq API keys fail
|
| 107 |
"""
|
| 108 |
self._check_rate_limits()
|
| 109 |
|
| 110 |
+
# Select model based on task
|
| 111 |
+
model_name = self.eval_model_name if task == "evaluation" else self.chat_model_name
|
| 112 |
+
|
| 113 |
+
# Format messages for LangChain
|
| 114 |
+
formatted_messages = []
|
| 115 |
+
|
| 116 |
+
# Add system message if provided
|
| 117 |
+
if system_prompt:
|
| 118 |
+
formatted_messages.append(SystemMessage(content=system_prompt))
|
| 119 |
+
|
| 120 |
+
# Convert conversation messages
|
| 121 |
+
for msg in messages:
|
| 122 |
+
if msg['role'] == 'user':
|
| 123 |
+
formatted_messages.append(HumanMessage(content=msg['content']))
|
| 124 |
+
elif msg['role'] == 'assistant':
|
| 125 |
+
formatted_messages.append(AIMessage(content=msg['content']))
|
| 126 |
+
|
| 127 |
+
# Try each Groq API key sequentially
|
| 128 |
+
for key_index, api_key in enumerate(self.api_keys, 1):
|
| 129 |
+
try:
|
| 130 |
+
print(f"🔑 Trying Groq API Key {key_index}/{len(self.api_keys)} with {model_name}...")
|
| 131 |
+
|
| 132 |
+
# Create LLM instance with current key
|
| 133 |
+
llm = self._create_llm(api_key, model_name)
|
| 134 |
+
|
| 135 |
+
# Generate response
|
| 136 |
+
response = await llm.ainvoke(formatted_messages)
|
| 137 |
+
|
| 138 |
+
# Track rate limits
|
| 139 |
+
self.requests_this_minute += 1
|
| 140 |
+
|
| 141 |
+
print(f"✅ Groq API Key {key_index} succeeded")
|
| 142 |
+
return response.content
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
print(f"❌ Groq API Key {key_index} failed: {e}")
|
| 146 |
+
|
| 147 |
+
# If this was the last key, raise exception
|
| 148 |
+
if key_index == len(self.api_keys):
|
| 149 |
+
print(f"❌ All {len(self.api_keys)} Groq API keys exhausted")
|
| 150 |
+
raise Exception(f"All Groq API keys failed. Last error: {e}")
|
| 151 |
+
|
| 152 |
+
# Otherwise, continue to next key
|
| 153 |
+
print(f"⏭️ Falling back to next Groq API key...")
|
| 154 |
+
continue
|
| 155 |
|
| 156 |
# ============================================================================
|
| 157 |
+
# HUGGINGFACE MANAGER WITH FALLBACK
|
| 158 |
# ============================================================================
|
| 159 |
+
class HuggingFaceManager:
|
|
|
|
| 160 |
"""
|
| 161 |
+
HuggingFace Inference API Manager with multiple token fallback support
|
| 162 |
+
Uses InferenceClient from huggingface_hub
|
| 163 |
"""
|
| 164 |
|
| 165 |
def __init__(self):
|
| 166 |
+
"""Initialize HuggingFace manager with all available tokens"""
|
| 167 |
+
self.tokens = settings.get_hf_tokens()
|
| 168 |
+
self.chat_model_name = settings.HF_CHAT_MODEL
|
| 169 |
+
self.eval_model_name = settings.HF_EVAL_MODEL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
if not self.tokens:
|
| 172 |
+
raise ValueError("No HuggingFace tokens configured. Set HF_TOKEN_1 in .env")
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
print(f"✅ HuggingFace Manager initialized with {len(self.tokens)} token(s)")
|
| 175 |
+
print(f" Chat Model: {self.chat_model_name}")
|
| 176 |
+
print(f" Eval Model: {self.eval_model_name}")
|
| 177 |
|
| 178 |
+
def _create_client(self, token: str, model_name: str) -> InferenceClient:
|
| 179 |
+
"""Create InferenceClient instance with given token and model"""
|
| 180 |
+
return InferenceClient(
|
| 181 |
+
model=model_name,
|
| 182 |
+
token=token
|
| 183 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
async def generate(
|
| 186 |
self,
|
| 187 |
messages: List[Dict[str, str]],
|
| 188 |
+
system_prompt: Optional[str] = None,
|
| 189 |
+
task: Literal["chat", "evaluation"] = "chat"
|
| 190 |
) -> str:
|
| 191 |
"""
|
| 192 |
+
Generate response using HuggingFace Inference API with fallback logic.
|
| 193 |
|
| 194 |
Args:
|
| 195 |
messages: List of conversation messages
|
|
|
|
| 196 |
system_prompt: Optional system prompt
|
| 197 |
+
task: Task type to determine model
|
| 198 |
|
| 199 |
Returns:
|
| 200 |
str: Generated response text
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
Raises:
|
| 203 |
+
Exception: If all HuggingFace tokens fail
|
| 204 |
+
"""
|
| 205 |
+
# Select model based on task
|
| 206 |
+
model_name = self.eval_model_name if task == "evaluation" else self.chat_model_name
|
| 207 |
+
|
| 208 |
+
# Format messages for HuggingFace chat API
|
| 209 |
+
formatted_messages = []
|
| 210 |
+
|
| 211 |
+
# Add system message if provided
|
| 212 |
+
if system_prompt:
|
| 213 |
+
formatted_messages.append({
|
| 214 |
+
"role": "system",
|
| 215 |
+
"content": system_prompt
|
| 216 |
+
})
|
| 217 |
+
|
| 218 |
+
# Convert conversation messages
|
| 219 |
+
for msg in messages:
|
| 220 |
+
formatted_messages.append({
|
| 221 |
+
"role": msg['role'],
|
| 222 |
+
"content": msg['content']
|
| 223 |
+
})
|
| 224 |
+
|
| 225 |
+
# Try each HuggingFace token sequentially
|
| 226 |
+
for token_index, token in enumerate(self.tokens, 1):
|
| 227 |
+
try:
|
| 228 |
+
print(f"🔑 Trying HuggingFace Token {token_index}/{len(self.tokens)} with {model_name}...")
|
| 229 |
+
|
| 230 |
+
# Create client with current token
|
| 231 |
+
client = self._create_client(token, model_name)
|
| 232 |
+
|
| 233 |
+
# Generate response using chat completion
|
| 234 |
+
response = client.chat_completion(
|
| 235 |
+
messages=formatted_messages,
|
| 236 |
+
max_tokens=settings.LLM_MAX_TOKENS,
|
| 237 |
+
temperature=settings.LLM_TEMPERATURE
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
# Extract content from response
|
| 241 |
+
content = response.choices[0].message.content
|
| 242 |
+
|
| 243 |
+
print(f"✅ HuggingFace Token {token_index} succeeded")
|
| 244 |
+
return content
|
| 245 |
+
|
| 246 |
+
except Exception as e:
|
| 247 |
+
print(f"❌ HuggingFace Token {token_index} failed: {e}")
|
| 248 |
+
|
| 249 |
+
# If this was the last token, raise exception
|
| 250 |
+
if token_index == len(self.tokens):
|
| 251 |
+
print(f"❌ All {len(self.tokens)} HuggingFace tokens exhausted")
|
| 252 |
+
raise Exception(f"All HuggingFace tokens failed. Last error: {e}")
|
| 253 |
+
|
| 254 |
+
# Otherwise, continue to next token
|
| 255 |
+
print(f"⏭️ Falling back to next HuggingFace token...")
|
| 256 |
+
continue
|
| 257 |
|
| 258 |
# ============================================================================
|
| 259 |
+
# UNIFIED LLM MANAGER (Groq Primary, HuggingFace Fallback)
|
| 260 |
# ============================================================================
|
|
|
|
| 261 |
class LLMManager:
|
| 262 |
"""
|
| 263 |
+
Unified LLM Manager with cascading fallback logic:
|
| 264 |
+
1. Try all Groq API keys (primary)
|
| 265 |
+
2. If all fail, try all HuggingFace tokens (fallback)
|
| 266 |
|
| 267 |
+
Models:
|
| 268 |
+
- Chat: Llama 3 8B (for user-facing chat responses)
|
| 269 |
+
- Evaluation: Llama 3 70B (for response evaluation)
|
|
|
|
| 270 |
"""
|
| 271 |
|
| 272 |
def __init__(self):
|
| 273 |
"""Initialize all LLM managers"""
|
|
|
|
| 274 |
self.groq = None
|
| 275 |
+
self.huggingface = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
# Initialize Groq if configured
|
| 278 |
if settings.is_groq_enabled():
|
|
|
|
| 281 |
except Exception as e:
|
| 282 |
print(f"⚠️ Failed to initialize Groq: {e}")
|
| 283 |
|
| 284 |
+
# Initialize HuggingFace if configured
|
| 285 |
+
if settings.is_hf_enabled():
|
| 286 |
+
try:
|
| 287 |
+
self.huggingface = HuggingFaceManager()
|
| 288 |
+
except Exception as e:
|
| 289 |
+
print(f"⚠️ Failed to initialize HuggingFace: {e}")
|
| 290 |
+
|
| 291 |
+
# Check if at least one is available
|
| 292 |
+
if not self.groq and not self.huggingface:
|
| 293 |
+
raise ValueError("No LLM provider configured. Set either Groq or HuggingFace credentials in .env")
|
| 294 |
+
|
| 295 |
+
print("✅ LLM Manager initialized with fallback logic")
|
| 296 |
|
| 297 |
async def generate(
|
| 298 |
self,
|
|
|
|
| 301 |
task: Literal["chat", "evaluation"] = "chat"
|
| 302 |
) -> str:
|
| 303 |
"""
|
| 304 |
+
Generate response with cascading fallback logic.
|
| 305 |
+
|
| 306 |
+
Fallback order:
|
| 307 |
+
1. Try all Groq API keys (3 keys)
|
| 308 |
+
2. If all Groq keys fail, try all HuggingFace tokens (3 tokens)
|
| 309 |
|
| 310 |
Args:
|
| 311 |
messages: Conversation messages
|
| 312 |
system_prompt: Optional system prompt
|
| 313 |
+
task: Task type - "chat" (8B) or "evaluation" (70B)
|
| 314 |
|
| 315 |
Returns:
|
| 316 |
str: Generated response
|
| 317 |
|
| 318 |
Raises:
|
| 319 |
+
ValueError: If all providers fail
|
| 320 |
"""
|
| 321 |
+
# Try Groq first (if available)
|
| 322 |
+
if self.groq:
|
| 323 |
+
try:
|
| 324 |
+
print("🚀 Attempting Groq API (Primary)...")
|
| 325 |
+
response = await self.groq.generate(messages, system_prompt, task)
|
| 326 |
+
return response
|
| 327 |
+
except Exception as groq_error:
|
| 328 |
+
print(f"❌ All Groq API keys failed: {groq_error}")
|
| 329 |
+
|
| 330 |
+
# Fall back to HuggingFace if available
|
| 331 |
+
if self.huggingface:
|
| 332 |
+
print("🔄 Falling back to HuggingFace Inference API...")
|
| 333 |
+
else:
|
| 334 |
+
raise ValueError(f"Groq failed and no HuggingFace fallback configured: {groq_error}")
|
| 335 |
+
|
| 336 |
+
# Try HuggingFace (if Groq failed or not available)
|
| 337 |
+
if self.huggingface:
|
| 338 |
+
try:
|
| 339 |
+
print("🚀 Attempting HuggingFace API (Fallback)...")
|
| 340 |
+
response = await self.huggingface.generate(messages, system_prompt, task)
|
| 341 |
+
return response
|
| 342 |
+
except Exception as hf_error:
|
| 343 |
+
raise ValueError(f"All LLM providers exhausted. HuggingFace error: {hf_error}")
|
| 344 |
|
| 345 |
+
raise ValueError("No LLM provider available")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
async def generate_chat_response(
|
| 348 |
self,
|
|
|
|
| 350 |
context: str,
|
| 351 |
history: List[Dict[str, str]]
|
| 352 |
) -> str:
|
| 353 |
+
"""
|
| 354 |
+
Generate chat response (uses Llama 3 8B).
|
| 355 |
+
|
| 356 |
+
Args:
|
| 357 |
+
query: User query
|
| 358 |
+
context: Retrieved context (from FAISS)
|
| 359 |
+
history: Conversation history
|
| 360 |
+
|
| 361 |
+
Returns:
|
| 362 |
+
str: Chat response
|
| 363 |
+
"""
|
| 364 |
# Import the detailed prompt
|
| 365 |
from app.services.chat_service import BANKING_SYSTEM_PROMPT
|
| 366 |
+
|
| 367 |
# Build enhanced system prompt with context
|
| 368 |
system_prompt = BANKING_SYSTEM_PROMPT
|
|
|
|
| 369 |
if context:
|
| 370 |
system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
|
| 371 |
else:
|
| 372 |
system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
|
| 373 |
+
|
| 374 |
# Build messages
|
| 375 |
messages = history + [{'role': 'user', 'content': query}]
|
| 376 |
+
|
| 377 |
+
# Generate using chat task (Llama 3 8B)
|
| 378 |
return await self.generate(messages, system_prompt, task="chat")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
async def evaluate_response(
|
| 381 |
self,
|
|
|
|
| 384 |
context: str = ""
|
| 385 |
) -> Dict:
|
| 386 |
"""
|
| 387 |
+
Evaluate response quality (uses Llama 3 70B for better evaluation).
|
| 388 |
Used during RL training.
|
| 389 |
|
| 390 |
Args:
|
|
|
|
| 394 |
|
| 395 |
Returns:
|
| 396 |
dict: Evaluation results
|
| 397 |
+
{'quality': 'Good'/'Bad', 'explanation': '...'}
|
| 398 |
"""
|
| 399 |
eval_prompt = f"""Evaluate this response:
|
| 400 |
+
|
| 401 |
Query: {query}
|
| 402 |
Response: {response}
|
| 403 |
Context used: {context if context else 'None'}
|
|
|
|
| 406 |
|
| 407 |
messages = [{'role': 'user', 'content': eval_prompt}]
|
| 408 |
|
| 409 |
+
# Generate using evaluation task (Llama 3 70B)
|
| 410 |
result = await self.generate(messages, task="evaluation")
|
| 411 |
|
| 412 |
# Parse result
|
|
|
|
| 417 |
'explanation': result
|
| 418 |
}
|
| 419 |
|
|
|
|
| 420 |
# ============================================================================
|
| 421 |
# GLOBAL LLM MANAGER INSTANCE
|
| 422 |
# ============================================================================
|
| 423 |
llm_manager = LLMManager()
|
| 424 |
|
|
|
|
| 425 |
# ============================================================================
|
| 426 |
# USAGE EXAMPLE (for reference)
|
| 427 |
# ============================================================================
|
| 428 |
"""
|
| 429 |
# In your service file:
|
|
|
|
| 430 |
from app.core.llm_manager import llm_manager
|
| 431 |
|
| 432 |
+
# Generate chat response (uses Llama 3 8B with Groq → HF fallback)
|
| 433 |
response = await llm_manager.generate_chat_response(
|
| 434 |
query="What is my account balance?",
|
| 435 |
context="Your balance is $1000",
|
| 436 |
history=[]
|
| 437 |
)
|
| 438 |
|
| 439 |
+
# Evaluate response (uses Llama 3 70B with Groq → HF fallback)
|
| 440 |
evaluation = await llm_manager.evaluate_response(
|
| 441 |
query="What is my balance?",
|
| 442 |
response="Your balance is $1000",
|
| 443 |
context="Balance: $1000"
|
| 444 |
)
|
| 445 |
+
"""
|
app/main.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
FastAPI Main Application Entry Point
|
|
|
|
| 3 |
Banking RAG Chatbot API with JWT Authentication
|
| 4 |
|
| 5 |
This file:
|
| 6 |
1. Creates the FastAPI app
|
| 7 |
-
2. Configures CORS middleware
|
| 8 |
3. Connects to MongoDB on startup/shutdown
|
| 9 |
4. Includes API routers (auth + chat)
|
| 10 |
5. Provides health check endpoints
|
|
@@ -18,7 +19,6 @@ from contextlib import asynccontextmanager
|
|
| 18 |
from app.config import settings
|
| 19 |
from app.db.mongodb import connect_to_mongo, close_mongo_connection
|
| 20 |
|
| 21 |
-
|
| 22 |
# ============================================================================
|
| 23 |
# LIFESPAN MANAGER (Startup & Shutdown)
|
| 24 |
# ============================================================================
|
|
@@ -52,7 +52,13 @@ async def lifespan(app: FastAPI):
|
|
| 52 |
print("\n💡 ML Models Info:")
|
| 53 |
print(" Policy Network: Loads on first chat request (lazy loading)")
|
| 54 |
print(" Retriever Model: Loads on first retrieval (lazy loading)")
|
| 55 |
-
print(" LLM (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
print("\n✅ Backend startup complete!")
|
| 58 |
print("=" * 80)
|
|
@@ -77,7 +83,6 @@ async def lifespan(app: FastAPI):
|
|
| 77 |
print("✅ Shutdown complete")
|
| 78 |
print("=" * 80 + "\n")
|
| 79 |
|
| 80 |
-
|
| 81 |
# ============================================================================
|
| 82 |
# CREATE FASTAPI APPLICATION
|
| 83 |
# ============================================================================
|
|
@@ -85,21 +90,22 @@ async def lifespan(app: FastAPI):
|
|
| 85 |
app = FastAPI(
|
| 86 |
title="Banking RAG Chatbot API",
|
| 87 |
description="""
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
""",
|
| 104 |
version="1.0.0",
|
| 105 |
docs_url="/docs",
|
|
@@ -107,13 +113,11 @@ app = FastAPI(
|
|
| 107 |
lifespan=lifespan
|
| 108 |
)
|
| 109 |
|
| 110 |
-
|
| 111 |
# ============================================================================
|
| 112 |
# CORS MIDDLEWARE
|
| 113 |
# ============================================================================
|
| 114 |
|
| 115 |
allowed_origins = settings.get_allowed_origins()
|
| 116 |
-
|
| 117 |
print("\n🌐 CORS Configuration:")
|
| 118 |
print(f" Allowed Origins: {allowed_origins}")
|
| 119 |
|
|
@@ -125,7 +129,6 @@ app.add_middleware(
|
|
| 125 |
allow_headers=["*"],
|
| 126 |
)
|
| 127 |
|
| 128 |
-
|
| 129 |
# ============================================================================
|
| 130 |
# INCLUDE API ROUTERS
|
| 131 |
# ============================================================================
|
|
@@ -146,7 +149,6 @@ app.include_router(
|
|
| 146 |
tags=["💬 Chat"]
|
| 147 |
)
|
| 148 |
|
| 149 |
-
|
| 150 |
# ============================================================================
|
| 151 |
# ROOT ENDPOINTS
|
| 152 |
# ============================================================================
|
|
@@ -161,6 +163,11 @@ async def root():
|
|
| 161 |
"version": "1.0.0",
|
| 162 |
"status": "online",
|
| 163 |
"authentication": "JWT Bearer Token Required for chat endpoints",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
"documentation": {
|
| 165 |
"swagger_ui": "/docs",
|
| 166 |
"redoc": "/redoc"
|
|
@@ -182,7 +189,6 @@ async def root():
|
|
| 182 |
}
|
| 183 |
}
|
| 184 |
|
| 185 |
-
|
| 186 |
@app.get("/health", tags=["🏥 Health"])
|
| 187 |
async def health_check():
|
| 188 |
"""
|
|
@@ -193,6 +199,7 @@ async def health_check():
|
|
| 193 |
- MongoDB connection
|
| 194 |
- ML models (lazy loaded)
|
| 195 |
- Authentication system
|
|
|
|
| 196 |
|
| 197 |
Returns:
|
| 198 |
dict: Health status of all components
|
|
@@ -209,6 +216,22 @@ async def health_check():
|
|
| 209 |
"llm": "ready (API-based)"
|
| 210 |
}
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
# Check authentication
|
| 213 |
auth_status = {
|
| 214 |
"jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
|
|
@@ -217,19 +240,23 @@ async def health_check():
|
|
| 217 |
}
|
| 218 |
|
| 219 |
# Overall health
|
| 220 |
-
is_healthy =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
return {
|
| 223 |
"status": "healthy" if is_healthy else "degraded",
|
| 224 |
"api": "online",
|
| 225 |
"mongodb": mongodb_status,
|
| 226 |
"authentication": auth_status,
|
|
|
|
| 227 |
"ml_models": ml_models_status,
|
| 228 |
"environment": settings.ENVIRONMENT,
|
| 229 |
"debug_mode": settings.DEBUG
|
| 230 |
}
|
| 231 |
|
| 232 |
-
|
| 233 |
# ============================================================================
|
| 234 |
# GLOBAL EXCEPTION HANDLER
|
| 235 |
# ============================================================================
|
|
@@ -256,7 +283,6 @@ async def global_exception_handler(request: Request, exc: Exception):
|
|
| 256 |
}
|
| 257 |
)
|
| 258 |
|
| 259 |
-
|
| 260 |
# ============================================================================
|
| 261 |
# MAIN ENTRY POINT (for direct execution)
|
| 262 |
# ============================================================================
|
|
|
|
| 1 |
"""
|
| 2 |
FastAPI Main Application Entry Point
|
| 3 |
+
|
| 4 |
Banking RAG Chatbot API with JWT Authentication
|
| 5 |
|
| 6 |
This file:
|
| 7 |
1. Creates the FastAPI app
|
| 8 |
+
2. Configures CORS middleware
|
| 9 |
3. Connects to MongoDB on startup/shutdown
|
| 10 |
4. Includes API routers (auth + chat)
|
| 11 |
5. Provides health check endpoints
|
|
|
|
| 19 |
from app.config import settings
|
| 20 |
from app.db.mongodb import connect_to_mongo, close_mongo_connection
|
| 21 |
|
|
|
|
| 22 |
# ============================================================================
|
| 23 |
# LIFESPAN MANAGER (Startup & Shutdown)
|
| 24 |
# ============================================================================
|
|
|
|
| 52 |
print("\n💡 ML Models Info:")
|
| 53 |
print(" Policy Network: Loads on first chat request (lazy loading)")
|
| 54 |
print(" Retriever Model: Loads on first retrieval (lazy loading)")
|
| 55 |
+
print(" LLM: Groq (ChatGroq) with HuggingFace fallback")
|
| 56 |
+
print("\n🤖 LLM Configuration:")
|
| 57 |
+
print(f" Chat Model: {settings.GROQ_CHAT_MODEL} (Llama 3 8B)")
|
| 58 |
+
print(f" Eval Model: {settings.GROQ_EVAL_MODEL} (Llama 3 70B)")
|
| 59 |
+
print(f" Groq API Keys: {len(settings.get_groq_api_keys())} configured")
|
| 60 |
+
print(f" HuggingFace Tokens: {len(settings.get_hf_tokens())} configured")
|
| 61 |
+
print(f" Fallback: Groq → HuggingFace")
|
| 62 |
|
| 63 |
print("\n✅ Backend startup complete!")
|
| 64 |
print("=" * 80)
|
|
|
|
| 83 |
print("✅ Shutdown complete")
|
| 84 |
print("=" * 80 + "\n")
|
| 85 |
|
|
|
|
| 86 |
# ============================================================================
|
| 87 |
# CREATE FASTAPI APPLICATION
|
| 88 |
# ============================================================================
|
|
|
|
| 90 |
app = FastAPI(
|
| 91 |
title="Banking RAG Chatbot API",
|
| 92 |
description="""
|
| 93 |
+
🤖 AI-powered Banking Assistant with:
|
| 94 |
+
|
| 95 |
+
**Features:**
|
| 96 |
+
- 🔐 JWT Authentication (Sign up, Login, Protected routes)
|
| 97 |
+
- 💬 RAG (Retrieval-Augmented Generation)
|
| 98 |
+
- 🧠 RL-based Policy Network (BERT)
|
| 99 |
+
- 🔍 Custom E5 Retriever
|
| 100 |
+
- ⚡ Groq LLM with HuggingFace Fallback (Llama 3 models)
|
| 101 |
+
|
| 102 |
+
**Capabilities:**
|
| 103 |
+
- Intelligent document retrieval
|
| 104 |
+
- Context-aware responses
|
| 105 |
+
- Conversation history
|
| 106 |
+
- Real-time chat
|
| 107 |
+
- User authentication & authorization
|
| 108 |
+
- Multi-provider LLM with automatic fallback
|
| 109 |
""",
|
| 110 |
version="1.0.0",
|
| 111 |
docs_url="/docs",
|
|
|
|
| 113 |
lifespan=lifespan
|
| 114 |
)
|
| 115 |
|
|
|
|
| 116 |
# ============================================================================
|
| 117 |
# CORS MIDDLEWARE
|
| 118 |
# ============================================================================
|
| 119 |
|
| 120 |
allowed_origins = settings.get_allowed_origins()
|
|
|
|
| 121 |
print("\n🌐 CORS Configuration:")
|
| 122 |
print(f" Allowed Origins: {allowed_origins}")
|
| 123 |
|
|
|
|
| 129 |
allow_headers=["*"],
|
| 130 |
)
|
| 131 |
|
|
|
|
| 132 |
# ============================================================================
|
| 133 |
# INCLUDE API ROUTERS
|
| 134 |
# ============================================================================
|
|
|
|
| 149 |
tags=["💬 Chat"]
|
| 150 |
)
|
| 151 |
|
|
|
|
| 152 |
# ============================================================================
|
| 153 |
# ROOT ENDPOINTS
|
| 154 |
# ============================================================================
|
|
|
|
| 163 |
"version": "1.0.0",
|
| 164 |
"status": "online",
|
| 165 |
"authentication": "JWT Bearer Token Required for chat endpoints",
|
| 166 |
+
"llm_provider": "Groq (ChatGroq) with HuggingFace fallback",
|
| 167 |
+
"models": {
|
| 168 |
+
"chat": settings.GROQ_CHAT_MODEL,
|
| 169 |
+
"evaluation": settings.GROQ_EVAL_MODEL
|
| 170 |
+
},
|
| 171 |
"documentation": {
|
| 172 |
"swagger_ui": "/docs",
|
| 173 |
"redoc": "/redoc"
|
|
|
|
| 189 |
}
|
| 190 |
}
|
| 191 |
|
|
|
|
| 192 |
@app.get("/health", tags=["🏥 Health"])
|
| 193 |
async def health_check():
|
| 194 |
"""
|
|
|
|
| 199 |
- MongoDB connection
|
| 200 |
- ML models (lazy loaded)
|
| 201 |
- Authentication system
|
| 202 |
+
- LLM providers (Groq & HuggingFace)
|
| 203 |
|
| 204 |
Returns:
|
| 205 |
dict: Health status of all components
|
|
|
|
| 216 |
"llm": "ready (API-based)"
|
| 217 |
}
|
| 218 |
|
| 219 |
+
# Check LLM providers
|
| 220 |
+
llm_providers = {
|
| 221 |
+
"groq": {
|
| 222 |
+
"enabled": settings.is_groq_enabled(),
|
| 223 |
+
"api_keys_configured": len(settings.get_groq_api_keys()),
|
| 224 |
+
"chat_model": settings.GROQ_CHAT_MODEL,
|
| 225 |
+
"eval_model": settings.GROQ_EVAL_MODEL
|
| 226 |
+
},
|
| 227 |
+
"huggingface": {
|
| 228 |
+
"enabled": settings.is_hf_enabled(),
|
| 229 |
+
"tokens_configured": len(settings.get_hf_tokens()),
|
| 230 |
+
"chat_model": settings.HF_CHAT_MODEL,
|
| 231 |
+
"eval_model": settings.HF_EVAL_MODEL
|
| 232 |
+
}
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
# Check authentication
|
| 236 |
auth_status = {
|
| 237 |
"jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
|
|
|
|
| 240 |
}
|
| 241 |
|
| 242 |
# Overall health
|
| 243 |
+
is_healthy = (
|
| 244 |
+
mongodb_status == "connected" and
|
| 245 |
+
auth_status["jwt_enabled"] and
|
| 246 |
+
(llm_providers["groq"]["enabled"] or llm_providers["huggingface"]["enabled"])
|
| 247 |
+
)
|
| 248 |
|
| 249 |
return {
|
| 250 |
"status": "healthy" if is_healthy else "degraded",
|
| 251 |
"api": "online",
|
| 252 |
"mongodb": mongodb_status,
|
| 253 |
"authentication": auth_status,
|
| 254 |
+
"llm_providers": llm_providers,
|
| 255 |
"ml_models": ml_models_status,
|
| 256 |
"environment": settings.ENVIRONMENT,
|
| 257 |
"debug_mode": settings.DEBUG
|
| 258 |
}
|
| 259 |
|
|
|
|
| 260 |
# ============================================================================
|
| 261 |
# GLOBAL EXCEPTION HANDLER
|
| 262 |
# ============================================================================
|
|
|
|
| 283 |
}
|
| 284 |
)
|
| 285 |
|
|
|
|
| 286 |
# ============================================================================
|
| 287 |
# MAIN ENTRY POINT (for direct execution)
|
| 288 |
# ============================================================================
|
app/services/chat_service.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
"""
|
| 2 |
Chat Service - Main RAG Pipeline
|
|
|
|
| 3 |
Combines: Policy Network → Retriever → LLM Generator
|
| 4 |
|
| 5 |
This is the core service that orchestrates:
|
| 6 |
1. Policy decision (FETCH vs NO_FETCH)
|
| 7 |
2. Document retrieval (if FETCH)
|
| 8 |
-
3. Response generation (
|
| 9 |
4. Logging to MongoDB
|
| 10 |
|
| 11 |
Adapted from your RAG.py workflow
|
|
@@ -53,8 +54,6 @@ Rate the response as:
|
|
| 53 |
|
| 54 |
Provide your rating and brief explanation."""
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
# ============================================================================
|
| 59 |
# CHAT SERVICE
|
| 60 |
# ============================================================================
|
|
@@ -67,7 +66,7 @@ class ChatService:
|
|
| 67 |
1. User query comes in
|
| 68 |
2. Policy network decides: FETCH or NO_FETCH
|
| 69 |
3. If FETCH: Retrieve documents from FAISS
|
| 70 |
-
4. Generate response using
|
| 71 |
5. Return response + metadata
|
| 72 |
"""
|
| 73 |
|
|
@@ -97,18 +96,18 @@ class ChatService:
|
|
| 97 |
|
| 98 |
Returns:
|
| 99 |
dict: Complete response with metadata
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
"""
|
| 113 |
start_time = time.time()
|
| 114 |
|
|
@@ -196,13 +195,13 @@ class ChatService:
|
|
| 196 |
print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
|
| 197 |
|
| 198 |
# ====================================================================
|
| 199 |
-
# STEP 3: GENERATE RESPONSE (
|
| 200 |
# ====================================================================
|
| 201 |
print(f"\n💬 Generating response...")
|
| 202 |
generation_start = time.time()
|
| 203 |
|
| 204 |
try:
|
| 205 |
-
# Generate response using LLM manager (
|
| 206 |
response = await llm_manager.generate_chat_response(
|
| 207 |
query=query,
|
| 208 |
context=context,
|
|
@@ -288,8 +287,8 @@ class ChatService:
|
|
| 288 |
# Check LLM manager
|
| 289 |
try:
|
| 290 |
from app.core.llm_manager import llm_manager as llm
|
| 291 |
-
health['components']['gemini'] = 'enabled' if llm.gemini else 'disabled'
|
| 292 |
health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
|
|
|
|
| 293 |
except Exception as e:
|
| 294 |
health['components']['llm_manager'] = f'error: {str(e)}'
|
| 295 |
|
|
@@ -301,19 +300,17 @@ class ChatService:
|
|
| 301 |
|
| 302 |
return health
|
| 303 |
|
| 304 |
-
|
| 305 |
# ============================================================================
|
| 306 |
# GLOBAL CHAT SERVICE INSTANCE
|
| 307 |
# ============================================================================
|
| 308 |
-
chat_service = ChatService()
|
| 309 |
|
|
|
|
| 310 |
|
| 311 |
# ============================================================================
|
| 312 |
# USAGE EXAMPLE (for reference)
|
| 313 |
# ============================================================================
|
| 314 |
"""
|
| 315 |
# In your API endpoint (chat.py):
|
| 316 |
-
|
| 317 |
from app.services.chat_service import chat_service
|
| 318 |
|
| 319 |
# Process user query
|
|
@@ -335,4 +332,4 @@ result = await chat_service.process_query(
|
|
| 335 |
|
| 336 |
# Get service health
|
| 337 |
health = await chat_service.health_check()
|
| 338 |
-
"""
|
|
|
|
| 1 |
"""
|
| 2 |
Chat Service - Main RAG Pipeline
|
| 3 |
+
|
| 4 |
Combines: Policy Network → Retriever → LLM Generator
|
| 5 |
|
| 6 |
This is the core service that orchestrates:
|
| 7 |
1. Policy decision (FETCH vs NO_FETCH)
|
| 8 |
2. Document retrieval (if FETCH)
|
| 9 |
+
3. Response generation (Groq/HuggingFace with Llama 3)
|
| 10 |
4. Logging to MongoDB
|
| 11 |
|
| 12 |
Adapted from your RAG.py workflow
|
|
|
|
| 54 |
|
| 55 |
Provide your rating and brief explanation."""
|
| 56 |
|
|
|
|
|
|
|
| 57 |
# ============================================================================
|
| 58 |
# CHAT SERVICE
|
| 59 |
# ============================================================================
|
|
|
|
| 66 |
1. User query comes in
|
| 67 |
2. Policy network decides: FETCH or NO_FETCH
|
| 68 |
3. If FETCH: Retrieve documents from FAISS
|
| 69 |
+
4. Generate response using Groq/HuggingFace (with or without context)
|
| 70 |
5. Return response + metadata
|
| 71 |
"""
|
| 72 |
|
|
|
|
| 96 |
|
| 97 |
Returns:
|
| 98 |
dict: Complete response with metadata
|
| 99 |
+
{
|
| 100 |
+
'response': str, # Generated response
|
| 101 |
+
'policy_action': str, # FETCH or NO_FETCH
|
| 102 |
+
'policy_confidence': float, # Confidence score
|
| 103 |
+
'should_retrieve': bool, # Whether retrieval was done
|
| 104 |
+
'documents_retrieved': int, # Number of docs retrieved
|
| 105 |
+
'top_doc_score': float or None, # Best similarity score
|
| 106 |
+
'retrieval_time_ms': float, # Time spent on retrieval
|
| 107 |
+
'generation_time_ms': float, # Time spent on generation
|
| 108 |
+
'total_time_ms': float, # Total processing time
|
| 109 |
+
'timestamp': str # ISO timestamp
|
| 110 |
+
}
|
| 111 |
"""
|
| 112 |
start_time = time.time()
|
| 113 |
|
|
|
|
| 195 |
print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
|
| 196 |
|
| 197 |
# ====================================================================
|
| 198 |
+
# STEP 3: GENERATE RESPONSE (Groq/HuggingFace with fallback)
|
| 199 |
# ====================================================================
|
| 200 |
print(f"\n💬 Generating response...")
|
| 201 |
generation_start = time.time()
|
| 202 |
|
| 203 |
try:
|
| 204 |
+
# Generate response using LLM manager (Groq → HuggingFace fallback)
|
| 205 |
response = await llm_manager.generate_chat_response(
|
| 206 |
query=query,
|
| 207 |
context=context,
|
|
|
|
| 287 |
# Check LLM manager
|
| 288 |
try:
|
| 289 |
from app.core.llm_manager import llm_manager as llm
|
|
|
|
| 290 |
health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
|
| 291 |
+
health['components']['huggingface'] = 'enabled' if llm.huggingface else 'disabled'
|
| 292 |
except Exception as e:
|
| 293 |
health['components']['llm_manager'] = f'error: {str(e)}'
|
| 294 |
|
|
|
|
| 300 |
|
| 301 |
return health
|
| 302 |
|
|
|
|
| 303 |
# ============================================================================
|
| 304 |
# GLOBAL CHAT SERVICE INSTANCE
|
| 305 |
# ============================================================================
|
|
|
|
| 306 |
|
| 307 |
+
chat_service = ChatService()
|
| 308 |
|
| 309 |
# ============================================================================
|
| 310 |
# USAGE EXAMPLE (for reference)
|
| 311 |
# ============================================================================
|
| 312 |
"""
|
| 313 |
# In your API endpoint (chat.py):
|
|
|
|
| 314 |
from app.services.chat_service import chat_service
|
| 315 |
|
| 316 |
# Process user query
|
|
|
|
| 332 |
|
| 333 |
# Get service health
|
| 334 |
health = await chat_service.health_check()
|
| 335 |
+
"""
|
backups/backup_chat_service.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# """
|
| 2 |
+
# Chat Service - Main RAG Pipeline
|
| 3 |
+
# Combines: Policy Network → Retriever → LLM Generator
|
| 4 |
+
|
| 5 |
+
# This is the core service that orchestrates:
|
| 6 |
+
# 1. Policy decision (FETCH vs NO_FETCH)
|
| 7 |
+
# 2. Document retrieval (if FETCH)
|
| 8 |
+
# 3. Response generation (Gemini)
|
| 9 |
+
# 4. Logging to MongoDB
|
| 10 |
+
|
| 11 |
+
# Adapted from your RAG.py workflow
|
| 12 |
+
# """
|
| 13 |
+
|
| 14 |
+
# import time
|
| 15 |
+
# from datetime import datetime
|
| 16 |
+
# from typing import List, Dict, Any, Optional
|
| 17 |
+
|
| 18 |
+
# from app.config import settings
|
| 19 |
+
# from app.ml.policy_network import predict_policy_action
|
| 20 |
+
# from app.ml.retriever import retrieve_documents, format_context
|
| 21 |
+
# from app.core.llm_manager import llm_manager
|
| 22 |
+
|
| 23 |
+
# # ============================================================================
|
| 24 |
+
# # SYSTEM PROMPTS
|
| 25 |
+
# # ============================================================================
|
| 26 |
+
|
| 27 |
+
# BANKING_SYSTEM_PROMPT = """You are an expert banking assistant specialized in Indian financial regulations and banking practices. You have access to a comprehensive knowledge base of banking policies, procedures, and RBI regulations.
|
| 28 |
+
|
| 29 |
+
# Instructions:
|
| 30 |
+
# - Answer the user query accurately using the provided context when available
|
| 31 |
+
# - If context is insufficient or query is outside banking domain, still respond helpfully but mention your banking specialization
|
| 32 |
+
# - If no banking context is available, provide a general helpful response but acknowledge your expertise is in banking
|
| 33 |
+
# - Never refuse to answer - always be helpful while being transparent about your specialization
|
| 34 |
+
# - Cite relevant policy numbers or document references when available in context
|
| 35 |
+
# - Never fabricate specific policies, rates, or eligibility criteria
|
| 36 |
+
# - If uncertain about current rates or policies, acknowledge the limitation
|
| 37 |
+
# - Maintain a helpful and professional tone
|
| 38 |
+
# - Keep responses concise, clear, and actionable
|
| 39 |
+
# """
|
| 40 |
+
|
| 41 |
+
# EVALUATION_PROMPT = """You are evaluating a banking assistant's response for quality and accuracy.
|
| 42 |
+
|
| 43 |
+
# Criteria:
|
| 44 |
+
# 1. Accuracy: Is the response factually correct?
|
| 45 |
+
# 2. Relevance: Does it address the user's question?
|
| 46 |
+
# 3. Completeness: Are all aspects of the question covered?
|
| 47 |
+
# 4. Clarity: Is the response easy to understand?
|
| 48 |
+
# 5. Context Usage: Does it properly use the retrieved context?
|
| 49 |
+
|
| 50 |
+
# Rate the response as:
|
| 51 |
+
# - "Good": Accurate, relevant, complete, and clear
|
| 52 |
+
# - "Bad": Inaccurate, irrelevant, incomplete, or unclear
|
| 53 |
+
|
| 54 |
+
# Provide your rating and brief explanation."""
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# # ============================================================================
|
| 59 |
+
# # CHAT SERVICE
|
| 60 |
+
# # ============================================================================
|
| 61 |
+
|
| 62 |
+
# class ChatService:
|
| 63 |
+
# """
|
| 64 |
+
# Main chat service that handles the complete RAG pipeline.
|
| 65 |
+
|
| 66 |
+
# Pipeline:
|
| 67 |
+
# 1. User query comes in
|
| 68 |
+
# 2. Policy network decides: FETCH or NO_FETCH
|
| 69 |
+
# 3. If FETCH: Retrieve documents from FAISS
|
| 70 |
+
# 4. Generate response using Gemini (with or without context)
|
| 71 |
+
# 5. Return response + metadata
|
| 72 |
+
# """
|
| 73 |
+
|
| 74 |
+
# def __init__(self):
|
| 75 |
+
# """Initialize chat service"""
|
| 76 |
+
# print("🤖 ChatService initialized")
|
| 77 |
+
|
| 78 |
+
# async def process_query(
|
| 79 |
+
# self,
|
| 80 |
+
# query: str,
|
| 81 |
+
# conversation_history: List[Dict[str, str]] = None,
|
| 82 |
+
# user_id: Optional[str] = None
|
| 83 |
+
# ) -> Dict[str, Any]:
|
| 84 |
+
# """
|
| 85 |
+
# Process a user query through the complete RAG pipeline.
|
| 86 |
+
|
| 87 |
+
# This is the MAIN function that combines everything:
|
| 88 |
+
# - Policy decision
|
| 89 |
+
# - Retrieval
|
| 90 |
+
# - Generation
|
| 91 |
+
|
| 92 |
+
# Args:
|
| 93 |
+
# query: User query text
|
| 94 |
+
# conversation_history: Previous conversation turns
|
| 95 |
+
# Format: [{'role': 'user'/'assistant', 'content': '...', 'metadata': {...}}]
|
| 96 |
+
# user_id: Optional user ID for logging
|
| 97 |
+
|
| 98 |
+
# Returns:
|
| 99 |
+
# dict: Complete response with metadata
|
| 100 |
+
# {
|
| 101 |
+
# 'response': str, # Generated response
|
| 102 |
+
# 'policy_action': str, # FETCH or NO_FETCH
|
| 103 |
+
# 'policy_confidence': float, # Confidence score
|
| 104 |
+
# 'should_retrieve': bool, # Whether retrieval was done
|
| 105 |
+
# 'documents_retrieved': int, # Number of docs retrieved
|
| 106 |
+
# 'top_doc_score': float or None, # Best similarity score
|
| 107 |
+
# 'retrieval_time_ms': float, # Time spent on retrieval
|
| 108 |
+
# 'generation_time_ms': float, # Time spent on generation
|
| 109 |
+
# 'total_time_ms': float, # Total processing time
|
| 110 |
+
# 'timestamp': str # ISO timestamp
|
| 111 |
+
# }
|
| 112 |
+
# """
|
| 113 |
+
# start_time = time.time()
|
| 114 |
+
|
| 115 |
+
# # Initialize history if None
|
| 116 |
+
# if conversation_history is None:
|
| 117 |
+
# conversation_history = []
|
| 118 |
+
|
| 119 |
+
# # Validate query
|
| 120 |
+
# if not query or query.strip() == "":
|
| 121 |
+
# return {
|
| 122 |
+
# 'response': "I didn't receive a valid question. Could you please try again?",
|
| 123 |
+
# 'policy_action': 'NO_FETCH',
|
| 124 |
+
# 'policy_confidence': 1.0,
|
| 125 |
+
# 'should_retrieve': False,
|
| 126 |
+
# 'documents_retrieved': 0,
|
| 127 |
+
# 'top_doc_score': None,
|
| 128 |
+
# 'retrieval_time_ms': 0,
|
| 129 |
+
# 'generation_time_ms': 0,
|
| 130 |
+
# 'total_time_ms': 0,
|
| 131 |
+
# 'timestamp': datetime.now().isoformat()
|
| 132 |
+
# }
|
| 133 |
+
|
| 134 |
+
# # ====================================================================
|
| 135 |
+
# # STEP 1: POLICY DECISION (Local BERT model)
|
| 136 |
+
# # ====================================================================
|
| 137 |
+
# print(f"\n{'='*80}")
|
| 138 |
+
# print(f"🔍 Processing Query: {query[:50]}...")
|
| 139 |
+
# print(f"{'='*80}")
|
| 140 |
+
|
| 141 |
+
# policy_start = time.time()
|
| 142 |
+
|
| 143 |
+
# # Predict action using policy network
|
| 144 |
+
# policy_result = predict_policy_action(
|
| 145 |
+
# query=query,
|
| 146 |
+
# history=conversation_history,
|
| 147 |
+
# return_probs=True
|
| 148 |
+
# )
|
| 149 |
+
|
| 150 |
+
# policy_time = (time.time() - policy_start) * 1000
|
| 151 |
+
|
| 152 |
+
# print(f"\n📊 Policy Decision:")
|
| 153 |
+
# print(f" Action: {policy_result['action']}")
|
| 154 |
+
# print(f" Confidence: {policy_result['confidence']:.3f}")
|
| 155 |
+
# print(f" Should Retrieve: {policy_result['should_retrieve']}")
|
| 156 |
+
# print(f" Time: {policy_time:.2f}ms")
|
| 157 |
+
|
| 158 |
+
# # ====================================================================
|
| 159 |
+
# # STEP 2: RETRIEVAL (if FETCH or low confidence NO_FETCH)
|
| 160 |
+
# # ====================================================================
|
| 161 |
+
# retrieved_docs = []
|
| 162 |
+
# context = ""
|
| 163 |
+
# retrieval_time = 0
|
| 164 |
+
|
| 165 |
+
# if policy_result['should_retrieve']:
|
| 166 |
+
# print(f"\n🔎 Retrieving documents...")
|
| 167 |
+
# retrieval_start = time.time()
|
| 168 |
+
|
| 169 |
+
# try:
|
| 170 |
+
# # Retrieve documents using custom retriever + FAISS
|
| 171 |
+
# retrieved_docs = retrieve_documents(
|
| 172 |
+
# query=query,
|
| 173 |
+
# top_k=settings.TOP_K,
|
| 174 |
+
# min_similarity=settings.SIMILARITY_THRESHOLD
|
| 175 |
+
# )
|
| 176 |
+
|
| 177 |
+
# retrieval_time = (time.time() - retrieval_start) * 1000
|
| 178 |
+
|
| 179 |
+
# if retrieved_docs:
|
| 180 |
+
# print(f" ✅ Retrieved {len(retrieved_docs)} documents")
|
| 181 |
+
# print(f" Top score: {retrieved_docs[0]['score']:.3f}")
|
| 182 |
+
|
| 183 |
+
# # Format context for LLM
|
| 184 |
+
# context = format_context(
|
| 185 |
+
# retrieved_docs,
|
| 186 |
+
# max_context_length=settings.MAX_CONTEXT_LENGTH
|
| 187 |
+
# )
|
| 188 |
+
# else:
|
| 189 |
+
# print(f" ⚠️ No documents above threshold")
|
| 190 |
+
|
| 191 |
+
# except Exception as e:
|
| 192 |
+
# print(f" ❌ Retrieval error: {e}")
|
| 193 |
+
# # Continue without retrieval
|
| 194 |
+
|
| 195 |
+
# else:
|
| 196 |
+
# print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
|
| 197 |
+
|
| 198 |
+
# # ====================================================================
|
| 199 |
+
# # STEP 3: GENERATE RESPONSE (Gemini)
|
| 200 |
+
# # ====================================================================
|
| 201 |
+
# print(f"\n💬 Generating response...")
|
| 202 |
+
# generation_start = time.time()
|
| 203 |
+
|
| 204 |
+
# try:
|
| 205 |
+
# # Generate response using LLM manager (Gemini)
|
| 206 |
+
# response = await llm_manager.generate_chat_response(
|
| 207 |
+
# query=query,
|
| 208 |
+
# context=context,
|
| 209 |
+
# history=conversation_history
|
| 210 |
+
# )
|
| 211 |
+
|
| 212 |
+
# generation_time = (time.time() - generation_start) * 1000
|
| 213 |
+
|
| 214 |
+
# print(f" ✅ Response generated")
|
| 215 |
+
# print(f" Length: {len(response)} chars")
|
| 216 |
+
# print(f" Time: {generation_time:.2f}ms")
|
| 217 |
+
|
| 218 |
+
# except Exception as e:
|
| 219 |
+
# print(f" ❌ Generation error: {e}")
|
| 220 |
+
# response = "I apologize, but I encountered an error generating a response. Please try again."
|
| 221 |
+
# generation_time = (time.time() - generation_start) * 1000
|
| 222 |
+
|
| 223 |
+
# # ====================================================================
|
| 224 |
+
# # STEP 4: COMPILE RESULTS
|
| 225 |
+
# # ====================================================================
|
| 226 |
+
# total_time = (time.time() - start_time) * 1000
|
| 227 |
+
|
| 228 |
+
# result = {
|
| 229 |
+
# 'response': response,
|
| 230 |
+
# 'policy_action': policy_result['action'],
|
| 231 |
+
# 'policy_confidence': policy_result['confidence'],
|
| 232 |
+
# 'should_retrieve': policy_result['should_retrieve'],
|
| 233 |
+
# 'documents_retrieved': len(retrieved_docs),
|
| 234 |
+
# 'top_doc_score': retrieved_docs[0]['score'] if retrieved_docs else None,
|
| 235 |
+
# 'retrieval_time_ms': round(retrieval_time, 2),
|
| 236 |
+
# 'generation_time_ms': round(generation_time, 2),
|
| 237 |
+
# 'total_time_ms': round(total_time, 2),
|
| 238 |
+
# 'timestamp': datetime.now().isoformat()
|
| 239 |
+
# }
|
| 240 |
+
|
| 241 |
+
# # Add retrieved docs metadata (for logging, not sent to user)
|
| 242 |
+
# if retrieved_docs:
|
| 243 |
+
# result['retrieved_docs_metadata'] = [
|
| 244 |
+
# {
|
| 245 |
+
# 'faq_id': doc['faq_id'],
|
| 246 |
+
# 'score': doc['score'],
|
| 247 |
+
# 'category': doc['category'],
|
| 248 |
+
# 'rank': doc['rank']
|
| 249 |
+
# }
|
| 250 |
+
# for doc in retrieved_docs
|
| 251 |
+
# ]
|
| 252 |
+
|
| 253 |
+
# print(f"\n{'='*80}")
|
| 254 |
+
# print(f"✅ Query processed successfully")
|
| 255 |
+
# print(f" Total time: {total_time:.2f}ms")
|
| 256 |
+
# print(f"{'='*80}\n")
|
| 257 |
+
|
| 258 |
+
# return result
|
| 259 |
+
|
| 260 |
+
# async def health_check(self) -> Dict[str, Any]:
|
| 261 |
+
# """
|
| 262 |
+
# Check health of all service components.
|
| 263 |
+
|
| 264 |
+
# Returns:
|
| 265 |
+
# dict: Health status
|
| 266 |
+
# """
|
| 267 |
+
# health = {
|
| 268 |
+
# 'service': 'chat_service',
|
| 269 |
+
# 'status': 'healthy',
|
| 270 |
+
# 'components': {}
|
| 271 |
+
# }
|
| 272 |
+
|
| 273 |
+
# # Check policy network
|
| 274 |
+
# try:
|
| 275 |
+
# from app.ml.policy_network import POLICY_MODEL
|
| 276 |
+
# health['components']['policy_network'] = 'loaded' if POLICY_MODEL else 'not_loaded'
|
| 277 |
+
# except Exception as e:
|
| 278 |
+
# health['components']['policy_network'] = f'error: {str(e)}'
|
| 279 |
+
|
| 280 |
+
# # Check retriever
|
| 281 |
+
# try:
|
| 282 |
+
# from app.ml.retriever import RETRIEVER_MODEL, FAISS_INDEX
|
| 283 |
+
# health['components']['retriever'] = 'loaded' if RETRIEVER_MODEL else 'not_loaded'
|
| 284 |
+
# health['components']['faiss_index'] = 'loaded' if FAISS_INDEX else 'not_loaded'
|
| 285 |
+
# except Exception as e:
|
| 286 |
+
# health['components']['retriever'] = f'error: {str(e)}'
|
| 287 |
+
|
| 288 |
+
# # Check LLM manager
|
| 289 |
+
# try:
|
| 290 |
+
# from app.core.llm_manager import llm_manager as llm
|
| 291 |
+
# health['components']['gemini'] = 'enabled' if llm.gemini else 'disabled'
|
| 292 |
+
# health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
|
| 293 |
+
# except Exception as e:
|
| 294 |
+
# health['components']['llm_manager'] = f'error: {str(e)}'
|
| 295 |
+
|
| 296 |
+
# # Overall status
|
| 297 |
+
# failed_components = [k for k, v in health['components'].items() if 'error' in str(v)]
|
| 298 |
+
# if failed_components:
|
| 299 |
+
# health['status'] = 'degraded'
|
| 300 |
+
# health['failed_components'] = failed_components
|
| 301 |
+
|
| 302 |
+
# return health
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
# # ============================================================================
|
| 306 |
+
# # GLOBAL CHAT SERVICE INSTANCE
|
| 307 |
+
# # ============================================================================
|
| 308 |
+
# chat_service = ChatService()
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
# # ============================================================================
|
| 312 |
+
# # USAGE EXAMPLE (for reference)
|
| 313 |
+
# # ============================================================================
|
| 314 |
+
# """
|
| 315 |
+
# # In your API endpoint (chat.py):
|
| 316 |
+
|
| 317 |
+
# from app.services.chat_service import chat_service
|
| 318 |
+
|
| 319 |
+
# # Process user query
|
| 320 |
+
# result = await chat_service.process_query(
|
| 321 |
+
# query="What is my account balance?",
|
| 322 |
+
# conversation_history=[
|
| 323 |
+
# {'role': 'user', 'content': 'Hello'},
|
| 324 |
+
# {'role': 'assistant', 'content': 'Hi! How can I help?', 'metadata': {'policy_action': 'NO_FETCH'}}
|
| 325 |
+
# ],
|
| 326 |
+
# user_id="user_123"
|
| 327 |
+
# )
|
| 328 |
+
|
| 329 |
+
# # Result contains:
|
| 330 |
+
# # - response: "Your account balance is $1,234.56"
|
| 331 |
+
# # - policy_action: "FETCH"
|
| 332 |
+
# # - documents_retrieved: 3
|
| 333 |
+
# # - total_time_ms: 450.23
|
| 334 |
+
# # etc.
|
| 335 |
+
|
| 336 |
+
# # Get service health
|
| 337 |
+
# health = await chat_service.health_check()
|
| 338 |
+
# """
|
| 339 |
+
|
| 340 |
+
|
backups/backup_config.py
ADDED
|
@@ -0,0 +1,640 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LINE 80 VERY IMP CHANGE OF LLM MAX TOKENS FROM 512 TO 1024
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Application Configuration
|
| 6 |
+
Settings for Banking RAG Chatbot with JWT Authentication
|
| 7 |
+
Includes all settings needed by existing llm_manager.py
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
from typing import List
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class Settings:
|
| 18 |
+
"""Application settings loaded from environment variables"""
|
| 19 |
+
|
| 20 |
+
# ========================================================================
|
| 21 |
+
# ENVIRONMENT
|
| 22 |
+
# ========================================================================
|
| 23 |
+
ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 24 |
+
DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 25 |
+
|
| 26 |
+
# ========================================================================
|
| 27 |
+
# MONGODB
|
| 28 |
+
# ========================================================================
|
| 29 |
+
MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 30 |
+
DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 31 |
+
|
| 32 |
+
# ========================================================================
|
| 33 |
+
# JWT AUTHENTICATION
|
| 34 |
+
# ========================================================================
|
| 35 |
+
SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 36 |
+
ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 37 |
+
ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 38 |
+
|
| 39 |
+
# ========================================================================
|
| 40 |
+
# CORS (for frontend)
|
| 41 |
+
# ========================================================================
|
| 42 |
+
ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 43 |
+
|
| 44 |
+
# ========================================================================
|
| 45 |
+
# GOOGLE GEMINI API
|
| 46 |
+
# ========================================================================
|
| 47 |
+
GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 48 |
+
GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 49 |
+
GEMINI_REQUESTS_PER_MINUTE: int = int(os.getenv("GEMINI_REQUESTS_PER_MINUTE", "60"))
|
| 50 |
+
|
| 51 |
+
# ========================================================================
|
| 52 |
+
# GROQ API (Optional - for evaluation)
|
| 53 |
+
# ========================================================================
|
| 54 |
+
GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 55 |
+
GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 56 |
+
GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
|
| 57 |
+
|
| 58 |
+
# ========================================================================
|
| 59 |
+
# HUGGING FACE (Optional - for model downloads)
|
| 60 |
+
# ========================================================================
|
| 61 |
+
HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 62 |
+
|
| 63 |
+
# ========================================================================
|
| 64 |
+
# MODEL PATHS (for RL Policy Network and RAG models)
|
| 65 |
+
# ========================================================================
|
| 66 |
+
POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "app/models/best_policy_model.pth")
|
| 67 |
+
RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "app/models/best_retriever_model.pth")
|
| 68 |
+
FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "app/models/faiss_index.pkl")
|
| 69 |
+
KB_PATH: str = os.getenv("KB_PATH", "app/data/final_knowledge_base.jsonl")
|
| 70 |
+
|
| 71 |
+
# ========================================================================
|
| 72 |
+
# DEVICE SETTINGS (for PyTorch/TensorFlow models)
|
| 73 |
+
# ========================================================================
|
| 74 |
+
DEVICE: str = os.getenv("DEVICE", "cpu")
|
| 75 |
+
|
| 76 |
+
# ========================================================================
|
| 77 |
+
# LLM PARAMETERS
|
| 78 |
+
# ========================================================================
|
| 79 |
+
LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 80 |
+
LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024")) # VERY IMPORTANT CHANGE =============================================================================================
|
| 81 |
+
# ============================================================================
|
| 82 |
+
|
| 83 |
+
# ========================================================================
|
| 84 |
+
# RAG PARAMETERS
|
| 85 |
+
# ========================================================================
|
| 86 |
+
TOP_K: int = int(os.getenv("TOP_K", "5"))
|
| 87 |
+
SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
|
| 88 |
+
MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
|
| 89 |
+
|
| 90 |
+
# ========================================================================
|
| 91 |
+
# POLICY NETWORK PARAMETERS
|
| 92 |
+
# ========================================================================
|
| 93 |
+
POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
|
| 94 |
+
CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
|
| 95 |
+
|
| 96 |
+
# ========================================================================
|
| 97 |
+
# HELPER METHODS (Required by llm_manager.py)
|
| 98 |
+
# ========================================================================
|
| 99 |
+
|
| 100 |
+
def is_gemini_enabled(self) -> bool:
|
| 101 |
+
"""Check if Google Gemini API is configured"""
|
| 102 |
+
return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 103 |
+
|
| 104 |
+
def is_groq_enabled(self) -> bool:
|
| 105 |
+
"""Check if Groq API is configured"""
|
| 106 |
+
return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 107 |
+
|
| 108 |
+
def is_hf_enabled(self) -> bool:
|
| 109 |
+
"""Check if HuggingFace token is configured"""
|
| 110 |
+
return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 111 |
+
|
| 112 |
+
def get_allowed_origins(self) -> List[str]:
|
| 113 |
+
"""Parse allowed origins from comma-separated string"""
|
| 114 |
+
if self.ALLOWED_ORIGINS == "*":
|
| 115 |
+
return ["*"]
|
| 116 |
+
return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 117 |
+
|
| 118 |
+
def get_llm_for_task(self, task: str = "qa") -> str:
|
| 119 |
+
"""
|
| 120 |
+
Get LLM name for a specific task.
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
task: Task type ('chat', 'evaluation', etc.')
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
str: LLM name ('gemini' or 'groq')
|
| 127 |
+
"""
|
| 128 |
+
# Use Gemini for chat, Groq for evaluation
|
| 129 |
+
if task == "evaluation":
|
| 130 |
+
return "groq" if self.is_groq_enabled() else "gemini"
|
| 131 |
+
else:
|
| 132 |
+
return "gemini" # Default to Gemini for all tasks
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
# ============================================================================
|
| 136 |
+
# CREATE GLOBAL SETTINGS INSTANCE
|
| 137 |
+
# ============================================================================
|
| 138 |
+
settings = Settings()
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# ============================================================================
|
| 142 |
+
# PRINT CONFIGURATION ON LOAD
|
| 143 |
+
# ============================================================================
|
| 144 |
+
print("=" * 80)
|
| 145 |
+
print("✅ Configuration Loaded")
|
| 146 |
+
print("=" * 80)
|
| 147 |
+
print(f"Environment: {settings.ENVIRONMENT}")
|
| 148 |
+
print(f"Debug Mode: {settings.DEBUG}")
|
| 149 |
+
print(f"Database: {settings.DATABASE_NAME}")
|
| 150 |
+
print(f"Device: {settings.DEVICE}")
|
| 151 |
+
print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 152 |
+
print()
|
| 153 |
+
print("🔑 API Keys:")
|
| 154 |
+
print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 155 |
+
print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 156 |
+
print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 157 |
+
print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 158 |
+
print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 159 |
+
print()
|
| 160 |
+
print("🤖 Model Paths:")
|
| 161 |
+
print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
|
| 162 |
+
print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
|
| 163 |
+
print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
|
| 164 |
+
print(f" Knowledge Base: {settings.KB_PATH}")
|
| 165 |
+
print("=" * 80)
|
| 166 |
+
# ============================================================================
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# """
|
| 185 |
+
# Application Configuration
|
| 186 |
+
# Settings for Banking RAG Chatbot with JWT Authentication
|
| 187 |
+
# Includes all settings needed by existing llm_manager.py
|
| 188 |
+
# """
|
| 189 |
+
|
| 190 |
+
# import os
|
| 191 |
+
# from typing import List
|
| 192 |
+
# from dotenv import load_dotenv
|
| 193 |
+
|
| 194 |
+
# load_dotenv()
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
# class Settings:
|
| 198 |
+
# """Application settings loaded from environment variables"""
|
| 199 |
+
|
| 200 |
+
# # ========================================================================
|
| 201 |
+
# # ENVIRONMENT
|
| 202 |
+
# # ========================================================================
|
| 203 |
+
# ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 204 |
+
# DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 205 |
+
|
| 206 |
+
# # ========================================================================
|
| 207 |
+
# # MONGODB
|
| 208 |
+
# # ========================================================================
|
| 209 |
+
# MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 210 |
+
# DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 211 |
+
|
| 212 |
+
# # ========================================================================
|
| 213 |
+
# # JWT AUTHENTICATION
|
| 214 |
+
# # ========================================================================
|
| 215 |
+
# SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 216 |
+
# ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 217 |
+
# ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 218 |
+
|
| 219 |
+
# # ========================================================================
|
| 220 |
+
# # CORS (for frontend)
|
| 221 |
+
# # ========================================================================
|
| 222 |
+
# ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 223 |
+
|
| 224 |
+
# # ========================================================================
|
| 225 |
+
# # GOOGLE GEMINI API
|
| 226 |
+
# # ========================================================================
|
| 227 |
+
# GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 228 |
+
# GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 229 |
+
|
| 230 |
+
# # ========================================================================
|
| 231 |
+
# # GROQ API (Optional - for your llm_manager)
|
| 232 |
+
# # ========================================================================
|
| 233 |
+
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 234 |
+
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 235 |
+
|
| 236 |
+
# # ========================================================================
|
| 237 |
+
# # HUGGING FACE (Optional - for model downloads)
|
| 238 |
+
# # ========================================================================
|
| 239 |
+
# HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 240 |
+
|
| 241 |
+
# # ========================================================================
|
| 242 |
+
# # MODEL PATHS (for RL Policy Network and RAG models)
|
| 243 |
+
# # ========================================================================
|
| 244 |
+
# POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
|
| 245 |
+
# RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
|
| 246 |
+
# FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
|
| 247 |
+
# KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
|
| 248 |
+
|
| 249 |
+
# # ========================================================================
|
| 250 |
+
# # DEVICE SETTINGS (for PyTorch/TensorFlow models)
|
| 251 |
+
# # ========================================================================
|
| 252 |
+
# DEVICE: str = os.getenv("DEVICE", "cpu")
|
| 253 |
+
|
| 254 |
+
# # ========================================================================
|
| 255 |
+
# # LLM PARAMETERS
|
| 256 |
+
# # ========================================================================
|
| 257 |
+
# LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 258 |
+
# LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
|
| 259 |
+
|
| 260 |
+
# # ========================================================================
|
| 261 |
+
# # RAG PARAMETERS
|
| 262 |
+
# # ========================================================================
|
| 263 |
+
# TOP_K: int = int(os.getenv("TOP_K", "5"))
|
| 264 |
+
# SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
|
| 265 |
+
# MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
|
| 266 |
+
|
| 267 |
+
# # ========================================================================
|
| 268 |
+
# # POLICY NETWORK PARAMETERS
|
| 269 |
+
# # ========================================================================
|
| 270 |
+
# POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
|
| 271 |
+
# CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
# # ========================================================================
|
| 275 |
+
# # HELPER METHODS (Required by llm_manager.py)
|
| 276 |
+
# # ========================================================================
|
| 277 |
+
|
| 278 |
+
# def is_gemini_enabled(self) -> bool:
|
| 279 |
+
# """Check if Google Gemini API is configured"""
|
| 280 |
+
# return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 281 |
+
|
| 282 |
+
# def is_groq_enabled(self) -> bool:
|
| 283 |
+
# """Check if Groq API is configured"""
|
| 284 |
+
# return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 285 |
+
|
| 286 |
+
# def is_hf_enabled(self) -> bool:
|
| 287 |
+
# """Check if HuggingFace token is configured"""
|
| 288 |
+
# return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 289 |
+
|
| 290 |
+
# def get_allowed_origins(self) -> List[str]:
|
| 291 |
+
# """Parse allowed origins from comma-separated string"""
|
| 292 |
+
# if self.ALLOWED_ORIGINS == "*":
|
| 293 |
+
# return ["*"]
|
| 294 |
+
# return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 295 |
+
|
| 296 |
+
# # def get_llm_for_task(self, task: str = "qa"):
|
| 297 |
+
# # """
|
| 298 |
+
# # Get LLM configuration for a specific task.
|
| 299 |
+
# # Returns a dict with model settings.
|
| 300 |
+
|
| 301 |
+
# # Args:
|
| 302 |
+
# # task: Task type ('qa', 'retrieval', 'summary', etc.)
|
| 303 |
+
|
| 304 |
+
# # Returns:
|
| 305 |
+
# # dict: LLM configuration
|
| 306 |
+
# # """
|
| 307 |
+
# # return {
|
| 308 |
+
# # 'api_key': self.GOOGLE_API_KEY,
|
| 309 |
+
# # 'model': self.GEMINI_MODEL,
|
| 310 |
+
# # 'temperature': self.LLM_TEMPERATURE,
|
| 311 |
+
# # 'max_tokens': self.LLM_MAX_TOKENS,
|
| 312 |
+
# # 'task': task
|
| 313 |
+
# # }
|
| 314 |
+
# def get_llm_for_task(self, task: str = "qa") -> str:
|
| 315 |
+
# """
|
| 316 |
+
# Get LLM name for a specific task.
|
| 317 |
+
|
| 318 |
+
# Args:
|
| 319 |
+
# task: Task type ('chat', 'evaluation', etc.)
|
| 320 |
+
|
| 321 |
+
# Returns:
|
| 322 |
+
# str: LLM name ('gemini' or 'groq')
|
| 323 |
+
# """
|
| 324 |
+
# # Use Gemini for chat, Groq for evaluation
|
| 325 |
+
# if task == "evaluation":
|
| 326 |
+
# return "groq" if self.is_groq_enabled() else "gemini"
|
| 327 |
+
# else:
|
| 328 |
+
# return "gemini" # Default to Gemini for all other tasks
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# # ============================================================================
|
| 334 |
+
# # CREATE GLOBAL SETTINGS INSTANCE
|
| 335 |
+
# # ============================================================================
|
| 336 |
+
# settings = Settings()
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
# # ============================================================================
|
| 340 |
+
# # PRINT CONFIGURATION ON LOAD
|
| 341 |
+
# # ============================================================================
|
| 342 |
+
# print("=" * 80)
|
| 343 |
+
# print("✅ Configuration Loaded")
|
| 344 |
+
# print("=" * 80)
|
| 345 |
+
# print(f"Environment: {settings.ENVIRONMENT}")
|
| 346 |
+
# print(f"Debug Mode: {settings.DEBUG}")
|
| 347 |
+
# print(f"Database: {settings.DATABASE_NAME}")
|
| 348 |
+
# print(f"Device: {settings.DEVICE}")
|
| 349 |
+
# print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 350 |
+
# print()
|
| 351 |
+
# print("🔑 API Keys:")
|
| 352 |
+
# print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 353 |
+
# print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 354 |
+
# print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 355 |
+
# print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 356 |
+
# print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 357 |
+
# print()
|
| 358 |
+
# print("🤖 Model Paths:")
|
| 359 |
+
# print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
|
| 360 |
+
# print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
|
| 361 |
+
# print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
|
| 362 |
+
# print(f" Knowledge Base: {settings.KB_PATH}")
|
| 363 |
+
# print("=" * 80)
|
| 364 |
+
# # # ============================================================================
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
# # """
|
| 387 |
+
# # Application Configuration
|
| 388 |
+
# # Settings for Banking RAG Chatbot with JWT Authentication
|
| 389 |
+
# # Includes all settings needed by existing llm_manager.py
|
| 390 |
+
# # """
|
| 391 |
+
|
| 392 |
+
# # import os
|
| 393 |
+
# # from typing import List
|
| 394 |
+
# # from dotenv import load_dotenv
|
| 395 |
+
|
| 396 |
+
# # load_dotenv()
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
# # class Settings:
|
| 400 |
+
# # """Application settings loaded from environment variables"""
|
| 401 |
+
|
| 402 |
+
# # # ========================================================================
|
| 403 |
+
# # # ENVIRONMENT
|
| 404 |
+
# # # ========================================================================
|
| 405 |
+
# # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 406 |
+
# # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 407 |
+
|
| 408 |
+
# # # ========================================================================
|
| 409 |
+
# # # MONGODB
|
| 410 |
+
# # # ========================================================================
|
| 411 |
+
# # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 412 |
+
# # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 413 |
+
|
| 414 |
+
# # # ========================================================================
|
| 415 |
+
# # # JWT AUTHENTICATION
|
| 416 |
+
# # # ========================================================================
|
| 417 |
+
# # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 418 |
+
# # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 419 |
+
# # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 420 |
+
|
| 421 |
+
# # # ========================================================================
|
| 422 |
+
# # # CORS (for frontend)
|
| 423 |
+
# # # ========================================================================
|
| 424 |
+
# # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 425 |
+
|
| 426 |
+
# # # ========================================================================
|
| 427 |
+
# # # GOOGLE GEMINI API
|
| 428 |
+
# # # ========================================================================
|
| 429 |
+
# # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 430 |
+
# # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 431 |
+
|
| 432 |
+
# # # ========================================================================
|
| 433 |
+
# # # GROQ API (Optional - for your llm_manager)
|
| 434 |
+
# # # ========================================================================
|
| 435 |
+
# # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 436 |
+
# # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 437 |
+
|
| 438 |
+
# # # ========================================================================
|
| 439 |
+
# # # HUGGING FACE (Optional - for model downloads)
|
| 440 |
+
# # # ========================================================================
|
| 441 |
+
# # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 442 |
+
|
| 443 |
+
# # # ========================================================================
|
| 444 |
+
# # # HELPER METHODS (Required by llm_manager.py)
|
| 445 |
+
# # # ========================================================================
|
| 446 |
+
|
| 447 |
+
# # def is_gemini_enabled(self) -> bool:
|
| 448 |
+
# # """Check if Google Gemini API is configured"""
|
| 449 |
+
# # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 450 |
+
|
| 451 |
+
# # def is_groq_enabled(self) -> bool:
|
| 452 |
+
# # """Check if Groq API is configured"""
|
| 453 |
+
# # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 454 |
+
|
| 455 |
+
# # def is_hf_enabled(self) -> bool:
|
| 456 |
+
# # """Check if HuggingFace token is configured"""
|
| 457 |
+
# # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 458 |
+
|
| 459 |
+
# # def get_allowed_origins(self) -> List[str]:
|
| 460 |
+
# # """Parse allowed origins from comma-separated string"""
|
| 461 |
+
# # if self.ALLOWED_ORIGINS == "*":
|
| 462 |
+
# # return ["*"]
|
| 463 |
+
# # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
# # # ============================================================================
|
| 467 |
+
# # # CREATE GLOBAL SETTINGS INSTANCE
|
| 468 |
+
# # # ============================================================================
|
| 469 |
+
# # settings = Settings()
|
| 470 |
+
|
| 471 |
+
# # # ============================================================================
|
| 472 |
+
# # # PRINT CONFIGURATION ON LOAD
|
| 473 |
+
# # # ============================================================================
|
| 474 |
+
# # print("=" * 80)
|
| 475 |
+
# # print("✅ Configuration Loaded")
|
| 476 |
+
# # print("=" * 80)
|
| 477 |
+
# # print(f"Environment: {settings.ENVIRONMENT}")
|
| 478 |
+
# # print(f"Debug Mode: {settings.DEBUG}")
|
| 479 |
+
# # print(f"Database: {settings.DATABASE_NAME}")
|
| 480 |
+
# # # print(f"JWT Algorithm: {settings.ALGORITHM}")
|
| 481 |
+
# # # print(f"Token Expiry: {settings.ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
|
| 482 |
+
# # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 483 |
+
# # print()
|
| 484 |
+
# # print("🔑 API Keys:")
|
| 485 |
+
# # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 486 |
+
# # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 487 |
+
# # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 488 |
+
# # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 489 |
+
# # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 490 |
+
# # print("=" * 80)
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
|
| 516 |
+
# """
|
| 517 |
+
# Application Configuration
|
| 518 |
+
# Settings for Banking RAG Chatbot with JWT Authentication
|
| 519 |
+
# Includes all settings needed by existing llm_manager.py
|
| 520 |
+
# """
|
| 521 |
+
|
| 522 |
+
# import os
|
| 523 |
+
# from typing import List
|
| 524 |
+
# from dotenv import load_dotenv
|
| 525 |
+
|
| 526 |
+
# load_dotenv()
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
# class Settings:
|
| 530 |
+
# """Application settings loaded from environment variables"""
|
| 531 |
+
|
| 532 |
+
# # ========================================================================
|
| 533 |
+
# # ENVIRONMENT
|
| 534 |
+
# # ========================================================================
|
| 535 |
+
# ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
|
| 536 |
+
# DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 537 |
+
|
| 538 |
+
# # ========================================================================
|
| 539 |
+
# # MONGODB
|
| 540 |
+
# # ========================================================================
|
| 541 |
+
# MONGODB_URI: str = os.getenv("MONGODB_URI", "")
|
| 542 |
+
# DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
|
| 543 |
+
|
| 544 |
+
# # ========================================================================
|
| 545 |
+
# # JWT AUTHENTICATION
|
| 546 |
+
# # ========================================================================
|
| 547 |
+
# SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
| 548 |
+
# ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
|
| 549 |
+
# ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
|
| 550 |
+
|
| 551 |
+
# # ========================================================================
|
| 552 |
+
# # CORS (for frontend)
|
| 553 |
+
# # ========================================================================
|
| 554 |
+
# ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
|
| 555 |
+
|
| 556 |
+
# # ========================================================================
|
| 557 |
+
# # GOOGLE GEMINI API
|
| 558 |
+
# # ========================================================================
|
| 559 |
+
# GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
|
| 560 |
+
# GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
| 561 |
+
|
| 562 |
+
# # ========================================================================
|
| 563 |
+
# # GROQ API (Optional - for your llm_manager)
|
| 564 |
+
# # ========================================================================
|
| 565 |
+
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 566 |
+
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
|
| 567 |
+
|
| 568 |
+
# # ========================================================================
|
| 569 |
+
# # HUGGING FACE (Optional - for model downloads)
|
| 570 |
+
# # ========================================================================
|
| 571 |
+
# HF_TOKEN: str = os.getenv("HF_TOKEN", "")
|
| 572 |
+
|
| 573 |
+
# # ========================================================================
|
| 574 |
+
# # MODEL PATHS (for RL Policy Network and RAG models)
|
| 575 |
+
# # ========================================================================
|
| 576 |
+
# POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
|
| 577 |
+
# RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
|
| 578 |
+
# FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
|
| 579 |
+
# KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
|
| 580 |
+
|
| 581 |
+
# # ========================================================================
|
| 582 |
+
# # LLM PARAMETERS
|
| 583 |
+
# # ========================================================================
|
| 584 |
+
# LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
| 585 |
+
# LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
|
| 586 |
+
|
| 587 |
+
# # ========================================================================
|
| 588 |
+
# # RAG PARAMETERS
|
| 589 |
+
# # ========================================================================
|
| 590 |
+
# TOP_K: int = int(os.getenv("TOP_K", "5"))
|
| 591 |
+
# SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
|
| 592 |
+
# MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
|
| 593 |
+
|
| 594 |
+
# # ========================================================================
|
| 595 |
+
# # HELPER METHODS (Required by llm_manager.py)
|
| 596 |
+
# # ========================================================================
|
| 597 |
+
|
| 598 |
+
# def is_gemini_enabled(self) -> bool:
|
| 599 |
+
# """Check if Google Gemini API is configured"""
|
| 600 |
+
# return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
|
| 601 |
+
|
| 602 |
+
# def is_groq_enabled(self) -> bool:
|
| 603 |
+
# """Check if Groq API is configured"""
|
| 604 |
+
# return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
|
| 605 |
+
|
| 606 |
+
# def is_hf_enabled(self) -> bool:
|
| 607 |
+
# """Check if HuggingFace token is configured"""
|
| 608 |
+
# return bool(self.HF_TOKEN and self.HF_TOKEN != "")
|
| 609 |
+
|
| 610 |
+
# def get_allowed_origins(self) -> List[str]:
|
| 611 |
+
# """Parse allowed origins from comma-separated string"""
|
| 612 |
+
# if self.ALLOWED_ORIGINS == "*":
|
| 613 |
+
# return ["*"]
|
| 614 |
+
# return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
# # ============================================================================
|
| 618 |
+
# # CREATE GLOBAL SETTINGS INSTANCE
|
| 619 |
+
# # ============================================================================
|
| 620 |
+
# settings = Settings()
|
| 621 |
+
|
| 622 |
+
|
| 623 |
+
# # ============================================================================
|
| 624 |
+
# # PRINT CONFIGURATION ON LOAD
|
| 625 |
+
# # ============================================================================
|
| 626 |
+
# print("=" * 80)
|
| 627 |
+
# print("✅ Configuration Loaded")
|
| 628 |
+
# print("=" * 80)
|
| 629 |
+
# print(f"Environment: {settings.ENVIRONMENT}")
|
| 630 |
+
# print(f"Debug Mode: {settings.DEBUG}")
|
| 631 |
+
# print(f"Database: {settings.DATABASE_NAME}")
|
| 632 |
+
# print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
|
| 633 |
+
# print()
|
| 634 |
+
# print("🔑 API Keys:")
|
| 635 |
+
# print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
|
| 636 |
+
# print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
|
| 637 |
+
# print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
|
| 638 |
+
# print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
|
| 639 |
+
# print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
|
| 640 |
+
# print("=" * 80)
|
backups/backup_llm_manager.py
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# """
|
| 2 |
+
# Multi-LLM Manager for Google Gemini, Groq, and HuggingFace
|
| 3 |
+
# All three APIs co-exist for different purposes (no fallback logic)
|
| 4 |
+
|
| 5 |
+
# Architecture:
|
| 6 |
+
# - Google Gemini (Primary): User-facing chat responses (best quality)
|
| 7 |
+
# - Groq (Secondary): Fast inference for evaluation and specific tasks
|
| 8 |
+
# - HuggingFace: Model downloads and embeddings (always required)
|
| 9 |
+
|
| 10 |
+
# Each API has its designated purpose based on config settings.
|
| 11 |
+
# """
|
| 12 |
+
|
| 13 |
+
# import time
|
| 14 |
+
# import google.generativeai as genai
|
| 15 |
+
# from typing import List, Dict, Optional, Literal
|
| 16 |
+
# from langchain_groq import ChatGroq
|
| 17 |
+
# from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
|
| 18 |
+
|
| 19 |
+
# from app.config import settings
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# # ============================================================================
|
| 23 |
+
# # GOOGLE GEMINI MANAGER
|
| 24 |
+
# # ============================================================================
|
| 25 |
+
|
| 26 |
+
# class GeminiManager:
|
| 27 |
+
# """
|
| 28 |
+
# Google Gemini API Manager (Primary LLM)
|
| 29 |
+
# Handles Google Pro account with gemini-2.0-flash-lite model
|
| 30 |
+
# """
|
| 31 |
+
|
| 32 |
+
# def __init__(self):
|
| 33 |
+
# """Initialize Gemini API with your Google API key"""
|
| 34 |
+
# self.api_key = settings.GOOGLE_API_KEY
|
| 35 |
+
# self.model_name = settings.GEMINI_MODEL
|
| 36 |
+
|
| 37 |
+
# # Configure Gemini
|
| 38 |
+
# genai.configure(api_key=self.api_key)
|
| 39 |
+
|
| 40 |
+
# # Create model instance with safety settings
|
| 41 |
+
# self.model = genai.GenerativeModel(
|
| 42 |
+
# model_name=self.model_name,
|
| 43 |
+
# generation_config={
|
| 44 |
+
# "temperature": settings.LLM_TEMPERATURE,
|
| 45 |
+
# "max_output_tokens": settings.LLM_MAX_TOKENS,
|
| 46 |
+
# }
|
| 47 |
+
# )
|
| 48 |
+
|
| 49 |
+
# # Rate limiting tracking
|
| 50 |
+
# self.requests_this_minute = 0
|
| 51 |
+
# self.tokens_this_minute = 0
|
| 52 |
+
# self.last_reset = time.time()
|
| 53 |
+
|
| 54 |
+
# print(f"✅ Gemini Manager initialized: {self.model_name}")
|
| 55 |
+
|
| 56 |
+
# def _check_rate_limits(self):
|
| 57 |
+
# """
|
| 58 |
+
# Check and reset rate limit counters.
|
| 59 |
+
# Gemini Pro: 60 requests/min, 60,000 tokens/min
|
| 60 |
+
# """
|
| 61 |
+
# current_time = time.time()
|
| 62 |
+
|
| 63 |
+
# # Reset counters every minute
|
| 64 |
+
# if current_time - self.last_reset > 60:
|
| 65 |
+
# self.requests_this_minute = 0
|
| 66 |
+
# self.tokens_this_minute = 0
|
| 67 |
+
# self.last_reset = current_time
|
| 68 |
+
|
| 69 |
+
# # Check if limits exceeded
|
| 70 |
+
# if self.requests_this_minute >= settings.GEMINI_REQUESTS_PER_MINUTE:
|
| 71 |
+
# wait_time = 60 - (current_time - self.last_reset)
|
| 72 |
+
# print(f"⚠️ Gemini rate limit hit. Waiting {wait_time:.1f}s...")
|
| 73 |
+
# time.sleep(wait_time)
|
| 74 |
+
# self._check_rate_limits() # Recursive check after waiting
|
| 75 |
+
|
| 76 |
+
# async def generate(
|
| 77 |
+
# self,
|
| 78 |
+
# messages: List[Dict[str, str]],
|
| 79 |
+
# system_prompt: Optional[str] = None
|
| 80 |
+
# ) -> str:
|
| 81 |
+
# """
|
| 82 |
+
# Generate response using Gemini.
|
| 83 |
+
|
| 84 |
+
# Args:
|
| 85 |
+
# messages: List of conversation messages
|
| 86 |
+
# Format: [{'role': 'user'/'assistant', 'content': '...'}]
|
| 87 |
+
# system_prompt: Optional system prompt (prepended to first message)
|
| 88 |
+
|
| 89 |
+
# Returns:
|
| 90 |
+
# str: Generated response text
|
| 91 |
+
# """
|
| 92 |
+
# self._check_rate_limits()
|
| 93 |
+
|
| 94 |
+
# try:
|
| 95 |
+
# # Format messages for Gemini
|
| 96 |
+
# # Gemini uses 'user' and 'model' roles
|
| 97 |
+
# formatted_messages = []
|
| 98 |
+
|
| 99 |
+
# # Add system prompt as first user message if provided
|
| 100 |
+
# if system_prompt:
|
| 101 |
+
# formatted_messages.append({
|
| 102 |
+
# 'role': 'user',
|
| 103 |
+
# 'parts': [system_prompt]
|
| 104 |
+
# })
|
| 105 |
+
|
| 106 |
+
# # Convert messages
|
| 107 |
+
# for msg in messages:
|
| 108 |
+
# role = 'model' if msg['role'] == 'assistant' else 'user'
|
| 109 |
+
# formatted_messages.append({
|
| 110 |
+
# 'role': role,
|
| 111 |
+
# 'parts': [msg['content']]
|
| 112 |
+
# })
|
| 113 |
+
|
| 114 |
+
# # Generate response
|
| 115 |
+
# chat = self.model.start_chat(history=formatted_messages[:-1])
|
| 116 |
+
# response = chat.send_message(formatted_messages[-1]['parts'][0])
|
| 117 |
+
|
| 118 |
+
# # Track rate limits
|
| 119 |
+
# self.requests_this_minute += 1
|
| 120 |
+
# # Note: Token counting would require additional API call
|
| 121 |
+
# # For now, estimate ~4 chars per token
|
| 122 |
+
# estimated_tokens = len(response.text) // 4
|
| 123 |
+
# self.tokens_this_minute += estimated_tokens
|
| 124 |
+
|
| 125 |
+
# return response.text
|
| 126 |
+
|
| 127 |
+
# except Exception as e:
|
| 128 |
+
# print(f"❌ Gemini API error: {e}")
|
| 129 |
+
# raise
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# # ============================================================================
|
| 133 |
+
# # GROQ MANAGER
|
| 134 |
+
# # ============================================================================
|
| 135 |
+
|
| 136 |
+
# class GroqManager:
|
| 137 |
+
# """
|
| 138 |
+
# Groq API Manager (Secondary LLM)
|
| 139 |
+
# Handles fast inference with Llama-3-70B
|
| 140 |
+
# """
|
| 141 |
+
|
| 142 |
+
# def __init__(self):
|
| 143 |
+
# """Initialize Groq API with single API key"""
|
| 144 |
+
# self.api_key = settings.GROQ_API_KEY
|
| 145 |
+
# self.model_name = settings.GROQ_MODEL
|
| 146 |
+
|
| 147 |
+
# # Create ChatGroq instance
|
| 148 |
+
# self.llm = ChatGroq(
|
| 149 |
+
# api_key=self.api_key,
|
| 150 |
+
# model_name=self.model_name,
|
| 151 |
+
# temperature=settings.LLM_TEMPERATURE,
|
| 152 |
+
# max_tokens=settings.LLM_MAX_TOKENS
|
| 153 |
+
# )
|
| 154 |
+
|
| 155 |
+
# # Rate limiting tracking
|
| 156 |
+
# self.requests_this_minute = 0
|
| 157 |
+
# self.tokens_this_minute = 0
|
| 158 |
+
# self.last_reset = time.time()
|
| 159 |
+
|
| 160 |
+
# print(f"✅ Groq Manager initialized: {self.model_name}")
|
| 161 |
+
|
| 162 |
+
# def _check_rate_limits(self):
|
| 163 |
+
# """
|
| 164 |
+
# Check and reset rate limit counters.
|
| 165 |
+
# Groq Free: 30 requests/min, 30,000 tokens/min
|
| 166 |
+
# """
|
| 167 |
+
# current_time = time.time()
|
| 168 |
+
|
| 169 |
+
# # Reset counters every minute
|
| 170 |
+
# if current_time - self.last_reset > 60:
|
| 171 |
+
# self.requests_this_minute = 0
|
| 172 |
+
# self.tokens_this_minute = 0
|
| 173 |
+
# self.last_reset = current_time
|
| 174 |
+
|
| 175 |
+
# # Check if limits exceeded
|
| 176 |
+
# if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
|
| 177 |
+
# wait_time = 60 - (current_time - self.last_reset)
|
| 178 |
+
# print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
|
| 179 |
+
# time.sleep(wait_time)
|
| 180 |
+
# self._check_rate_limits()
|
| 181 |
+
|
| 182 |
+
# async def generate(
|
| 183 |
+
# self,
|
| 184 |
+
# messages: List[Dict[str, str]],
|
| 185 |
+
# system_prompt: Optional[str] = None
|
| 186 |
+
# ) -> str:
|
| 187 |
+
# """
|
| 188 |
+
# Generate response using Groq.
|
| 189 |
+
|
| 190 |
+
# Args:
|
| 191 |
+
# messages: List of conversation messages
|
| 192 |
+
# Format: [{'role': 'user'/'assistant', 'content': '...'}]
|
| 193 |
+
# system_prompt: Optional system prompt
|
| 194 |
+
|
| 195 |
+
# Returns:
|
| 196 |
+
# str: Generated response text
|
| 197 |
+
# """
|
| 198 |
+
# self._check_rate_limits()
|
| 199 |
+
|
| 200 |
+
# try:
|
| 201 |
+
# # Format messages for LangChain
|
| 202 |
+
# formatted_messages = []
|
| 203 |
+
|
| 204 |
+
# # Add system message if provided
|
| 205 |
+
# if system_prompt:
|
| 206 |
+
# formatted_messages.append(SystemMessage(content=system_prompt))
|
| 207 |
+
|
| 208 |
+
# # Convert conversation messages
|
| 209 |
+
# for msg in messages:
|
| 210 |
+
# if msg['role'] == 'user':
|
| 211 |
+
# formatted_messages.append(HumanMessage(content=msg['content']))
|
| 212 |
+
# elif msg['role'] == 'assistant':
|
| 213 |
+
# formatted_messages.append(AIMessage(content=msg['content']))
|
| 214 |
+
|
| 215 |
+
# # Generate response
|
| 216 |
+
# response = await self.llm.ainvoke(formatted_messages)
|
| 217 |
+
|
| 218 |
+
# # Track rate limits
|
| 219 |
+
# self.requests_this_minute += 1
|
| 220 |
+
# # Estimate tokens (rough approximation)
|
| 221 |
+
# estimated_tokens = len(response.content) // 4
|
| 222 |
+
# self.tokens_this_minute += estimated_tokens
|
| 223 |
+
|
| 224 |
+
# return response.content
|
| 225 |
+
|
| 226 |
+
# except Exception as e:
|
| 227 |
+
# print(f"❌ Groq API error: {e}")
|
| 228 |
+
# raise
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# # ============================================================================
|
| 232 |
+
# # UNIFIED LLM MANAGER (Routes to appropriate LLM)
|
| 233 |
+
# # ============================================================================
|
| 234 |
+
|
| 235 |
+
# class LLMManager:
|
| 236 |
+
# """
|
| 237 |
+
# Unified LLM Manager that routes requests to appropriate LLM.
|
| 238 |
+
|
| 239 |
+
# Routing strategy (from config):
|
| 240 |
+
# - Chat responses → Gemini (best quality for users)
|
| 241 |
+
# - Evaluation → Groq (fast, good enough for RL)
|
| 242 |
+
# - Policy → Local BERT (no API call)
|
| 243 |
+
# """
|
| 244 |
+
|
| 245 |
+
# def __init__(self):
|
| 246 |
+
# """Initialize all LLM managers"""
|
| 247 |
+
# self.gemini = None
|
| 248 |
+
# self.groq = None
|
| 249 |
+
|
| 250 |
+
# # Initialize Gemini if configured
|
| 251 |
+
# if settings.is_gemini_enabled():
|
| 252 |
+
# try:
|
| 253 |
+
# self.gemini = GeminiManager()
|
| 254 |
+
# except Exception as e:
|
| 255 |
+
# print(f"⚠️ Failed to initialize Gemini: {e}")
|
| 256 |
+
|
| 257 |
+
# # Initialize Groq if configured
|
| 258 |
+
# if settings.is_groq_enabled():
|
| 259 |
+
# try:
|
| 260 |
+
# self.groq = GroqManager()
|
| 261 |
+
# except Exception as e:
|
| 262 |
+
# print(f"⚠️ Failed to initialize Groq: {e}")
|
| 263 |
+
|
| 264 |
+
# print("✅ LLM Manager initialized")
|
| 265 |
+
|
| 266 |
+
# async def generate(
|
| 267 |
+
# self,
|
| 268 |
+
# messages: List[Dict[str, str]],
|
| 269 |
+
# system_prompt: Optional[str] = None,
|
| 270 |
+
# task: Literal["chat", "evaluation"] = "chat"
|
| 271 |
+
# ) -> str:
|
| 272 |
+
# """
|
| 273 |
+
# Generate response using appropriate LLM based on task.
|
| 274 |
+
|
| 275 |
+
# Args:
|
| 276 |
+
# messages: Conversation messages
|
| 277 |
+
# system_prompt: Optional system prompt
|
| 278 |
+
# task: Task type - "chat" (user-facing) or "evaluation" (RL training)
|
| 279 |
+
|
| 280 |
+
# Returns:
|
| 281 |
+
# str: Generated response
|
| 282 |
+
|
| 283 |
+
# Raises:
|
| 284 |
+
# ValueError: If appropriate LLM is not configured
|
| 285 |
+
# """
|
| 286 |
+
# # Determine which LLM to use based on task
|
| 287 |
+
# llm_choice = settings.get_llm_for_task(task)
|
| 288 |
+
|
| 289 |
+
# if llm_choice == "gemini":
|
| 290 |
+
# if self.gemini is None:
|
| 291 |
+
# raise ValueError("Gemini API not configured. Set GOOGLE_API_KEY in .env")
|
| 292 |
+
# return await self.gemini.generate(messages, system_prompt)
|
| 293 |
+
|
| 294 |
+
# elif llm_choice == "groq":
|
| 295 |
+
# if self.groq is None:
|
| 296 |
+
# raise ValueError("Groq API not configured. Set GROQ_API_KEY in .env")
|
| 297 |
+
# return await self.groq.generate(messages, system_prompt)
|
| 298 |
+
|
| 299 |
+
# else:
|
| 300 |
+
# raise ValueError(f"Unknown LLM choice: {llm_choice}")
|
| 301 |
+
|
| 302 |
+
# # async def generate_chat_response(
|
| 303 |
+
# # self,
|
| 304 |
+
# # query: str,
|
| 305 |
+
# # context: str,
|
| 306 |
+
# # history: List[Dict[str, str]]
|
| 307 |
+
# # ) -> str:
|
| 308 |
+
# # """
|
| 309 |
+
# # Generate chat response (uses Gemini by default).
|
| 310 |
+
|
| 311 |
+
# # Args:
|
| 312 |
+
# # query: User query
|
| 313 |
+
# # context: Retrieved context (from FAISS)
|
| 314 |
+
# # history: Conversation history
|
| 315 |
+
|
| 316 |
+
# # Returns:
|
| 317 |
+
# # str: Chat response
|
| 318 |
+
# # """
|
| 319 |
+
# # # Build system prompt
|
| 320 |
+
# # system_prompt = settings.SYSTEM_PROMPT
|
| 321 |
+
# # if context:
|
| 322 |
+
# # system_prompt += f"\n\nRelevant Information:\n{context}"
|
| 323 |
+
|
| 324 |
+
# # # Build messages
|
| 325 |
+
# # messages = history + [{'role': 'user', 'content': query}]
|
| 326 |
+
|
| 327 |
+
# # # Generate using chat LLM (Gemini)
|
| 328 |
+
# # return await self.generate(messages, system_prompt, task="chat")
|
| 329 |
+
|
| 330 |
+
# async def generate_chat_response(
|
| 331 |
+
# self,
|
| 332 |
+
# query: str,
|
| 333 |
+
# context: str,
|
| 334 |
+
# history: List[Dict[str, str]]
|
| 335 |
+
# ) -> str:
|
| 336 |
+
# """Generate chat response (uses Gemini by default)."""
|
| 337 |
+
|
| 338 |
+
# # Import the detailed prompt
|
| 339 |
+
# from app.services.chat_service import BANKING_SYSTEM_PROMPT
|
| 340 |
+
|
| 341 |
+
# # Build enhanced system prompt with context
|
| 342 |
+
# system_prompt = BANKING_SYSTEM_PROMPT
|
| 343 |
+
|
| 344 |
+
# if context:
|
| 345 |
+
# system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
|
| 346 |
+
# else:
|
| 347 |
+
# system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
|
| 348 |
+
|
| 349 |
+
# # Build messages
|
| 350 |
+
# messages = history + [{'role': 'user', 'content': query}]
|
| 351 |
+
|
| 352 |
+
# # Generate using chat LLM (Gemini)
|
| 353 |
+
# return await self.generate(messages, system_prompt, task="chat")
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
# async def evaluate_response(
|
| 360 |
+
# self,
|
| 361 |
+
# query: str,
|
| 362 |
+
# response: str,
|
| 363 |
+
# context: str = ""
|
| 364 |
+
# ) -> Dict:
|
| 365 |
+
# """
|
| 366 |
+
# Evaluate response quality (uses Groq for speed).
|
| 367 |
+
# Used during RL training.
|
| 368 |
+
|
| 369 |
+
# Args:
|
| 370 |
+
# query: User query
|
| 371 |
+
# response: Generated response
|
| 372 |
+
# context: Retrieved context (if any)
|
| 373 |
+
|
| 374 |
+
# Returns:
|
| 375 |
+
# dict: Evaluation results
|
| 376 |
+
# {'quality': 'Good'/'Bad', 'explanation': '...'}
|
| 377 |
+
# """
|
| 378 |
+
# eval_prompt = f"""Evaluate this response:
|
| 379 |
+
# Query: {query}
|
| 380 |
+
# Response: {response}
|
| 381 |
+
# Context used: {context if context else 'None'}
|
| 382 |
+
|
| 383 |
+
# Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explanation."""
|
| 384 |
+
|
| 385 |
+
# messages = [{'role': 'user', 'content': eval_prompt}]
|
| 386 |
+
|
| 387 |
+
# # Generate using evaluation LLM (Groq)
|
| 388 |
+
# result = await self.generate(messages, task="evaluation")
|
| 389 |
+
|
| 390 |
+
# # Parse result
|
| 391 |
+
# quality = "Good" if "Good" in result else "Bad"
|
| 392 |
+
|
| 393 |
+
# return {
|
| 394 |
+
# 'quality': quality,
|
| 395 |
+
# 'explanation': result
|
| 396 |
+
# }
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
# # ============================================================================
|
| 400 |
+
# # GLOBAL LLM MANAGER INSTANCE
|
| 401 |
+
# # ============================================================================
|
| 402 |
+
# llm_manager = LLMManager()
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
# # ============================================================================
|
| 406 |
+
# # USAGE EXAMPLE (for reference)
|
| 407 |
+
# # ============================================================================
|
| 408 |
+
# """
|
| 409 |
+
# # In your service file:
|
| 410 |
+
|
| 411 |
+
# from app.core.llm_manager import llm_manager
|
| 412 |
+
|
| 413 |
+
# # Generate chat response (uses Gemini)
|
| 414 |
+
# response = await llm_manager.generate_chat_response(
|
| 415 |
+
# query="What is my account balance?",
|
| 416 |
+
# context="Your balance is $1000",
|
| 417 |
+
# history=[]
|
| 418 |
+
# )
|
| 419 |
+
|
| 420 |
+
# # Evaluate response (uses Groq)
|
| 421 |
+
# evaluation = await llm_manager.evaluate_response(
|
| 422 |
+
# query="What is my balance?",
|
| 423 |
+
# response="Your balance is $1000",
|
| 424 |
+
# context="Balance: $1000"
|
| 425 |
+
# )
|
| 426 |
+
# """
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
|
backups/backup_main.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI Main Application Entry Point
|
| 3 |
+
Banking RAG Chatbot API with JWT Authentication
|
| 4 |
+
|
| 5 |
+
This file:
|
| 6 |
+
1. Creates the FastAPI app
|
| 7 |
+
2. Configures CORS middleware
|
| 8 |
+
3. Connects to MongoDB on startup/shutdown
|
| 9 |
+
4. Includes API routers (auth + chat)
|
| 10 |
+
5. Provides health check endpoints
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from fastapi import FastAPI, Request
|
| 14 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 15 |
+
from fastapi.responses import JSONResponse
|
| 16 |
+
from contextlib import asynccontextmanager
|
| 17 |
+
|
| 18 |
+
from app.config import settings
|
| 19 |
+
from app.db.mongodb import connect_to_mongo, close_mongo_connection
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# ============================================================================
|
| 23 |
+
# LIFESPAN MANAGER (Startup & Shutdown)
|
| 24 |
+
# ============================================================================
|
| 25 |
+
|
| 26 |
+
@asynccontextmanager
|
| 27 |
+
async def lifespan(app: FastAPI):
|
| 28 |
+
"""
|
| 29 |
+
Manage application lifespan events.
|
| 30 |
+
|
| 31 |
+
Startup:
|
| 32 |
+
- Connect to MongoDB Atlas
|
| 33 |
+
- ML models load lazily on first use
|
| 34 |
+
|
| 35 |
+
Shutdown:
|
| 36 |
+
- Close MongoDB connection
|
| 37 |
+
- Cleanup resources
|
| 38 |
+
"""
|
| 39 |
+
# ========================================================================
|
| 40 |
+
# STARTUP
|
| 41 |
+
# ========================================================================
|
| 42 |
+
print("\n" + "=" * 80)
|
| 43 |
+
print("🚀 STARTING BANKING RAG CHATBOT API")
|
| 44 |
+
print("=" * 80)
|
| 45 |
+
print(f"Environment: {settings.ENVIRONMENT}")
|
| 46 |
+
print(f"Debug Mode: {settings.DEBUG}")
|
| 47 |
+
print("=" * 80)
|
| 48 |
+
|
| 49 |
+
# Connect to MongoDB
|
| 50 |
+
await connect_to_mongo()
|
| 51 |
+
|
| 52 |
+
print("\n💡 ML Models Info:")
|
| 53 |
+
print(" Policy Network: Loads on first chat request (lazy loading)")
|
| 54 |
+
print(" Retriever Model: Loads on first retrieval (lazy loading)")
|
| 55 |
+
print(" LLM (Gemini): Connects on first generation")
|
| 56 |
+
|
| 57 |
+
print("\n✅ Backend startup complete!")
|
| 58 |
+
print("=" * 80)
|
| 59 |
+
print(f"📖 API Docs: http://localhost:8000/docs")
|
| 60 |
+
print(f"🏥 Health Check: http://localhost:8000/health")
|
| 61 |
+
print(f"🔐 Register: POST http://localhost:8000/api/v1/auth/register")
|
| 62 |
+
print(f"🔑 Login: POST http://localhost:8000/api/v1/auth/login")
|
| 63 |
+
print("=" * 80 + "\n")
|
| 64 |
+
|
| 65 |
+
yield # Application runs here
|
| 66 |
+
|
| 67 |
+
# ========================================================================
|
| 68 |
+
# SHUTDOWN
|
| 69 |
+
# ========================================================================
|
| 70 |
+
print("\n" + "=" * 80)
|
| 71 |
+
print("🛑 SHUTTING DOWN API")
|
| 72 |
+
print("=" * 80)
|
| 73 |
+
|
| 74 |
+
# Close MongoDB connection
|
| 75 |
+
await close_mongo_connection()
|
| 76 |
+
|
| 77 |
+
print("✅ Shutdown complete")
|
| 78 |
+
print("=" * 80 + "\n")
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ============================================================================
|
| 82 |
+
# CREATE FASTAPI APPLICATION
|
| 83 |
+
# ============================================================================
|
| 84 |
+
|
| 85 |
+
app = FastAPI(
|
| 86 |
+
title="Banking RAG Chatbot API",
|
| 87 |
+
description="""
|
| 88 |
+
🤖 AI-powered Banking Assistant with:
|
| 89 |
+
|
| 90 |
+
**Features:**
|
| 91 |
+
- 🔐 JWT Authentication (Sign up, Login, Protected routes)
|
| 92 |
+
- 💬 RAG (Retrieval-Augmented Generation)
|
| 93 |
+
- 🧠 RL-based Policy Network (BERT)
|
| 94 |
+
- 🔍 Custom E5 Retriever
|
| 95 |
+
- ✨ Google Gemini LLM
|
| 96 |
+
|
| 97 |
+
**Capabilities:**
|
| 98 |
+
- Intelligent document retrieval
|
| 99 |
+
- Context-aware responses
|
| 100 |
+
- Conversation history
|
| 101 |
+
- Real-time chat
|
| 102 |
+
- User authentication & authorization
|
| 103 |
+
""",
|
| 104 |
+
version="1.0.0",
|
| 105 |
+
docs_url="/docs",
|
| 106 |
+
redoc_url="/redoc",
|
| 107 |
+
lifespan=lifespan
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
# ============================================================================
|
| 112 |
+
# CORS MIDDLEWARE
|
| 113 |
+
# ============================================================================
|
| 114 |
+
|
| 115 |
+
allowed_origins = settings.get_allowed_origins()
|
| 116 |
+
|
| 117 |
+
print("\n🌐 CORS Configuration:")
|
| 118 |
+
print(f" Allowed Origins: {allowed_origins}")
|
| 119 |
+
|
| 120 |
+
app.add_middleware(
|
| 121 |
+
CORSMiddleware,
|
| 122 |
+
allow_origins=allowed_origins,
|
| 123 |
+
allow_credentials=True,
|
| 124 |
+
allow_methods=["*"],
|
| 125 |
+
allow_headers=["*"],
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
# ============================================================================
|
| 130 |
+
# INCLUDE API ROUTERS
|
| 131 |
+
# ============================================================================
|
| 132 |
+
|
| 133 |
+
from app.api.v1 import chat, auth
|
| 134 |
+
|
| 135 |
+
# Auth router (public endpoints - register, login)
|
| 136 |
+
app.include_router(
|
| 137 |
+
auth.router,
|
| 138 |
+
prefix="/api/v1/auth",
|
| 139 |
+
tags=["🔐 Authentication"]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# Chat router (protected endpoints - requires JWT token)
|
| 143 |
+
app.include_router(
|
| 144 |
+
chat.router,
|
| 145 |
+
prefix="/api/v1/chat",
|
| 146 |
+
tags=["💬 Chat"]
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# ============================================================================
|
| 151 |
+
# ROOT ENDPOINTS
|
| 152 |
+
# ============================================================================
|
| 153 |
+
|
| 154 |
+
@app.get("/", tags=["📍 Root"])
|
| 155 |
+
async def root():
|
| 156 |
+
"""
|
| 157 |
+
Root endpoint - API information and available endpoints
|
| 158 |
+
"""
|
| 159 |
+
return {
|
| 160 |
+
"message": "Banking RAG Chatbot API with Authentication",
|
| 161 |
+
"version": "1.0.0",
|
| 162 |
+
"status": "online",
|
| 163 |
+
"authentication": "JWT Bearer Token Required for chat endpoints",
|
| 164 |
+
"documentation": {
|
| 165 |
+
"swagger_ui": "/docs",
|
| 166 |
+
"redoc": "/redoc"
|
| 167 |
+
},
|
| 168 |
+
"endpoints": {
|
| 169 |
+
"auth": {
|
| 170 |
+
"register": "POST /api/v1/auth/register",
|
| 171 |
+
"login": "POST /api/v1/auth/login",
|
| 172 |
+
"me": "GET /api/v1/auth/me (requires token)",
|
| 173 |
+
"logout": "POST /api/v1/auth/logout (requires token)"
|
| 174 |
+
},
|
| 175 |
+
"chat": {
|
| 176 |
+
"send_message": "POST /api/v1/chat/ (requires token)",
|
| 177 |
+
"get_history": "GET /api/v1/chat/history/{conversation_id} (requires token)",
|
| 178 |
+
"list_conversations": "GET /api/v1/chat/conversations (requires token)",
|
| 179 |
+
"delete_conversation": "DELETE /api/v1/chat/conversation/{conversation_id} (requires token)"
|
| 180 |
+
},
|
| 181 |
+
"health": "GET /health"
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
@app.get("/health", tags=["🏥 Health"])
|
| 187 |
+
async def health_check():
|
| 188 |
+
"""
|
| 189 |
+
Comprehensive health check endpoint
|
| 190 |
+
|
| 191 |
+
Checks status of:
|
| 192 |
+
- API service
|
| 193 |
+
- MongoDB connection
|
| 194 |
+
- ML models (lazy loaded)
|
| 195 |
+
- Authentication system
|
| 196 |
+
|
| 197 |
+
Returns:
|
| 198 |
+
dict: Health status of all components
|
| 199 |
+
"""
|
| 200 |
+
from app.db.mongodb import get_database
|
| 201 |
+
|
| 202 |
+
# Check MongoDB
|
| 203 |
+
mongodb_status = "connected" if get_database() is not None else "disconnected"
|
| 204 |
+
|
| 205 |
+
# Check ML models (don't load them, just check readiness)
|
| 206 |
+
ml_models_status = {
|
| 207 |
+
"policy_network": "ready (lazy load)",
|
| 208 |
+
"retriever": "ready (lazy load)",
|
| 209 |
+
"llm": "ready (API-based)"
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
# Check authentication
|
| 213 |
+
auth_status = {
|
| 214 |
+
"jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
|
| 215 |
+
"algorithm": settings.ALGORITHM,
|
| 216 |
+
"token_expiry_minutes": settings.ACCESS_TOKEN_EXPIRE_MINUTES
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
# Overall health
|
| 220 |
+
is_healthy = mongodb_status == "connected" and auth_status["jwt_enabled"]
|
| 221 |
+
|
| 222 |
+
return {
|
| 223 |
+
"status": "healthy" if is_healthy else "degraded",
|
| 224 |
+
"api": "online",
|
| 225 |
+
"mongodb": mongodb_status,
|
| 226 |
+
"authentication": auth_status,
|
| 227 |
+
"ml_models": ml_models_status,
|
| 228 |
+
"environment": settings.ENVIRONMENT,
|
| 229 |
+
"debug_mode": settings.DEBUG
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
# ============================================================================
|
| 234 |
+
# GLOBAL EXCEPTION HANDLER
|
| 235 |
+
# ============================================================================
|
| 236 |
+
|
| 237 |
+
@app.exception_handler(Exception)
|
| 238 |
+
async def global_exception_handler(request: Request, exc: Exception):
|
| 239 |
+
"""
|
| 240 |
+
Global exception handler for unhandled errors
|
| 241 |
+
"""
|
| 242 |
+
print(f"\n❌ Unhandled Exception:")
|
| 243 |
+
print(f" Path: {request.url.path}")
|
| 244 |
+
print(f" Error: {str(exc)}")
|
| 245 |
+
|
| 246 |
+
if settings.DEBUG:
|
| 247 |
+
import traceback
|
| 248 |
+
traceback.print_exc()
|
| 249 |
+
|
| 250 |
+
return JSONResponse(
|
| 251 |
+
status_code=500,
|
| 252 |
+
content={
|
| 253 |
+
"error": "Internal Server Error",
|
| 254 |
+
"detail": str(exc) if settings.DEBUG else "An unexpected error occurred",
|
| 255 |
+
"path": str(request.url.path)
|
| 256 |
+
}
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
# ============================================================================
|
| 261 |
+
# MAIN ENTRY POINT (for direct execution)
|
| 262 |
+
# ============================================================================
|
| 263 |
+
|
| 264 |
+
if __name__ == "__main__":
|
| 265 |
+
import uvicorn
|
| 266 |
+
|
| 267 |
+
print("\n🚀 Starting server directly...")
|
| 268 |
+
print(" Note: For production, use: uvicorn app.main:app --host 0.0.0.0 --port 8000")
|
| 269 |
+
|
| 270 |
+
uvicorn.run(
|
| 271 |
+
"app.main:app",
|
| 272 |
+
host="0.0.0.0",
|
| 273 |
+
port=8000,
|
| 274 |
+
reload=settings.DEBUG # Auto-reload only in debug mode
|
| 275 |
+
)
|
backups/backup_requirements.txt
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# # ================================================================================
|
| 2 |
+
# # BANKING RAG CHATBOT API - DEPENDENCIES
|
| 3 |
+
# # Python 3.10+ required
|
| 4 |
+
# # ================================================================================
|
| 5 |
+
|
| 6 |
+
# # ============================================================================
|
| 7 |
+
# # CORE WEB FRAMEWORK
|
| 8 |
+
# # ============================================================================
|
| 9 |
+
# # FastAPI - Modern async web framework
|
| 10 |
+
# fastapi==0.104.1
|
| 11 |
+
|
| 12 |
+
# # Uvicorn - ASGI server for FastAPI
|
| 13 |
+
# uvicorn[standard]==0.24.0
|
| 14 |
+
|
| 15 |
+
# # Python multipart for file uploads (if needed later)
|
| 16 |
+
# python-multipart==0.0.6
|
| 17 |
+
|
| 18 |
+
# # ============================================================================
|
| 19 |
+
# # CONFIGURATION & ENVIRONMENT
|
| 20 |
+
# # ============================================================================
|
| 21 |
+
# # Pydantic - Data validation and settings management
|
| 22 |
+
# pydantic==2.5.0
|
| 23 |
+
# pydantic-settings==2.1.0
|
| 24 |
+
|
| 25 |
+
# # Python-dotenv - Load environment variables from .env file
|
| 26 |
+
# python-dotenv==1.0.0
|
| 27 |
+
|
| 28 |
+
# # ============================================================================
|
| 29 |
+
# # DATABASE - MongoDB
|
| 30 |
+
# # ============================================================================
|
| 31 |
+
# # Motor - Async MongoDB driver for FastAPI
|
| 32 |
+
# motor==3.3.2
|
| 33 |
+
|
| 34 |
+
# # PyMongo - MongoDB Python driver (used by Motor)
|
| 35 |
+
# pymongo==4.6.0
|
| 36 |
+
|
| 37 |
+
# # ============================================================================
|
| 38 |
+
# # AUTHENTICATION & SECURITY
|
| 39 |
+
# # ============================================================================
|
| 40 |
+
# # Python-jose - JWT token handling
|
| 41 |
+
# python-jose[cryptography]==3.3.0
|
| 42 |
+
|
| 43 |
+
# # Passlib - Password hashing
|
| 44 |
+
# passlib[bcrypt]==1.7.4
|
| 45 |
+
|
| 46 |
+
# # ============================================================================
|
| 47 |
+
# # MACHINE LEARNING - PYTORCH & TRANSFORMERS
|
| 48 |
+
# # ============================================================================
|
| 49 |
+
# # PyTorch - Deep learning framework
|
| 50 |
+
# torch==2.1.0
|
| 51 |
+
|
| 52 |
+
# # Transformers - HuggingFace transformers library (BERT, e5-base-v2)
|
| 53 |
+
# transformers==4.35.0
|
| 54 |
+
|
| 55 |
+
# # Sentence-Transformers - Sentence embeddings
|
| 56 |
+
# sentence-transformers==2.2.2
|
| 57 |
+
|
| 58 |
+
# # ============================================================================
|
| 59 |
+
# # VECTOR SEARCH
|
| 60 |
+
# # ============================================================================
|
| 61 |
+
# # FAISS - Facebook AI Similarity Search (CPU version)
|
| 62 |
+
# faiss-cpu==1.7.4
|
| 63 |
+
|
| 64 |
+
# # ============================================================================
|
| 65 |
+
# # LLM INTEGRATIONS
|
| 66 |
+
# # ============================================================================
|
| 67 |
+
# # LangChain - LLM orchestration framework
|
| 68 |
+
# langchain==0.1.0
|
| 69 |
+
|
| 70 |
+
# # LangChain Groq integration
|
| 71 |
+
# langchain-groq==0.0.1
|
| 72 |
+
|
| 73 |
+
# # LangChain Google GenAI (for Gemini)
|
| 74 |
+
# langchain-google-genai==1.0.0
|
| 75 |
+
|
| 76 |
+
# # Google Generative AI - Direct Gemini API
|
| 77 |
+
# google-generativeai==0.3.2
|
| 78 |
+
|
| 79 |
+
# # ============================================================================
|
| 80 |
+
# # UTILITIES
|
| 81 |
+
# # ============================================================================
|
| 82 |
+
# # NumPy - Numerical computing
|
| 83 |
+
# numpy==1.24.3
|
| 84 |
+
|
| 85 |
+
# # Tiktoken - OpenAI tokenizer (for token counting)
|
| 86 |
+
# tiktoken==0.5.1
|
| 87 |
+
|
| 88 |
+
# # Rich - Beautiful terminal output (for logging)
|
| 89 |
+
# rich==13.7.0
|
| 90 |
+
|
| 91 |
+
# # Requests - HTTP library
|
| 92 |
+
# requests==2.31.0
|
| 93 |
+
|
| 94 |
+
# # ============================================================================
|
| 95 |
+
# # OPTIONAL: DEVELOPMENT TOOLS (comment out for production)
|
| 96 |
+
# # ============================================================================
|
| 97 |
+
# # Pytest - Testing framework
|
| 98 |
+
# # pytest==7.4.3
|
| 99 |
+
|
| 100 |
+
# # Black - Code formatter
|
| 101 |
+
# # black==23.12.0
|
| 102 |
+
|
| 103 |
+
# # Flake8 - Linter
|
| 104 |
+
# # flake8==6.1.0
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
# fastapi==0.104.1
|
| 122 |
+
# uvicorn[standard]==0.24.0
|
| 123 |
+
# pydantic==2.5.0
|
| 124 |
+
# pydantic-settings==2.1.0
|
| 125 |
+
# python-dotenv==1.0.0
|
| 126 |
+
# motor==3.3.2
|
| 127 |
+
# pymongo==4.6.0
|
| 128 |
+
# google-generativeai==0.3.1
|
| 129 |
+
# sentence-transformers==2.2.2
|
| 130 |
+
# faiss-cpu==1.7.4
|
| 131 |
+
# numpy==1.24.3
|
| 132 |
+
# torch==2.1.0
|
| 133 |
+
# transformers==4.35.2
|
| 134 |
+
|
| 135 |
+
# # AUTH DEPENDENCIES (NEW!)
|
| 136 |
+
# python-jose[cryptography]==3.3.0
|
| 137 |
+
# passlib[bcrypt]==1.7.4
|
| 138 |
+
# python-multipart==0.0.6
|
| 139 |
+
# bcrypt==4.1.1
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
# FastAPI & Server
|
| 152 |
+
fastapi==0.104.1
|
| 153 |
+
uvicorn[standard]==0.24.0
|
| 154 |
+
|
| 155 |
+
# Data Validation
|
| 156 |
+
pydantic==2.5.0
|
| 157 |
+
pydantic-settings==2.1.0
|
| 158 |
+
python-dotenv==1.0.0
|
| 159 |
+
|
| 160 |
+
# Database
|
| 161 |
+
motor==3.3.2
|
| 162 |
+
pymongo==4.6.0
|
| 163 |
+
|
| 164 |
+
# LLM & AI Libraries
|
| 165 |
+
langchain-groq==0.1.0
|
| 166 |
+
langchain-core==0.1.0
|
| 167 |
+
huggingface-hub==0.20.0
|
| 168 |
+
|
| 169 |
+
# Embeddings & Vector Search
|
| 170 |
+
sentence-transformers==2.2.2
|
| 171 |
+
faiss-cpu==1.7.4
|
| 172 |
+
numpy==1.24.3
|
| 173 |
+
|
| 174 |
+
# ML/Deep Learning
|
| 175 |
+
torch==2.1.0
|
| 176 |
+
transformers==4.35.2
|
| 177 |
+
|
| 178 |
+
# Authentication
|
| 179 |
+
python-jose[cryptography]==3.3.0
|
| 180 |
+
passlib[bcrypt]==1.7.4
|
| 181 |
+
python-multipart==0.0.6
|
| 182 |
+
bcrypt==4.1.1
|
requirements.txt
CHANGED
|
@@ -1,138 +1,37 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
# # ================================================================================
|
| 5 |
-
|
| 6 |
-
# # ============================================================================
|
| 7 |
-
# # CORE WEB FRAMEWORK
|
| 8 |
-
# # ============================================================================
|
| 9 |
-
# # FastAPI - Modern async web framework
|
| 10 |
-
# fastapi==0.104.1
|
| 11 |
-
|
| 12 |
-
# # Uvicorn - ASGI server for FastAPI
|
| 13 |
-
# uvicorn[standard]==0.24.0
|
| 14 |
-
|
| 15 |
-
# # Python multipart for file uploads (if needed later)
|
| 16 |
-
# python-multipart==0.0.6
|
| 17 |
-
|
| 18 |
-
# # ============================================================================
|
| 19 |
-
# # CONFIGURATION & ENVIRONMENT
|
| 20 |
-
# # ============================================================================
|
| 21 |
-
# # Pydantic - Data validation and settings management
|
| 22 |
-
# pydantic==2.5.0
|
| 23 |
-
# pydantic-settings==2.1.0
|
| 24 |
-
|
| 25 |
-
# # Python-dotenv - Load environment variables from .env file
|
| 26 |
-
# python-dotenv==1.0.0
|
| 27 |
-
|
| 28 |
-
# # ============================================================================
|
| 29 |
-
# # DATABASE - MongoDB
|
| 30 |
-
# # ============================================================================
|
| 31 |
-
# # Motor - Async MongoDB driver for FastAPI
|
| 32 |
-
# motor==3.3.2
|
| 33 |
-
|
| 34 |
-
# # PyMongo - MongoDB Python driver (used by Motor)
|
| 35 |
-
# pymongo==4.6.0
|
| 36 |
-
|
| 37 |
-
# # ============================================================================
|
| 38 |
-
# # AUTHENTICATION & SECURITY
|
| 39 |
-
# # ============================================================================
|
| 40 |
-
# # Python-jose - JWT token handling
|
| 41 |
-
# python-jose[cryptography]==3.3.0
|
| 42 |
-
|
| 43 |
-
# # Passlib - Password hashing
|
| 44 |
-
# passlib[bcrypt]==1.7.4
|
| 45 |
-
|
| 46 |
-
# # ============================================================================
|
| 47 |
-
# # MACHINE LEARNING - PYTORCH & TRANSFORMERS
|
| 48 |
-
# # ============================================================================
|
| 49 |
-
# # PyTorch - Deep learning framework
|
| 50 |
-
# torch==2.1.0
|
| 51 |
-
|
| 52 |
-
# # Transformers - HuggingFace transformers library (BERT, e5-base-v2)
|
| 53 |
-
# transformers==4.35.0
|
| 54 |
-
|
| 55 |
-
# # Sentence-Transformers - Sentence embeddings
|
| 56 |
-
# sentence-transformers==2.2.2
|
| 57 |
-
|
| 58 |
-
# # ============================================================================
|
| 59 |
-
# # VECTOR SEARCH
|
| 60 |
-
# # ============================================================================
|
| 61 |
-
# # FAISS - Facebook AI Similarity Search (CPU version)
|
| 62 |
-
# faiss-cpu==1.7.4
|
| 63 |
-
|
| 64 |
-
# # ============================================================================
|
| 65 |
-
# # LLM INTEGRATIONS
|
| 66 |
-
# # ============================================================================
|
| 67 |
-
# # LangChain - LLM orchestration framework
|
| 68 |
-
# langchain==0.1.0
|
| 69 |
-
|
| 70 |
-
# # LangChain Groq integration
|
| 71 |
-
# langchain-groq==0.0.1
|
| 72 |
-
|
| 73 |
-
# # LangChain Google GenAI (for Gemini)
|
| 74 |
-
# langchain-google-genai==1.0.0
|
| 75 |
-
|
| 76 |
-
# # Google Generative AI - Direct Gemini API
|
| 77 |
-
# google-generativeai==0.3.2
|
| 78 |
-
|
| 79 |
-
# # ============================================================================
|
| 80 |
-
# # UTILITIES
|
| 81 |
-
# # ============================================================================
|
| 82 |
-
# # NumPy - Numerical computing
|
| 83 |
-
# numpy==1.24.3
|
| 84 |
-
|
| 85 |
-
# # Tiktoken - OpenAI tokenizer (for token counting)
|
| 86 |
-
# tiktoken==0.5.1
|
| 87 |
-
|
| 88 |
-
# # Rich - Beautiful terminal output (for logging)
|
| 89 |
-
# rich==13.7.0
|
| 90 |
-
|
| 91 |
-
# # Requests - HTTP library
|
| 92 |
-
# requests==2.31.0
|
| 93 |
-
|
| 94 |
-
# # ============================================================================
|
| 95 |
-
# # OPTIONAL: DEVELOPMENT TOOLS (comment out for production)
|
| 96 |
-
# # ============================================================================
|
| 97 |
-
# # Pytest - Testing framework
|
| 98 |
-
# # pytest==7.4.3
|
| 99 |
-
|
| 100 |
-
# # Black - Code formatter
|
| 101 |
-
# # black==23.12.0
|
| 102 |
-
|
| 103 |
-
# # Flake8 - Linter
|
| 104 |
-
# # flake8==6.1.0
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
-
|
| 122 |
-
uvicorn[standard]==0.24.0
|
| 123 |
-
pydantic==2.5.0
|
| 124 |
-
pydantic-settings==2.1.0
|
| 125 |
-
python-dotenv==1.0.0
|
| 126 |
-
motor==3.3.2
|
| 127 |
-
pymongo==4.6.0
|
| 128 |
-
google-generativeai==0.3.1
|
| 129 |
sentence-transformers==2.2.2
|
| 130 |
faiss-cpu==1.7.4
|
| 131 |
numpy==1.24.3
|
|
|
|
|
|
|
|
|
|
| 132 |
torch==2.1.0
|
| 133 |
transformers==4.35.2
|
| 134 |
|
| 135 |
-
|
|
|
|
| 136 |
python-jose[cryptography]==3.3.0
|
| 137 |
passlib[bcrypt]==1.7.4
|
| 138 |
python-multipart==0.0.6
|
|
|
|
| 1 |
+
# FastAPI & Server
|
| 2 |
+
fastapi==0.104.1
|
| 3 |
+
uvicorn[standard]==0.24.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
+
# Data Validation
|
| 7 |
+
pydantic==2.5.0
|
| 8 |
+
pydantic-settings==2.1.0
|
| 9 |
+
python-dotenv==1.0.0
|
| 10 |
|
| 11 |
|
| 12 |
+
# Database
|
| 13 |
+
motor==3.3.2
|
| 14 |
+
pymongo==4.6.0
|
| 15 |
|
| 16 |
|
| 17 |
+
# LLM & AI Libraries
|
| 18 |
+
langchain-groq==0.1.9
|
| 19 |
+
langchain-core==0.2.38
|
| 20 |
+
huggingface-hub==0.24.6
|
| 21 |
|
| 22 |
|
| 23 |
+
# Embeddings & Vector Search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
sentence-transformers==2.2.2
|
| 25 |
faiss-cpu==1.7.4
|
| 26 |
numpy==1.24.3
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ML/Deep Learning
|
| 30 |
torch==2.1.0
|
| 31 |
transformers==4.35.2
|
| 32 |
|
| 33 |
+
|
| 34 |
+
# Authentication
|
| 35 |
python-jose[cryptography]==3.3.0
|
| 36 |
passlib[bcrypt]==1.7.4
|
| 37 |
python-multipart==0.0.6
|