eeshanyaj commited on
Commit
e448800
·
1 Parent(s): d1b6d07

major changes

Browse files
app/config.py CHANGED
@@ -1,10 +1,7 @@
1
- # LINE 80 VERY IMP CHANGE OF LLM MAX TOKENS FROM 512 TO 1024
2
-
3
-
4
  """
5
  Application Configuration
6
  Settings for Banking RAG Chatbot with JWT Authentication
7
- Includes all settings needed by existing llm_manager.py
8
  """
9
 
10
  import os
@@ -13,7 +10,6 @@ from dotenv import load_dotenv
13
 
14
  load_dotenv()
15
 
16
-
17
  class Settings:
18
  """Application settings loaded from environment variables"""
19
 
@@ -42,23 +38,32 @@ class Settings:
42
  ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
43
 
44
  # ========================================================================
45
- # GOOGLE GEMINI API
46
  # ========================================================================
47
- GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
48
- GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
49
- GEMINI_REQUESTS_PER_MINUTE: int = int(os.getenv("GEMINI_REQUESTS_PER_MINUTE", "60"))
 
 
 
 
50
 
51
  # ========================================================================
52
- # GROQ API (Optional - for evaluation)
53
  # ========================================================================
54
- GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
55
- GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
56
- GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
57
 
58
  # ========================================================================
59
- # HUGGING FACE (Optional - for model downloads)
60
  # ========================================================================
61
- HF_TOKEN: str = os.getenv("HF_TOKEN", "")
 
 
 
 
 
 
62
 
63
  # ========================================================================
64
  # MODEL PATHS (for RL Policy Network and RAG models)
@@ -77,8 +82,7 @@ class Settings:
77
  # LLM PARAMETERS
78
  # ========================================================================
79
  LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
80
- LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024")) # VERY IMPORTANT CHANGE =============================================================================================
81
- # ============================================================================
82
 
83
  # ========================================================================
84
  # RAG PARAMETERS
@@ -94,20 +98,37 @@ class Settings:
94
  CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
95
 
96
  # ========================================================================
97
- # HELPER METHODS (Required by llm_manager.py)
98
- # ========================================================================
99
-
100
- def is_gemini_enabled(self) -> bool:
101
- """Check if Google Gemini API is configured"""
102
- return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  def is_groq_enabled(self) -> bool:
105
- """Check if Groq API is configured"""
106
- return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
107
 
108
  def is_hf_enabled(self) -> bool:
109
- """Check if HuggingFace token is configured"""
110
- return bool(self.HF_TOKEN and self.HF_TOKEN != "")
111
 
112
  def get_allowed_origins(self) -> List[str]:
113
  """Parse allowed origins from comma-separated string"""
@@ -115,29 +136,26 @@ class Settings:
115
  return ["*"]
116
  return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
117
 
118
- def get_llm_for_task(self, task: str = "qa") -> str:
119
  """
120
- Get LLM name for a specific task.
121
 
122
  Args:
123
- task: Task type ('chat', 'evaluation', etc.')
124
 
125
  Returns:
126
- str: LLM name ('gemini' or 'groq')
127
  """
128
- # Use Gemini for chat, Groq for evaluation
129
  if task == "evaluation":
130
- return "groq" if self.is_groq_enabled() else "gemini"
131
  else:
132
- return "gemini" # Default to Gemini for all tasks
133
-
134
 
135
  # ============================================================================
136
  # CREATE GLOBAL SETTINGS INSTANCE
137
  # ============================================================================
138
  settings = Settings()
139
 
140
-
141
  # ============================================================================
142
  # PRINT CONFIGURATION ON LOAD
143
  # ============================================================================
@@ -151,11 +169,20 @@ print(f"Device: {settings.DEVICE}")
151
  print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
152
  print()
153
  print("🔑 API Keys:")
154
- print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
155
- print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
156
- print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
 
 
 
 
 
157
  print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
158
- print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
 
 
 
 
159
  print()
160
  print("🤖 Model Paths:")
161
  print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
@@ -163,478 +190,3 @@ print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
163
  print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
164
  print(f" Knowledge Base: {settings.KB_PATH}")
165
  print("=" * 80)
166
- # ============================================================================
167
-
168
-
169
-
170
-
171
-
172
-
173
-
174
-
175
-
176
-
177
-
178
-
179
-
180
-
181
-
182
-
183
-
184
- # """
185
- # Application Configuration
186
- # Settings for Banking RAG Chatbot with JWT Authentication
187
- # Includes all settings needed by existing llm_manager.py
188
- # """
189
-
190
- # import os
191
- # from typing import List
192
- # from dotenv import load_dotenv
193
-
194
- # load_dotenv()
195
-
196
-
197
- # class Settings:
198
- # """Application settings loaded from environment variables"""
199
-
200
- # # ========================================================================
201
- # # ENVIRONMENT
202
- # # ========================================================================
203
- # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
204
- # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
205
-
206
- # # ========================================================================
207
- # # MONGODB
208
- # # ========================================================================
209
- # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
210
- # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
211
-
212
- # # ========================================================================
213
- # # JWT AUTHENTICATION
214
- # # ========================================================================
215
- # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
216
- # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
217
- # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
218
-
219
- # # ========================================================================
220
- # # CORS (for frontend)
221
- # # ========================================================================
222
- # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
223
-
224
- # # ========================================================================
225
- # # GOOGLE GEMINI API
226
- # # ========================================================================
227
- # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
228
- # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
229
-
230
- # # ========================================================================
231
- # # GROQ API (Optional - for your llm_manager)
232
- # # ========================================================================
233
- # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
234
- # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
235
-
236
- # # ========================================================================
237
- # # HUGGING FACE (Optional - for model downloads)
238
- # # ========================================================================
239
- # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
240
-
241
- # # ========================================================================
242
- # # MODEL PATHS (for RL Policy Network and RAG models)
243
- # # ========================================================================
244
- # POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
245
- # RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
246
- # FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
247
- # KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
248
-
249
- # # ========================================================================
250
- # # DEVICE SETTINGS (for PyTorch/TensorFlow models)
251
- # # ========================================================================
252
- # DEVICE: str = os.getenv("DEVICE", "cpu")
253
-
254
- # # ========================================================================
255
- # # LLM PARAMETERS
256
- # # ========================================================================
257
- # LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
258
- # LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
259
-
260
- # # ========================================================================
261
- # # RAG PARAMETERS
262
- # # ========================================================================
263
- # TOP_K: int = int(os.getenv("TOP_K", "5"))
264
- # SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
265
- # MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
266
-
267
- # # ========================================================================
268
- # # POLICY NETWORK PARAMETERS
269
- # # ========================================================================
270
- # POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
271
- # CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
272
-
273
-
274
- # # ========================================================================
275
- # # HELPER METHODS (Required by llm_manager.py)
276
- # # ========================================================================
277
-
278
- # def is_gemini_enabled(self) -> bool:
279
- # """Check if Google Gemini API is configured"""
280
- # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
281
-
282
- # def is_groq_enabled(self) -> bool:
283
- # """Check if Groq API is configured"""
284
- # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
285
-
286
- # def is_hf_enabled(self) -> bool:
287
- # """Check if HuggingFace token is configured"""
288
- # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
289
-
290
- # def get_allowed_origins(self) -> List[str]:
291
- # """Parse allowed origins from comma-separated string"""
292
- # if self.ALLOWED_ORIGINS == "*":
293
- # return ["*"]
294
- # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
295
-
296
- # # def get_llm_for_task(self, task: str = "qa"):
297
- # # """
298
- # # Get LLM configuration for a specific task.
299
- # # Returns a dict with model settings.
300
-
301
- # # Args:
302
- # # task: Task type ('qa', 'retrieval', 'summary', etc.)
303
-
304
- # # Returns:
305
- # # dict: LLM configuration
306
- # # """
307
- # # return {
308
- # # 'api_key': self.GOOGLE_API_KEY,
309
- # # 'model': self.GEMINI_MODEL,
310
- # # 'temperature': self.LLM_TEMPERATURE,
311
- # # 'max_tokens': self.LLM_MAX_TOKENS,
312
- # # 'task': task
313
- # # }
314
- # def get_llm_for_task(self, task: str = "qa") -> str:
315
- # """
316
- # Get LLM name for a specific task.
317
-
318
- # Args:
319
- # task: Task type ('chat', 'evaluation', etc.)
320
-
321
- # Returns:
322
- # str: LLM name ('gemini' or 'groq')
323
- # """
324
- # # Use Gemini for chat, Groq for evaluation
325
- # if task == "evaluation":
326
- # return "groq" if self.is_groq_enabled() else "gemini"
327
- # else:
328
- # return "gemini" # Default to Gemini for all other tasks
329
-
330
-
331
-
332
-
333
- # # ============================================================================
334
- # # CREATE GLOBAL SETTINGS INSTANCE
335
- # # ============================================================================
336
- # settings = Settings()
337
-
338
-
339
- # # ============================================================================
340
- # # PRINT CONFIGURATION ON LOAD
341
- # # ============================================================================
342
- # print("=" * 80)
343
- # print("✅ Configuration Loaded")
344
- # print("=" * 80)
345
- # print(f"Environment: {settings.ENVIRONMENT}")
346
- # print(f"Debug Mode: {settings.DEBUG}")
347
- # print(f"Database: {settings.DATABASE_NAME}")
348
- # print(f"Device: {settings.DEVICE}")
349
- # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
350
- # print()
351
- # print("🔑 API Keys:")
352
- # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
353
- # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
354
- # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
355
- # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
356
- # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
357
- # print()
358
- # print("🤖 Model Paths:")
359
- # print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
360
- # print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
361
- # print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
362
- # print(f" Knowledge Base: {settings.KB_PATH}")
363
- # print("=" * 80)
364
- # # # ============================================================================
365
-
366
-
367
-
368
-
369
-
370
-
371
-
372
-
373
-
374
-
375
-
376
-
377
-
378
-
379
-
380
-
381
-
382
-
383
-
384
-
385
-
386
- # # """
387
- # # Application Configuration
388
- # # Settings for Banking RAG Chatbot with JWT Authentication
389
- # # Includes all settings needed by existing llm_manager.py
390
- # # """
391
-
392
- # # import os
393
- # # from typing import List
394
- # # from dotenv import load_dotenv
395
-
396
- # # load_dotenv()
397
-
398
-
399
- # # class Settings:
400
- # # """Application settings loaded from environment variables"""
401
-
402
- # # # ========================================================================
403
- # # # ENVIRONMENT
404
- # # # ========================================================================
405
- # # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
406
- # # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
407
-
408
- # # # ========================================================================
409
- # # # MONGODB
410
- # # # ========================================================================
411
- # # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
412
- # # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
413
-
414
- # # # ========================================================================
415
- # # # JWT AUTHENTICATION
416
- # # # ========================================================================
417
- # # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
418
- # # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
419
- # # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
420
-
421
- # # # ========================================================================
422
- # # # CORS (for frontend)
423
- # # # ========================================================================
424
- # # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
425
-
426
- # # # ========================================================================
427
- # # # GOOGLE GEMINI API
428
- # # # ========================================================================
429
- # # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
430
- # # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
431
-
432
- # # # ========================================================================
433
- # # # GROQ API (Optional - for your llm_manager)
434
- # # # ========================================================================
435
- # # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
436
- # # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
437
-
438
- # # # ========================================================================
439
- # # # HUGGING FACE (Optional - for model downloads)
440
- # # # ========================================================================
441
- # # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
442
-
443
- # # # ========================================================================
444
- # # # HELPER METHODS (Required by llm_manager.py)
445
- # # # ========================================================================
446
-
447
- # # def is_gemini_enabled(self) -> bool:
448
- # # """Check if Google Gemini API is configured"""
449
- # # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
450
-
451
- # # def is_groq_enabled(self) -> bool:
452
- # # """Check if Groq API is configured"""
453
- # # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
454
-
455
- # # def is_hf_enabled(self) -> bool:
456
- # # """Check if HuggingFace token is configured"""
457
- # # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
458
-
459
- # # def get_allowed_origins(self) -> List[str]:
460
- # # """Parse allowed origins from comma-separated string"""
461
- # # if self.ALLOWED_ORIGINS == "*":
462
- # # return ["*"]
463
- # # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
464
-
465
-
466
- # # # ============================================================================
467
- # # # CREATE GLOBAL SETTINGS INSTANCE
468
- # # # ============================================================================
469
- # # settings = Settings()
470
-
471
- # # # ============================================================================
472
- # # # PRINT CONFIGURATION ON LOAD
473
- # # # ============================================================================
474
- # # print("=" * 80)
475
- # # print("✅ Configuration Loaded")
476
- # # print("=" * 80)
477
- # # print(f"Environment: {settings.ENVIRONMENT}")
478
- # # print(f"Debug Mode: {settings.DEBUG}")
479
- # # print(f"Database: {settings.DATABASE_NAME}")
480
- # # # print(f"JWT Algorithm: {settings.ALGORITHM}")
481
- # # # print(f"Token Expiry: {settings.ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
482
- # # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
483
- # # print()
484
- # # print("🔑 API Keys:")
485
- # # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
486
- # # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
487
- # # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
488
- # # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
489
- # # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
490
- # # print("=" * 80)
491
-
492
-
493
-
494
-
495
-
496
-
497
-
498
-
499
-
500
-
501
-
502
-
503
-
504
-
505
-
506
-
507
-
508
-
509
-
510
-
511
-
512
-
513
-
514
-
515
-
516
- # """
517
- # Application Configuration
518
- # Settings for Banking RAG Chatbot with JWT Authentication
519
- # Includes all settings needed by existing llm_manager.py
520
- # """
521
-
522
- # import os
523
- # from typing import List
524
- # from dotenv import load_dotenv
525
-
526
- # load_dotenv()
527
-
528
-
529
- # class Settings:
530
- # """Application settings loaded from environment variables"""
531
-
532
- # # ========================================================================
533
- # # ENVIRONMENT
534
- # # ========================================================================
535
- # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
536
- # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
537
-
538
- # # ========================================================================
539
- # # MONGODB
540
- # # ========================================================================
541
- # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
542
- # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
543
-
544
- # # ========================================================================
545
- # # JWT AUTHENTICATION
546
- # # ========================================================================
547
- # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
548
- # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
549
- # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
550
-
551
- # # ========================================================================
552
- # # CORS (for frontend)
553
- # # ========================================================================
554
- # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
555
-
556
- # # ========================================================================
557
- # # GOOGLE GEMINI API
558
- # # ========================================================================
559
- # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
560
- # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
561
-
562
- # # ========================================================================
563
- # # GROQ API (Optional - for your llm_manager)
564
- # # ========================================================================
565
- # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
566
- # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
567
-
568
- # # ========================================================================
569
- # # HUGGING FACE (Optional - for model downloads)
570
- # # ========================================================================
571
- # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
572
-
573
- # # ========================================================================
574
- # # MODEL PATHS (for RL Policy Network and RAG models)
575
- # # ========================================================================
576
- # POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
577
- # RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
578
- # FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
579
- # KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
580
-
581
- # # ========================================================================
582
- # # LLM PARAMETERS
583
- # # ========================================================================
584
- # LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
585
- # LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
586
-
587
- # # ========================================================================
588
- # # RAG PARAMETERS
589
- # # ========================================================================
590
- # TOP_K: int = int(os.getenv("TOP_K", "5"))
591
- # SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
592
- # MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
593
-
594
- # # ========================================================================
595
- # # HELPER METHODS (Required by llm_manager.py)
596
- # # ========================================================================
597
-
598
- # def is_gemini_enabled(self) -> bool:
599
- # """Check if Google Gemini API is configured"""
600
- # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
601
-
602
- # def is_groq_enabled(self) -> bool:
603
- # """Check if Groq API is configured"""
604
- # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
605
-
606
- # def is_hf_enabled(self) -> bool:
607
- # """Check if HuggingFace token is configured"""
608
- # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
609
-
610
- # def get_allowed_origins(self) -> List[str]:
611
- # """Parse allowed origins from comma-separated string"""
612
- # if self.ALLOWED_ORIGINS == "*":
613
- # return ["*"]
614
- # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
615
-
616
-
617
- # # ============================================================================
618
- # # CREATE GLOBAL SETTINGS INSTANCE
619
- # # ============================================================================
620
- # settings = Settings()
621
-
622
-
623
- # # ============================================================================
624
- # # PRINT CONFIGURATION ON LOAD
625
- # # ============================================================================
626
- # print("=" * 80)
627
- # print("✅ Configuration Loaded")
628
- # print("=" * 80)
629
- # print(f"Environment: {settings.ENVIRONMENT}")
630
- # print(f"Debug Mode: {settings.DEBUG}")
631
- # print(f"Database: {settings.DATABASE_NAME}")
632
- # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
633
- # print()
634
- # print("🔑 API Keys:")
635
- # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
636
- # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
637
- # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
638
- # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
639
- # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
640
- # print("=" * 80)
 
 
 
 
1
  """
2
  Application Configuration
3
  Settings for Banking RAG Chatbot with JWT Authentication
4
+ Updated to support multiple Groq API keys and HuggingFace tokens with fallback logic
5
  """
6
 
7
  import os
 
10
 
11
  load_dotenv()
12
 
 
13
  class Settings:
14
  """Application settings loaded from environment variables"""
15
 
 
38
  ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
39
 
40
  # ========================================================================
41
+ # GROQ API KEYS (Multiple for fallback)
42
  # ========================================================================
43
+ GROQ_API_KEY_1: str = os.getenv("GROQ_API_KEY_1", "") # Primary
44
+ GROQ_API_KEY_2: str = os.getenv("GROQ_API_KEY_2", "") # Fallback 1
45
+ GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "") # Fallback 2
46
+
47
+ # Model names for Groq (using correct GroqCloud naming)
48
+ GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama3-8b-8192") # For chat interface
49
+ GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama3-70b-8192") # For evaluation
50
 
51
  # ========================================================================
52
+ # Commented as of now, can be re-enabled if rate limiting is needed
53
  # ========================================================================
54
+
55
+ # GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
 
56
 
57
  # ========================================================================
58
+ # HUGGING FACE TOKENS (Multiple for fallback)
59
  # ========================================================================
60
+ HF_TOKEN_1: str = os.getenv("HF_TOKEN_1", "") # Primary
61
+ HF_TOKEN_2: str = os.getenv("HF_TOKEN_2", "") # Fallback 1
62
+ HF_TOKEN_3: str = os.getenv("HF_TOKEN_3", "") # Fallback 2
63
+
64
+ # HuggingFace model for inference (fallback from Groq)
65
+ HF_CHAT_MODEL: str = os.getenv("HF_CHAT_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
66
+ HF_EVAL_MODEL: str = os.getenv("HF_EVAL_MODEL", "meta-llama/Meta-Llama-3-70B-Instruct")
67
 
68
  # ========================================================================
69
  # MODEL PATHS (for RL Policy Network and RAG models)
 
82
  # LLM PARAMETERS
83
  # ========================================================================
84
  LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
85
+ LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024"))
 
86
 
87
  # ========================================================================
88
  # RAG PARAMETERS
 
98
  CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
99
 
100
  # ========================================================================
101
+ # HELPER METHODS
102
+ # ========================================================================
103
+ def get_groq_api_keys(self) -> List[str]:
104
+ """Get all configured Groq API keys in priority order"""
105
+ keys = []
106
+ if self.GROQ_API_KEY_1:
107
+ keys.append(self.GROQ_API_KEY_1)
108
+ if self.GROQ_API_KEY_2:
109
+ keys.append(self.GROQ_API_KEY_2)
110
+ if self.GROQ_API_KEY_3:
111
+ keys.append(self.GROQ_API_KEY_3)
112
+ return keys
113
+
114
+ def get_hf_tokens(self) -> List[str]:
115
+ """Get all configured HuggingFace tokens in priority order"""
116
+ tokens = []
117
+ if self.HF_TOKEN_1:
118
+ tokens.append(self.HF_TOKEN_1)
119
+ if self.HF_TOKEN_2:
120
+ tokens.append(self.HF_TOKEN_2)
121
+ if self.HF_TOKEN_3:
122
+ tokens.append(self.HF_TOKEN_3)
123
+ return tokens
124
 
125
  def is_groq_enabled(self) -> bool:
126
+ """Check if at least one Groq API key is configured"""
127
+ return bool(self.get_groq_api_keys())
128
 
129
  def is_hf_enabled(self) -> bool:
130
+ """Check if at least one HuggingFace token is configured"""
131
+ return bool(self.get_hf_tokens())
132
 
133
  def get_allowed_origins(self) -> List[str]:
134
  """Parse allowed origins from comma-separated string"""
 
136
  return ["*"]
137
  return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
138
 
139
+ def get_llm_for_task(self, task: str = "chat") -> str:
140
  """
141
+ Get LLM model name for a specific task.
142
 
143
  Args:
144
+ task: Task type ('chat' or 'evaluation')
145
 
146
  Returns:
147
+ str: Model name for the task
148
  """
 
149
  if task == "evaluation":
150
+ return self.GROQ_EVAL_MODEL # llama3-70b-8192
151
  else:
152
+ return self.GROQ_CHAT_MODEL # llama3-8b-8192
 
153
 
154
  # ============================================================================
155
  # CREATE GLOBAL SETTINGS INSTANCE
156
  # ============================================================================
157
  settings = Settings()
158
 
 
159
  # ============================================================================
160
  # PRINT CONFIGURATION ON LOAD
161
  # ============================================================================
 
169
  print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
170
  print()
171
  print("🔑 API Keys:")
172
+ groq_keys = settings.get_groq_api_keys()
173
+ print(f" Groq Keys: {len(groq_keys)} configured")
174
+ for i, key in enumerate(groq_keys, 1):
175
+ print(f" - Key {i}: {'✅ Set' if key else '❌ Missing'}")
176
+ hf_tokens = settings.get_hf_tokens()
177
+ print(f" HuggingFace Tokens: {len(hf_tokens)} configured")
178
+ for i, token in enumerate(hf_tokens, 1):
179
+ print(f" - Token {i}: {'✅ Set' if token else '❌ Missing'}")
180
  print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
181
+ print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
182
+ print()
183
+ print("🤖 LLM Models:")
184
+ print(f" Chat Model: {settings.GROQ_CHAT_MODEL} (Llama 3 8B)")
185
+ print(f" Eval Model: {settings.GROQ_EVAL_MODEL} (Llama 3 70B)")
186
  print()
187
  print("🤖 Model Paths:")
188
  print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
 
190
  print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
191
  print(f" Knowledge Base: {settings.KB_PATH}")
192
  print("=" * 80)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/core/llm_manager.py CHANGED
@@ -1,258 +1,278 @@
1
  """
2
- Multi-LLM Manager for Google Gemini, Groq, and HuggingFace
3
- All three APIs co-exist for different purposes (no fallback logic)
4
 
5
  Architecture:
6
- - Google Gemini (Primary): User-facing chat responses (best quality)
7
- - Groq (Secondary): Fast inference for evaluation and specific tasks
8
- - HuggingFace: Model downloads and embeddings (always required)
 
9
 
10
- Each API has its designated purpose based on config settings.
 
 
 
 
 
 
11
  """
12
 
13
  import time
14
- import google.generativeai as genai
15
  from typing import List, Dict, Optional, Literal
16
  from langchain_groq import ChatGroq
17
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
18
-
19
  from app.config import settings
20
 
21
-
22
  # ============================================================================
23
- # GOOGLE GEMINI MANAGER
24
  # ============================================================================
25
-
26
- class GeminiManager:
27
  """
28
- Google Gemini API Manager (Primary LLM)
29
- Handles Google Pro account with gemini-2.0-flash-lite model
30
  """
31
 
32
  def __init__(self):
33
- """Initialize Gemini API with your Google API key"""
34
- self.api_key = settings.GOOGLE_API_KEY
35
- self.model_name = settings.GEMINI_MODEL
36
-
37
- # Configure Gemini
38
- genai.configure(api_key=self.api_key)
39
-
40
- # Create model instance with safety settings
41
- self.model = genai.GenerativeModel(
42
- model_name=self.model_name,
43
- generation_config={
44
- "temperature": settings.LLM_TEMPERATURE,
45
- "max_output_tokens": settings.LLM_MAX_TOKENS,
46
- }
47
- )
48
 
49
  # Rate limiting tracking
50
  self.requests_this_minute = 0
51
- self.tokens_this_minute = 0
52
  self.last_reset = time.time()
53
 
54
- print(f"✅ Gemini Manager initialized: {self.model_name}")
 
 
 
 
 
55
 
56
  def _check_rate_limits(self):
57
  """
58
  Check and reset rate limit counters.
59
- Gemini Pro: 60 requests/min, 60,000 tokens/min
60
  """
61
  current_time = time.time()
62
 
63
  # Reset counters every minute
64
  if current_time - self.last_reset > 60:
65
  self.requests_this_minute = 0
66
- self.tokens_this_minute = 0
67
  self.last_reset = current_time
68
 
69
  # Check if limits exceeded
70
- if self.requests_this_minute >= settings.GEMINI_REQUESTS_PER_MINUTE:
71
- wait_time = 60 - (current_time - self.last_reset)
72
- print(f"⚠️ Gemini rate limit hit. Waiting {wait_time:.1f}s...")
73
- time.sleep(wait_time)
74
- self._check_rate_limits() # Recursive check after waiting
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  async def generate(
77
  self,
78
  messages: List[Dict[str, str]],
79
- system_prompt: Optional[str] = None
 
80
  ) -> str:
81
  """
82
- Generate response using Gemini.
83
 
84
  Args:
85
  messages: List of conversation messages
86
- Format: [{'role': 'user'/'assistant', 'content': '...'}]
87
- system_prompt: Optional system prompt (prepended to first message)
88
 
89
  Returns:
90
  str: Generated response text
 
 
 
91
  """
92
  self._check_rate_limits()
93
 
94
- try:
95
- # Format messages for Gemini
96
- # Gemini uses 'user' and 'model' roles
97
- formatted_messages = []
98
-
99
- # Add system prompt as first user message if provided
100
- if system_prompt:
101
- formatted_messages.append({
102
- 'role': 'user',
103
- 'parts': [system_prompt]
104
- })
105
-
106
- # Convert messages
107
- for msg in messages:
108
- role = 'model' if msg['role'] == 'assistant' else 'user'
109
- formatted_messages.append({
110
- 'role': role,
111
- 'parts': [msg['content']]
112
- })
113
-
114
- # Generate response
115
- chat = self.model.start_chat(history=formatted_messages[:-1])
116
- response = chat.send_message(formatted_messages[-1]['parts'][0])
117
-
118
- # Track rate limits
119
- self.requests_this_minute += 1
120
- # Note: Token counting would require additional API call
121
- # For now, estimate ~4 chars per token
122
- estimated_tokens = len(response.text) // 4
123
- self.tokens_this_minute += estimated_tokens
124
-
125
- return response.text
126
-
127
- except Exception as e:
128
- print(f"❌ Gemini API error: {e}")
129
- raise
130
-
 
 
 
 
 
 
 
 
131
 
132
  # ============================================================================
133
- # GROQ MANAGER
134
  # ============================================================================
135
-
136
- class GroqManager:
137
  """
138
- Groq API Manager (Secondary LLM)
139
- Handles fast inference with Llama-3-70B
140
  """
141
 
142
  def __init__(self):
143
- """Initialize Groq API with single API key"""
144
- self.api_key = settings.GROQ_API_KEY
145
- self.model_name = settings.GROQ_MODEL
146
-
147
- # Create ChatGroq instance
148
- self.llm = ChatGroq(
149
- api_key=self.api_key,
150
- model_name=self.model_name,
151
- temperature=settings.LLM_TEMPERATURE,
152
- max_tokens=settings.LLM_MAX_TOKENS
153
- )
154
 
155
- # Rate limiting tracking
156
- self.requests_this_minute = 0
157
- self.tokens_this_minute = 0
158
- self.last_reset = time.time()
159
 
160
- print(f"✅ Groq Manager initialized: {self.model_name}")
 
 
161
 
162
- def _check_rate_limits(self):
163
- """
164
- Check and reset rate limit counters.
165
- Groq Free: 30 requests/min, 30,000 tokens/min
166
- """
167
- current_time = time.time()
168
-
169
- # Reset counters every minute
170
- if current_time - self.last_reset > 60:
171
- self.requests_this_minute = 0
172
- self.tokens_this_minute = 0
173
- self.last_reset = current_time
174
-
175
- # Check if limits exceeded
176
- if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
177
- wait_time = 60 - (current_time - self.last_reset)
178
- print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
179
- time.sleep(wait_time)
180
- self._check_rate_limits()
181
 
182
  async def generate(
183
  self,
184
  messages: List[Dict[str, str]],
185
- system_prompt: Optional[str] = None
 
186
  ) -> str:
187
  """
188
- Generate response using Groq.
189
 
190
  Args:
191
  messages: List of conversation messages
192
- Format: [{'role': 'user'/'assistant', 'content': '...'}]
193
  system_prompt: Optional system prompt
 
194
 
195
  Returns:
196
  str: Generated response text
197
- """
198
- self._check_rate_limits()
199
 
200
- try:
201
- # Format messages for LangChain
202
- formatted_messages = []
203
-
204
- # Add system message if provided
205
- if system_prompt:
206
- formatted_messages.append(SystemMessage(content=system_prompt))
207
-
208
- # Convert conversation messages
209
- for msg in messages:
210
- if msg['role'] == 'user':
211
- formatted_messages.append(HumanMessage(content=msg['content']))
212
- elif msg['role'] == 'assistant':
213
- formatted_messages.append(AIMessage(content=msg['content']))
214
-
215
- # Generate response
216
- response = await self.llm.ainvoke(formatted_messages)
217
-
218
- # Track rate limits
219
- self.requests_this_minute += 1
220
- # Estimate tokens (rough approximation)
221
- estimated_tokens = len(response.content) // 4
222
- self.tokens_this_minute += estimated_tokens
223
-
224
- return response.content
225
-
226
- except Exception as e:
227
- print(f"❌ Groq API error: {e}")
228
- raise
229
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  # ============================================================================
232
- # UNIFIED LLM MANAGER (Routes to appropriate LLM)
233
  # ============================================================================
234
-
235
  class LLMManager:
236
  """
237
- Unified LLM Manager that routes requests to appropriate LLM.
 
 
238
 
239
- Routing strategy (from config):
240
- - Chat responses Gemini (best quality for users)
241
- - Evaluation Groq (fast, good enough for RL)
242
- - Policy → Local BERT (no API call)
243
  """
244
 
245
  def __init__(self):
246
  """Initialize all LLM managers"""
247
- self.gemini = None
248
  self.groq = None
249
-
250
- # Initialize Gemini if configured
251
- if settings.is_gemini_enabled():
252
- try:
253
- self.gemini = GeminiManager()
254
- except Exception as e:
255
- print(f"⚠️ Failed to initialize Gemini: {e}")
256
 
257
  # Initialize Groq if configured
258
  if settings.is_groq_enabled():
@@ -261,7 +281,18 @@ class LLMManager:
261
  except Exception as e:
262
  print(f"⚠️ Failed to initialize Groq: {e}")
263
 
264
- print("✅ LLM Manager initialized")
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  async def generate(
267
  self,
@@ -270,62 +301,48 @@ class LLMManager:
270
  task: Literal["chat", "evaluation"] = "chat"
271
  ) -> str:
272
  """
273
- Generate response using appropriate LLM based on task.
 
 
 
 
274
 
275
  Args:
276
  messages: Conversation messages
277
  system_prompt: Optional system prompt
278
- task: Task type - "chat" (user-facing) or "evaluation" (RL training)
279
 
280
  Returns:
281
  str: Generated response
282
 
283
  Raises:
284
- ValueError: If appropriate LLM is not configured
285
  """
286
- # Determine which LLM to use based on task
287
- llm_choice = settings.get_llm_for_task(task)
288
-
289
- if llm_choice == "gemini":
290
- if self.gemini is None:
291
- raise ValueError("Gemini API not configured. Set GOOGLE_API_KEY in .env")
292
- return await self.gemini.generate(messages, system_prompt)
293
-
294
- elif llm_choice == "groq":
295
- if self.groq is None:
296
- raise ValueError("Groq API not configured. Set GROQ_API_KEY in .env")
297
- return await self.groq.generate(messages, system_prompt)
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- else:
300
- raise ValueError(f"Unknown LLM choice: {llm_choice}")
301
-
302
- # async def generate_chat_response(
303
- # self,
304
- # query: str,
305
- # context: str,
306
- # history: List[Dict[str, str]]
307
- # ) -> str:
308
- # """
309
- # Generate chat response (uses Gemini by default).
310
-
311
- # Args:
312
- # query: User query
313
- # context: Retrieved context (from FAISS)
314
- # history: Conversation history
315
-
316
- # Returns:
317
- # str: Chat response
318
- # """
319
- # # Build system prompt
320
- # system_prompt = settings.SYSTEM_PROMPT
321
- # if context:
322
- # system_prompt += f"\n\nRelevant Information:\n{context}"
323
-
324
- # # Build messages
325
- # messages = history + [{'role': 'user', 'content': query}]
326
-
327
- # # Generate using chat LLM (Gemini)
328
- # return await self.generate(messages, system_prompt, task="chat")
329
 
330
  async def generate_chat_response(
331
  self,
@@ -333,28 +350,32 @@ class LLMManager:
333
  context: str,
334
  history: List[Dict[str, str]]
335
  ) -> str:
336
- """Generate chat response (uses Gemini by default)."""
337
-
 
 
 
 
 
 
 
 
 
338
  # Import the detailed prompt
339
  from app.services.chat_service import BANKING_SYSTEM_PROMPT
340
-
341
  # Build enhanced system prompt with context
342
  system_prompt = BANKING_SYSTEM_PROMPT
343
-
344
  if context:
345
  system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
346
  else:
347
  system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
348
-
349
  # Build messages
350
  messages = history + [{'role': 'user', 'content': query}]
351
-
352
- # Generate using chat LLM (Gemini)
353
  return await self.generate(messages, system_prompt, task="chat")
354
-
355
-
356
-
357
-
358
 
359
  async def evaluate_response(
360
  self,
@@ -363,7 +384,7 @@ class LLMManager:
363
  context: str = ""
364
  ) -> Dict:
365
  """
366
- Evaluate response quality (uses Groq for speed).
367
  Used during RL training.
368
 
369
  Args:
@@ -373,9 +394,10 @@ class LLMManager:
373
 
374
  Returns:
375
  dict: Evaluation results
376
- {'quality': 'Good'/'Bad', 'explanation': '...'}
377
  """
378
  eval_prompt = f"""Evaluate this response:
 
379
  Query: {query}
380
  Response: {response}
381
  Context used: {context if context else 'None'}
@@ -384,7 +406,7 @@ Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explan
384
 
385
  messages = [{'role': 'user', 'content': eval_prompt}]
386
 
387
- # Generate using evaluation LLM (Groq)
388
  result = await self.generate(messages, task="evaluation")
389
 
390
  # Parse result
@@ -395,32 +417,29 @@ Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explan
395
  'explanation': result
396
  }
397
 
398
-
399
  # ============================================================================
400
  # GLOBAL LLM MANAGER INSTANCE
401
  # ============================================================================
402
  llm_manager = LLMManager()
403
 
404
-
405
  # ============================================================================
406
  # USAGE EXAMPLE (for reference)
407
  # ============================================================================
408
  """
409
  # In your service file:
410
-
411
  from app.core.llm_manager import llm_manager
412
 
413
- # Generate chat response (uses Gemini)
414
  response = await llm_manager.generate_chat_response(
415
  query="What is my account balance?",
416
  context="Your balance is $1000",
417
  history=[]
418
  )
419
 
420
- # Evaluate response (uses Groq)
421
  evaluation = await llm_manager.evaluate_response(
422
  query="What is my balance?",
423
  response="Your balance is $1000",
424
  context="Balance: $1000"
425
  )
426
- """
 
1
  """
2
+ Multi-LLM Manager with Groq (ChatGroq) and HuggingFace Fallback Logic
 
3
 
4
  Architecture:
5
+ - Primary: Groq API with 3 keys (sequential fallback)
6
+ - Fallback: HuggingFace Inference API with 3 tokens (sequential fallback)
7
+ - Llama 3 8B for chat interface
8
+ - Llama 3 70B for evaluation
9
 
10
+ Fallback Logic:
11
+ 1. Try GROQ_API_KEY_1
12
+ 2. If fails, try GROQ_API_KEY_2
13
+ 3. If fails, try GROQ_API_KEY_3
14
+ 4. If all Groq keys fail, try HF_TOKEN_1
15
+ 5. If fails, try HF_TOKEN_2
16
+ 6. If fails, try HF_TOKEN_3
17
  """
18
 
19
  import time
 
20
  from typing import List, Dict, Optional, Literal
21
  from langchain_groq import ChatGroq
22
  from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
23
+ from huggingface_hub import InferenceClient
24
  from app.config import settings
25
 
 
26
  # ============================================================================
27
+ # GROQ MANAGER WITH FALLBACK
28
  # ============================================================================
29
+ class GroqManager:
 
30
  """
31
+ Groq API Manager with multiple API key fallback support
32
+ Uses ChatGroq from langchain_groq
33
  """
34
 
35
  def __init__(self):
36
+ """Initialize Groq manager with all available API keys"""
37
+ self.api_keys = settings.get_groq_api_keys()
38
+ self.chat_model_name = settings.GROQ_CHAT_MODEL # llama3-8b-8192
39
+ self.eval_model_name = settings.GROQ_EVAL_MODEL # llama3-70b-8192
40
+
41
+ # Track current key index
42
+ self.current_key_index = 0
 
 
 
 
 
 
 
 
43
 
44
  # Rate limiting tracking
45
  self.requests_this_minute = 0
 
46
  self.last_reset = time.time()
47
 
48
+ if not self.api_keys:
49
+ raise ValueError("No Groq API keys configured. Set GROQ_API_KEY_1 in .env")
50
+
51
+ print(f"✅ Groq Manager initialized with {len(self.api_keys)} API key(s)")
52
+ print(f" Chat Model: {self.chat_model_name}")
53
+ print(f" Eval Model: {self.eval_model_name}")
54
 
55
  def _check_rate_limits(self):
56
  """
57
  Check and reset rate limit counters.
58
+ Groq Free: 30 requests/min
59
  """
60
  current_time = time.time()
61
 
62
  # Reset counters every minute
63
  if current_time - self.last_reset > 60:
64
  self.requests_this_minute = 0
 
65
  self.last_reset = current_time
66
 
67
  # Check if limits exceeded
68
+ # =================================================================
69
+ # Uncomment below if rate limiting enforcement is needed
70
+ # =================================================================
71
+
72
+ # if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
73
+ # wait_time = 60 - (current_time - self.last_reset)
74
+ # print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
75
+ # time.sleep(wait_time)
76
+ # self._check_rate_limits()
77
+
78
+ def _create_llm(self, api_key: str, model_name: str) -> ChatGroq:
79
+ """Create ChatGroq instance with given API key and model"""
80
+ return ChatGroq(
81
+ api_key=api_key,
82
+ model_name=model_name,
83
+ temperature=settings.LLM_TEMPERATURE,
84
+ max_tokens=settings.LLM_MAX_TOKENS,
85
+ max_retries=0 # Disable automatic retries, we handle fallback manually
86
+ )
87
 
88
  async def generate(
89
  self,
90
  messages: List[Dict[str, str]],
91
+ system_prompt: Optional[str] = None,
92
+ task: Literal["chat", "evaluation"] = "chat"
93
  ) -> str:
94
  """
95
+ Generate response using Groq with fallback logic.
96
 
97
  Args:
98
  messages: List of conversation messages
99
+ system_prompt: Optional system prompt
100
+ task: Task type to determine model (chat uses 8B, evaluation uses 70B)
101
 
102
  Returns:
103
  str: Generated response text
104
+
105
+ Raises:
106
+ Exception: If all Groq API keys fail
107
  """
108
  self._check_rate_limits()
109
 
110
+ # Select model based on task
111
+ model_name = self.eval_model_name if task == "evaluation" else self.chat_model_name
112
+
113
+ # Format messages for LangChain
114
+ formatted_messages = []
115
+
116
+ # Add system message if provided
117
+ if system_prompt:
118
+ formatted_messages.append(SystemMessage(content=system_prompt))
119
+
120
+ # Convert conversation messages
121
+ for msg in messages:
122
+ if msg['role'] == 'user':
123
+ formatted_messages.append(HumanMessage(content=msg['content']))
124
+ elif msg['role'] == 'assistant':
125
+ formatted_messages.append(AIMessage(content=msg['content']))
126
+
127
+ # Try each Groq API key sequentially
128
+ for key_index, api_key in enumerate(self.api_keys, 1):
129
+ try:
130
+ print(f"🔑 Trying Groq API Key {key_index}/{len(self.api_keys)} with {model_name}...")
131
+
132
+ # Create LLM instance with current key
133
+ llm = self._create_llm(api_key, model_name)
134
+
135
+ # Generate response
136
+ response = await llm.ainvoke(formatted_messages)
137
+
138
+ # Track rate limits
139
+ self.requests_this_minute += 1
140
+
141
+ print(f"✅ Groq API Key {key_index} succeeded")
142
+ return response.content
143
+
144
+ except Exception as e:
145
+ print(f"❌ Groq API Key {key_index} failed: {e}")
146
+
147
+ # If this was the last key, raise exception
148
+ if key_index == len(self.api_keys):
149
+ print(f"❌ All {len(self.api_keys)} Groq API keys exhausted")
150
+ raise Exception(f"All Groq API keys failed. Last error: {e}")
151
+
152
+ # Otherwise, continue to next key
153
+ print(f"⏭️ Falling back to next Groq API key...")
154
+ continue
155
 
156
  # ============================================================================
157
+ # HUGGINGFACE MANAGER WITH FALLBACK
158
  # ============================================================================
159
+ class HuggingFaceManager:
 
160
  """
161
+ HuggingFace Inference API Manager with multiple token fallback support
162
+ Uses InferenceClient from huggingface_hub
163
  """
164
 
165
  def __init__(self):
166
+ """Initialize HuggingFace manager with all available tokens"""
167
+ self.tokens = settings.get_hf_tokens()
168
+ self.chat_model_name = settings.HF_CHAT_MODEL
169
+ self.eval_model_name = settings.HF_EVAL_MODEL
 
 
 
 
 
 
 
170
 
171
+ if not self.tokens:
172
+ raise ValueError("No HuggingFace tokens configured. Set HF_TOKEN_1 in .env")
 
 
173
 
174
+ print(f"✅ HuggingFace Manager initialized with {len(self.tokens)} token(s)")
175
+ print(f" Chat Model: {self.chat_model_name}")
176
+ print(f" Eval Model: {self.eval_model_name}")
177
 
178
+ def _create_client(self, token: str, model_name: str) -> InferenceClient:
179
+ """Create InferenceClient instance with given token and model"""
180
+ return InferenceClient(
181
+ model=model_name,
182
+ token=token
183
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  async def generate(
186
  self,
187
  messages: List[Dict[str, str]],
188
+ system_prompt: Optional[str] = None,
189
+ task: Literal["chat", "evaluation"] = "chat"
190
  ) -> str:
191
  """
192
+ Generate response using HuggingFace Inference API with fallback logic.
193
 
194
  Args:
195
  messages: List of conversation messages
 
196
  system_prompt: Optional system prompt
197
+ task: Task type to determine model
198
 
199
  Returns:
200
  str: Generated response text
 
 
201
 
202
+ Raises:
203
+ Exception: If all HuggingFace tokens fail
204
+ """
205
+ # Select model based on task
206
+ model_name = self.eval_model_name if task == "evaluation" else self.chat_model_name
207
+
208
+ # Format messages for HuggingFace chat API
209
+ formatted_messages = []
210
+
211
+ # Add system message if provided
212
+ if system_prompt:
213
+ formatted_messages.append({
214
+ "role": "system",
215
+ "content": system_prompt
216
+ })
217
+
218
+ # Convert conversation messages
219
+ for msg in messages:
220
+ formatted_messages.append({
221
+ "role": msg['role'],
222
+ "content": msg['content']
223
+ })
224
+
225
+ # Try each HuggingFace token sequentially
226
+ for token_index, token in enumerate(self.tokens, 1):
227
+ try:
228
+ print(f"🔑 Trying HuggingFace Token {token_index}/{len(self.tokens)} with {model_name}...")
229
+
230
+ # Create client with current token
231
+ client = self._create_client(token, model_name)
232
+
233
+ # Generate response using chat completion
234
+ response = client.chat_completion(
235
+ messages=formatted_messages,
236
+ max_tokens=settings.LLM_MAX_TOKENS,
237
+ temperature=settings.LLM_TEMPERATURE
238
+ )
239
+
240
+ # Extract content from response
241
+ content = response.choices[0].message.content
242
+
243
+ print(f"✅ HuggingFace Token {token_index} succeeded")
244
+ return content
245
+
246
+ except Exception as e:
247
+ print(f"❌ HuggingFace Token {token_index} failed: {e}")
248
+
249
+ # If this was the last token, raise exception
250
+ if token_index == len(self.tokens):
251
+ print(f"❌ All {len(self.tokens)} HuggingFace tokens exhausted")
252
+ raise Exception(f"All HuggingFace tokens failed. Last error: {e}")
253
+
254
+ # Otherwise, continue to next token
255
+ print(f"⏭️ Falling back to next HuggingFace token...")
256
+ continue
257
 
258
  # ============================================================================
259
+ # UNIFIED LLM MANAGER (Groq Primary, HuggingFace Fallback)
260
  # ============================================================================
 
261
  class LLMManager:
262
  """
263
+ Unified LLM Manager with cascading fallback logic:
264
+ 1. Try all Groq API keys (primary)
265
+ 2. If all fail, try all HuggingFace tokens (fallback)
266
 
267
+ Models:
268
+ - Chat: Llama 3 8B (for user-facing chat responses)
269
+ - Evaluation: Llama 3 70B (for response evaluation)
 
270
  """
271
 
272
  def __init__(self):
273
  """Initialize all LLM managers"""
 
274
  self.groq = None
275
+ self.huggingface = None
 
 
 
 
 
 
276
 
277
  # Initialize Groq if configured
278
  if settings.is_groq_enabled():
 
281
  except Exception as e:
282
  print(f"⚠️ Failed to initialize Groq: {e}")
283
 
284
+ # Initialize HuggingFace if configured
285
+ if settings.is_hf_enabled():
286
+ try:
287
+ self.huggingface = HuggingFaceManager()
288
+ except Exception as e:
289
+ print(f"⚠️ Failed to initialize HuggingFace: {e}")
290
+
291
+ # Check if at least one is available
292
+ if not self.groq and not self.huggingface:
293
+ raise ValueError("No LLM provider configured. Set either Groq or HuggingFace credentials in .env")
294
+
295
+ print("✅ LLM Manager initialized with fallback logic")
296
 
297
  async def generate(
298
  self,
 
301
  task: Literal["chat", "evaluation"] = "chat"
302
  ) -> str:
303
  """
304
+ Generate response with cascading fallback logic.
305
+
306
+ Fallback order:
307
+ 1. Try all Groq API keys (3 keys)
308
+ 2. If all Groq keys fail, try all HuggingFace tokens (3 tokens)
309
 
310
  Args:
311
  messages: Conversation messages
312
  system_prompt: Optional system prompt
313
+ task: Task type - "chat" (8B) or "evaluation" (70B)
314
 
315
  Returns:
316
  str: Generated response
317
 
318
  Raises:
319
+ ValueError: If all providers fail
320
  """
321
+ # Try Groq first (if available)
322
+ if self.groq:
323
+ try:
324
+ print("🚀 Attempting Groq API (Primary)...")
325
+ response = await self.groq.generate(messages, system_prompt, task)
326
+ return response
327
+ except Exception as groq_error:
328
+ print(f"❌ All Groq API keys failed: {groq_error}")
329
+
330
+ # Fall back to HuggingFace if available
331
+ if self.huggingface:
332
+ print("🔄 Falling back to HuggingFace Inference API...")
333
+ else:
334
+ raise ValueError(f"Groq failed and no HuggingFace fallback configured: {groq_error}")
335
+
336
+ # Try HuggingFace (if Groq failed or not available)
337
+ if self.huggingface:
338
+ try:
339
+ print("🚀 Attempting HuggingFace API (Fallback)...")
340
+ response = await self.huggingface.generate(messages, system_prompt, task)
341
+ return response
342
+ except Exception as hf_error:
343
+ raise ValueError(f"All LLM providers exhausted. HuggingFace error: {hf_error}")
344
 
345
+ raise ValueError("No LLM provider available")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
  async def generate_chat_response(
348
  self,
 
350
  context: str,
351
  history: List[Dict[str, str]]
352
  ) -> str:
353
+ """
354
+ Generate chat response (uses Llama 3 8B).
355
+
356
+ Args:
357
+ query: User query
358
+ context: Retrieved context (from FAISS)
359
+ history: Conversation history
360
+
361
+ Returns:
362
+ str: Chat response
363
+ """
364
  # Import the detailed prompt
365
  from app.services.chat_service import BANKING_SYSTEM_PROMPT
366
+
367
  # Build enhanced system prompt with context
368
  system_prompt = BANKING_SYSTEM_PROMPT
 
369
  if context:
370
  system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
371
  else:
372
  system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
373
+
374
  # Build messages
375
  messages = history + [{'role': 'user', 'content': query}]
376
+
377
+ # Generate using chat task (Llama 3 8B)
378
  return await self.generate(messages, system_prompt, task="chat")
 
 
 
 
379
 
380
  async def evaluate_response(
381
  self,
 
384
  context: str = ""
385
  ) -> Dict:
386
  """
387
+ Evaluate response quality (uses Llama 3 70B for better evaluation).
388
  Used during RL training.
389
 
390
  Args:
 
394
 
395
  Returns:
396
  dict: Evaluation results
397
+ {'quality': 'Good'/'Bad', 'explanation': '...'}
398
  """
399
  eval_prompt = f"""Evaluate this response:
400
+
401
  Query: {query}
402
  Response: {response}
403
  Context used: {context if context else 'None'}
 
406
 
407
  messages = [{'role': 'user', 'content': eval_prompt}]
408
 
409
+ # Generate using evaluation task (Llama 3 70B)
410
  result = await self.generate(messages, task="evaluation")
411
 
412
  # Parse result
 
417
  'explanation': result
418
  }
419
 
 
420
  # ============================================================================
421
  # GLOBAL LLM MANAGER INSTANCE
422
  # ============================================================================
423
  llm_manager = LLMManager()
424
 
 
425
  # ============================================================================
426
  # USAGE EXAMPLE (for reference)
427
  # ============================================================================
428
  """
429
  # In your service file:
 
430
  from app.core.llm_manager import llm_manager
431
 
432
+ # Generate chat response (uses Llama 3 8B with Groq → HF fallback)
433
  response = await llm_manager.generate_chat_response(
434
  query="What is my account balance?",
435
  context="Your balance is $1000",
436
  history=[]
437
  )
438
 
439
+ # Evaluate response (uses Llama 3 70B with Groq → HF fallback)
440
  evaluation = await llm_manager.evaluate_response(
441
  query="What is my balance?",
442
  response="Your balance is $1000",
443
  context="Balance: $1000"
444
  )
445
+ """
app/main.py CHANGED
@@ -1,10 +1,11 @@
1
  """
2
  FastAPI Main Application Entry Point
 
3
  Banking RAG Chatbot API with JWT Authentication
4
 
5
  This file:
6
  1. Creates the FastAPI app
7
- 2. Configures CORS middleware
8
  3. Connects to MongoDB on startup/shutdown
9
  4. Includes API routers (auth + chat)
10
  5. Provides health check endpoints
@@ -18,7 +19,6 @@ from contextlib import asynccontextmanager
18
  from app.config import settings
19
  from app.db.mongodb import connect_to_mongo, close_mongo_connection
20
 
21
-
22
  # ============================================================================
23
  # LIFESPAN MANAGER (Startup & Shutdown)
24
  # ============================================================================
@@ -52,7 +52,13 @@ async def lifespan(app: FastAPI):
52
  print("\n💡 ML Models Info:")
53
  print(" Policy Network: Loads on first chat request (lazy loading)")
54
  print(" Retriever Model: Loads on first retrieval (lazy loading)")
55
- print(" LLM (Gemini): Connects on first generation")
 
 
 
 
 
 
56
 
57
  print("\n✅ Backend startup complete!")
58
  print("=" * 80)
@@ -77,7 +83,6 @@ async def lifespan(app: FastAPI):
77
  print("✅ Shutdown complete")
78
  print("=" * 80 + "\n")
79
 
80
-
81
  # ============================================================================
82
  # CREATE FASTAPI APPLICATION
83
  # ============================================================================
@@ -85,21 +90,22 @@ async def lifespan(app: FastAPI):
85
  app = FastAPI(
86
  title="Banking RAG Chatbot API",
87
  description="""
88
- 🤖 AI-powered Banking Assistant with:
89
-
90
- **Features:**
91
- - 🔐 JWT Authentication (Sign up, Login, Protected routes)
92
- - 💬 RAG (Retrieval-Augmented Generation)
93
- - 🧠 RL-based Policy Network (BERT)
94
- - 🔍 Custom E5 Retriever
95
- - Google Gemini LLM
96
-
97
- **Capabilities:**
98
- - Intelligent document retrieval
99
- - Context-aware responses
100
- - Conversation history
101
- - Real-time chat
102
- - User authentication & authorization
 
103
  """,
104
  version="1.0.0",
105
  docs_url="/docs",
@@ -107,13 +113,11 @@ app = FastAPI(
107
  lifespan=lifespan
108
  )
109
 
110
-
111
  # ============================================================================
112
  # CORS MIDDLEWARE
113
  # ============================================================================
114
 
115
  allowed_origins = settings.get_allowed_origins()
116
-
117
  print("\n🌐 CORS Configuration:")
118
  print(f" Allowed Origins: {allowed_origins}")
119
 
@@ -125,7 +129,6 @@ app.add_middleware(
125
  allow_headers=["*"],
126
  )
127
 
128
-
129
  # ============================================================================
130
  # INCLUDE API ROUTERS
131
  # ============================================================================
@@ -146,7 +149,6 @@ app.include_router(
146
  tags=["💬 Chat"]
147
  )
148
 
149
-
150
  # ============================================================================
151
  # ROOT ENDPOINTS
152
  # ============================================================================
@@ -161,6 +163,11 @@ async def root():
161
  "version": "1.0.0",
162
  "status": "online",
163
  "authentication": "JWT Bearer Token Required for chat endpoints",
 
 
 
 
 
164
  "documentation": {
165
  "swagger_ui": "/docs",
166
  "redoc": "/redoc"
@@ -182,7 +189,6 @@ async def root():
182
  }
183
  }
184
 
185
-
186
  @app.get("/health", tags=["🏥 Health"])
187
  async def health_check():
188
  """
@@ -193,6 +199,7 @@ async def health_check():
193
  - MongoDB connection
194
  - ML models (lazy loaded)
195
  - Authentication system
 
196
 
197
  Returns:
198
  dict: Health status of all components
@@ -209,6 +216,22 @@ async def health_check():
209
  "llm": "ready (API-based)"
210
  }
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  # Check authentication
213
  auth_status = {
214
  "jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
@@ -217,19 +240,23 @@ async def health_check():
217
  }
218
 
219
  # Overall health
220
- is_healthy = mongodb_status == "connected" and auth_status["jwt_enabled"]
 
 
 
 
221
 
222
  return {
223
  "status": "healthy" if is_healthy else "degraded",
224
  "api": "online",
225
  "mongodb": mongodb_status,
226
  "authentication": auth_status,
 
227
  "ml_models": ml_models_status,
228
  "environment": settings.ENVIRONMENT,
229
  "debug_mode": settings.DEBUG
230
  }
231
 
232
-
233
  # ============================================================================
234
  # GLOBAL EXCEPTION HANDLER
235
  # ============================================================================
@@ -256,7 +283,6 @@ async def global_exception_handler(request: Request, exc: Exception):
256
  }
257
  )
258
 
259
-
260
  # ============================================================================
261
  # MAIN ENTRY POINT (for direct execution)
262
  # ============================================================================
 
1
  """
2
  FastAPI Main Application Entry Point
3
+
4
  Banking RAG Chatbot API with JWT Authentication
5
 
6
  This file:
7
  1. Creates the FastAPI app
8
+ 2. Configures CORS middleware
9
  3. Connects to MongoDB on startup/shutdown
10
  4. Includes API routers (auth + chat)
11
  5. Provides health check endpoints
 
19
  from app.config import settings
20
  from app.db.mongodb import connect_to_mongo, close_mongo_connection
21
 
 
22
  # ============================================================================
23
  # LIFESPAN MANAGER (Startup & Shutdown)
24
  # ============================================================================
 
52
  print("\n💡 ML Models Info:")
53
  print(" Policy Network: Loads on first chat request (lazy loading)")
54
  print(" Retriever Model: Loads on first retrieval (lazy loading)")
55
+ print(" LLM: Groq (ChatGroq) with HuggingFace fallback")
56
+ print("\n🤖 LLM Configuration:")
57
+ print(f" Chat Model: {settings.GROQ_CHAT_MODEL} (Llama 3 8B)")
58
+ print(f" Eval Model: {settings.GROQ_EVAL_MODEL} (Llama 3 70B)")
59
+ print(f" Groq API Keys: {len(settings.get_groq_api_keys())} configured")
60
+ print(f" HuggingFace Tokens: {len(settings.get_hf_tokens())} configured")
61
+ print(f" Fallback: Groq → HuggingFace")
62
 
63
  print("\n✅ Backend startup complete!")
64
  print("=" * 80)
 
83
  print("✅ Shutdown complete")
84
  print("=" * 80 + "\n")
85
 
 
86
  # ============================================================================
87
  # CREATE FASTAPI APPLICATION
88
  # ============================================================================
 
90
  app = FastAPI(
91
  title="Banking RAG Chatbot API",
92
  description="""
93
+ 🤖 AI-powered Banking Assistant with:
94
+
95
+ **Features:**
96
+ - 🔐 JWT Authentication (Sign up, Login, Protected routes)
97
+ - 💬 RAG (Retrieval-Augmented Generation)
98
+ - 🧠 RL-based Policy Network (BERT)
99
+ - 🔍 Custom E5 Retriever
100
+ - Groq LLM with HuggingFace Fallback (Llama 3 models)
101
+
102
+ **Capabilities:**
103
+ - Intelligent document retrieval
104
+ - Context-aware responses
105
+ - Conversation history
106
+ - Real-time chat
107
+ - User authentication & authorization
108
+ - Multi-provider LLM with automatic fallback
109
  """,
110
  version="1.0.0",
111
  docs_url="/docs",
 
113
  lifespan=lifespan
114
  )
115
 
 
116
  # ============================================================================
117
  # CORS MIDDLEWARE
118
  # ============================================================================
119
 
120
  allowed_origins = settings.get_allowed_origins()
 
121
  print("\n🌐 CORS Configuration:")
122
  print(f" Allowed Origins: {allowed_origins}")
123
 
 
129
  allow_headers=["*"],
130
  )
131
 
 
132
  # ============================================================================
133
  # INCLUDE API ROUTERS
134
  # ============================================================================
 
149
  tags=["💬 Chat"]
150
  )
151
 
 
152
  # ============================================================================
153
  # ROOT ENDPOINTS
154
  # ============================================================================
 
163
  "version": "1.0.0",
164
  "status": "online",
165
  "authentication": "JWT Bearer Token Required for chat endpoints",
166
+ "llm_provider": "Groq (ChatGroq) with HuggingFace fallback",
167
+ "models": {
168
+ "chat": settings.GROQ_CHAT_MODEL,
169
+ "evaluation": settings.GROQ_EVAL_MODEL
170
+ },
171
  "documentation": {
172
  "swagger_ui": "/docs",
173
  "redoc": "/redoc"
 
189
  }
190
  }
191
 
 
192
  @app.get("/health", tags=["🏥 Health"])
193
  async def health_check():
194
  """
 
199
  - MongoDB connection
200
  - ML models (lazy loaded)
201
  - Authentication system
202
+ - LLM providers (Groq & HuggingFace)
203
 
204
  Returns:
205
  dict: Health status of all components
 
216
  "llm": "ready (API-based)"
217
  }
218
 
219
+ # Check LLM providers
220
+ llm_providers = {
221
+ "groq": {
222
+ "enabled": settings.is_groq_enabled(),
223
+ "api_keys_configured": len(settings.get_groq_api_keys()),
224
+ "chat_model": settings.GROQ_CHAT_MODEL,
225
+ "eval_model": settings.GROQ_EVAL_MODEL
226
+ },
227
+ "huggingface": {
228
+ "enabled": settings.is_hf_enabled(),
229
+ "tokens_configured": len(settings.get_hf_tokens()),
230
+ "chat_model": settings.HF_CHAT_MODEL,
231
+ "eval_model": settings.HF_EVAL_MODEL
232
+ }
233
+ }
234
+
235
  # Check authentication
236
  auth_status = {
237
  "jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
 
240
  }
241
 
242
  # Overall health
243
+ is_healthy = (
244
+ mongodb_status == "connected" and
245
+ auth_status["jwt_enabled"] and
246
+ (llm_providers["groq"]["enabled"] or llm_providers["huggingface"]["enabled"])
247
+ )
248
 
249
  return {
250
  "status": "healthy" if is_healthy else "degraded",
251
  "api": "online",
252
  "mongodb": mongodb_status,
253
  "authentication": auth_status,
254
+ "llm_providers": llm_providers,
255
  "ml_models": ml_models_status,
256
  "environment": settings.ENVIRONMENT,
257
  "debug_mode": settings.DEBUG
258
  }
259
 
 
260
  # ============================================================================
261
  # GLOBAL EXCEPTION HANDLER
262
  # ============================================================================
 
283
  }
284
  )
285
 
 
286
  # ============================================================================
287
  # MAIN ENTRY POINT (for direct execution)
288
  # ============================================================================
app/services/chat_service.py CHANGED
@@ -1,11 +1,12 @@
1
  """
2
  Chat Service - Main RAG Pipeline
 
3
  Combines: Policy Network → Retriever → LLM Generator
4
 
5
  This is the core service that orchestrates:
6
  1. Policy decision (FETCH vs NO_FETCH)
7
  2. Document retrieval (if FETCH)
8
- 3. Response generation (Gemini)
9
  4. Logging to MongoDB
10
 
11
  Adapted from your RAG.py workflow
@@ -53,8 +54,6 @@ Rate the response as:
53
 
54
  Provide your rating and brief explanation."""
55
 
56
-
57
-
58
  # ============================================================================
59
  # CHAT SERVICE
60
  # ============================================================================
@@ -67,7 +66,7 @@ class ChatService:
67
  1. User query comes in
68
  2. Policy network decides: FETCH or NO_FETCH
69
  3. If FETCH: Retrieve documents from FAISS
70
- 4. Generate response using Gemini (with or without context)
71
  5. Return response + metadata
72
  """
73
 
@@ -97,18 +96,18 @@ class ChatService:
97
 
98
  Returns:
99
  dict: Complete response with metadata
100
- {
101
- 'response': str, # Generated response
102
- 'policy_action': str, # FETCH or NO_FETCH
103
- 'policy_confidence': float, # Confidence score
104
- 'should_retrieve': bool, # Whether retrieval was done
105
- 'documents_retrieved': int, # Number of docs retrieved
106
- 'top_doc_score': float or None, # Best similarity score
107
- 'retrieval_time_ms': float, # Time spent on retrieval
108
- 'generation_time_ms': float, # Time spent on generation
109
- 'total_time_ms': float, # Total processing time
110
- 'timestamp': str # ISO timestamp
111
- }
112
  """
113
  start_time = time.time()
114
 
@@ -196,13 +195,13 @@ class ChatService:
196
  print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
197
 
198
  # ====================================================================
199
- # STEP 3: GENERATE RESPONSE (Gemini)
200
  # ====================================================================
201
  print(f"\n💬 Generating response...")
202
  generation_start = time.time()
203
 
204
  try:
205
- # Generate response using LLM manager (Gemini)
206
  response = await llm_manager.generate_chat_response(
207
  query=query,
208
  context=context,
@@ -288,8 +287,8 @@ class ChatService:
288
  # Check LLM manager
289
  try:
290
  from app.core.llm_manager import llm_manager as llm
291
- health['components']['gemini'] = 'enabled' if llm.gemini else 'disabled'
292
  health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
 
293
  except Exception as e:
294
  health['components']['llm_manager'] = f'error: {str(e)}'
295
 
@@ -301,19 +300,17 @@ class ChatService:
301
 
302
  return health
303
 
304
-
305
  # ============================================================================
306
  # GLOBAL CHAT SERVICE INSTANCE
307
  # ============================================================================
308
- chat_service = ChatService()
309
 
 
310
 
311
  # ============================================================================
312
  # USAGE EXAMPLE (for reference)
313
  # ============================================================================
314
  """
315
  # In your API endpoint (chat.py):
316
-
317
  from app.services.chat_service import chat_service
318
 
319
  # Process user query
@@ -335,4 +332,4 @@ result = await chat_service.process_query(
335
 
336
  # Get service health
337
  health = await chat_service.health_check()
338
- """
 
1
  """
2
  Chat Service - Main RAG Pipeline
3
+
4
  Combines: Policy Network → Retriever → LLM Generator
5
 
6
  This is the core service that orchestrates:
7
  1. Policy decision (FETCH vs NO_FETCH)
8
  2. Document retrieval (if FETCH)
9
+ 3. Response generation (Groq/HuggingFace with Llama 3)
10
  4. Logging to MongoDB
11
 
12
  Adapted from your RAG.py workflow
 
54
 
55
  Provide your rating and brief explanation."""
56
 
 
 
57
  # ============================================================================
58
  # CHAT SERVICE
59
  # ============================================================================
 
66
  1. User query comes in
67
  2. Policy network decides: FETCH or NO_FETCH
68
  3. If FETCH: Retrieve documents from FAISS
69
+ 4. Generate response using Groq/HuggingFace (with or without context)
70
  5. Return response + metadata
71
  """
72
 
 
96
 
97
  Returns:
98
  dict: Complete response with metadata
99
+ {
100
+ 'response': str, # Generated response
101
+ 'policy_action': str, # FETCH or NO_FETCH
102
+ 'policy_confidence': float, # Confidence score
103
+ 'should_retrieve': bool, # Whether retrieval was done
104
+ 'documents_retrieved': int, # Number of docs retrieved
105
+ 'top_doc_score': float or None, # Best similarity score
106
+ 'retrieval_time_ms': float, # Time spent on retrieval
107
+ 'generation_time_ms': float, # Time spent on generation
108
+ 'total_time_ms': float, # Total processing time
109
+ 'timestamp': str # ISO timestamp
110
+ }
111
  """
112
  start_time = time.time()
113
 
 
195
  print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
196
 
197
  # ====================================================================
198
+ # STEP 3: GENERATE RESPONSE (Groq/HuggingFace with fallback)
199
  # ====================================================================
200
  print(f"\n💬 Generating response...")
201
  generation_start = time.time()
202
 
203
  try:
204
+ # Generate response using LLM manager (Groq → HuggingFace fallback)
205
  response = await llm_manager.generate_chat_response(
206
  query=query,
207
  context=context,
 
287
  # Check LLM manager
288
  try:
289
  from app.core.llm_manager import llm_manager as llm
 
290
  health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
291
+ health['components']['huggingface'] = 'enabled' if llm.huggingface else 'disabled'
292
  except Exception as e:
293
  health['components']['llm_manager'] = f'error: {str(e)}'
294
 
 
300
 
301
  return health
302
 
 
303
  # ============================================================================
304
  # GLOBAL CHAT SERVICE INSTANCE
305
  # ============================================================================
 
306
 
307
+ chat_service = ChatService()
308
 
309
  # ============================================================================
310
  # USAGE EXAMPLE (for reference)
311
  # ============================================================================
312
  """
313
  # In your API endpoint (chat.py):
 
314
  from app.services.chat_service import chat_service
315
 
316
  # Process user query
 
332
 
333
  # Get service health
334
  health = await chat_service.health_check()
335
+ """
backups/backup_chat_service.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Chat Service - Main RAG Pipeline
3
+ # Combines: Policy Network → Retriever → LLM Generator
4
+
5
+ # This is the core service that orchestrates:
6
+ # 1. Policy decision (FETCH vs NO_FETCH)
7
+ # 2. Document retrieval (if FETCH)
8
+ # 3. Response generation (Gemini)
9
+ # 4. Logging to MongoDB
10
+
11
+ # Adapted from your RAG.py workflow
12
+ # """
13
+
14
+ # import time
15
+ # from datetime import datetime
16
+ # from typing import List, Dict, Any, Optional
17
+
18
+ # from app.config import settings
19
+ # from app.ml.policy_network import predict_policy_action
20
+ # from app.ml.retriever import retrieve_documents, format_context
21
+ # from app.core.llm_manager import llm_manager
22
+
23
+ # # ============================================================================
24
+ # # SYSTEM PROMPTS
25
+ # # ============================================================================
26
+
27
+ # BANKING_SYSTEM_PROMPT = """You are an expert banking assistant specialized in Indian financial regulations and banking practices. You have access to a comprehensive knowledge base of banking policies, procedures, and RBI regulations.
28
+
29
+ # Instructions:
30
+ # - Answer the user query accurately using the provided context when available
31
+ # - If context is insufficient or query is outside banking domain, still respond helpfully but mention your banking specialization
32
+ # - If no banking context is available, provide a general helpful response but acknowledge your expertise is in banking
33
+ # - Never refuse to answer - always be helpful while being transparent about your specialization
34
+ # - Cite relevant policy numbers or document references when available in context
35
+ # - Never fabricate specific policies, rates, or eligibility criteria
36
+ # - If uncertain about current rates or policies, acknowledge the limitation
37
+ # - Maintain a helpful and professional tone
38
+ # - Keep responses concise, clear, and actionable
39
+ # """
40
+
41
+ # EVALUATION_PROMPT = """You are evaluating a banking assistant's response for quality and accuracy.
42
+
43
+ # Criteria:
44
+ # 1. Accuracy: Is the response factually correct?
45
+ # 2. Relevance: Does it address the user's question?
46
+ # 3. Completeness: Are all aspects of the question covered?
47
+ # 4. Clarity: Is the response easy to understand?
48
+ # 5. Context Usage: Does it properly use the retrieved context?
49
+
50
+ # Rate the response as:
51
+ # - "Good": Accurate, relevant, complete, and clear
52
+ # - "Bad": Inaccurate, irrelevant, incomplete, or unclear
53
+
54
+ # Provide your rating and brief explanation."""
55
+
56
+
57
+
58
+ # # ============================================================================
59
+ # # CHAT SERVICE
60
+ # # ============================================================================
61
+
62
+ # class ChatService:
63
+ # """
64
+ # Main chat service that handles the complete RAG pipeline.
65
+
66
+ # Pipeline:
67
+ # 1. User query comes in
68
+ # 2. Policy network decides: FETCH or NO_FETCH
69
+ # 3. If FETCH: Retrieve documents from FAISS
70
+ # 4. Generate response using Gemini (with or without context)
71
+ # 5. Return response + metadata
72
+ # """
73
+
74
+ # def __init__(self):
75
+ # """Initialize chat service"""
76
+ # print("🤖 ChatService initialized")
77
+
78
+ # async def process_query(
79
+ # self,
80
+ # query: str,
81
+ # conversation_history: List[Dict[str, str]] = None,
82
+ # user_id: Optional[str] = None
83
+ # ) -> Dict[str, Any]:
84
+ # """
85
+ # Process a user query through the complete RAG pipeline.
86
+
87
+ # This is the MAIN function that combines everything:
88
+ # - Policy decision
89
+ # - Retrieval
90
+ # - Generation
91
+
92
+ # Args:
93
+ # query: User query text
94
+ # conversation_history: Previous conversation turns
95
+ # Format: [{'role': 'user'/'assistant', 'content': '...', 'metadata': {...}}]
96
+ # user_id: Optional user ID for logging
97
+
98
+ # Returns:
99
+ # dict: Complete response with metadata
100
+ # {
101
+ # 'response': str, # Generated response
102
+ # 'policy_action': str, # FETCH or NO_FETCH
103
+ # 'policy_confidence': float, # Confidence score
104
+ # 'should_retrieve': bool, # Whether retrieval was done
105
+ # 'documents_retrieved': int, # Number of docs retrieved
106
+ # 'top_doc_score': float or None, # Best similarity score
107
+ # 'retrieval_time_ms': float, # Time spent on retrieval
108
+ # 'generation_time_ms': float, # Time spent on generation
109
+ # 'total_time_ms': float, # Total processing time
110
+ # 'timestamp': str # ISO timestamp
111
+ # }
112
+ # """
113
+ # start_time = time.time()
114
+
115
+ # # Initialize history if None
116
+ # if conversation_history is None:
117
+ # conversation_history = []
118
+
119
+ # # Validate query
120
+ # if not query or query.strip() == "":
121
+ # return {
122
+ # 'response': "I didn't receive a valid question. Could you please try again?",
123
+ # 'policy_action': 'NO_FETCH',
124
+ # 'policy_confidence': 1.0,
125
+ # 'should_retrieve': False,
126
+ # 'documents_retrieved': 0,
127
+ # 'top_doc_score': None,
128
+ # 'retrieval_time_ms': 0,
129
+ # 'generation_time_ms': 0,
130
+ # 'total_time_ms': 0,
131
+ # 'timestamp': datetime.now().isoformat()
132
+ # }
133
+
134
+ # # ====================================================================
135
+ # # STEP 1: POLICY DECISION (Local BERT model)
136
+ # # ====================================================================
137
+ # print(f"\n{'='*80}")
138
+ # print(f"🔍 Processing Query: {query[:50]}...")
139
+ # print(f"{'='*80}")
140
+
141
+ # policy_start = time.time()
142
+
143
+ # # Predict action using policy network
144
+ # policy_result = predict_policy_action(
145
+ # query=query,
146
+ # history=conversation_history,
147
+ # return_probs=True
148
+ # )
149
+
150
+ # policy_time = (time.time() - policy_start) * 1000
151
+
152
+ # print(f"\n📊 Policy Decision:")
153
+ # print(f" Action: {policy_result['action']}")
154
+ # print(f" Confidence: {policy_result['confidence']:.3f}")
155
+ # print(f" Should Retrieve: {policy_result['should_retrieve']}")
156
+ # print(f" Time: {policy_time:.2f}ms")
157
+
158
+ # # ====================================================================
159
+ # # STEP 2: RETRIEVAL (if FETCH or low confidence NO_FETCH)
160
+ # # ====================================================================
161
+ # retrieved_docs = []
162
+ # context = ""
163
+ # retrieval_time = 0
164
+
165
+ # if policy_result['should_retrieve']:
166
+ # print(f"\n🔎 Retrieving documents...")
167
+ # retrieval_start = time.time()
168
+
169
+ # try:
170
+ # # Retrieve documents using custom retriever + FAISS
171
+ # retrieved_docs = retrieve_documents(
172
+ # query=query,
173
+ # top_k=settings.TOP_K,
174
+ # min_similarity=settings.SIMILARITY_THRESHOLD
175
+ # )
176
+
177
+ # retrieval_time = (time.time() - retrieval_start) * 1000
178
+
179
+ # if retrieved_docs:
180
+ # print(f" ✅ Retrieved {len(retrieved_docs)} documents")
181
+ # print(f" Top score: {retrieved_docs[0]['score']:.3f}")
182
+
183
+ # # Format context for LLM
184
+ # context = format_context(
185
+ # retrieved_docs,
186
+ # max_context_length=settings.MAX_CONTEXT_LENGTH
187
+ # )
188
+ # else:
189
+ # print(f" ⚠️ No documents above threshold")
190
+
191
+ # except Exception as e:
192
+ # print(f" ❌ Retrieval error: {e}")
193
+ # # Continue without retrieval
194
+
195
+ # else:
196
+ # print(f"\n🚫 Skipping retrieval (Policy: {policy_result['action']})")
197
+
198
+ # # ====================================================================
199
+ # # STEP 3: GENERATE RESPONSE (Gemini)
200
+ # # ====================================================================
201
+ # print(f"\n💬 Generating response...")
202
+ # generation_start = time.time()
203
+
204
+ # try:
205
+ # # Generate response using LLM manager (Gemini)
206
+ # response = await llm_manager.generate_chat_response(
207
+ # query=query,
208
+ # context=context,
209
+ # history=conversation_history
210
+ # )
211
+
212
+ # generation_time = (time.time() - generation_start) * 1000
213
+
214
+ # print(f" ✅ Response generated")
215
+ # print(f" Length: {len(response)} chars")
216
+ # print(f" Time: {generation_time:.2f}ms")
217
+
218
+ # except Exception as e:
219
+ # print(f" ❌ Generation error: {e}")
220
+ # response = "I apologize, but I encountered an error generating a response. Please try again."
221
+ # generation_time = (time.time() - generation_start) * 1000
222
+
223
+ # # ====================================================================
224
+ # # STEP 4: COMPILE RESULTS
225
+ # # ====================================================================
226
+ # total_time = (time.time() - start_time) * 1000
227
+
228
+ # result = {
229
+ # 'response': response,
230
+ # 'policy_action': policy_result['action'],
231
+ # 'policy_confidence': policy_result['confidence'],
232
+ # 'should_retrieve': policy_result['should_retrieve'],
233
+ # 'documents_retrieved': len(retrieved_docs),
234
+ # 'top_doc_score': retrieved_docs[0]['score'] if retrieved_docs else None,
235
+ # 'retrieval_time_ms': round(retrieval_time, 2),
236
+ # 'generation_time_ms': round(generation_time, 2),
237
+ # 'total_time_ms': round(total_time, 2),
238
+ # 'timestamp': datetime.now().isoformat()
239
+ # }
240
+
241
+ # # Add retrieved docs metadata (for logging, not sent to user)
242
+ # if retrieved_docs:
243
+ # result['retrieved_docs_metadata'] = [
244
+ # {
245
+ # 'faq_id': doc['faq_id'],
246
+ # 'score': doc['score'],
247
+ # 'category': doc['category'],
248
+ # 'rank': doc['rank']
249
+ # }
250
+ # for doc in retrieved_docs
251
+ # ]
252
+
253
+ # print(f"\n{'='*80}")
254
+ # print(f"✅ Query processed successfully")
255
+ # print(f" Total time: {total_time:.2f}ms")
256
+ # print(f"{'='*80}\n")
257
+
258
+ # return result
259
+
260
+ # async def health_check(self) -> Dict[str, Any]:
261
+ # """
262
+ # Check health of all service components.
263
+
264
+ # Returns:
265
+ # dict: Health status
266
+ # """
267
+ # health = {
268
+ # 'service': 'chat_service',
269
+ # 'status': 'healthy',
270
+ # 'components': {}
271
+ # }
272
+
273
+ # # Check policy network
274
+ # try:
275
+ # from app.ml.policy_network import POLICY_MODEL
276
+ # health['components']['policy_network'] = 'loaded' if POLICY_MODEL else 'not_loaded'
277
+ # except Exception as e:
278
+ # health['components']['policy_network'] = f'error: {str(e)}'
279
+
280
+ # # Check retriever
281
+ # try:
282
+ # from app.ml.retriever import RETRIEVER_MODEL, FAISS_INDEX
283
+ # health['components']['retriever'] = 'loaded' if RETRIEVER_MODEL else 'not_loaded'
284
+ # health['components']['faiss_index'] = 'loaded' if FAISS_INDEX else 'not_loaded'
285
+ # except Exception as e:
286
+ # health['components']['retriever'] = f'error: {str(e)}'
287
+
288
+ # # Check LLM manager
289
+ # try:
290
+ # from app.core.llm_manager import llm_manager as llm
291
+ # health['components']['gemini'] = 'enabled' if llm.gemini else 'disabled'
292
+ # health['components']['groq'] = 'enabled' if llm.groq else 'disabled'
293
+ # except Exception as e:
294
+ # health['components']['llm_manager'] = f'error: {str(e)}'
295
+
296
+ # # Overall status
297
+ # failed_components = [k for k, v in health['components'].items() if 'error' in str(v)]
298
+ # if failed_components:
299
+ # health['status'] = 'degraded'
300
+ # health['failed_components'] = failed_components
301
+
302
+ # return health
303
+
304
+
305
+ # # ============================================================================
306
+ # # GLOBAL CHAT SERVICE INSTANCE
307
+ # # ============================================================================
308
+ # chat_service = ChatService()
309
+
310
+
311
+ # # ============================================================================
312
+ # # USAGE EXAMPLE (for reference)
313
+ # # ============================================================================
314
+ # """
315
+ # # In your API endpoint (chat.py):
316
+
317
+ # from app.services.chat_service import chat_service
318
+
319
+ # # Process user query
320
+ # result = await chat_service.process_query(
321
+ # query="What is my account balance?",
322
+ # conversation_history=[
323
+ # {'role': 'user', 'content': 'Hello'},
324
+ # {'role': 'assistant', 'content': 'Hi! How can I help?', 'metadata': {'policy_action': 'NO_FETCH'}}
325
+ # ],
326
+ # user_id="user_123"
327
+ # )
328
+
329
+ # # Result contains:
330
+ # # - response: "Your account balance is $1,234.56"
331
+ # # - policy_action: "FETCH"
332
+ # # - documents_retrieved: 3
333
+ # # - total_time_ms: 450.23
334
+ # # etc.
335
+
336
+ # # Get service health
337
+ # health = await chat_service.health_check()
338
+ # """
339
+
340
+
backups/backup_config.py ADDED
@@ -0,0 +1,640 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LINE 80 VERY IMP CHANGE OF LLM MAX TOKENS FROM 512 TO 1024
2
+
3
+
4
+ """
5
+ Application Configuration
6
+ Settings for Banking RAG Chatbot with JWT Authentication
7
+ Includes all settings needed by existing llm_manager.py
8
+ """
9
+
10
+ import os
11
+ from typing import List
12
+ from dotenv import load_dotenv
13
+
14
+ load_dotenv()
15
+
16
+
17
+ class Settings:
18
+ """Application settings loaded from environment variables"""
19
+
20
+ # ========================================================================
21
+ # ENVIRONMENT
22
+ # ========================================================================
23
+ ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
24
+ DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
25
+
26
+ # ========================================================================
27
+ # MONGODB
28
+ # ========================================================================
29
+ MONGODB_URI: str = os.getenv("MONGODB_URI", "")
30
+ DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
31
+
32
+ # ========================================================================
33
+ # JWT AUTHENTICATION
34
+ # ========================================================================
35
+ SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
36
+ ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
37
+ ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
38
+
39
+ # ========================================================================
40
+ # CORS (for frontend)
41
+ # ========================================================================
42
+ ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
43
+
44
+ # ========================================================================
45
+ # GOOGLE GEMINI API
46
+ # ========================================================================
47
+ GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
48
+ GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
49
+ GEMINI_REQUESTS_PER_MINUTE: int = int(os.getenv("GEMINI_REQUESTS_PER_MINUTE", "60"))
50
+
51
+ # ========================================================================
52
+ # GROQ API (Optional - for evaluation)
53
+ # ========================================================================
54
+ GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
55
+ GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
56
+ GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
57
+
58
+ # ========================================================================
59
+ # HUGGING FACE (Optional - for model downloads)
60
+ # ========================================================================
61
+ HF_TOKEN: str = os.getenv("HF_TOKEN", "")
62
+
63
+ # ========================================================================
64
+ # MODEL PATHS (for RL Policy Network and RAG models)
65
+ # ========================================================================
66
+ POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "app/models/best_policy_model.pth")
67
+ RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "app/models/best_retriever_model.pth")
68
+ FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "app/models/faiss_index.pkl")
69
+ KB_PATH: str = os.getenv("KB_PATH", "app/data/final_knowledge_base.jsonl")
70
+
71
+ # ========================================================================
72
+ # DEVICE SETTINGS (for PyTorch/TensorFlow models)
73
+ # ========================================================================
74
+ DEVICE: str = os.getenv("DEVICE", "cpu")
75
+
76
+ # ========================================================================
77
+ # LLM PARAMETERS
78
+ # ========================================================================
79
+ LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
80
+ LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "1024")) # VERY IMPORTANT CHANGE =============================================================================================
81
+ # ============================================================================
82
+
83
+ # ========================================================================
84
+ # RAG PARAMETERS
85
+ # ========================================================================
86
+ TOP_K: int = int(os.getenv("TOP_K", "5"))
87
+ SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
88
+ MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
89
+
90
+ # ========================================================================
91
+ # POLICY NETWORK PARAMETERS
92
+ # ========================================================================
93
+ POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
94
+ CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
95
+
96
+ # ========================================================================
97
+ # HELPER METHODS (Required by llm_manager.py)
98
+ # ========================================================================
99
+
100
+ def is_gemini_enabled(self) -> bool:
101
+ """Check if Google Gemini API is configured"""
102
+ return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
103
+
104
+ def is_groq_enabled(self) -> bool:
105
+ """Check if Groq API is configured"""
106
+ return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
107
+
108
+ def is_hf_enabled(self) -> bool:
109
+ """Check if HuggingFace token is configured"""
110
+ return bool(self.HF_TOKEN and self.HF_TOKEN != "")
111
+
112
+ def get_allowed_origins(self) -> List[str]:
113
+ """Parse allowed origins from comma-separated string"""
114
+ if self.ALLOWED_ORIGINS == "*":
115
+ return ["*"]
116
+ return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
117
+
118
+ def get_llm_for_task(self, task: str = "qa") -> str:
119
+ """
120
+ Get LLM name for a specific task.
121
+
122
+ Args:
123
+ task: Task type ('chat', 'evaluation', etc.')
124
+
125
+ Returns:
126
+ str: LLM name ('gemini' or 'groq')
127
+ """
128
+ # Use Gemini for chat, Groq for evaluation
129
+ if task == "evaluation":
130
+ return "groq" if self.is_groq_enabled() else "gemini"
131
+ else:
132
+ return "gemini" # Default to Gemini for all tasks
133
+
134
+
135
+ # ============================================================================
136
+ # CREATE GLOBAL SETTINGS INSTANCE
137
+ # ============================================================================
138
+ settings = Settings()
139
+
140
+
141
+ # ============================================================================
142
+ # PRINT CONFIGURATION ON LOAD
143
+ # ============================================================================
144
+ print("=" * 80)
145
+ print("✅ Configuration Loaded")
146
+ print("=" * 80)
147
+ print(f"Environment: {settings.ENVIRONMENT}")
148
+ print(f"Debug Mode: {settings.DEBUG}")
149
+ print(f"Database: {settings.DATABASE_NAME}")
150
+ print(f"Device: {settings.DEVICE}")
151
+ print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
152
+ print()
153
+ print("🔑 API Keys:")
154
+ print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
155
+ print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
156
+ print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
157
+ print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
158
+ print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
159
+ print()
160
+ print("🤖 Model Paths:")
161
+ print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
162
+ print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
163
+ print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
164
+ print(f" Knowledge Base: {settings.KB_PATH}")
165
+ print("=" * 80)
166
+ # ============================================================================
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+ # """
185
+ # Application Configuration
186
+ # Settings for Banking RAG Chatbot with JWT Authentication
187
+ # Includes all settings needed by existing llm_manager.py
188
+ # """
189
+
190
+ # import os
191
+ # from typing import List
192
+ # from dotenv import load_dotenv
193
+
194
+ # load_dotenv()
195
+
196
+
197
+ # class Settings:
198
+ # """Application settings loaded from environment variables"""
199
+
200
+ # # ========================================================================
201
+ # # ENVIRONMENT
202
+ # # ========================================================================
203
+ # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
204
+ # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
205
+
206
+ # # ========================================================================
207
+ # # MONGODB
208
+ # # ========================================================================
209
+ # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
210
+ # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
211
+
212
+ # # ========================================================================
213
+ # # JWT AUTHENTICATION
214
+ # # ========================================================================
215
+ # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
216
+ # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
217
+ # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
218
+
219
+ # # ========================================================================
220
+ # # CORS (for frontend)
221
+ # # ========================================================================
222
+ # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
223
+
224
+ # # ========================================================================
225
+ # # GOOGLE GEMINI API
226
+ # # ========================================================================
227
+ # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
228
+ # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
229
+
230
+ # # ========================================================================
231
+ # # GROQ API (Optional - for your llm_manager)
232
+ # # ========================================================================
233
+ # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
234
+ # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
235
+
236
+ # # ========================================================================
237
+ # # HUGGING FACE (Optional - for model downloads)
238
+ # # ========================================================================
239
+ # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
240
+
241
+ # # ========================================================================
242
+ # # MODEL PATHS (for RL Policy Network and RAG models)
243
+ # # ========================================================================
244
+ # POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
245
+ # RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
246
+ # FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
247
+ # KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
248
+
249
+ # # ========================================================================
250
+ # # DEVICE SETTINGS (for PyTorch/TensorFlow models)
251
+ # # ========================================================================
252
+ # DEVICE: str = os.getenv("DEVICE", "cpu")
253
+
254
+ # # ========================================================================
255
+ # # LLM PARAMETERS
256
+ # # ========================================================================
257
+ # LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
258
+ # LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
259
+
260
+ # # ========================================================================
261
+ # # RAG PARAMETERS
262
+ # # ========================================================================
263
+ # TOP_K: int = int(os.getenv("TOP_K", "5"))
264
+ # SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
265
+ # MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
266
+
267
+ # # ========================================================================
268
+ # # POLICY NETWORK PARAMETERS
269
+ # # ========================================================================
270
+ # POLICY_MAX_LEN: int = int(os.getenv("POLICY_MAX_LEN", "256"))
271
+ # CONFIDENCE_THRESHOLD: float = float(os.getenv("CONFIDENCE_THRESHOLD", "0.7"))
272
+
273
+
274
+ # # ========================================================================
275
+ # # HELPER METHODS (Required by llm_manager.py)
276
+ # # ========================================================================
277
+
278
+ # def is_gemini_enabled(self) -> bool:
279
+ # """Check if Google Gemini API is configured"""
280
+ # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
281
+
282
+ # def is_groq_enabled(self) -> bool:
283
+ # """Check if Groq API is configured"""
284
+ # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
285
+
286
+ # def is_hf_enabled(self) -> bool:
287
+ # """Check if HuggingFace token is configured"""
288
+ # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
289
+
290
+ # def get_allowed_origins(self) -> List[str]:
291
+ # """Parse allowed origins from comma-separated string"""
292
+ # if self.ALLOWED_ORIGINS == "*":
293
+ # return ["*"]
294
+ # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
295
+
296
+ # # def get_llm_for_task(self, task: str = "qa"):
297
+ # # """
298
+ # # Get LLM configuration for a specific task.
299
+ # # Returns a dict with model settings.
300
+
301
+ # # Args:
302
+ # # task: Task type ('qa', 'retrieval', 'summary', etc.)
303
+
304
+ # # Returns:
305
+ # # dict: LLM configuration
306
+ # # """
307
+ # # return {
308
+ # # 'api_key': self.GOOGLE_API_KEY,
309
+ # # 'model': self.GEMINI_MODEL,
310
+ # # 'temperature': self.LLM_TEMPERATURE,
311
+ # # 'max_tokens': self.LLM_MAX_TOKENS,
312
+ # # 'task': task
313
+ # # }
314
+ # def get_llm_for_task(self, task: str = "qa") -> str:
315
+ # """
316
+ # Get LLM name for a specific task.
317
+
318
+ # Args:
319
+ # task: Task type ('chat', 'evaluation', etc.)
320
+
321
+ # Returns:
322
+ # str: LLM name ('gemini' or 'groq')
323
+ # """
324
+ # # Use Gemini for chat, Groq for evaluation
325
+ # if task == "evaluation":
326
+ # return "groq" if self.is_groq_enabled() else "gemini"
327
+ # else:
328
+ # return "gemini" # Default to Gemini for all other tasks
329
+
330
+
331
+
332
+
333
+ # # ============================================================================
334
+ # # CREATE GLOBAL SETTINGS INSTANCE
335
+ # # ============================================================================
336
+ # settings = Settings()
337
+
338
+
339
+ # # ============================================================================
340
+ # # PRINT CONFIGURATION ON LOAD
341
+ # # ============================================================================
342
+ # print("=" * 80)
343
+ # print("✅ Configuration Loaded")
344
+ # print("=" * 80)
345
+ # print(f"Environment: {settings.ENVIRONMENT}")
346
+ # print(f"Debug Mode: {settings.DEBUG}")
347
+ # print(f"Database: {settings.DATABASE_NAME}")
348
+ # print(f"Device: {settings.DEVICE}")
349
+ # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
350
+ # print()
351
+ # print("🔑 API Keys:")
352
+ # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
353
+ # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
354
+ # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
355
+ # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
356
+ # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
357
+ # print()
358
+ # print("🤖 Model Paths:")
359
+ # print(f" Policy Model: {settings.POLICY_MODEL_PATH}")
360
+ # print(f" Retriever Model: {settings.RETRIEVER_MODEL_PATH}")
361
+ # print(f" FAISS Index: {settings.FAISS_INDEX_PATH}")
362
+ # print(f" Knowledge Base: {settings.KB_PATH}")
363
+ # print("=" * 80)
364
+ # # # ============================================================================
365
+
366
+
367
+
368
+
369
+
370
+
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+ # # """
387
+ # # Application Configuration
388
+ # # Settings for Banking RAG Chatbot with JWT Authentication
389
+ # # Includes all settings needed by existing llm_manager.py
390
+ # # """
391
+
392
+ # # import os
393
+ # # from typing import List
394
+ # # from dotenv import load_dotenv
395
+
396
+ # # load_dotenv()
397
+
398
+
399
+ # # class Settings:
400
+ # # """Application settings loaded from environment variables"""
401
+
402
+ # # # ========================================================================
403
+ # # # ENVIRONMENT
404
+ # # # ========================================================================
405
+ # # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
406
+ # # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
407
+
408
+ # # # ========================================================================
409
+ # # # MONGODB
410
+ # # # ========================================================================
411
+ # # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
412
+ # # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
413
+
414
+ # # # ========================================================================
415
+ # # # JWT AUTHENTICATION
416
+ # # # ========================================================================
417
+ # # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
418
+ # # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
419
+ # # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
420
+
421
+ # # # ========================================================================
422
+ # # # CORS (for frontend)
423
+ # # # ========================================================================
424
+ # # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
425
+
426
+ # # # ========================================================================
427
+ # # # GOOGLE GEMINI API
428
+ # # # ========================================================================
429
+ # # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
430
+ # # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
431
+
432
+ # # # ========================================================================
433
+ # # # GROQ API (Optional - for your llm_manager)
434
+ # # # ========================================================================
435
+ # # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
436
+ # # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
437
+
438
+ # # # ========================================================================
439
+ # # # HUGGING FACE (Optional - for model downloads)
440
+ # # # ========================================================================
441
+ # # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
442
+
443
+ # # # ========================================================================
444
+ # # # HELPER METHODS (Required by llm_manager.py)
445
+ # # # ========================================================================
446
+
447
+ # # def is_gemini_enabled(self) -> bool:
448
+ # # """Check if Google Gemini API is configured"""
449
+ # # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
450
+
451
+ # # def is_groq_enabled(self) -> bool:
452
+ # # """Check if Groq API is configured"""
453
+ # # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
454
+
455
+ # # def is_hf_enabled(self) -> bool:
456
+ # # """Check if HuggingFace token is configured"""
457
+ # # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
458
+
459
+ # # def get_allowed_origins(self) -> List[str]:
460
+ # # """Parse allowed origins from comma-separated string"""
461
+ # # if self.ALLOWED_ORIGINS == "*":
462
+ # # return ["*"]
463
+ # # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
464
+
465
+
466
+ # # # ============================================================================
467
+ # # # CREATE GLOBAL SETTINGS INSTANCE
468
+ # # # ============================================================================
469
+ # # settings = Settings()
470
+
471
+ # # # ============================================================================
472
+ # # # PRINT CONFIGURATION ON LOAD
473
+ # # # ============================================================================
474
+ # # print("=" * 80)
475
+ # # print("✅ Configuration Loaded")
476
+ # # print("=" * 80)
477
+ # # print(f"Environment: {settings.ENVIRONMENT}")
478
+ # # print(f"Debug Mode: {settings.DEBUG}")
479
+ # # print(f"Database: {settings.DATABASE_NAME}")
480
+ # # # print(f"JWT Algorithm: {settings.ALGORITHM}")
481
+ # # # print(f"Token Expiry: {settings.ACCESS_TOKEN_EXPIRE_MINUTES} minutes")
482
+ # # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
483
+ # # print()
484
+ # # print("🔑 API Keys:")
485
+ # # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
486
+ # # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
487
+ # # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
488
+ # # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
489
+ # # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
490
+ # # print("=" * 80)
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+ # """
517
+ # Application Configuration
518
+ # Settings for Banking RAG Chatbot with JWT Authentication
519
+ # Includes all settings needed by existing llm_manager.py
520
+ # """
521
+
522
+ # import os
523
+ # from typing import List
524
+ # from dotenv import load_dotenv
525
+
526
+ # load_dotenv()
527
+
528
+
529
+ # class Settings:
530
+ # """Application settings loaded from environment variables"""
531
+
532
+ # # ========================================================================
533
+ # # ENVIRONMENT
534
+ # # ========================================================================
535
+ # ENVIRONMENT: str = os.getenv("ENVIRONMENT", "development")
536
+ # DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
537
+
538
+ # # ========================================================================
539
+ # # MONGODB
540
+ # # ========================================================================
541
+ # MONGODB_URI: str = os.getenv("MONGODB_URI", "")
542
+ # DATABASE_NAME: str = os.getenv("DATABASE_NAME", "aml_ia_db")
543
+
544
+ # # ========================================================================
545
+ # # JWT AUTHENTICATION
546
+ # # ========================================================================
547
+ # SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
548
+ # ALGORITHM: str = os.getenv("ALGORITHM", "HS256")
549
+ # ACCESS_TOKEN_EXPIRE_MINUTES: int = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
550
+
551
+ # # ========================================================================
552
+ # # CORS (for frontend)
553
+ # # ========================================================================
554
+ # ALLOWED_ORIGINS: str = os.getenv("ALLOWED_ORIGINS", "*")
555
+
556
+ # # ========================================================================
557
+ # # GOOGLE GEMINI API
558
+ # # ========================================================================
559
+ # GOOGLE_API_KEY: str = os.getenv("GOOGLE_API_KEY", "")
560
+ # GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
561
+
562
+ # # ========================================================================
563
+ # # GROQ API (Optional - for your llm_manager)
564
+ # # ========================================================================
565
+ # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
566
+ # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
567
+
568
+ # # ========================================================================
569
+ # # HUGGING FACE (Optional - for model downloads)
570
+ # # ========================================================================
571
+ # HF_TOKEN: str = os.getenv("HF_TOKEN", "")
572
+
573
+ # # ========================================================================
574
+ # # MODEL PATHS (for RL Policy Network and RAG models)
575
+ # # ========================================================================
576
+ # POLICY_MODEL_PATH: str = os.getenv("POLICY_MODEL_PATH", "models/best_policy_model.pth")
577
+ # RETRIEVER_MODEL_PATH: str = os.getenv("RETRIEVER_MODEL_PATH", "models/best_retriever_model.pth")
578
+ # FAISS_INDEX_PATH: str = os.getenv("FAISS_INDEX_PATH", "models/faiss_index.pkl")
579
+ # KB_PATH: str = os.getenv("KB_PATH", "data/final_knowledge_base.jsonl")
580
+
581
+ # # ========================================================================
582
+ # # LLM PARAMETERS
583
+ # # ========================================================================
584
+ # LLM_TEMPERATURE: float = float(os.getenv("LLM_TEMPERATURE", "0.7"))
585
+ # LLM_MAX_TOKENS: int = int(os.getenv("LLM_MAX_TOKENS", "512"))
586
+
587
+ # # ========================================================================
588
+ # # RAG PARAMETERS
589
+ # # ========================================================================
590
+ # TOP_K: int = int(os.getenv("TOP_K", "5"))
591
+ # SIMILARITY_THRESHOLD: float = float(os.getenv("SIMILARITY_THRESHOLD", "0.5"))
592
+ # MAX_CONTEXT_LENGTH: int = int(os.getenv("MAX_CONTEXT_LENGTH", "2000"))
593
+
594
+ # # ========================================================================
595
+ # # HELPER METHODS (Required by llm_manager.py)
596
+ # # ========================================================================
597
+
598
+ # def is_gemini_enabled(self) -> bool:
599
+ # """Check if Google Gemini API is configured"""
600
+ # return bool(self.GOOGLE_API_KEY and self.GOOGLE_API_KEY != "")
601
+
602
+ # def is_groq_enabled(self) -> bool:
603
+ # """Check if Groq API is configured"""
604
+ # return bool(self.GROQ_API_KEY and self.GROQ_API_KEY != "")
605
+
606
+ # def is_hf_enabled(self) -> bool:
607
+ # """Check if HuggingFace token is configured"""
608
+ # return bool(self.HF_TOKEN and self.HF_TOKEN != "")
609
+
610
+ # def get_allowed_origins(self) -> List[str]:
611
+ # """Parse allowed origins from comma-separated string"""
612
+ # if self.ALLOWED_ORIGINS == "*":
613
+ # return ["*"]
614
+ # return [origin.strip() for origin in self.ALLOWED_ORIGINS.split(",")]
615
+
616
+
617
+ # # ============================================================================
618
+ # # CREATE GLOBAL SETTINGS INSTANCE
619
+ # # ============================================================================
620
+ # settings = Settings()
621
+
622
+
623
+ # # ============================================================================
624
+ # # PRINT CONFIGURATION ON LOAD
625
+ # # ============================================================================
626
+ # print("=" * 80)
627
+ # print("✅ Configuration Loaded")
628
+ # print("=" * 80)
629
+ # print(f"Environment: {settings.ENVIRONMENT}")
630
+ # print(f"Debug Mode: {settings.DEBUG}")
631
+ # print(f"Database: {settings.DATABASE_NAME}")
632
+ # print(f"CORS Origins: {settings.ALLOWED_ORIGINS}")
633
+ # print()
634
+ # print("🔑 API Keys:")
635
+ # print(f" Google Gemini: {'✅ Configured' if settings.is_gemini_enabled() else '❌ Missing'}")
636
+ # print(f" Groq API: {'✅ Configured' if settings.is_groq_enabled() else '⚠️ Optional (not set)'}")
637
+ # print(f" HuggingFace: {'✅ Configured' if settings.is_hf_enabled() else '⚠️ Optional (not set)'}")
638
+ # print(f" MongoDB: {'✅ Configured' if settings.MONGODB_URI else '❌ Missing'}")
639
+ # print(f" JWT Secret: {'✅ Configured' if settings.SECRET_KEY != 'your-secret-key-change-in-production' else '⚠️ Using default (CHANGE THIS!)'}")
640
+ # print("=" * 80)
backups/backup_llm_manager.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Multi-LLM Manager for Google Gemini, Groq, and HuggingFace
3
+ # All three APIs co-exist for different purposes (no fallback logic)
4
+
5
+ # Architecture:
6
+ # - Google Gemini (Primary): User-facing chat responses (best quality)
7
+ # - Groq (Secondary): Fast inference for evaluation and specific tasks
8
+ # - HuggingFace: Model downloads and embeddings (always required)
9
+
10
+ # Each API has its designated purpose based on config settings.
11
+ # """
12
+
13
+ # import time
14
+ # import google.generativeai as genai
15
+ # from typing import List, Dict, Optional, Literal
16
+ # from langchain_groq import ChatGroq
17
+ # from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
18
+
19
+ # from app.config import settings
20
+
21
+
22
+ # # ============================================================================
23
+ # # GOOGLE GEMINI MANAGER
24
+ # # ============================================================================
25
+
26
+ # class GeminiManager:
27
+ # """
28
+ # Google Gemini API Manager (Primary LLM)
29
+ # Handles Google Pro account with gemini-2.0-flash-lite model
30
+ # """
31
+
32
+ # def __init__(self):
33
+ # """Initialize Gemini API with your Google API key"""
34
+ # self.api_key = settings.GOOGLE_API_KEY
35
+ # self.model_name = settings.GEMINI_MODEL
36
+
37
+ # # Configure Gemini
38
+ # genai.configure(api_key=self.api_key)
39
+
40
+ # # Create model instance with safety settings
41
+ # self.model = genai.GenerativeModel(
42
+ # model_name=self.model_name,
43
+ # generation_config={
44
+ # "temperature": settings.LLM_TEMPERATURE,
45
+ # "max_output_tokens": settings.LLM_MAX_TOKENS,
46
+ # }
47
+ # )
48
+
49
+ # # Rate limiting tracking
50
+ # self.requests_this_minute = 0
51
+ # self.tokens_this_minute = 0
52
+ # self.last_reset = time.time()
53
+
54
+ # print(f"✅ Gemini Manager initialized: {self.model_name}")
55
+
56
+ # def _check_rate_limits(self):
57
+ # """
58
+ # Check and reset rate limit counters.
59
+ # Gemini Pro: 60 requests/min, 60,000 tokens/min
60
+ # """
61
+ # current_time = time.time()
62
+
63
+ # # Reset counters every minute
64
+ # if current_time - self.last_reset > 60:
65
+ # self.requests_this_minute = 0
66
+ # self.tokens_this_minute = 0
67
+ # self.last_reset = current_time
68
+
69
+ # # Check if limits exceeded
70
+ # if self.requests_this_minute >= settings.GEMINI_REQUESTS_PER_MINUTE:
71
+ # wait_time = 60 - (current_time - self.last_reset)
72
+ # print(f"⚠️ Gemini rate limit hit. Waiting {wait_time:.1f}s...")
73
+ # time.sleep(wait_time)
74
+ # self._check_rate_limits() # Recursive check after waiting
75
+
76
+ # async def generate(
77
+ # self,
78
+ # messages: List[Dict[str, str]],
79
+ # system_prompt: Optional[str] = None
80
+ # ) -> str:
81
+ # """
82
+ # Generate response using Gemini.
83
+
84
+ # Args:
85
+ # messages: List of conversation messages
86
+ # Format: [{'role': 'user'/'assistant', 'content': '...'}]
87
+ # system_prompt: Optional system prompt (prepended to first message)
88
+
89
+ # Returns:
90
+ # str: Generated response text
91
+ # """
92
+ # self._check_rate_limits()
93
+
94
+ # try:
95
+ # # Format messages for Gemini
96
+ # # Gemini uses 'user' and 'model' roles
97
+ # formatted_messages = []
98
+
99
+ # # Add system prompt as first user message if provided
100
+ # if system_prompt:
101
+ # formatted_messages.append({
102
+ # 'role': 'user',
103
+ # 'parts': [system_prompt]
104
+ # })
105
+
106
+ # # Convert messages
107
+ # for msg in messages:
108
+ # role = 'model' if msg['role'] == 'assistant' else 'user'
109
+ # formatted_messages.append({
110
+ # 'role': role,
111
+ # 'parts': [msg['content']]
112
+ # })
113
+
114
+ # # Generate response
115
+ # chat = self.model.start_chat(history=formatted_messages[:-1])
116
+ # response = chat.send_message(formatted_messages[-1]['parts'][0])
117
+
118
+ # # Track rate limits
119
+ # self.requests_this_minute += 1
120
+ # # Note: Token counting would require additional API call
121
+ # # For now, estimate ~4 chars per token
122
+ # estimated_tokens = len(response.text) // 4
123
+ # self.tokens_this_minute += estimated_tokens
124
+
125
+ # return response.text
126
+
127
+ # except Exception as e:
128
+ # print(f"❌ Gemini API error: {e}")
129
+ # raise
130
+
131
+
132
+ # # ============================================================================
133
+ # # GROQ MANAGER
134
+ # # ============================================================================
135
+
136
+ # class GroqManager:
137
+ # """
138
+ # Groq API Manager (Secondary LLM)
139
+ # Handles fast inference with Llama-3-70B
140
+ # """
141
+
142
+ # def __init__(self):
143
+ # """Initialize Groq API with single API key"""
144
+ # self.api_key = settings.GROQ_API_KEY
145
+ # self.model_name = settings.GROQ_MODEL
146
+
147
+ # # Create ChatGroq instance
148
+ # self.llm = ChatGroq(
149
+ # api_key=self.api_key,
150
+ # model_name=self.model_name,
151
+ # temperature=settings.LLM_TEMPERATURE,
152
+ # max_tokens=settings.LLM_MAX_TOKENS
153
+ # )
154
+
155
+ # # Rate limiting tracking
156
+ # self.requests_this_minute = 0
157
+ # self.tokens_this_minute = 0
158
+ # self.last_reset = time.time()
159
+
160
+ # print(f"✅ Groq Manager initialized: {self.model_name}")
161
+
162
+ # def _check_rate_limits(self):
163
+ # """
164
+ # Check and reset rate limit counters.
165
+ # Groq Free: 30 requests/min, 30,000 tokens/min
166
+ # """
167
+ # current_time = time.time()
168
+
169
+ # # Reset counters every minute
170
+ # if current_time - self.last_reset > 60:
171
+ # self.requests_this_minute = 0
172
+ # self.tokens_this_minute = 0
173
+ # self.last_reset = current_time
174
+
175
+ # # Check if limits exceeded
176
+ # if self.requests_this_minute >= settings.GROQ_REQUESTS_PER_MINUTE:
177
+ # wait_time = 60 - (current_time - self.last_reset)
178
+ # print(f"⚠️ Groq rate limit hit. Waiting {wait_time:.1f}s...")
179
+ # time.sleep(wait_time)
180
+ # self._check_rate_limits()
181
+
182
+ # async def generate(
183
+ # self,
184
+ # messages: List[Dict[str, str]],
185
+ # system_prompt: Optional[str] = None
186
+ # ) -> str:
187
+ # """
188
+ # Generate response using Groq.
189
+
190
+ # Args:
191
+ # messages: List of conversation messages
192
+ # Format: [{'role': 'user'/'assistant', 'content': '...'}]
193
+ # system_prompt: Optional system prompt
194
+
195
+ # Returns:
196
+ # str: Generated response text
197
+ # """
198
+ # self._check_rate_limits()
199
+
200
+ # try:
201
+ # # Format messages for LangChain
202
+ # formatted_messages = []
203
+
204
+ # # Add system message if provided
205
+ # if system_prompt:
206
+ # formatted_messages.append(SystemMessage(content=system_prompt))
207
+
208
+ # # Convert conversation messages
209
+ # for msg in messages:
210
+ # if msg['role'] == 'user':
211
+ # formatted_messages.append(HumanMessage(content=msg['content']))
212
+ # elif msg['role'] == 'assistant':
213
+ # formatted_messages.append(AIMessage(content=msg['content']))
214
+
215
+ # # Generate response
216
+ # response = await self.llm.ainvoke(formatted_messages)
217
+
218
+ # # Track rate limits
219
+ # self.requests_this_minute += 1
220
+ # # Estimate tokens (rough approximation)
221
+ # estimated_tokens = len(response.content) // 4
222
+ # self.tokens_this_minute += estimated_tokens
223
+
224
+ # return response.content
225
+
226
+ # except Exception as e:
227
+ # print(f"❌ Groq API error: {e}")
228
+ # raise
229
+
230
+
231
+ # # ============================================================================
232
+ # # UNIFIED LLM MANAGER (Routes to appropriate LLM)
233
+ # # ============================================================================
234
+
235
+ # class LLMManager:
236
+ # """
237
+ # Unified LLM Manager that routes requests to appropriate LLM.
238
+
239
+ # Routing strategy (from config):
240
+ # - Chat responses → Gemini (best quality for users)
241
+ # - Evaluation → Groq (fast, good enough for RL)
242
+ # - Policy → Local BERT (no API call)
243
+ # """
244
+
245
+ # def __init__(self):
246
+ # """Initialize all LLM managers"""
247
+ # self.gemini = None
248
+ # self.groq = None
249
+
250
+ # # Initialize Gemini if configured
251
+ # if settings.is_gemini_enabled():
252
+ # try:
253
+ # self.gemini = GeminiManager()
254
+ # except Exception as e:
255
+ # print(f"⚠️ Failed to initialize Gemini: {e}")
256
+
257
+ # # Initialize Groq if configured
258
+ # if settings.is_groq_enabled():
259
+ # try:
260
+ # self.groq = GroqManager()
261
+ # except Exception as e:
262
+ # print(f"⚠️ Failed to initialize Groq: {e}")
263
+
264
+ # print("✅ LLM Manager initialized")
265
+
266
+ # async def generate(
267
+ # self,
268
+ # messages: List[Dict[str, str]],
269
+ # system_prompt: Optional[str] = None,
270
+ # task: Literal["chat", "evaluation"] = "chat"
271
+ # ) -> str:
272
+ # """
273
+ # Generate response using appropriate LLM based on task.
274
+
275
+ # Args:
276
+ # messages: Conversation messages
277
+ # system_prompt: Optional system prompt
278
+ # task: Task type - "chat" (user-facing) or "evaluation" (RL training)
279
+
280
+ # Returns:
281
+ # str: Generated response
282
+
283
+ # Raises:
284
+ # ValueError: If appropriate LLM is not configured
285
+ # """
286
+ # # Determine which LLM to use based on task
287
+ # llm_choice = settings.get_llm_for_task(task)
288
+
289
+ # if llm_choice == "gemini":
290
+ # if self.gemini is None:
291
+ # raise ValueError("Gemini API not configured. Set GOOGLE_API_KEY in .env")
292
+ # return await self.gemini.generate(messages, system_prompt)
293
+
294
+ # elif llm_choice == "groq":
295
+ # if self.groq is None:
296
+ # raise ValueError("Groq API not configured. Set GROQ_API_KEY in .env")
297
+ # return await self.groq.generate(messages, system_prompt)
298
+
299
+ # else:
300
+ # raise ValueError(f"Unknown LLM choice: {llm_choice}")
301
+
302
+ # # async def generate_chat_response(
303
+ # # self,
304
+ # # query: str,
305
+ # # context: str,
306
+ # # history: List[Dict[str, str]]
307
+ # # ) -> str:
308
+ # # """
309
+ # # Generate chat response (uses Gemini by default).
310
+
311
+ # # Args:
312
+ # # query: User query
313
+ # # context: Retrieved context (from FAISS)
314
+ # # history: Conversation history
315
+
316
+ # # Returns:
317
+ # # str: Chat response
318
+ # # """
319
+ # # # Build system prompt
320
+ # # system_prompt = settings.SYSTEM_PROMPT
321
+ # # if context:
322
+ # # system_prompt += f"\n\nRelevant Information:\n{context}"
323
+
324
+ # # # Build messages
325
+ # # messages = history + [{'role': 'user', 'content': query}]
326
+
327
+ # # # Generate using chat LLM (Gemini)
328
+ # # return await self.generate(messages, system_prompt, task="chat")
329
+
330
+ # async def generate_chat_response(
331
+ # self,
332
+ # query: str,
333
+ # context: str,
334
+ # history: List[Dict[str, str]]
335
+ # ) -> str:
336
+ # """Generate chat response (uses Gemini by default)."""
337
+
338
+ # # Import the detailed prompt
339
+ # from app.services.chat_service import BANKING_SYSTEM_PROMPT
340
+
341
+ # # Build enhanced system prompt with context
342
+ # system_prompt = BANKING_SYSTEM_PROMPT
343
+
344
+ # if context:
345
+ # system_prompt += f"\n\nRelevant Knowledge Base Context:\n{context}"
346
+ # else:
347
+ # system_prompt += "\n\nNo specific banking documents were retrieved for this query. Provide a helpful general response while acknowledging your banking specialization."
348
+
349
+ # # Build messages
350
+ # messages = history + [{'role': 'user', 'content': query}]
351
+
352
+ # # Generate using chat LLM (Gemini)
353
+ # return await self.generate(messages, system_prompt, task="chat")
354
+
355
+
356
+
357
+
358
+
359
+ # async def evaluate_response(
360
+ # self,
361
+ # query: str,
362
+ # response: str,
363
+ # context: str = ""
364
+ # ) -> Dict:
365
+ # """
366
+ # Evaluate response quality (uses Groq for speed).
367
+ # Used during RL training.
368
+
369
+ # Args:
370
+ # query: User query
371
+ # response: Generated response
372
+ # context: Retrieved context (if any)
373
+
374
+ # Returns:
375
+ # dict: Evaluation results
376
+ # {'quality': 'Good'/'Bad', 'explanation': '...'}
377
+ # """
378
+ # eval_prompt = f"""Evaluate this response:
379
+ # Query: {query}
380
+ # Response: {response}
381
+ # Context used: {context if context else 'None'}
382
+
383
+ # Is this response Good or Bad? Respond with just "Good" or "Bad" and brief explanation."""
384
+
385
+ # messages = [{'role': 'user', 'content': eval_prompt}]
386
+
387
+ # # Generate using evaluation LLM (Groq)
388
+ # result = await self.generate(messages, task="evaluation")
389
+
390
+ # # Parse result
391
+ # quality = "Good" if "Good" in result else "Bad"
392
+
393
+ # return {
394
+ # 'quality': quality,
395
+ # 'explanation': result
396
+ # }
397
+
398
+
399
+ # # ============================================================================
400
+ # # GLOBAL LLM MANAGER INSTANCE
401
+ # # ============================================================================
402
+ # llm_manager = LLMManager()
403
+
404
+
405
+ # # ============================================================================
406
+ # # USAGE EXAMPLE (for reference)
407
+ # # ============================================================================
408
+ # """
409
+ # # In your service file:
410
+
411
+ # from app.core.llm_manager import llm_manager
412
+
413
+ # # Generate chat response (uses Gemini)
414
+ # response = await llm_manager.generate_chat_response(
415
+ # query="What is my account balance?",
416
+ # context="Your balance is $1000",
417
+ # history=[]
418
+ # )
419
+
420
+ # # Evaluate response (uses Groq)
421
+ # evaluation = await llm_manager.evaluate_response(
422
+ # query="What is my balance?",
423
+ # response="Your balance is $1000",
424
+ # context="Balance: $1000"
425
+ # )
426
+ # """
427
+
428
+
429
+
430
+
backups/backup_main.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI Main Application Entry Point
3
+ Banking RAG Chatbot API with JWT Authentication
4
+
5
+ This file:
6
+ 1. Creates the FastAPI app
7
+ 2. Configures CORS middleware
8
+ 3. Connects to MongoDB on startup/shutdown
9
+ 4. Includes API routers (auth + chat)
10
+ 5. Provides health check endpoints
11
+ """
12
+
13
+ from fastapi import FastAPI, Request
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from fastapi.responses import JSONResponse
16
+ from contextlib import asynccontextmanager
17
+
18
+ from app.config import settings
19
+ from app.db.mongodb import connect_to_mongo, close_mongo_connection
20
+
21
+
22
+ # ============================================================================
23
+ # LIFESPAN MANAGER (Startup & Shutdown)
24
+ # ============================================================================
25
+
26
+ @asynccontextmanager
27
+ async def lifespan(app: FastAPI):
28
+ """
29
+ Manage application lifespan events.
30
+
31
+ Startup:
32
+ - Connect to MongoDB Atlas
33
+ - ML models load lazily on first use
34
+
35
+ Shutdown:
36
+ - Close MongoDB connection
37
+ - Cleanup resources
38
+ """
39
+ # ========================================================================
40
+ # STARTUP
41
+ # ========================================================================
42
+ print("\n" + "=" * 80)
43
+ print("🚀 STARTING BANKING RAG CHATBOT API")
44
+ print("=" * 80)
45
+ print(f"Environment: {settings.ENVIRONMENT}")
46
+ print(f"Debug Mode: {settings.DEBUG}")
47
+ print("=" * 80)
48
+
49
+ # Connect to MongoDB
50
+ await connect_to_mongo()
51
+
52
+ print("\n💡 ML Models Info:")
53
+ print(" Policy Network: Loads on first chat request (lazy loading)")
54
+ print(" Retriever Model: Loads on first retrieval (lazy loading)")
55
+ print(" LLM (Gemini): Connects on first generation")
56
+
57
+ print("\n✅ Backend startup complete!")
58
+ print("=" * 80)
59
+ print(f"📖 API Docs: http://localhost:8000/docs")
60
+ print(f"🏥 Health Check: http://localhost:8000/health")
61
+ print(f"🔐 Register: POST http://localhost:8000/api/v1/auth/register")
62
+ print(f"🔑 Login: POST http://localhost:8000/api/v1/auth/login")
63
+ print("=" * 80 + "\n")
64
+
65
+ yield # Application runs here
66
+
67
+ # ========================================================================
68
+ # SHUTDOWN
69
+ # ========================================================================
70
+ print("\n" + "=" * 80)
71
+ print("🛑 SHUTTING DOWN API")
72
+ print("=" * 80)
73
+
74
+ # Close MongoDB connection
75
+ await close_mongo_connection()
76
+
77
+ print("✅ Shutdown complete")
78
+ print("=" * 80 + "\n")
79
+
80
+
81
+ # ============================================================================
82
+ # CREATE FASTAPI APPLICATION
83
+ # ============================================================================
84
+
85
+ app = FastAPI(
86
+ title="Banking RAG Chatbot API",
87
+ description="""
88
+ 🤖 AI-powered Banking Assistant with:
89
+
90
+ **Features:**
91
+ - 🔐 JWT Authentication (Sign up, Login, Protected routes)
92
+ - 💬 RAG (Retrieval-Augmented Generation)
93
+ - 🧠 RL-based Policy Network (BERT)
94
+ - 🔍 Custom E5 Retriever
95
+ - ✨ Google Gemini LLM
96
+
97
+ **Capabilities:**
98
+ - Intelligent document retrieval
99
+ - Context-aware responses
100
+ - Conversation history
101
+ - Real-time chat
102
+ - User authentication & authorization
103
+ """,
104
+ version="1.0.0",
105
+ docs_url="/docs",
106
+ redoc_url="/redoc",
107
+ lifespan=lifespan
108
+ )
109
+
110
+
111
+ # ============================================================================
112
+ # CORS MIDDLEWARE
113
+ # ============================================================================
114
+
115
+ allowed_origins = settings.get_allowed_origins()
116
+
117
+ print("\n🌐 CORS Configuration:")
118
+ print(f" Allowed Origins: {allowed_origins}")
119
+
120
+ app.add_middleware(
121
+ CORSMiddleware,
122
+ allow_origins=allowed_origins,
123
+ allow_credentials=True,
124
+ allow_methods=["*"],
125
+ allow_headers=["*"],
126
+ )
127
+
128
+
129
+ # ============================================================================
130
+ # INCLUDE API ROUTERS
131
+ # ============================================================================
132
+
133
+ from app.api.v1 import chat, auth
134
+
135
+ # Auth router (public endpoints - register, login)
136
+ app.include_router(
137
+ auth.router,
138
+ prefix="/api/v1/auth",
139
+ tags=["🔐 Authentication"]
140
+ )
141
+
142
+ # Chat router (protected endpoints - requires JWT token)
143
+ app.include_router(
144
+ chat.router,
145
+ prefix="/api/v1/chat",
146
+ tags=["💬 Chat"]
147
+ )
148
+
149
+
150
+ # ============================================================================
151
+ # ROOT ENDPOINTS
152
+ # ============================================================================
153
+
154
+ @app.get("/", tags=["📍 Root"])
155
+ async def root():
156
+ """
157
+ Root endpoint - API information and available endpoints
158
+ """
159
+ return {
160
+ "message": "Banking RAG Chatbot API with Authentication",
161
+ "version": "1.0.0",
162
+ "status": "online",
163
+ "authentication": "JWT Bearer Token Required for chat endpoints",
164
+ "documentation": {
165
+ "swagger_ui": "/docs",
166
+ "redoc": "/redoc"
167
+ },
168
+ "endpoints": {
169
+ "auth": {
170
+ "register": "POST /api/v1/auth/register",
171
+ "login": "POST /api/v1/auth/login",
172
+ "me": "GET /api/v1/auth/me (requires token)",
173
+ "logout": "POST /api/v1/auth/logout (requires token)"
174
+ },
175
+ "chat": {
176
+ "send_message": "POST /api/v1/chat/ (requires token)",
177
+ "get_history": "GET /api/v1/chat/history/{conversation_id} (requires token)",
178
+ "list_conversations": "GET /api/v1/chat/conversations (requires token)",
179
+ "delete_conversation": "DELETE /api/v1/chat/conversation/{conversation_id} (requires token)"
180
+ },
181
+ "health": "GET /health"
182
+ }
183
+ }
184
+
185
+
186
+ @app.get("/health", tags=["🏥 Health"])
187
+ async def health_check():
188
+ """
189
+ Comprehensive health check endpoint
190
+
191
+ Checks status of:
192
+ - API service
193
+ - MongoDB connection
194
+ - ML models (lazy loaded)
195
+ - Authentication system
196
+
197
+ Returns:
198
+ dict: Health status of all components
199
+ """
200
+ from app.db.mongodb import get_database
201
+
202
+ # Check MongoDB
203
+ mongodb_status = "connected" if get_database() is not None else "disconnected"
204
+
205
+ # Check ML models (don't load them, just check readiness)
206
+ ml_models_status = {
207
+ "policy_network": "ready (lazy load)",
208
+ "retriever": "ready (lazy load)",
209
+ "llm": "ready (API-based)"
210
+ }
211
+
212
+ # Check authentication
213
+ auth_status = {
214
+ "jwt_enabled": bool(settings.SECRET_KEY and settings.SECRET_KEY != "your-secret-key-change-in-production"),
215
+ "algorithm": settings.ALGORITHM,
216
+ "token_expiry_minutes": settings.ACCESS_TOKEN_EXPIRE_MINUTES
217
+ }
218
+
219
+ # Overall health
220
+ is_healthy = mongodb_status == "connected" and auth_status["jwt_enabled"]
221
+
222
+ return {
223
+ "status": "healthy" if is_healthy else "degraded",
224
+ "api": "online",
225
+ "mongodb": mongodb_status,
226
+ "authentication": auth_status,
227
+ "ml_models": ml_models_status,
228
+ "environment": settings.ENVIRONMENT,
229
+ "debug_mode": settings.DEBUG
230
+ }
231
+
232
+
233
+ # ============================================================================
234
+ # GLOBAL EXCEPTION HANDLER
235
+ # ============================================================================
236
+
237
+ @app.exception_handler(Exception)
238
+ async def global_exception_handler(request: Request, exc: Exception):
239
+ """
240
+ Global exception handler for unhandled errors
241
+ """
242
+ print(f"\n❌ Unhandled Exception:")
243
+ print(f" Path: {request.url.path}")
244
+ print(f" Error: {str(exc)}")
245
+
246
+ if settings.DEBUG:
247
+ import traceback
248
+ traceback.print_exc()
249
+
250
+ return JSONResponse(
251
+ status_code=500,
252
+ content={
253
+ "error": "Internal Server Error",
254
+ "detail": str(exc) if settings.DEBUG else "An unexpected error occurred",
255
+ "path": str(request.url.path)
256
+ }
257
+ )
258
+
259
+
260
+ # ============================================================================
261
+ # MAIN ENTRY POINT (for direct execution)
262
+ # ============================================================================
263
+
264
+ if __name__ == "__main__":
265
+ import uvicorn
266
+
267
+ print("\n🚀 Starting server directly...")
268
+ print(" Note: For production, use: uvicorn app.main:app --host 0.0.0.0 --port 8000")
269
+
270
+ uvicorn.run(
271
+ "app.main:app",
272
+ host="0.0.0.0",
273
+ port=8000,
274
+ reload=settings.DEBUG # Auto-reload only in debug mode
275
+ )
backups/backup_requirements.txt ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # ================================================================================
2
+ # # BANKING RAG CHATBOT API - DEPENDENCIES
3
+ # # Python 3.10+ required
4
+ # # ================================================================================
5
+
6
+ # # ============================================================================
7
+ # # CORE WEB FRAMEWORK
8
+ # # ============================================================================
9
+ # # FastAPI - Modern async web framework
10
+ # fastapi==0.104.1
11
+
12
+ # # Uvicorn - ASGI server for FastAPI
13
+ # uvicorn[standard]==0.24.0
14
+
15
+ # # Python multipart for file uploads (if needed later)
16
+ # python-multipart==0.0.6
17
+
18
+ # # ============================================================================
19
+ # # CONFIGURATION & ENVIRONMENT
20
+ # # ============================================================================
21
+ # # Pydantic - Data validation and settings management
22
+ # pydantic==2.5.0
23
+ # pydantic-settings==2.1.0
24
+
25
+ # # Python-dotenv - Load environment variables from .env file
26
+ # python-dotenv==1.0.0
27
+
28
+ # # ============================================================================
29
+ # # DATABASE - MongoDB
30
+ # # ============================================================================
31
+ # # Motor - Async MongoDB driver for FastAPI
32
+ # motor==3.3.2
33
+
34
+ # # PyMongo - MongoDB Python driver (used by Motor)
35
+ # pymongo==4.6.0
36
+
37
+ # # ============================================================================
38
+ # # AUTHENTICATION & SECURITY
39
+ # # ============================================================================
40
+ # # Python-jose - JWT token handling
41
+ # python-jose[cryptography]==3.3.0
42
+
43
+ # # Passlib - Password hashing
44
+ # passlib[bcrypt]==1.7.4
45
+
46
+ # # ============================================================================
47
+ # # MACHINE LEARNING - PYTORCH & TRANSFORMERS
48
+ # # ============================================================================
49
+ # # PyTorch - Deep learning framework
50
+ # torch==2.1.0
51
+
52
+ # # Transformers - HuggingFace transformers library (BERT, e5-base-v2)
53
+ # transformers==4.35.0
54
+
55
+ # # Sentence-Transformers - Sentence embeddings
56
+ # sentence-transformers==2.2.2
57
+
58
+ # # ============================================================================
59
+ # # VECTOR SEARCH
60
+ # # ============================================================================
61
+ # # FAISS - Facebook AI Similarity Search (CPU version)
62
+ # faiss-cpu==1.7.4
63
+
64
+ # # ============================================================================
65
+ # # LLM INTEGRATIONS
66
+ # # ============================================================================
67
+ # # LangChain - LLM orchestration framework
68
+ # langchain==0.1.0
69
+
70
+ # # LangChain Groq integration
71
+ # langchain-groq==0.0.1
72
+
73
+ # # LangChain Google GenAI (for Gemini)
74
+ # langchain-google-genai==1.0.0
75
+
76
+ # # Google Generative AI - Direct Gemini API
77
+ # google-generativeai==0.3.2
78
+
79
+ # # ============================================================================
80
+ # # UTILITIES
81
+ # # ============================================================================
82
+ # # NumPy - Numerical computing
83
+ # numpy==1.24.3
84
+
85
+ # # Tiktoken - OpenAI tokenizer (for token counting)
86
+ # tiktoken==0.5.1
87
+
88
+ # # Rich - Beautiful terminal output (for logging)
89
+ # rich==13.7.0
90
+
91
+ # # Requests - HTTP library
92
+ # requests==2.31.0
93
+
94
+ # # ============================================================================
95
+ # # OPTIONAL: DEVELOPMENT TOOLS (comment out for production)
96
+ # # ============================================================================
97
+ # # Pytest - Testing framework
98
+ # # pytest==7.4.3
99
+
100
+ # # Black - Code formatter
101
+ # # black==23.12.0
102
+
103
+ # # Flake8 - Linter
104
+ # # flake8==6.1.0
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+ # fastapi==0.104.1
122
+ # uvicorn[standard]==0.24.0
123
+ # pydantic==2.5.0
124
+ # pydantic-settings==2.1.0
125
+ # python-dotenv==1.0.0
126
+ # motor==3.3.2
127
+ # pymongo==4.6.0
128
+ # google-generativeai==0.3.1
129
+ # sentence-transformers==2.2.2
130
+ # faiss-cpu==1.7.4
131
+ # numpy==1.24.3
132
+ # torch==2.1.0
133
+ # transformers==4.35.2
134
+
135
+ # # AUTH DEPENDENCIES (NEW!)
136
+ # python-jose[cryptography]==3.3.0
137
+ # passlib[bcrypt]==1.7.4
138
+ # python-multipart==0.0.6
139
+ # bcrypt==4.1.1
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+ # FastAPI & Server
152
+ fastapi==0.104.1
153
+ uvicorn[standard]==0.24.0
154
+
155
+ # Data Validation
156
+ pydantic==2.5.0
157
+ pydantic-settings==2.1.0
158
+ python-dotenv==1.0.0
159
+
160
+ # Database
161
+ motor==3.3.2
162
+ pymongo==4.6.0
163
+
164
+ # LLM & AI Libraries
165
+ langchain-groq==0.1.0
166
+ langchain-core==0.1.0
167
+ huggingface-hub==0.20.0
168
+
169
+ # Embeddings & Vector Search
170
+ sentence-transformers==2.2.2
171
+ faiss-cpu==1.7.4
172
+ numpy==1.24.3
173
+
174
+ # ML/Deep Learning
175
+ torch==2.1.0
176
+ transformers==4.35.2
177
+
178
+ # Authentication
179
+ python-jose[cryptography]==3.3.0
180
+ passlib[bcrypt]==1.7.4
181
+ python-multipart==0.0.6
182
+ bcrypt==4.1.1
requirements.txt CHANGED
@@ -1,138 +1,37 @@
1
- # # ================================================================================
2
- # # BANKING RAG CHATBOT API - DEPENDENCIES
3
- # # Python 3.10+ required
4
- # # ================================================================================
5
-
6
- # # ============================================================================
7
- # # CORE WEB FRAMEWORK
8
- # # ============================================================================
9
- # # FastAPI - Modern async web framework
10
- # fastapi==0.104.1
11
-
12
- # # Uvicorn - ASGI server for FastAPI
13
- # uvicorn[standard]==0.24.0
14
-
15
- # # Python multipart for file uploads (if needed later)
16
- # python-multipart==0.0.6
17
-
18
- # # ============================================================================
19
- # # CONFIGURATION & ENVIRONMENT
20
- # # ============================================================================
21
- # # Pydantic - Data validation and settings management
22
- # pydantic==2.5.0
23
- # pydantic-settings==2.1.0
24
-
25
- # # Python-dotenv - Load environment variables from .env file
26
- # python-dotenv==1.0.0
27
-
28
- # # ============================================================================
29
- # # DATABASE - MongoDB
30
- # # ============================================================================
31
- # # Motor - Async MongoDB driver for FastAPI
32
- # motor==3.3.2
33
-
34
- # # PyMongo - MongoDB Python driver (used by Motor)
35
- # pymongo==4.6.0
36
-
37
- # # ============================================================================
38
- # # AUTHENTICATION & SECURITY
39
- # # ============================================================================
40
- # # Python-jose - JWT token handling
41
- # python-jose[cryptography]==3.3.0
42
-
43
- # # Passlib - Password hashing
44
- # passlib[bcrypt]==1.7.4
45
-
46
- # # ============================================================================
47
- # # MACHINE LEARNING - PYTORCH & TRANSFORMERS
48
- # # ============================================================================
49
- # # PyTorch - Deep learning framework
50
- # torch==2.1.0
51
-
52
- # # Transformers - HuggingFace transformers library (BERT, e5-base-v2)
53
- # transformers==4.35.0
54
-
55
- # # Sentence-Transformers - Sentence embeddings
56
- # sentence-transformers==2.2.2
57
-
58
- # # ============================================================================
59
- # # VECTOR SEARCH
60
- # # ============================================================================
61
- # # FAISS - Facebook AI Similarity Search (CPU version)
62
- # faiss-cpu==1.7.4
63
-
64
- # # ============================================================================
65
- # # LLM INTEGRATIONS
66
- # # ============================================================================
67
- # # LangChain - LLM orchestration framework
68
- # langchain==0.1.0
69
-
70
- # # LangChain Groq integration
71
- # langchain-groq==0.0.1
72
-
73
- # # LangChain Google GenAI (for Gemini)
74
- # langchain-google-genai==1.0.0
75
-
76
- # # Google Generative AI - Direct Gemini API
77
- # google-generativeai==0.3.2
78
-
79
- # # ============================================================================
80
- # # UTILITIES
81
- # # ============================================================================
82
- # # NumPy - Numerical computing
83
- # numpy==1.24.3
84
-
85
- # # Tiktoken - OpenAI tokenizer (for token counting)
86
- # tiktoken==0.5.1
87
-
88
- # # Rich - Beautiful terminal output (for logging)
89
- # rich==13.7.0
90
-
91
- # # Requests - HTTP library
92
- # requests==2.31.0
93
-
94
- # # ============================================================================
95
- # # OPTIONAL: DEVELOPMENT TOOLS (comment out for production)
96
- # # ============================================================================
97
- # # Pytest - Testing framework
98
- # # pytest==7.4.3
99
-
100
- # # Black - Code formatter
101
- # # black==23.12.0
102
-
103
- # # Flake8 - Linter
104
- # # flake8==6.1.0
105
-
106
-
107
-
108
-
109
-
110
-
111
-
112
-
113
 
114
 
 
 
 
 
115
 
116
 
 
 
 
117
 
118
 
 
 
 
 
119
 
120
 
121
- fastapi==0.104.1
122
- uvicorn[standard]==0.24.0
123
- pydantic==2.5.0
124
- pydantic-settings==2.1.0
125
- python-dotenv==1.0.0
126
- motor==3.3.2
127
- pymongo==4.6.0
128
- google-generativeai==0.3.1
129
  sentence-transformers==2.2.2
130
  faiss-cpu==1.7.4
131
  numpy==1.24.3
 
 
 
132
  torch==2.1.0
133
  transformers==4.35.2
134
 
135
- # AUTH DEPENDENCIES (NEW!)
 
136
  python-jose[cryptography]==3.3.0
137
  passlib[bcrypt]==1.7.4
138
  python-multipart==0.0.6
 
1
+ # FastAPI & Server
2
+ fastapi==0.104.1
3
+ uvicorn[standard]==0.24.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
+ # Data Validation
7
+ pydantic==2.5.0
8
+ pydantic-settings==2.1.0
9
+ python-dotenv==1.0.0
10
 
11
 
12
+ # Database
13
+ motor==3.3.2
14
+ pymongo==4.6.0
15
 
16
 
17
+ # LLM & AI Libraries
18
+ langchain-groq==0.1.9
19
+ langchain-core==0.2.38
20
+ huggingface-hub==0.24.6
21
 
22
 
23
+ # Embeddings & Vector Search
 
 
 
 
 
 
 
24
  sentence-transformers==2.2.2
25
  faiss-cpu==1.7.4
26
  numpy==1.24.3
27
+
28
+
29
+ # ML/Deep Learning
30
  torch==2.1.0
31
  transformers==4.35.2
32
 
33
+
34
+ # Authentication
35
  python-jose[cryptography]==3.3.0
36
  passlib[bcrypt]==1.7.4
37
  python-multipart==0.0.6