eeshanyaj commited on
Commit
a2b2f2d
·
1 Parent(s): 10562b2

Fix: update model names - auto-resolve

Browse files
.env.example CHANGED
@@ -33,7 +33,7 @@ ACCESS_TOKEN_EXPIRE_MINUTES=1440
33
  # --- GOOGLE GEMINI API (PRIMARY) ---
34
  # Get from: https://aistudio.google.com/app/apikey
35
  # You have Google Pro - this is your main LLM for response generation
36
- GOOGLE_API_KEY=AIzaSyXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
37
 
38
  # Which Gemini model to use
39
  # Options: gemini-2.0-flash-lite, gemini-1.5-flash
@@ -47,10 +47,10 @@ GEMINI_TOKENS_PER_MINUTE=60000
47
  # --- GROQ API (SECONDARY) ---
48
  # Get from: https://console.groq.com/keys
49
  # Single key for specific fast inference tasks (llama models)
50
- GROQ_API_KEY=gsk_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
51
 
52
  # Groq model (fast inference for policy evaluations)
53
- GROQ_MODEL=llama3-70b-8192
54
 
55
  # Groq rate limits (Free tier)
56
  GROQ_REQUESTS_PER_MINUTE=30
@@ -60,7 +60,7 @@ GROQ_TOKENS_PER_MINUTE=30000
60
  # --- HUGGING FACE TOKEN (REQUIRED) ---
61
  # Get from: https://huggingface.co/settings/tokens
62
  # Required for: Model downloads (e5-base-v2, BERT), embeddings
63
- HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
64
 
65
  # ============================================================================
66
  # MODEL PATHS (Local storage)
 
33
  # --- GOOGLE GEMINI API (PRIMARY) ---
34
  # Get from: https://aistudio.google.com/app/apikey
35
  # You have Google Pro - this is your main LLM for response generation
36
+ GOOGLE_API_KEY=your_google_api_key_here
37
 
38
  # Which Gemini model to use
39
  # Options: gemini-2.0-flash-lite, gemini-1.5-flash
 
47
  # --- GROQ API (SECONDARY) ---
48
  # Get from: https://console.groq.com/keys
49
  # Single key for specific fast inference tasks (llama models)
50
+ GROQ_API_KEY=your_groq_api_key_here
51
 
52
  # Groq model (fast inference for policy evaluations)
53
+ GROQ_MODEL=llama-3.3-70b-versatile
54
 
55
  # Groq rate limits (Free tier)
56
  GROQ_REQUESTS_PER_MINUTE=30
 
60
  # --- HUGGING FACE TOKEN (REQUIRED) ---
61
  # Get from: https://huggingface.co/settings/tokens
62
  # Required for: Model downloads (e5-base-v2, BERT), embeddings
63
+ HF_TOKEN=your_hf_token_here
64
 
65
  # ============================================================================
66
  # MODEL PATHS (Local storage)
app/config.py CHANGED
@@ -47,8 +47,8 @@ class Settings:
47
  GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "") # Fallback 2
48
 
49
  # Model names for Groq (using correct GroqCloud naming)
50
- GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama3-8b-8192") # For chat interface
51
- GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama3-70b-8192") # For evaluation
52
 
53
  # ========================================================================
54
  # Commented as of now, can be re-enabled if rate limiting is needed
@@ -191,9 +191,9 @@ class Settings:
191
  str: Model name for the task
192
  """
193
  if task == "evaluation":
194
- return self.GROQ_EVAL_MODEL # llama3-70b-8192
195
  else:
196
- return self.GROQ_CHAT_MODEL # llama3-8b-8192
197
 
198
  # ============================================================================
199
  # CREATE GLOBAL SETTINGS INSTANCE
 
47
  GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "") # Fallback 2
48
 
49
  # Model names for Groq (using correct GroqCloud naming)
50
+ GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama-3.1-8b-instant") # For chat interface
51
+ GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama-3.3-70b-versatile") # For evaluation
52
 
53
  # ========================================================================
54
  # Commented as of now, can be re-enabled if rate limiting is needed
 
191
  str: Model name for the task
192
  """
193
  if task == "evaluation":
194
+ return self.GROQ_EVAL_MODEL # llama-3.3-70b-versatile
195
  else:
196
+ return self.GROQ_CHAT_MODEL # llama-3.1-8b-instant
197
 
198
  # ============================================================================
199
  # CREATE GLOBAL SETTINGS INSTANCE
app/core/llm_manager.py CHANGED
@@ -35,8 +35,8 @@ class GroqManager:
35
  def __init__(self):
36
  """Initialize Groq manager with all available API keys"""
37
  self.api_keys = settings.get_groq_api_keys()
38
- self.chat_model_name = settings.GROQ_CHAT_MODEL # llama3-8b-8192
39
- self.eval_model_name = settings.GROQ_EVAL_MODEL # llama3-70b-8192
40
 
41
  # Track current key index
42
  self.current_key_index = 0
 
35
  def __init__(self):
36
  """Initialize Groq manager with all available API keys"""
37
  self.api_keys = settings.get_groq_api_keys()
38
+ self.chat_model_name = settings.GROQ_CHAT_MODEL # llama-3.1-8b-instant
39
+ self.eval_model_name = settings.GROQ_EVAL_MODEL # llama-3.3-70b-versatile
40
 
41
  # Track current key index
42
  self.current_key_index = 0
backups/backup_config.py CHANGED
@@ -52,7 +52,7 @@ class Settings:
52
  # GROQ API (Optional - for evaluation)
53
  # ========================================================================
54
  GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
55
- GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
56
  GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
57
 
58
  # ========================================================================
@@ -231,7 +231,7 @@ print("=" * 80)
231
  # # GROQ API (Optional - for your llm_manager)
232
  # # ========================================================================
233
  # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
234
- # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
235
 
236
  # # ========================================================================
237
  # # HUGGING FACE (Optional - for model downloads)
@@ -433,7 +433,7 @@ print("=" * 80)
433
  # # # GROQ API (Optional - for your llm_manager)
434
  # # # ========================================================================
435
  # # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
436
- # # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
437
 
438
  # # # ========================================================================
439
  # # # HUGGING FACE (Optional - for model downloads)
@@ -563,7 +563,7 @@ print("=" * 80)
563
  # # GROQ API (Optional - for your llm_manager)
564
  # # ========================================================================
565
  # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
566
- # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama3-70b-8192")
567
 
568
  # # ========================================================================
569
  # # HUGGING FACE (Optional - for model downloads)
 
52
  # GROQ API (Optional - for evaluation)
53
  # ========================================================================
54
  GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
55
+ GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
56
  GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
57
 
58
  # ========================================================================
 
231
  # # GROQ API (Optional - for your llm_manager)
232
  # # ========================================================================
233
  # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
234
+ # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
235
 
236
  # # ========================================================================
237
  # # HUGGING FACE (Optional - for model downloads)
 
433
  # # # GROQ API (Optional - for your llm_manager)
434
  # # # ========================================================================
435
  # # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
436
+ # # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
437
 
438
  # # # ========================================================================
439
  # # # HUGGING FACE (Optional - for model downloads)
 
563
  # # GROQ API (Optional - for your llm_manager)
564
  # # ========================================================================
565
  # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
566
+ # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
567
 
568
  # # ========================================================================
569
  # # HUGGING FACE (Optional - for model downloads)