Spaces:
Sleeping
Sleeping
Fix: update model names - auto-resolve
Browse files- .env.example +4 -4
- app/config.py +4 -4
- app/core/llm_manager.py +2 -2
- backups/backup_config.py +4 -4
.env.example
CHANGED
|
@@ -33,7 +33,7 @@ ACCESS_TOKEN_EXPIRE_MINUTES=1440
|
|
| 33 |
# --- GOOGLE GEMINI API (PRIMARY) ---
|
| 34 |
# Get from: https://aistudio.google.com/app/apikey
|
| 35 |
# You have Google Pro - this is your main LLM for response generation
|
| 36 |
-
GOOGLE_API_KEY=
|
| 37 |
|
| 38 |
# Which Gemini model to use
|
| 39 |
# Options: gemini-2.0-flash-lite, gemini-1.5-flash
|
|
@@ -47,10 +47,10 @@ GEMINI_TOKENS_PER_MINUTE=60000
|
|
| 47 |
# --- GROQ API (SECONDARY) ---
|
| 48 |
# Get from: https://console.groq.com/keys
|
| 49 |
# Single key for specific fast inference tasks (llama models)
|
| 50 |
-
GROQ_API_KEY=
|
| 51 |
|
| 52 |
# Groq model (fast inference for policy evaluations)
|
| 53 |
-
GROQ_MODEL=
|
| 54 |
|
| 55 |
# Groq rate limits (Free tier)
|
| 56 |
GROQ_REQUESTS_PER_MINUTE=30
|
|
@@ -60,7 +60,7 @@ GROQ_TOKENS_PER_MINUTE=30000
|
|
| 60 |
# --- HUGGING FACE TOKEN (REQUIRED) ---
|
| 61 |
# Get from: https://huggingface.co/settings/tokens
|
| 62 |
# Required for: Model downloads (e5-base-v2, BERT), embeddings
|
| 63 |
-
HF_TOKEN=
|
| 64 |
|
| 65 |
# ============================================================================
|
| 66 |
# MODEL PATHS (Local storage)
|
|
|
|
| 33 |
# --- GOOGLE GEMINI API (PRIMARY) ---
|
| 34 |
# Get from: https://aistudio.google.com/app/apikey
|
| 35 |
# You have Google Pro - this is your main LLM for response generation
|
| 36 |
+
GOOGLE_API_KEY=your_google_api_key_here
|
| 37 |
|
| 38 |
# Which Gemini model to use
|
| 39 |
# Options: gemini-2.0-flash-lite, gemini-1.5-flash
|
|
|
|
| 47 |
# --- GROQ API (SECONDARY) ---
|
| 48 |
# Get from: https://console.groq.com/keys
|
| 49 |
# Single key for specific fast inference tasks (llama models)
|
| 50 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 51 |
|
| 52 |
# Groq model (fast inference for policy evaluations)
|
| 53 |
+
GROQ_MODEL=llama-3.3-70b-versatile
|
| 54 |
|
| 55 |
# Groq rate limits (Free tier)
|
| 56 |
GROQ_REQUESTS_PER_MINUTE=30
|
|
|
|
| 60 |
# --- HUGGING FACE TOKEN (REQUIRED) ---
|
| 61 |
# Get from: https://huggingface.co/settings/tokens
|
| 62 |
# Required for: Model downloads (e5-base-v2, BERT), embeddings
|
| 63 |
+
HF_TOKEN=your_hf_token_here
|
| 64 |
|
| 65 |
# ============================================================================
|
| 66 |
# MODEL PATHS (Local storage)
|
app/config.py
CHANGED
|
@@ -47,8 +47,8 @@ class Settings:
|
|
| 47 |
GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "") # Fallback 2
|
| 48 |
|
| 49 |
# Model names for Groq (using correct GroqCloud naming)
|
| 50 |
-
GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "
|
| 51 |
-
GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "
|
| 52 |
|
| 53 |
# ========================================================================
|
| 54 |
# Commented as of now, can be re-enabled if rate limiting is needed
|
|
@@ -191,9 +191,9 @@ class Settings:
|
|
| 191 |
str: Model name for the task
|
| 192 |
"""
|
| 193 |
if task == "evaluation":
|
| 194 |
-
return self.GROQ_EVAL_MODEL #
|
| 195 |
else:
|
| 196 |
-
return self.GROQ_CHAT_MODEL #
|
| 197 |
|
| 198 |
# ============================================================================
|
| 199 |
# CREATE GLOBAL SETTINGS INSTANCE
|
|
|
|
| 47 |
GROQ_API_KEY_3: str = os.getenv("GROQ_API_KEY_3", "") # Fallback 2
|
| 48 |
|
| 49 |
# Model names for Groq (using correct GroqCloud naming)
|
| 50 |
+
GROQ_CHAT_MODEL: str = os.getenv("GROQ_CHAT_MODEL", "llama-3.1-8b-instant") # For chat interface
|
| 51 |
+
GROQ_EVAL_MODEL: str = os.getenv("GROQ_EVAL_MODEL", "llama-3.3-70b-versatile") # For evaluation
|
| 52 |
|
| 53 |
# ========================================================================
|
| 54 |
# Commented as of now, can be re-enabled if rate limiting is needed
|
|
|
|
| 191 |
str: Model name for the task
|
| 192 |
"""
|
| 193 |
if task == "evaluation":
|
| 194 |
+
return self.GROQ_EVAL_MODEL # llama-3.3-70b-versatile
|
| 195 |
else:
|
| 196 |
+
return self.GROQ_CHAT_MODEL # llama-3.1-8b-instant
|
| 197 |
|
| 198 |
# ============================================================================
|
| 199 |
# CREATE GLOBAL SETTINGS INSTANCE
|
app/core/llm_manager.py
CHANGED
|
@@ -35,8 +35,8 @@ class GroqManager:
|
|
| 35 |
def __init__(self):
|
| 36 |
"""Initialize Groq manager with all available API keys"""
|
| 37 |
self.api_keys = settings.get_groq_api_keys()
|
| 38 |
-
self.chat_model_name = settings.GROQ_CHAT_MODEL #
|
| 39 |
-
self.eval_model_name = settings.GROQ_EVAL_MODEL #
|
| 40 |
|
| 41 |
# Track current key index
|
| 42 |
self.current_key_index = 0
|
|
|
|
| 35 |
def __init__(self):
|
| 36 |
"""Initialize Groq manager with all available API keys"""
|
| 37 |
self.api_keys = settings.get_groq_api_keys()
|
| 38 |
+
self.chat_model_name = settings.GROQ_CHAT_MODEL # llama-3.1-8b-instant
|
| 39 |
+
self.eval_model_name = settings.GROQ_EVAL_MODEL # llama-3.3-70b-versatile
|
| 40 |
|
| 41 |
# Track current key index
|
| 42 |
self.current_key_index = 0
|
backups/backup_config.py
CHANGED
|
@@ -52,7 +52,7 @@ class Settings:
|
|
| 52 |
# GROQ API (Optional - for evaluation)
|
| 53 |
# ========================================================================
|
| 54 |
GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 55 |
-
GROQ_MODEL: str = os.getenv("GROQ_MODEL", "
|
| 56 |
GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
|
| 57 |
|
| 58 |
# ========================================================================
|
|
@@ -231,7 +231,7 @@ print("=" * 80)
|
|
| 231 |
# # GROQ API (Optional - for your llm_manager)
|
| 232 |
# # ========================================================================
|
| 233 |
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 234 |
-
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "
|
| 235 |
|
| 236 |
# # ========================================================================
|
| 237 |
# # HUGGING FACE (Optional - for model downloads)
|
|
@@ -433,7 +433,7 @@ print("=" * 80)
|
|
| 433 |
# # # GROQ API (Optional - for your llm_manager)
|
| 434 |
# # # ========================================================================
|
| 435 |
# # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 436 |
-
# # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "
|
| 437 |
|
| 438 |
# # # ========================================================================
|
| 439 |
# # # HUGGING FACE (Optional - for model downloads)
|
|
@@ -563,7 +563,7 @@ print("=" * 80)
|
|
| 563 |
# # GROQ API (Optional - for your llm_manager)
|
| 564 |
# # ========================================================================
|
| 565 |
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 566 |
-
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "
|
| 567 |
|
| 568 |
# # ========================================================================
|
| 569 |
# # HUGGING FACE (Optional - for model downloads)
|
|
|
|
| 52 |
# GROQ API (Optional - for evaluation)
|
| 53 |
# ========================================================================
|
| 54 |
GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 55 |
+
GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 56 |
GROQ_REQUESTS_PER_MINUTE: int = int(os.getenv("GROQ_REQUESTS_PER_MINUTE", "30"))
|
| 57 |
|
| 58 |
# ========================================================================
|
|
|
|
| 231 |
# # GROQ API (Optional - for your llm_manager)
|
| 232 |
# # ========================================================================
|
| 233 |
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 234 |
+
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 235 |
|
| 236 |
# # ========================================================================
|
| 237 |
# # HUGGING FACE (Optional - for model downloads)
|
|
|
|
| 433 |
# # # GROQ API (Optional - for your llm_manager)
|
| 434 |
# # # ========================================================================
|
| 435 |
# # GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 436 |
+
# # GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 437 |
|
| 438 |
# # # ========================================================================
|
| 439 |
# # # HUGGING FACE (Optional - for model downloads)
|
|
|
|
| 563 |
# # GROQ API (Optional - for your llm_manager)
|
| 564 |
# # ========================================================================
|
| 565 |
# GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
|
| 566 |
+
# GROQ_MODEL: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 567 |
|
| 568 |
# # ========================================================================
|
| 569 |
# # HUGGING FACE (Optional - for model downloads)
|