Spaces:
Sleeping
Sleeping
Mahault commited on
Commit ·
30d0b74
1
Parent(s): 22521f9
Make LLM provider configurable via env vars (switch to Groq)
Browse filesSupport any OpenAI-compatible API (Groq, Mistral, Together, etc.) via
LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL environment variables.
Fail fast on 429 rate limits so callers use their fallbacks instantly.
- .env.example +11 -1
- src/mindsphere/core/agent.py +5 -5
- src/mindsphere/llm/client.py +39 -21
- src/mindsphere/llm/generator.py +10 -8
.env.example
CHANGED
|
@@ -1 +1,11 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LLM Provider Configuration
|
| 2 |
+
# Supports any OpenAI-compatible API (Mistral, Groq, Together, Gemini, etc.)
|
| 3 |
+
LLM_API_KEY=your-api-key-here
|
| 4 |
+
LLM_BASE_URL=https://api.groq.com/openai/v1
|
| 5 |
+
LLM_MODEL=llama-3.3-70b-versatile
|
| 6 |
+
|
| 7 |
+
# Optional: separate model for conversation (leave empty to use LLM_MODEL)
|
| 8 |
+
# LLM_CONVERSATION_MODEL=
|
| 9 |
+
|
| 10 |
+
# Legacy Mistral config (still supported if LLM_API_KEY is not set):
|
| 11 |
+
# MISTRAL_API_KEY=your-key-here
|
src/mindsphere/core/agent.py
CHANGED
|
@@ -164,7 +164,7 @@ class CoachingAgent:
|
|
| 164 |
client = MistralClient()
|
| 165 |
self._generator = CoachGenerator(client=client)
|
| 166 |
if self._generator.is_available:
|
| 167 |
-
logger.info("LLM generator available — using
|
| 168 |
else:
|
| 169 |
logger.warning("LLM generator created but not available (no API key?) — using template responses")
|
| 170 |
self._generator = None
|
|
@@ -1074,12 +1074,12 @@ class CoachingAgent:
|
|
| 1074 |
messages=messages,
|
| 1075 |
temperature=0.7,
|
| 1076 |
max_tokens=80, # Keep acks short
|
| 1077 |
-
model_override=
|
| 1078 |
)
|
| 1079 |
if response and response.strip():
|
| 1080 |
logger.info(f"[LLM] Calibration ack: '{response.strip()[:60]}...'")
|
| 1081 |
return response.strip()
|
| 1082 |
-
logger.warning("[LLM] Empty calibration ack from
|
| 1083 |
except Exception as e:
|
| 1084 |
logger.warning(f"[LLM] Calibration ack failed: {e}")
|
| 1085 |
else:
|
|
@@ -2093,12 +2093,12 @@ class CoachingAgent:
|
|
| 2093 |
messages=messages,
|
| 2094 |
temperature=0.7,
|
| 2095 |
max_tokens=300,
|
| 2096 |
-
model_override=
|
| 2097 |
)
|
| 2098 |
if response and response.strip():
|
| 2099 |
logger.info(f"[LLM] Sphere commentary generated ({len(response)} chars)")
|
| 2100 |
return response.strip()
|
| 2101 |
-
logger.warning("[LLM] Empty sphere commentary from
|
| 2102 |
except Exception as e:
|
| 2103 |
logger.warning(f"[LLM] Sphere commentary failed: {e}")
|
| 2104 |
|
|
|
|
| 164 |
client = MistralClient()
|
| 165 |
self._generator = CoachGenerator(client=client)
|
| 166 |
if self._generator.is_available:
|
| 167 |
+
logger.info(f"LLM generator available — using {client.base_url} ({client.model})")
|
| 168 |
else:
|
| 169 |
logger.warning("LLM generator created but not available (no API key?) — using template responses")
|
| 170 |
self._generator = None
|
|
|
|
| 1074 |
messages=messages,
|
| 1075 |
temperature=0.7,
|
| 1076 |
max_tokens=80, # Keep acks short
|
| 1077 |
+
model_override=None, # uses LLM_MODEL default
|
| 1078 |
)
|
| 1079 |
if response and response.strip():
|
| 1080 |
logger.info(f"[LLM] Calibration ack: '{response.strip()[:60]}...'")
|
| 1081 |
return response.strip()
|
| 1082 |
+
logger.warning("[LLM] Empty calibration ack from LLM")
|
| 1083 |
except Exception as e:
|
| 1084 |
logger.warning(f"[LLM] Calibration ack failed: {e}")
|
| 1085 |
else:
|
|
|
|
| 2093 |
messages=messages,
|
| 2094 |
temperature=0.7,
|
| 2095 |
max_tokens=300,
|
| 2096 |
+
model_override=None, # uses LLM_MODEL default
|
| 2097 |
)
|
| 2098 |
if response and response.strip():
|
| 2099 |
logger.info(f"[LLM] Sphere commentary generated ({len(response)} chars)")
|
| 2100 |
return response.strip()
|
| 2101 |
+
logger.warning("[LLM] Empty sphere commentary from LLM")
|
| 2102 |
except Exception as e:
|
| 2103 |
logger.warning(f"[LLM] Sphere commentary failed: {e}")
|
| 2104 |
|
src/mindsphere/llm/client.py
CHANGED
|
@@ -31,37 +31,58 @@ class MistralAPIError(Exception):
|
|
| 31 |
@dataclass
|
| 32 |
class MistralClient:
|
| 33 |
"""
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
|
| 40 |
api_key: str = ""
|
| 41 |
-
model: str = "
|
| 42 |
-
base_url: str = "
|
| 43 |
timeout: float = 30.0
|
| 44 |
max_retries: int = 2
|
| 45 |
|
| 46 |
def __post_init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
if not self.api_key:
|
| 48 |
self.api_key = self._load_api_key()
|
| 49 |
|
| 50 |
-
def
|
| 51 |
-
"""Load
|
| 52 |
-
key = os.environ.get("MISTRAL_API_KEY", "")
|
| 53 |
-
if key:
|
| 54 |
-
return key
|
| 55 |
-
|
| 56 |
for parent in [Path.cwd()] + list(Path(__file__).resolve().parents):
|
| 57 |
env_path = parent / ".env"
|
| 58 |
if env_path.exists():
|
| 59 |
for line in env_path.read_text().splitlines():
|
| 60 |
line = line.strip()
|
| 61 |
-
if line
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def _headers(self) -> Dict[str, str]:
|
| 67 |
return {
|
|
@@ -123,14 +144,11 @@ class MistralClient:
|
|
| 123 |
if resp.status_code in (400, 401, 403, 404):
|
| 124 |
raise MistralAPIError(resp.status_code, resp.text)
|
| 125 |
|
| 126 |
-
# Rate limit:
|
| 127 |
if resp.status_code == 429:
|
| 128 |
-
retry_after = resp.headers.get("Retry-After")
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
if attempt < self.max_retries:
|
| 132 |
-
time.sleep(wait)
|
| 133 |
-
continue
|
| 134 |
|
| 135 |
last_error = MistralAPIError(resp.status_code, resp.text)
|
| 136 |
|
|
|
|
| 31 |
@dataclass
|
| 32 |
class MistralClient:
|
| 33 |
"""
|
| 34 |
+
HTTP wrapper for OpenAI-compatible /v1/chat/completions endpoints.
|
| 35 |
|
| 36 |
+
Works with any provider: Mistral, Groq, Together, Gemini, etc.
|
| 37 |
+
Configure via environment variables:
|
| 38 |
+
LLM_API_KEY / MISTRAL_API_KEY — API key
|
| 39 |
+
LLM_BASE_URL — API base URL (default: Mistral)
|
| 40 |
+
LLM_MODEL — Default model name
|
| 41 |
"""
|
| 42 |
|
| 43 |
api_key: str = ""
|
| 44 |
+
model: str = ""
|
| 45 |
+
base_url: str = ""
|
| 46 |
timeout: float = 30.0
|
| 47 |
max_retries: int = 2
|
| 48 |
|
| 49 |
def __post_init__(self):
|
| 50 |
+
# Load .env file into os.environ so all config is accessible
|
| 51 |
+
self._load_dotenv()
|
| 52 |
+
if not self.base_url:
|
| 53 |
+
self.base_url = os.environ.get(
|
| 54 |
+
"LLM_BASE_URL", "https://api.mistral.ai/v1"
|
| 55 |
+
)
|
| 56 |
+
if not self.model:
|
| 57 |
+
self.model = os.environ.get("LLM_MODEL", "mistral-small-latest")
|
| 58 |
if not self.api_key:
|
| 59 |
self.api_key = self._load_api_key()
|
| 60 |
|
| 61 |
+
def _load_dotenv(self) -> None:
|
| 62 |
+
"""Load .env file into os.environ (only vars not already set)."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
for parent in [Path.cwd()] + list(Path(__file__).resolve().parents):
|
| 64 |
env_path = parent / ".env"
|
| 65 |
if env_path.exists():
|
| 66 |
for line in env_path.read_text().splitlines():
|
| 67 |
line = line.strip()
|
| 68 |
+
if not line or line.startswith("#"):
|
| 69 |
+
continue
|
| 70 |
+
if "=" in line:
|
| 71 |
+
key, value = line.split("=", 1)
|
| 72 |
+
key, value = key.strip(), value.strip()
|
| 73 |
+
if key and key not in os.environ:
|
| 74 |
+
os.environ[key] = value
|
| 75 |
+
break # only load the first .env found
|
| 76 |
|
| 77 |
+
def _load_api_key(self) -> str:
|
| 78 |
+
"""Load API key from environment variable."""
|
| 79 |
+
# Check generic LLM_API_KEY first, then legacy MISTRAL_API_KEY
|
| 80 |
+
for env_var in ("LLM_API_KEY", "MISTRAL_API_KEY"):
|
| 81 |
+
key = os.environ.get(env_var, "")
|
| 82 |
+
if key:
|
| 83 |
+
return key
|
| 84 |
+
|
| 85 |
+
raise MistralAPIError(401, "No LLM_API_KEY or MISTRAL_API_KEY found in env or .env file")
|
| 86 |
|
| 87 |
def _headers(self) -> Dict[str, str]:
|
| 88 |
return {
|
|
|
|
| 144 |
if resp.status_code in (400, 401, 403, 404):
|
| 145 |
raise MistralAPIError(resp.status_code, resp.text)
|
| 146 |
|
| 147 |
+
# Rate limit: fail fast so callers can use their fallback
|
| 148 |
if resp.status_code == 429:
|
| 149 |
+
retry_after = resp.headers.get("Retry-After", "?")
|
| 150 |
+
logger.warning(f"[LLMClient] Rate limited (429), Retry-After={retry_after}s — failing fast")
|
| 151 |
+
raise MistralAPIError(429, f"Rate limited (Retry-After: {retry_after}s)")
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
last_error = MistralAPIError(resp.status_code, resp.text)
|
| 154 |
|
src/mindsphere/llm/generator.py
CHANGED
|
@@ -21,8 +21,10 @@ from .client import MistralClient, MistralAPIError
|
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# Web search tool definition for Mistral
|
| 28 |
# NOTE: web_search format varies by Mistral API version.
|
|
@@ -352,7 +354,7 @@ class CoachGenerator:
|
|
| 352 |
|
| 353 |
# Log message structure for debugging
|
| 354 |
roles = [m["role"] for m in messages]
|
| 355 |
-
logger.info(f"[CoachGenerator] Sending {len(messages)} messages to
|
| 356 |
|
| 357 |
# Detect duplicate user messages (bug diagnostic)
|
| 358 |
user_msgs = [m["content"][:80] for m in messages if m["role"] == "user"]
|
|
@@ -373,12 +375,12 @@ class CoachGenerator:
|
|
| 373 |
temperature=0.7,
|
| 374 |
max_tokens=300,
|
| 375 |
tools=tools,
|
| 376 |
-
model_override=CONVERSATION_MODEL,
|
| 377 |
)
|
| 378 |
if response and response.strip():
|
| 379 |
-
logger.info(f"[CoachGenerator]
|
| 380 |
return response.strip()
|
| 381 |
-
logger.warning("[CoachGenerator] Empty response from
|
| 382 |
return ""
|
| 383 |
except Exception as e:
|
| 384 |
logger.warning(f"[CoachGenerator] Exception: {e}")
|
|
@@ -422,7 +424,7 @@ class CoachGenerator:
|
|
| 422 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 423 |
messages.append({"role": "user", "content": user_message})
|
| 424 |
|
| 425 |
-
logger.info(f"[CoachGenerator] Streaming {len(messages)} messages to
|
| 426 |
|
| 427 |
try:
|
| 428 |
collected = []
|
|
@@ -430,7 +432,7 @@ class CoachGenerator:
|
|
| 430 |
messages=messages,
|
| 431 |
temperature=0.7,
|
| 432 |
max_tokens=300,
|
| 433 |
-
model_override=CONVERSATION_MODEL,
|
| 434 |
):
|
| 435 |
collected.append(chunk)
|
| 436 |
yield chunk
|
|
|
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
|
| 24 |
+
# Optional: separate model for conversation (e.g. a stronger model for responses).
|
| 25 |
+
# Set via LLM_CONVERSATION_MODEL env var. Empty = use the default LLM_MODEL.
|
| 26 |
+
import os
|
| 27 |
+
CONVERSATION_MODEL = os.environ.get("LLM_CONVERSATION_MODEL", "")
|
| 28 |
|
| 29 |
# Web search tool definition for Mistral
|
| 30 |
# NOTE: web_search format varies by Mistral API version.
|
|
|
|
| 354 |
|
| 355 |
# Log message structure for debugging
|
| 356 |
roles = [m["role"] for m in messages]
|
| 357 |
+
logger.info(f"[CoachGenerator] Sending {len(messages)} messages to LLM (roles: {roles[-5:]})")
|
| 358 |
|
| 359 |
# Detect duplicate user messages (bug diagnostic)
|
| 360 |
user_msgs = [m["content"][:80] for m in messages if m["role"] == "user"]
|
|
|
|
| 375 |
temperature=0.7,
|
| 376 |
max_tokens=300,
|
| 377 |
tools=tools,
|
| 378 |
+
model_override=CONVERSATION_MODEL or None,
|
| 379 |
)
|
| 380 |
if response and response.strip():
|
| 381 |
+
logger.info(f"[CoachGenerator] LLM responded ({len(response)} chars)")
|
| 382 |
return response.strip()
|
| 383 |
+
logger.warning("[CoachGenerator] Empty response from LLM")
|
| 384 |
return ""
|
| 385 |
except Exception as e:
|
| 386 |
logger.warning(f"[CoachGenerator] Exception: {e}")
|
|
|
|
| 424 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
| 425 |
messages.append({"role": "user", "content": user_message})
|
| 426 |
|
| 427 |
+
logger.info(f"[CoachGenerator] Streaming {len(messages)} messages to LLM")
|
| 428 |
|
| 429 |
try:
|
| 430 |
collected = []
|
|
|
|
| 432 |
messages=messages,
|
| 433 |
temperature=0.7,
|
| 434 |
max_tokens=300,
|
| 435 |
+
model_override=CONVERSATION_MODEL or None,
|
| 436 |
):
|
| 437 |
collected.append(chunk)
|
| 438 |
yield chunk
|