Mahault commited on
Commit
30d0b74
·
1 Parent(s): 22521f9

Make LLM provider configurable via env vars (switch to Groq)

Browse files

Support any OpenAI-compatible API (Groq, Mistral, Together, etc.) via
LLM_API_KEY, LLM_BASE_URL, and LLM_MODEL environment variables.
Fail fast on 429 rate limits so callers use their fallbacks instantly.

.env.example CHANGED
@@ -1 +1,11 @@
1
- MISTRAL_API_KEY=your_mistral_api_key_here
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Provider Configuration
2
+ # Supports any OpenAI-compatible API (Mistral, Groq, Together, Gemini, etc.)
3
+ LLM_API_KEY=your-api-key-here
4
+ LLM_BASE_URL=https://api.groq.com/openai/v1
5
+ LLM_MODEL=llama-3.3-70b-versatile
6
+
7
+ # Optional: separate model for conversation (leave empty to use LLM_MODEL)
8
+ # LLM_CONVERSATION_MODEL=
9
+
10
+ # Legacy Mistral config (still supported if LLM_API_KEY is not set):
11
+ # MISTRAL_API_KEY=your-key-here
src/mindsphere/core/agent.py CHANGED
@@ -164,7 +164,7 @@ class CoachingAgent:
164
  client = MistralClient()
165
  self._generator = CoachGenerator(client=client)
166
  if self._generator.is_available:
167
- logger.info("LLM generator available — using Mistral for conversation")
168
  else:
169
  logger.warning("LLM generator created but not available (no API key?) — using template responses")
170
  self._generator = None
@@ -1074,12 +1074,12 @@ class CoachingAgent:
1074
  messages=messages,
1075
  temperature=0.7,
1076
  max_tokens=80, # Keep acks short
1077
- model_override="mistral-medium-latest",
1078
  )
1079
  if response and response.strip():
1080
  logger.info(f"[LLM] Calibration ack: '{response.strip()[:60]}...'")
1081
  return response.strip()
1082
- logger.warning("[LLM] Empty calibration ack from Mistral")
1083
  except Exception as e:
1084
  logger.warning(f"[LLM] Calibration ack failed: {e}")
1085
  else:
@@ -2093,12 +2093,12 @@ class CoachingAgent:
2093
  messages=messages,
2094
  temperature=0.7,
2095
  max_tokens=300,
2096
- model_override="mistral-medium-latest",
2097
  )
2098
  if response and response.strip():
2099
  logger.info(f"[LLM] Sphere commentary generated ({len(response)} chars)")
2100
  return response.strip()
2101
- logger.warning("[LLM] Empty sphere commentary from Mistral")
2102
  except Exception as e:
2103
  logger.warning(f"[LLM] Sphere commentary failed: {e}")
2104
 
 
164
  client = MistralClient()
165
  self._generator = CoachGenerator(client=client)
166
  if self._generator.is_available:
167
+ logger.info(f"LLM generator available — using {client.base_url} ({client.model})")
168
  else:
169
  logger.warning("LLM generator created but not available (no API key?) — using template responses")
170
  self._generator = None
 
1074
  messages=messages,
1075
  temperature=0.7,
1076
  max_tokens=80, # Keep acks short
1077
+ model_override=None, # uses LLM_MODEL default
1078
  )
1079
  if response and response.strip():
1080
  logger.info(f"[LLM] Calibration ack: '{response.strip()[:60]}...'")
1081
  return response.strip()
1082
+ logger.warning("[LLM] Empty calibration ack from LLM")
1083
  except Exception as e:
1084
  logger.warning(f"[LLM] Calibration ack failed: {e}")
1085
  else:
 
2093
  messages=messages,
2094
  temperature=0.7,
2095
  max_tokens=300,
2096
+ model_override=None, # uses LLM_MODEL default
2097
  )
2098
  if response and response.strip():
2099
  logger.info(f"[LLM] Sphere commentary generated ({len(response)} chars)")
2100
  return response.strip()
2101
+ logger.warning("[LLM] Empty sphere commentary from LLM")
2102
  except Exception as e:
2103
  logger.warning(f"[LLM] Sphere commentary failed: {e}")
2104
 
src/mindsphere/llm/client.py CHANGED
@@ -31,37 +31,58 @@ class MistralAPIError(Exception):
31
  @dataclass
32
  class MistralClient:
33
  """
34
- Thin HTTP wrapper for Mistral's /v1/chat/completions endpoint.
35
 
36
- Handles authentication, retries, and response parsing.
37
- The classifier and generator use this as their backend.
 
 
 
38
  """
39
 
40
  api_key: str = ""
41
- model: str = "mistral-small-latest"
42
- base_url: str = "https://api.mistral.ai/v1"
43
  timeout: float = 30.0
44
  max_retries: int = 2
45
 
46
  def __post_init__(self):
 
 
 
 
 
 
 
 
47
  if not self.api_key:
48
  self.api_key = self._load_api_key()
49
 
50
- def _load_api_key(self) -> str:
51
- """Load API key from environment variable or .env file."""
52
- key = os.environ.get("MISTRAL_API_KEY", "")
53
- if key:
54
- return key
55
-
56
  for parent in [Path.cwd()] + list(Path(__file__).resolve().parents):
57
  env_path = parent / ".env"
58
  if env_path.exists():
59
  for line in env_path.read_text().splitlines():
60
  line = line.strip()
61
- if line.startswith("MISTRAL_API_KEY=") and not line.startswith("#"):
62
- return line.split("=", 1)[1].strip()
 
 
 
 
 
 
63
 
64
- raise MistralAPIError(401, "No MISTRAL_API_KEY found in env or .env file")
 
 
 
 
 
 
 
 
65
 
66
  def _headers(self) -> Dict[str, str]:
67
  return {
@@ -123,14 +144,11 @@ class MistralClient:
123
  if resp.status_code in (400, 401, 403, 404):
124
  raise MistralAPIError(resp.status_code, resp.text)
125
 
126
- # Rate limit: respect Retry-After header if present
127
  if resp.status_code == 429:
128
- retry_after = resp.headers.get("Retry-After")
129
- wait = float(retry_after) if retry_after else (2 ** attempt + 1)
130
- logger.warning(f"[MistralClient] Rate limited (429), waiting {wait:.0f}s")
131
- if attempt < self.max_retries:
132
- time.sleep(wait)
133
- continue
134
 
135
  last_error = MistralAPIError(resp.status_code, resp.text)
136
 
 
31
  @dataclass
32
  class MistralClient:
33
  """
34
+ HTTP wrapper for OpenAI-compatible /v1/chat/completions endpoints.
35
 
36
+ Works with any provider: Mistral, Groq, Together, Gemini, etc.
37
+ Configure via environment variables:
38
+ LLM_API_KEY / MISTRAL_API_KEY — API key
39
+ LLM_BASE_URL — API base URL (default: Mistral)
40
+ LLM_MODEL — Default model name
41
  """
42
 
43
  api_key: str = ""
44
+ model: str = ""
45
+ base_url: str = ""
46
  timeout: float = 30.0
47
  max_retries: int = 2
48
 
49
  def __post_init__(self):
50
+ # Load .env file into os.environ so all config is accessible
51
+ self._load_dotenv()
52
+ if not self.base_url:
53
+ self.base_url = os.environ.get(
54
+ "LLM_BASE_URL", "https://api.mistral.ai/v1"
55
+ )
56
+ if not self.model:
57
+ self.model = os.environ.get("LLM_MODEL", "mistral-small-latest")
58
  if not self.api_key:
59
  self.api_key = self._load_api_key()
60
 
61
+ def _load_dotenv(self) -> None:
62
+ """Load .env file into os.environ (only vars not already set)."""
 
 
 
 
63
  for parent in [Path.cwd()] + list(Path(__file__).resolve().parents):
64
  env_path = parent / ".env"
65
  if env_path.exists():
66
  for line in env_path.read_text().splitlines():
67
  line = line.strip()
68
+ if not line or line.startswith("#"):
69
+ continue
70
+ if "=" in line:
71
+ key, value = line.split("=", 1)
72
+ key, value = key.strip(), value.strip()
73
+ if key and key not in os.environ:
74
+ os.environ[key] = value
75
+ break # only load the first .env found
76
 
77
+ def _load_api_key(self) -> str:
78
+ """Load API key from environment variable."""
79
+ # Check generic LLM_API_KEY first, then legacy MISTRAL_API_KEY
80
+ for env_var in ("LLM_API_KEY", "MISTRAL_API_KEY"):
81
+ key = os.environ.get(env_var, "")
82
+ if key:
83
+ return key
84
+
85
+ raise MistralAPIError(401, "No LLM_API_KEY or MISTRAL_API_KEY found in env or .env file")
86
 
87
  def _headers(self) -> Dict[str, str]:
88
  return {
 
144
  if resp.status_code in (400, 401, 403, 404):
145
  raise MistralAPIError(resp.status_code, resp.text)
146
 
147
+ # Rate limit: fail fast so callers can use their fallback
148
  if resp.status_code == 429:
149
+ retry_after = resp.headers.get("Retry-After", "?")
150
+ logger.warning(f"[LLMClient] Rate limited (429), Retry-After={retry_after}s failing fast")
151
+ raise MistralAPIError(429, f"Rate limited (Retry-After: {retry_after}s)")
 
 
 
152
 
153
  last_error = MistralAPIError(resp.status_code, resp.text)
154
 
src/mindsphere/llm/generator.py CHANGED
@@ -21,8 +21,10 @@ from .client import MistralClient, MistralAPIError
21
  logger = logging.getLogger(__name__)
22
 
23
 
24
- # The model to use for conversation (needs web_search support)
25
- CONVERSATION_MODEL = "mistral-medium-latest"
 
 
26
 
27
  # Web search tool definition for Mistral
28
  # NOTE: web_search format varies by Mistral API version.
@@ -352,7 +354,7 @@ class CoachGenerator:
352
 
353
  # Log message structure for debugging
354
  roles = [m["role"] for m in messages]
355
- logger.info(f"[CoachGenerator] Sending {len(messages)} messages to Mistral (roles: {roles[-5:]})")
356
 
357
  # Detect duplicate user messages (bug diagnostic)
358
  user_msgs = [m["content"][:80] for m in messages if m["role"] == "user"]
@@ -373,12 +375,12 @@ class CoachGenerator:
373
  temperature=0.7,
374
  max_tokens=300,
375
  tools=tools,
376
- model_override=CONVERSATION_MODEL,
377
  )
378
  if response and response.strip():
379
- logger.info(f"[CoachGenerator] Mistral responded ({len(response)} chars)")
380
  return response.strip()
381
- logger.warning("[CoachGenerator] Empty response from Mistral")
382
  return ""
383
  except Exception as e:
384
  logger.warning(f"[CoachGenerator] Exception: {e}")
@@ -422,7 +424,7 @@ class CoachGenerator:
422
  messages.append({"role": msg["role"], "content": msg["content"]})
423
  messages.append({"role": "user", "content": user_message})
424
 
425
- logger.info(f"[CoachGenerator] Streaming {len(messages)} messages to Mistral")
426
 
427
  try:
428
  collected = []
@@ -430,7 +432,7 @@ class CoachGenerator:
430
  messages=messages,
431
  temperature=0.7,
432
  max_tokens=300,
433
- model_override=CONVERSATION_MODEL,
434
  ):
435
  collected.append(chunk)
436
  yield chunk
 
21
  logger = logging.getLogger(__name__)
22
 
23
 
24
+ # Optional: separate model for conversation (e.g. a stronger model for responses).
25
+ # Set via LLM_CONVERSATION_MODEL env var. Empty = use the default LLM_MODEL.
26
+ import os
27
+ CONVERSATION_MODEL = os.environ.get("LLM_CONVERSATION_MODEL", "")
28
 
29
  # Web search tool definition for Mistral
30
  # NOTE: web_search format varies by Mistral API version.
 
354
 
355
  # Log message structure for debugging
356
  roles = [m["role"] for m in messages]
357
+ logger.info(f"[CoachGenerator] Sending {len(messages)} messages to LLM (roles: {roles[-5:]})")
358
 
359
  # Detect duplicate user messages (bug diagnostic)
360
  user_msgs = [m["content"][:80] for m in messages if m["role"] == "user"]
 
375
  temperature=0.7,
376
  max_tokens=300,
377
  tools=tools,
378
+ model_override=CONVERSATION_MODEL or None,
379
  )
380
  if response and response.strip():
381
+ logger.info(f"[CoachGenerator] LLM responded ({len(response)} chars)")
382
  return response.strip()
383
+ logger.warning("[CoachGenerator] Empty response from LLM")
384
  return ""
385
  except Exception as e:
386
  logger.warning(f"[CoachGenerator] Exception: {e}")
 
424
  messages.append({"role": msg["role"], "content": msg["content"]})
425
  messages.append({"role": "user", "content": user_message})
426
 
427
+ logger.info(f"[CoachGenerator] Streaming {len(messages)} messages to LLM")
428
 
429
  try:
430
  collected = []
 
432
  messages=messages,
433
  temperature=0.7,
434
  max_tokens=300,
435
+ model_override=CONVERSATION_MODEL or None,
436
  ):
437
  collected.append(chunk)
438
  yield chunk