Add _last_error tracking to GeminiClient and GroqClient for diagnosis
Browse filesBoth clients previously returned empty string silently on all error paths,
making the /llm/test endpoint show error:"" even when calls were failing.
Now every silent-return path sets _last_error so the test endpoint can
surface the real reason.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/engine/llm.py +16 -2
src/soci/engine/llm.py
CHANGED
|
@@ -432,6 +432,7 @@ class GroqClient:
|
|
| 432 |
self.max_retries = max_retries
|
| 433 |
self.usage = LLMUsage()
|
| 434 |
self.provider = PROVIDER_GROQ
|
|
|
|
| 435 |
self._http = httpx.AsyncClient(
|
| 436 |
base_url="https://api.groq.com/openai/v1",
|
| 437 |
headers={
|
|
@@ -510,6 +511,7 @@ class GroqClient:
|
|
| 510 |
|
| 511 |
if self._is_quota_exhausted():
|
| 512 |
logger.debug("Groq quota circuit breaker active β skipping complete()")
|
|
|
|
| 513 |
return ""
|
| 514 |
|
| 515 |
for attempt in range(self.max_retries):
|
|
@@ -526,6 +528,7 @@ class GroqClient:
|
|
| 526 |
usage.get("completion_tokens", 0),
|
| 527 |
)
|
| 528 |
|
|
|
|
| 529 |
return data["choices"][0]["message"]["content"]
|
| 530 |
|
| 531 |
except httpx.HTTPStatusError as e:
|
|
@@ -536,19 +539,22 @@ class GroqClient:
|
|
| 536 |
)
|
| 537 |
logger.warning(f"Groq 429: {body[:120]}")
|
| 538 |
if sleep_for == 0:
|
|
|
|
| 539 |
return "" # quota exhausted β skip immediately
|
| 540 |
await asyncio.sleep(sleep_for)
|
| 541 |
elif e.response.status_code == 401:
|
| 542 |
raise ValueError("Invalid GROQ_API_KEY")
|
| 543 |
else:
|
|
|
|
| 544 |
logger.error(f"Groq API error: {e.response.status_code} {e.response.text[:200]}")
|
| 545 |
if attempt == self.max_retries - 1:
|
| 546 |
-
|
| 547 |
await asyncio.sleep(1)
|
| 548 |
except Exception as e:
|
|
|
|
| 549 |
logger.error(f"Groq error: {e}")
|
| 550 |
if attempt == self.max_retries - 1:
|
| 551 |
-
|
| 552 |
await asyncio.sleep(1)
|
| 553 |
return ""
|
| 554 |
|
|
@@ -675,6 +681,7 @@ class GeminiClient:
|
|
| 675 |
self.max_retries = max_retries
|
| 676 |
self.usage = LLMUsage()
|
| 677 |
self.provider = PROVIDER_GEMINI
|
|
|
|
| 678 |
self._http = httpx.AsyncClient(
|
| 679 |
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
| 680 |
headers={
|
|
@@ -790,6 +797,7 @@ class GeminiClient:
|
|
| 790 |
) -> str:
|
| 791 |
"""Send a chat completion request to Gemini."""
|
| 792 |
if self._is_quota_exhausted():
|
|
|
|
| 793 |
logger.debug("Gemini quota circuit breaker active β skipping complete()")
|
| 794 |
return ""
|
| 795 |
|
|
@@ -813,6 +821,7 @@ class GeminiClient:
|
|
| 813 |
usage = data.get("usage", {})
|
| 814 |
self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
|
| 815 |
self._track_daily_request()
|
|
|
|
| 816 |
return data["choices"][0]["message"]["content"]
|
| 817 |
except httpx.HTTPStatusError as e:
|
| 818 |
status = e.response.status_code
|
|
@@ -829,12 +838,14 @@ class GeminiClient:
|
|
| 829 |
if "quota" in body_raw.lower() or wait > 30:
|
| 830 |
circuit_wait = self._secs_until_pacific_midnight()
|
| 831 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
|
|
|
| 832 |
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
|
| 833 |
return ""
|
| 834 |
logger.warning(f"Gemini 429: {body} β waiting {wait}s")
|
| 835 |
await asyncio.sleep(wait)
|
| 836 |
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 837 |
# Model not available on this endpoint (any status code) β try fallback
|
|
|
|
| 838 |
fallback = self._handle_model_not_found(model)
|
| 839 |
if fallback:
|
| 840 |
model = fallback
|
|
@@ -843,11 +854,13 @@ class GeminiClient:
|
|
| 843 |
logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
|
| 844 |
return ""
|
| 845 |
else:
|
|
|
|
| 846 |
logger.error(f"Gemini HTTP error: {status} {body}")
|
| 847 |
if attempt == self.max_retries - 1:
|
| 848 |
return ""
|
| 849 |
await asyncio.sleep(1)
|
| 850 |
except Exception as e:
|
|
|
|
| 851 |
logger.error(f"Gemini error: {e}")
|
| 852 |
if attempt == self.max_retries - 1:
|
| 853 |
return ""
|
|
@@ -864,6 +877,7 @@ class GeminiClient:
|
|
| 864 |
) -> dict:
|
| 865 |
"""Send a JSON-mode request to Gemini."""
|
| 866 |
if self._is_quota_exhausted():
|
|
|
|
| 867 |
logger.debug("Gemini quota circuit breaker active β skipping complete_json()")
|
| 868 |
return {}
|
| 869 |
|
|
|
|
| 432 |
self.max_retries = max_retries
|
| 433 |
self.usage = LLMUsage()
|
| 434 |
self.provider = PROVIDER_GROQ
|
| 435 |
+
self._last_error: str = ""
|
| 436 |
self._http = httpx.AsyncClient(
|
| 437 |
base_url="https://api.groq.com/openai/v1",
|
| 438 |
headers={
|
|
|
|
| 511 |
|
| 512 |
if self._is_quota_exhausted():
|
| 513 |
logger.debug("Groq quota circuit breaker active β skipping complete()")
|
| 514 |
+
self._last_error = f"quota exhausted (resets in {(self._rate_limited_until - time.monotonic())/3600:.1f}h)"
|
| 515 |
return ""
|
| 516 |
|
| 517 |
for attempt in range(self.max_retries):
|
|
|
|
| 528 |
usage.get("completion_tokens", 0),
|
| 529 |
)
|
| 530 |
|
| 531 |
+
self._last_error = ""
|
| 532 |
return data["choices"][0]["message"]["content"]
|
| 533 |
|
| 534 |
except httpx.HTTPStatusError as e:
|
|
|
|
| 539 |
)
|
| 540 |
logger.warning(f"Groq 429: {body[:120]}")
|
| 541 |
if sleep_for == 0:
|
| 542 |
+
self._last_error = f"429 quota exhausted: {body[:120]}"
|
| 543 |
return "" # quota exhausted β skip immediately
|
| 544 |
await asyncio.sleep(sleep_for)
|
| 545 |
elif e.response.status_code == 401:
|
| 546 |
raise ValueError("Invalid GROQ_API_KEY")
|
| 547 |
else:
|
| 548 |
+
self._last_error = f"HTTP {e.response.status_code}: {e.response.text[:120]}"
|
| 549 |
logger.error(f"Groq API error: {e.response.status_code} {e.response.text[:200]}")
|
| 550 |
if attempt == self.max_retries - 1:
|
| 551 |
+
return ""
|
| 552 |
await asyncio.sleep(1)
|
| 553 |
except Exception as e:
|
| 554 |
+
self._last_error = str(e)[:120]
|
| 555 |
logger.error(f"Groq error: {e}")
|
| 556 |
if attempt == self.max_retries - 1:
|
| 557 |
+
return ""
|
| 558 |
await asyncio.sleep(1)
|
| 559 |
return ""
|
| 560 |
|
|
|
|
| 681 |
self.max_retries = max_retries
|
| 682 |
self.usage = LLMUsage()
|
| 683 |
self.provider = PROVIDER_GEMINI
|
| 684 |
+
self._last_error: str = ""
|
| 685 |
self._http = httpx.AsyncClient(
|
| 686 |
base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
|
| 687 |
headers={
|
|
|
|
| 797 |
) -> str:
|
| 798 |
"""Send a chat completion request to Gemini."""
|
| 799 |
if self._is_quota_exhausted():
|
| 800 |
+
self._last_error = f"quota exhausted (resets in {self._secs_until_pacific_midnight()/3600:.1f}h)"
|
| 801 |
logger.debug("Gemini quota circuit breaker active β skipping complete()")
|
| 802 |
return ""
|
| 803 |
|
|
|
|
| 821 |
usage = data.get("usage", {})
|
| 822 |
self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
|
| 823 |
self._track_daily_request()
|
| 824 |
+
self._last_error = ""
|
| 825 |
return data["choices"][0]["message"]["content"]
|
| 826 |
except httpx.HTTPStatusError as e:
|
| 827 |
status = e.response.status_code
|
|
|
|
| 838 |
if "quota" in body_raw.lower() or wait > 30:
|
| 839 |
circuit_wait = self._secs_until_pacific_midnight()
|
| 840 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 841 |
+
self._last_error = f"daily quota exhausted β resets in {circuit_wait/3600:.1f}h"
|
| 842 |
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
|
| 843 |
return ""
|
| 844 |
logger.warning(f"Gemini 429: {body} β waiting {wait}s")
|
| 845 |
await asyncio.sleep(wait)
|
| 846 |
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 847 |
# Model not available on this endpoint (any status code) β try fallback
|
| 848 |
+
self._last_error = f"model unavailable ({status}): {body[:100]}"
|
| 849 |
fallback = self._handle_model_not_found(model)
|
| 850 |
if fallback:
|
| 851 |
model = fallback
|
|
|
|
| 854 |
logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
|
| 855 |
return ""
|
| 856 |
else:
|
| 857 |
+
self._last_error = f"HTTP {status}: {body[:120]}"
|
| 858 |
logger.error(f"Gemini HTTP error: {status} {body}")
|
| 859 |
if attempt == self.max_retries - 1:
|
| 860 |
return ""
|
| 861 |
await asyncio.sleep(1)
|
| 862 |
except Exception as e:
|
| 863 |
+
self._last_error = str(e)[:120]
|
| 864 |
logger.error(f"Gemini error: {e}")
|
| 865 |
if attempt == self.max_retries - 1:
|
| 866 |
return ""
|
|
|
|
| 877 |
) -> dict:
|
| 878 |
"""Send a JSON-mode request to Gemini."""
|
| 879 |
if self._is_quota_exhausted():
|
| 880 |
+
self._last_error = f"quota exhausted (resets in {self._secs_until_pacific_midnight()/3600:.1f}h)"
|
| 881 |
logger.debug("Gemini quota circuit breaker active β skipping complete_json()")
|
| 882 |
return {}
|
| 883 |
|