RayMelius Claude Sonnet 4.6 commited on
Commit
45734ba
Β·
1 Parent(s): 24aef56

Add _last_error tracking to GeminiClient and GroqClient for diagnosis

Browse files

Both clients previously returned empty string silently on all error paths,
making the /llm/test endpoint show error:"" even when calls were failing.
Now every silent-return path sets _last_error so the test endpoint can
surface the real reason.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/soci/engine/llm.py +16 -2
src/soci/engine/llm.py CHANGED
@@ -432,6 +432,7 @@ class GroqClient:
432
  self.max_retries = max_retries
433
  self.usage = LLMUsage()
434
  self.provider = PROVIDER_GROQ
 
435
  self._http = httpx.AsyncClient(
436
  base_url="https://api.groq.com/openai/v1",
437
  headers={
@@ -510,6 +511,7 @@ class GroqClient:
510
 
511
  if self._is_quota_exhausted():
512
  logger.debug("Groq quota circuit breaker active β€” skipping complete()")
 
513
  return ""
514
 
515
  for attempt in range(self.max_retries):
@@ -526,6 +528,7 @@ class GroqClient:
526
  usage.get("completion_tokens", 0),
527
  )
528
 
 
529
  return data["choices"][0]["message"]["content"]
530
 
531
  except httpx.HTTPStatusError as e:
@@ -536,19 +539,22 @@ class GroqClient:
536
  )
537
  logger.warning(f"Groq 429: {body[:120]}")
538
  if sleep_for == 0:
 
539
  return "" # quota exhausted β€” skip immediately
540
  await asyncio.sleep(sleep_for)
541
  elif e.response.status_code == 401:
542
  raise ValueError("Invalid GROQ_API_KEY")
543
  else:
 
544
  logger.error(f"Groq API error: {e.response.status_code} {e.response.text[:200]}")
545
  if attempt == self.max_retries - 1:
546
- raise
547
  await asyncio.sleep(1)
548
  except Exception as e:
 
549
  logger.error(f"Groq error: {e}")
550
  if attempt == self.max_retries - 1:
551
- raise
552
  await asyncio.sleep(1)
553
  return ""
554
 
@@ -675,6 +681,7 @@ class GeminiClient:
675
  self.max_retries = max_retries
676
  self.usage = LLMUsage()
677
  self.provider = PROVIDER_GEMINI
 
678
  self._http = httpx.AsyncClient(
679
  base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
680
  headers={
@@ -790,6 +797,7 @@ class GeminiClient:
790
  ) -> str:
791
  """Send a chat completion request to Gemini."""
792
  if self._is_quota_exhausted():
 
793
  logger.debug("Gemini quota circuit breaker active β€” skipping complete()")
794
  return ""
795
 
@@ -813,6 +821,7 @@ class GeminiClient:
813
  usage = data.get("usage", {})
814
  self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
815
  self._track_daily_request()
 
816
  return data["choices"][0]["message"]["content"]
817
  except httpx.HTTPStatusError as e:
818
  status = e.response.status_code
@@ -829,12 +838,14 @@ class GeminiClient:
829
  if "quota" in body_raw.lower() or wait > 30:
830
  circuit_wait = self._secs_until_pacific_midnight()
831
  self._rate_limited_until = time.monotonic() + circuit_wait
 
832
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
833
  return ""
834
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
835
  await asyncio.sleep(wait)
836
  elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
837
  # Model not available on this endpoint (any status code) β€” try fallback
 
838
  fallback = self._handle_model_not_found(model)
839
  if fallback:
840
  model = fallback
@@ -843,11 +854,13 @@ class GeminiClient:
843
  logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
844
  return ""
845
  else:
 
846
  logger.error(f"Gemini HTTP error: {status} {body}")
847
  if attempt == self.max_retries - 1:
848
  return ""
849
  await asyncio.sleep(1)
850
  except Exception as e:
 
851
  logger.error(f"Gemini error: {e}")
852
  if attempt == self.max_retries - 1:
853
  return ""
@@ -864,6 +877,7 @@ class GeminiClient:
864
  ) -> dict:
865
  """Send a JSON-mode request to Gemini."""
866
  if self._is_quota_exhausted():
 
867
  logger.debug("Gemini quota circuit breaker active β€” skipping complete_json()")
868
  return {}
869
 
 
432
  self.max_retries = max_retries
433
  self.usage = LLMUsage()
434
  self.provider = PROVIDER_GROQ
435
+ self._last_error: str = ""
436
  self._http = httpx.AsyncClient(
437
  base_url="https://api.groq.com/openai/v1",
438
  headers={
 
511
 
512
  if self._is_quota_exhausted():
513
  logger.debug("Groq quota circuit breaker active β€” skipping complete()")
514
+ self._last_error = f"quota exhausted (resets in {(self._rate_limited_until - time.monotonic())/3600:.1f}h)"
515
  return ""
516
 
517
  for attempt in range(self.max_retries):
 
528
  usage.get("completion_tokens", 0),
529
  )
530
 
531
+ self._last_error = ""
532
  return data["choices"][0]["message"]["content"]
533
 
534
  except httpx.HTTPStatusError as e:
 
539
  )
540
  logger.warning(f"Groq 429: {body[:120]}")
541
  if sleep_for == 0:
542
+ self._last_error = f"429 quota exhausted: {body[:120]}"
543
  return "" # quota exhausted β€” skip immediately
544
  await asyncio.sleep(sleep_for)
545
  elif e.response.status_code == 401:
546
  raise ValueError("Invalid GROQ_API_KEY")
547
  else:
548
+ self._last_error = f"HTTP {e.response.status_code}: {e.response.text[:120]}"
549
  logger.error(f"Groq API error: {e.response.status_code} {e.response.text[:200]}")
550
  if attempt == self.max_retries - 1:
551
+ return ""
552
  await asyncio.sleep(1)
553
  except Exception as e:
554
+ self._last_error = str(e)[:120]
555
  logger.error(f"Groq error: {e}")
556
  if attempt == self.max_retries - 1:
557
+ return ""
558
  await asyncio.sleep(1)
559
  return ""
560
 
 
681
  self.max_retries = max_retries
682
  self.usage = LLMUsage()
683
  self.provider = PROVIDER_GEMINI
684
+ self._last_error: str = ""
685
  self._http = httpx.AsyncClient(
686
  base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
687
  headers={
 
797
  ) -> str:
798
  """Send a chat completion request to Gemini."""
799
  if self._is_quota_exhausted():
800
+ self._last_error = f"quota exhausted (resets in {self._secs_until_pacific_midnight()/3600:.1f}h)"
801
  logger.debug("Gemini quota circuit breaker active β€” skipping complete()")
802
  return ""
803
 
 
821
  usage = data.get("usage", {})
822
  self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
823
  self._track_daily_request()
824
+ self._last_error = ""
825
  return data["choices"][0]["message"]["content"]
826
  except httpx.HTTPStatusError as e:
827
  status = e.response.status_code
 
838
  if "quota" in body_raw.lower() or wait > 30:
839
  circuit_wait = self._secs_until_pacific_midnight()
840
  self._rate_limited_until = time.monotonic() + circuit_wait
841
+ self._last_error = f"daily quota exhausted β€” resets in {circuit_wait/3600:.1f}h"
842
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
843
  return ""
844
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
845
  await asyncio.sleep(wait)
846
  elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
847
  # Model not available on this endpoint (any status code) β€” try fallback
848
+ self._last_error = f"model unavailable ({status}): {body[:100]}"
849
  fallback = self._handle_model_not_found(model)
850
  if fallback:
851
  model = fallback
 
854
  logger.error(f"Gemini model '{model}' not found and no fallback: {body}")
855
  return ""
856
  else:
857
+ self._last_error = f"HTTP {status}: {body[:120]}"
858
  logger.error(f"Gemini HTTP error: {status} {body}")
859
  if attempt == self.max_retries - 1:
860
  return ""
861
  await asyncio.sleep(1)
862
  except Exception as e:
863
+ self._last_error = str(e)[:120]
864
  logger.error(f"Gemini error: {e}")
865
  if attempt == self.max_retries - 1:
866
  return ""
 
877
  ) -> dict:
878
  """Send a JSON-mode request to Gemini."""
879
  if self._is_quota_exhausted():
880
+ self._last_error = f"quota exhausted (resets in {self._secs_until_pacific_midnight()/3600:.1f}h)"
881
  logger.debug("Gemini quota circuit breaker active β€” skipping complete_json()")
882
  return {}
883