RayMelius Claude Sonnet 4.6 commited on
Commit
02ea96a
·
1 Parent(s): 6f4ff21

Show actual HTTP error in LLM test toast instead of 'empty response'

Browse files

Track _last_error on every HF failure path (non-auth errors).
Test endpoint returns error field combining auth_error and last_error.
Toast now shows model name + real error e.g. 'HTTP 404: Model not found'

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

src/soci/api/routes.py CHANGED
@@ -299,9 +299,11 @@ async def test_llm():
299
  user_message='Reply with exactly: {"ok": true}',
300
  max_tokens=32,
301
  )
302
- return {"ok": bool(raw), "raw": raw, "provider": getattr(sim.llm, "provider", "?"),
 
 
303
  "model": getattr(sim.llm, "default_model", "?"),
304
- "auth_error": getattr(sim.llm, "_auth_error", "")}
305
  except Exception as e:
306
  return {"ok": False, "raw": "", "error": str(e)}
307
 
 
299
  user_message='Reply with exactly: {"ok": true}',
300
  max_tokens=32,
301
  )
302
+ error_detail = getattr(sim.llm, "_auth_error", "") or getattr(sim.llm, "_last_error", "")
303
+ return {"ok": bool(raw), "raw": raw,
304
+ "provider": getattr(sim.llm, "provider", "?"),
305
  "model": getattr(sim.llm, "default_model", "?"),
306
+ "error": error_detail}
307
  except Exception as e:
308
  return {"ok": False, "raw": "", "error": str(e)}
309
 
src/soci/engine/llm.py CHANGED
@@ -845,6 +845,7 @@ class HFInferenceClient:
845
  )
846
  self._rate_limited_until: float = 0.0
847
  self._auth_error: str = ""
 
848
 
849
  def _is_quota_exhausted(self) -> bool:
850
  return time.monotonic() < self._rate_limited_until
@@ -935,11 +936,13 @@ class HFInferenceClient:
935
  logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
936
  await asyncio.sleep(wait)
937
  else:
 
938
  logger.error(f"HF HTTP error: {status} {body}")
939
  if attempt == self.max_retries - 1:
940
  return ""
941
  await asyncio.sleep(2)
942
  except Exception as e:
 
943
  logger.error(f"HF error: {e}")
944
  if attempt == self.max_retries - 1:
945
  return ""
 
845
  )
846
  self._rate_limited_until: float = 0.0
847
  self._auth_error: str = ""
848
+ self._last_error: str = "" # last non-auth error for diagnostics
849
 
850
  def _is_quota_exhausted(self) -> bool:
851
  return time.monotonic() < self._rate_limited_until
 
936
  logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
937
  await asyncio.sleep(wait)
938
  else:
939
+ self._last_error = f"HTTP {status}: {body}"
940
  logger.error(f"HF HTTP error: {status} {body}")
941
  if attempt == self.max_retries - 1:
942
  return ""
943
  await asyncio.sleep(2)
944
  except Exception as e:
945
+ self._last_error = str(e)
946
  logger.error(f"HF error: {e}")
947
  if attempt == self.max_retries - 1:
948
  return ""
web/index.html CHANGED
@@ -3413,8 +3413,8 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
3413
  if (d.ok) {
3414
  showToast(`✔ LLM OK — "${d.raw.slice(0,60)}"`, 'conv');
3415
  } else {
3416
- const msg = d.auth_error ? `Auth error: ${d.auth_error.slice(0,80)}` : (d.error || d.raw || 'empty response');
3417
- showToast(`✘ LLM failed: ${msg}`, 'event');
3418
  }
3419
  } catch (err) { showToast('Test request failed', 'event'); }
3420
  });
 
3413
  if (d.ok) {
3414
  showToast(`✔ LLM OK — "${d.raw.slice(0,60)}"`, 'conv');
3415
  } else {
3416
+ const msg = (d.error || d.raw || 'empty response — model may not be on HF serverless API').slice(0, 120);
3417
+ showToast(`✘ ${d.model}: ${msg}`, 'event');
3418
  }
3419
  } catch (err) { showToast('Test request failed', 'event'); }
3420
  });