Show actual HTTP error in LLM test toast instead of 'empty response'
Browse filesTrack _last_error on every HF failure path (non-auth errors).
Test endpoint returns error field combining auth_error and last_error.
Toast now shows model name + real error e.g. 'HTTP 404: Model not found'
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/api/routes.py +4 -2
- src/soci/engine/llm.py +3 -0
- web/index.html +2 -2
src/soci/api/routes.py
CHANGED
|
@@ -299,9 +299,11 @@ async def test_llm():
|
|
| 299 |
user_message='Reply with exactly: {"ok": true}',
|
| 300 |
max_tokens=32,
|
| 301 |
)
|
| 302 |
-
|
|
|
|
|
|
|
| 303 |
"model": getattr(sim.llm, "default_model", "?"),
|
| 304 |
-
"
|
| 305 |
except Exception as e:
|
| 306 |
return {"ok": False, "raw": "", "error": str(e)}
|
| 307 |
|
|
|
|
| 299 |
user_message='Reply with exactly: {"ok": true}',
|
| 300 |
max_tokens=32,
|
| 301 |
)
|
| 302 |
+
error_detail = getattr(sim.llm, "_auth_error", "") or getattr(sim.llm, "_last_error", "")
|
| 303 |
+
return {"ok": bool(raw), "raw": raw,
|
| 304 |
+
"provider": getattr(sim.llm, "provider", "?"),
|
| 305 |
"model": getattr(sim.llm, "default_model", "?"),
|
| 306 |
+
"error": error_detail}
|
| 307 |
except Exception as e:
|
| 308 |
return {"ok": False, "raw": "", "error": str(e)}
|
| 309 |
|
src/soci/engine/llm.py
CHANGED
|
@@ -845,6 +845,7 @@ class HFInferenceClient:
|
|
| 845 |
)
|
| 846 |
self._rate_limited_until: float = 0.0
|
| 847 |
self._auth_error: str = ""
|
|
|
|
| 848 |
|
| 849 |
def _is_quota_exhausted(self) -> bool:
|
| 850 |
return time.monotonic() < self._rate_limited_until
|
|
@@ -935,11 +936,13 @@ class HFInferenceClient:
|
|
| 935 |
logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
|
| 936 |
await asyncio.sleep(wait)
|
| 937 |
else:
|
|
|
|
| 938 |
logger.error(f"HF HTTP error: {status} {body}")
|
| 939 |
if attempt == self.max_retries - 1:
|
| 940 |
return ""
|
| 941 |
await asyncio.sleep(2)
|
| 942 |
except Exception as e:
|
|
|
|
| 943 |
logger.error(f"HF error: {e}")
|
| 944 |
if attempt == self.max_retries - 1:
|
| 945 |
return ""
|
|
|
|
| 845 |
)
|
| 846 |
self._rate_limited_until: float = 0.0
|
| 847 |
self._auth_error: str = ""
|
| 848 |
+
self._last_error: str = "" # last non-auth error for diagnostics
|
| 849 |
|
| 850 |
def _is_quota_exhausted(self) -> bool:
|
| 851 |
return time.monotonic() < self._rate_limited_until
|
|
|
|
| 936 |
logger.warning(f"HF model loading ({status}), waiting {wait:.0f}s")
|
| 937 |
await asyncio.sleep(wait)
|
| 938 |
else:
|
| 939 |
+
self._last_error = f"HTTP {status}: {body}"
|
| 940 |
logger.error(f"HF HTTP error: {status} {body}")
|
| 941 |
if attempt == self.max_retries - 1:
|
| 942 |
return ""
|
| 943 |
await asyncio.sleep(2)
|
| 944 |
except Exception as e:
|
| 945 |
+
self._last_error = str(e)
|
| 946 |
logger.error(f"HF error: {e}")
|
| 947 |
if attempt == self.max_retries - 1:
|
| 948 |
return ""
|
web/index.html
CHANGED
|
@@ -3413,8 +3413,8 @@ document.getElementById('llm-model').addEventListener('click', async (e) => {
|
|
| 3413 |
if (d.ok) {
|
| 3414 |
showToast(`✔ LLM OK — "${d.raw.slice(0,60)}"`, 'conv');
|
| 3415 |
} else {
|
| 3416 |
-
const msg =
|
| 3417 |
-
showToast(`✘
|
| 3418 |
}
|
| 3419 |
} catch (err) { showToast('Test request failed', 'event'); }
|
| 3420 |
});
|
|
|
|
| 3413 |
if (d.ok) {
|
| 3414 |
showToast(`✔ LLM OK — "${d.raw.slice(0,60)}"`, 'conv');
|
| 3415 |
} else {
|
| 3416 |
+
const msg = (d.error || d.raw || 'empty response — model may not be on HF serverless API').slice(0, 120);
|
| 3417 |
+
showToast(`✘ ${d.model}: ${msg}`, 'event');
|
| 3418 |
}
|
| 3419 |
} catch (err) { showToast('Test request failed', 'event'); }
|
| 3420 |
});
|