Spaces:
Sleeping
Sleeping
Add LLM diagnostics: /api/test-llm endpoint + startup/error logging
Browse files- GET /api/test-llm: shows token status, model, api_base, and tests a real call
- Startup prints LLM config (token set/not-set, model, base_url) to logs
- LLM exceptions in execute-query now print to stdout (visible in container logs)
- backend/api/demo.py +48 -0
- backend/main.py +10 -0
backend/api/demo.py
CHANGED
|
@@ -14,9 +14,13 @@ from __future__ import annotations
|
|
| 14 |
|
| 15 |
import asyncio
|
| 16 |
import json
|
|
|
|
|
|
|
| 17 |
import time
|
| 18 |
from typing import AsyncIterator, Optional
|
| 19 |
|
|
|
|
|
|
|
| 20 |
from fastapi import APIRouter
|
| 21 |
from pydantic import BaseModel
|
| 22 |
from sse_starlette.sse import EventSourceResponse
|
|
@@ -49,6 +53,48 @@ from gepa.optimizer import get_gepa, QueryResult
|
|
| 49 |
router = APIRouter()
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# βββ /api/init ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
|
| 54 |
@router.get("/init")
|
|
@@ -172,6 +218,8 @@ async def execute_query_stream(req: ExecuteQueryRequest):
|
|
| 172 |
except Exception as e:
|
| 173 |
# Format LLM exception concisely (avoid dumping full HTML 401 pages)
|
| 174 |
err_str = str(e)
|
|
|
|
|
|
|
| 175 |
if len(err_str) > 300 or '<html' in err_str.lower():
|
| 176 |
err_str = f"LLM API error: {type(e).__name__} (check HF_TOKEN / model availability)"
|
| 177 |
yield {"data": json.dumps({"type": "error", "message": err_str, "error_class": "other"})}
|
|
|
|
| 14 |
|
| 15 |
import asyncio
|
| 16 |
import json
|
| 17 |
+
import logging
|
| 18 |
+
import os
|
| 19 |
import time
|
| 20 |
from typing import AsyncIterator, Optional
|
| 21 |
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
from fastapi import APIRouter
|
| 25 |
from pydantic import BaseModel
|
| 26 |
from sse_starlette.sse import EventSourceResponse
|
|
|
|
| 53 |
router = APIRouter()
|
| 54 |
|
| 55 |
|
| 56 |
+
# βββ /api/test-llm βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 57 |
+
|
| 58 |
+
@router.get("/test-llm")
|
| 59 |
+
async def test_llm():
|
| 60 |
+
"""Diagnostic: test LLM connectivity and return result."""
|
| 61 |
+
from env.sql_env import _make_client, _MODEL
|
| 62 |
+
token = os.environ.get("HF_TOKEN", "")
|
| 63 |
+
api_base = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 64 |
+
token_preview = f"{token[:8]}..." if len(token) > 8 else ("(empty)" if not token else token)
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
client = _make_client()
|
| 68 |
+
resp = await client.chat.completions.create(
|
| 69 |
+
model=_MODEL,
|
| 70 |
+
messages=[{"role": "user", "content": "Reply with just: OK"}],
|
| 71 |
+
temperature=0,
|
| 72 |
+
max_tokens=5,
|
| 73 |
+
)
|
| 74 |
+
result = resp.choices[0].message.content
|
| 75 |
+
return {
|
| 76 |
+
"ok": True,
|
| 77 |
+
"model": _MODEL,
|
| 78 |
+
"api_base": api_base,
|
| 79 |
+
"token_set": bool(token),
|
| 80 |
+
"token_preview": token_preview,
|
| 81 |
+
"response": result,
|
| 82 |
+
}
|
| 83 |
+
except Exception as e:
|
| 84 |
+
err = str(e)
|
| 85 |
+
if len(err) > 400 or '<html' in err.lower():
|
| 86 |
+
err = f"{type(e).__name__}: (response body too long, likely HTML error page)"
|
| 87 |
+
logger.error("test-llm failed: %s", err)
|
| 88 |
+
return {
|
| 89 |
+
"ok": False,
|
| 90 |
+
"model": _MODEL,
|
| 91 |
+
"api_base": api_base,
|
| 92 |
+
"token_set": bool(token),
|
| 93 |
+
"token_preview": token_preview,
|
| 94 |
+
"error": err,
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
# βββ /api/init ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 99 |
|
| 100 |
@router.get("/init")
|
|
|
|
| 218 |
except Exception as e:
|
| 219 |
# Format LLM exception concisely (avoid dumping full HTML 401 pages)
|
| 220 |
err_str = str(e)
|
| 221 |
+
logger.error("LLM call failed attempt=%d: %s: %s", attempt, type(e).__name__, err_str[:200])
|
| 222 |
+
print(f"[execute-query] LLM error attempt={attempt}: {type(e).__name__}: {err_str[:200]}", flush=True)
|
| 223 |
if len(err_str) > 300 or '<html' in err_str.lower():
|
| 224 |
err_str = f"LLM API error: {type(e).__name__} (check HF_TOKEN / model availability)"
|
| 225 |
yield {"data": json.dumps({"type": "error", "message": err_str, "error_class": "other"})}
|
backend/main.py
CHANGED
|
@@ -13,9 +13,12 @@ Environment variables:
|
|
| 13 |
|
| 14 |
from __future__ import annotations
|
| 15 |
|
|
|
|
| 16 |
import os
|
| 17 |
from pathlib import Path
|
| 18 |
|
|
|
|
|
|
|
| 19 |
from fastapi import FastAPI
|
| 20 |
from fastapi.middleware.cors import CORSMiddleware
|
| 21 |
from fastapi.staticfiles import StaticFiles
|
|
@@ -83,6 +86,13 @@ async def startup_event():
|
|
| 83 |
except Exception as e:
|
| 84 |
print(f"Warning: database seed failed: {e}")
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
# βββ Static files (frontend) β mount last βββββββββββββββββββββββββ
|
| 88 |
|
|
|
|
| 13 |
|
| 14 |
from __future__ import annotations
|
| 15 |
|
| 16 |
+
import logging
|
| 17 |
import os
|
| 18 |
from pathlib import Path
|
| 19 |
|
| 20 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
|
| 21 |
+
|
| 22 |
from fastapi import FastAPI
|
| 23 |
from fastapi.middleware.cors import CORSMiddleware
|
| 24 |
from fastapi.staticfiles import StaticFiles
|
|
|
|
| 86 |
except Exception as e:
|
| 87 |
print(f"Warning: database seed failed: {e}")
|
| 88 |
|
| 89 |
+
# Log LLM config so it's visible in container logs
|
| 90 |
+
token = os.environ.get("HF_TOKEN", "")
|
| 91 |
+
api_base = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 92 |
+
model = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
|
| 93 |
+
token_status = f"set ({len(token)} chars)" if token else "NOT SET"
|
| 94 |
+
print(f"[startup] LLM config: base_url={api_base} model={model} HF_TOKEN={token_status}", flush=True)
|
| 95 |
+
|
| 96 |
|
| 97 |
# βββ Static files (frontend) β mount last βββββββββββββββββββββββββ
|
| 98 |
|