ar9avg commited on
Commit
63cbec3
Β·
1 Parent(s): e9bea1b

Add LLM diagnostics: /api/test-llm endpoint + startup/error logging

Browse files

- GET /api/test-llm: shows token status, model, api_base, and tests a real call
- Startup prints LLM config (token set/not-set, model, base_url) to logs
- LLM exceptions in execute-query now print to stdout (visible in container logs)

Files changed (2) hide show
  1. backend/api/demo.py +48 -0
  2. backend/main.py +10 -0
backend/api/demo.py CHANGED
@@ -14,9 +14,13 @@ from __future__ import annotations
14
 
15
  import asyncio
16
  import json
 
 
17
  import time
18
  from typing import AsyncIterator, Optional
19
 
 
 
20
  from fastapi import APIRouter
21
  from pydantic import BaseModel
22
  from sse_starlette.sse import EventSourceResponse
@@ -49,6 +53,48 @@ from gepa.optimizer import get_gepa, QueryResult
49
  router = APIRouter()
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # ─── /api/init ────────────────────────────────────────────────────
53
 
54
  @router.get("/init")
@@ -172,6 +218,8 @@ async def execute_query_stream(req: ExecuteQueryRequest):
172
  except Exception as e:
173
  # Format LLM exception concisely (avoid dumping full HTML 401 pages)
174
  err_str = str(e)
 
 
175
  if len(err_str) > 300 or '<html' in err_str.lower():
176
  err_str = f"LLM API error: {type(e).__name__} (check HF_TOKEN / model availability)"
177
  yield {"data": json.dumps({"type": "error", "message": err_str, "error_class": "other"})}
 
14
 
15
  import asyncio
16
  import json
17
+ import logging
18
+ import os
19
  import time
20
  from typing import AsyncIterator, Optional
21
 
22
+ logger = logging.getLogger(__name__)
23
+
24
  from fastapi import APIRouter
25
  from pydantic import BaseModel
26
  from sse_starlette.sse import EventSourceResponse
 
53
  router = APIRouter()
54
 
55
 
56
+ # ─── /api/test-llm ───────────────────────────────────────────────
57
+
58
+ @router.get("/test-llm")
59
+ async def test_llm():
60
+ """Diagnostic: test LLM connectivity and return result."""
61
+ from env.sql_env import _make_client, _MODEL
62
+ token = os.environ.get("HF_TOKEN", "")
63
+ api_base = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
64
+ token_preview = f"{token[:8]}..." if len(token) > 8 else ("(empty)" if not token else token)
65
+
66
+ try:
67
+ client = _make_client()
68
+ resp = await client.chat.completions.create(
69
+ model=_MODEL,
70
+ messages=[{"role": "user", "content": "Reply with just: OK"}],
71
+ temperature=0,
72
+ max_tokens=5,
73
+ )
74
+ result = resp.choices[0].message.content
75
+ return {
76
+ "ok": True,
77
+ "model": _MODEL,
78
+ "api_base": api_base,
79
+ "token_set": bool(token),
80
+ "token_preview": token_preview,
81
+ "response": result,
82
+ }
83
+ except Exception as e:
84
+ err = str(e)
85
+ if len(err) > 400 or '<html' in err.lower():
86
+ err = f"{type(e).__name__}: (response body too long, likely HTML error page)"
87
+ logger.error("test-llm failed: %s", err)
88
+ return {
89
+ "ok": False,
90
+ "model": _MODEL,
91
+ "api_base": api_base,
92
+ "token_set": bool(token),
93
+ "token_preview": token_preview,
94
+ "error": err,
95
+ }
96
+
97
+
98
  # ─── /api/init ────────────────────────────────────────────────────
99
 
100
  @router.get("/init")
 
218
  except Exception as e:
219
  # Format LLM exception concisely (avoid dumping full HTML 401 pages)
220
  err_str = str(e)
221
+ logger.error("LLM call failed attempt=%d: %s: %s", attempt, type(e).__name__, err_str[:200])
222
+ print(f"[execute-query] LLM error attempt={attempt}: {type(e).__name__}: {err_str[:200]}", flush=True)
223
  if len(err_str) > 300 or '<html' in err_str.lower():
224
  err_str = f"LLM API error: {type(e).__name__} (check HF_TOKEN / model availability)"
225
  yield {"data": json.dumps({"type": "error", "message": err_str, "error_class": "other"})}
backend/main.py CHANGED
@@ -13,9 +13,12 @@ Environment variables:
13
 
14
  from __future__ import annotations
15
 
 
16
  import os
17
  from pathlib import Path
18
 
 
 
19
  from fastapi import FastAPI
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.staticfiles import StaticFiles
@@ -83,6 +86,13 @@ async def startup_event():
83
  except Exception as e:
84
  print(f"Warning: database seed failed: {e}")
85
 
 
 
 
 
 
 
 
86
 
87
  # ─── Static files (frontend) β€” mount last ─────────────────────────
88
 
 
13
 
14
  from __future__ import annotations
15
 
16
+ import logging
17
  import os
18
  from pathlib import Path
19
 
20
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
21
+
22
  from fastapi import FastAPI
23
  from fastapi.middleware.cors import CORSMiddleware
24
  from fastapi.staticfiles import StaticFiles
 
86
  except Exception as e:
87
  print(f"Warning: database seed failed: {e}")
88
 
89
+ # Log LLM config so it's visible in container logs
90
+ token = os.environ.get("HF_TOKEN", "")
91
+ api_base = os.environ.get("API_BASE_URL", "https://router.huggingface.co/v1")
92
+ model = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
93
+ token_status = f"set ({len(token)} chars)" if token else "NOT SET"
94
+ print(f"[startup] LLM config: base_url={api_base} model={model} HF_TOKEN={token_status}", flush=True)
95
+
96
 
97
  # ─── Static files (frontend) β€” mount last ─────────────────────────
98