Spaces:

msradam
/

riprap

Sleeping

App Files Files Community

msradam commited on 6 days ago

Commit

52a5649

verified ·

1 Parent(s): 80deb38

debug: add /api/debug/vllm-direct endpoint

Browse files

Files changed (1) hide show

web/main.py +58 -0

web/main.py CHANGED Viewed

@@ -356,6 +356,64 @@ def api_debug_eo():
     return JSONResponse(out)
 @app.get("/api/backend")
 async def api_backend():
     """Live LLM-backend descriptor for the UI's hardware badge.

     return JSONResponse(out)
+@app.get("/api/debug/vllm-direct")
+def api_debug_vllm_direct():
+    """Direct diagnostic: calls vLLM with a reconciler-style request,
+    bypassing LiteLLM Router, to surface the raw HTTP status and error."""
+    import traceback
+    import httpx
+    vllm_base = os.environ.get("RIPRAP_LLM_BASE_URL", "").rstrip("/")
+    vllm_key = os.environ.get("RIPRAP_LLM_API_KEY", "") or "EMPTY"
+    if not vllm_base:
+        return JSONResponse({"error": "RIPRAP_LLM_BASE_URL not set"}, status_code=400)
+    # Minimal reconciler-style payload: 2 documents + system + user.
+    payload = {
+        "model": os.environ.get("RIPRAP_LLM_VLLM_8B_NAME", "granite4.1:3b"),
+        "messages": [
+            {"role": "system", "content": "You are a flood risk analyst."},
+            {"role": "user", "content": "Write the cited paragraph now."},
+        ],
+        "max_tokens": 64,
+        "temperature": 0,
+        "stream": False,
+        "chat_template_kwargs": {
+            "documents": [
+                {"doc_id": "noaa_tides", "text": "Current tide at Battery Park: 4.14 ft MLLW."},
+                {"doc_id": "microtopo", "text": "Elevation 1.37 m, 80th pct 200m radius."},
+            ]
+        },
+        "documents": [
+            {"doc_id": "noaa_tides", "text": "Current tide at Battery Park: 4.14 ft MLLW."},
+            {"doc_id": "microtopo", "text": "Elevation 1.37 m, 80th pct 200m radius."},
+        ],
+    }
+    try:
+        with httpx.Client(timeout=30.0) as c:
+            r = c.post(
+                f"{vllm_base}/chat/completions",
+                headers={"Authorization": f"Bearer {vllm_key}",
+                         "Content-Type": "application/json"},
+                json=payload,
+            )
+        try:
+            body = r.json()
+        except Exception:
+            body = r.text[:500]
+        return JSONResponse({
+            "status": r.status_code,
+            "body": body,
+            "model_used": payload["model"],
+            "vllm_base": vllm_base,
+        })
+    except Exception as e:
+        return JSONResponse({
+            "error": str(e),
+            "tb": traceback.format_exc().splitlines()[-5:],
+        }, status_code=500)
 @app.get("/api/backend")
 async def api_backend():
     """Live LLM-backend descriptor for the UI's hardware badge.