Spaces:
Sleeping
Sleeping
Rick commited on
Commit ·
2c7b558
1
Parent(s): b937983
Fix HF submit gating and add runtime debug log panel in Settings
Browse files- app.py +393 -11
- static/js/settings.js +95 -0
- templates/index.html +21 -1
app.py
CHANGED
|
@@ -50,6 +50,8 @@ import re
|
|
| 50 |
import mimetypes
|
| 51 |
import io
|
| 52 |
import logging
|
|
|
|
|
|
|
| 53 |
|
| 54 |
import medgemma4
|
| 55 |
import medgemma27b
|
|
@@ -232,6 +234,12 @@ HF_REMOTE_TOKEN = (
|
|
| 232 |
)
|
| 233 |
# Default remote text model; when MedGemma 4B/27B is selected we pass that through instead.
|
| 234 |
REMOTE_MODEL = os.environ.get("REMOTE_MODEL") or "google/medgemma-1.5-4b-it"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
_dbg(
|
| 237 |
"env flags: "
|
|
@@ -322,6 +330,74 @@ if LEGACY_CACHE.exists() and not (CACHE_DIR / ".migrated").exists() and CACHE_DI
|
|
| 322 |
|
| 323 |
BACKUP_ROOT = BASE_STORE / "backups"
|
| 324 |
BACKUP_ROOT.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
# Prefer keeping the DB alongside app.py; migrate from legacy data/ path if present
|
| 326 |
DB_PATH = APP_HOME / "app.db"
|
| 327 |
LEGACY_DB = APP_HOME / "data" / "app.db"
|
|
@@ -495,6 +571,15 @@ async def _log_db_path():
|
|
| 495 |
"""Print the fully-resolved database path at startup for operational visibility."""
|
| 496 |
try:
|
| 497 |
print(f"[startup] Database path: {DB_PATH.resolve()}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
except Exception as exc:
|
| 499 |
print(f"[startup] Database path unavailable: {exc}", flush=True)
|
| 500 |
|
|
@@ -2423,6 +2508,39 @@ def _local_model_availability_payload() -> dict:
|
|
| 2423 |
Local installs are always tracked; runnable models may include remote-mode
|
| 2424 |
execution when local inference is disabled and a remote token is configured.
|
| 2425 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2426 |
models = []
|
| 2427 |
available_models = []
|
| 2428 |
missing_models = []
|
|
@@ -2492,6 +2610,49 @@ async def chat_queue_status(request: Request, _=Depends(require_auth)):
|
|
| 2492 |
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 2493 |
|
| 2494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2495 |
@app.get("/api/triage/options")
|
| 2496 |
async def triage_options(_=Depends(require_auth)):
|
| 2497 |
"""Expose dropdown options for triage form (table-backed)."""
|
|
@@ -3283,7 +3444,7 @@ def _cap_new_tokens(max_new_tokens, input_len, model_max_len):
|
|
| 3283 |
return max_new_tokens
|
| 3284 |
|
| 3285 |
|
| 3286 |
-
def _generate_response_local(model_choice: str, force_cpu_slow: bool, prompt: str, cfg: dict):
|
| 3287 |
"""
|
| 3288 |
Generate Response Local helper.
|
| 3289 |
Detailed inline notes are included to support safe maintenance and future edits.
|
|
@@ -3305,6 +3466,15 @@ def _generate_response_local(model_choice: str, force_cpu_slow: bool, prompt: st
|
|
| 3305 |
raise RuntimeError(f"CUDA_NOT_AVAILABLE{detail}")
|
| 3306 |
if is_large_model and runtime_device != "cuda" and not force_cpu_slow:
|
| 3307 |
raise RuntimeError("SLOW_28B_CPU")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3308 |
try:
|
| 3309 |
if is_large_model:
|
| 3310 |
# Ensure only one model family occupies VRAM at a time.
|
|
@@ -3347,6 +3517,16 @@ def _generate_response_local(model_choice: str, force_cpu_slow: bool, prompt: st
|
|
| 3347 |
)
|
| 3348 |
)
|
| 3349 |
if not cuda_runtime_failure:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3350 |
raise
|
| 3351 |
|
| 3352 |
_dbg(f"generate_response: cuda runtime failure detected: {err_txt}")
|
|
@@ -3388,11 +3568,26 @@ def _generate_response_local(model_choice: str, force_cpu_slow: bool, prompt: st
|
|
| 3388 |
)
|
| 3389 |
models["active_name"] = model_name
|
| 3390 |
models["is_text"] = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3391 |
_dbg(f"generate_response: local response_len={len(res)}")
|
| 3392 |
return res
|
| 3393 |
|
| 3394 |
|
| 3395 |
-
def _generate_response(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3396 |
"""
|
| 3397 |
Generate Response helper.
|
| 3398 |
Detailed inline notes are included to support safe maintenance and future edits.
|
|
@@ -3407,38 +3602,80 @@ def _generate_response(model_choice: str, force_cpu_slow: bool, prompt: str, cfg
|
|
| 3407 |
if DISABLE_LOCAL_INFERENCE:
|
| 3408 |
if not HF_REMOTE_TOKEN:
|
| 3409 |
_dbg("generate_response: remote path selected but HF_REMOTE_TOKEN missing")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3410 |
raise RuntimeError("REMOTE_TOKEN_MISSING")
|
| 3411 |
-
client = InferenceClient(token=HF_REMOTE_TOKEN)
|
| 3412 |
# Use requested model when provided (e.g., MedGemma) else default
|
| 3413 |
model_name = model_choice or REMOTE_MODEL
|
| 3414 |
_dbg(f"generate_response: remote inference model={model_name}")
|
| 3415 |
-
|
| 3416 |
-
|
|
|
|
|
|
|
| 3417 |
model=model_name,
|
| 3418 |
-
|
| 3419 |
-
|
| 3420 |
-
|
|
|
|
| 3421 |
top_k=cfg.get("k"),
|
|
|
|
| 3422 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3423 |
out = resp.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3424 |
_dbg(f"generate_response: remote response_len={len(out)}")
|
| 3425 |
return out
|
| 3426 |
|
| 3427 |
if prelocked:
|
| 3428 |
-
return _generate_response_local(model_choice, force_cpu_slow, prompt, cfg)
|
| 3429 |
with MODEL_MUTEX:
|
| 3430 |
-
return _generate_response_local(model_choice, force_cpu_slow, prompt, cfg)
|
| 3431 |
|
| 3432 |
|
| 3433 |
@app.post("/api/chat")
|
| 3434 |
async def chat(request: Request, _=Depends(require_auth)):
|
| 3435 |
"""Session-based chat endpoint (triage + inquiry)."""
|
|
|
|
| 3436 |
try:
|
| 3437 |
store = request.state.store
|
| 3438 |
start_time = datetime.now()
|
|
|
|
| 3439 |
form = await request.form()
|
| 3440 |
msg = (form.get("message") or "").strip()
|
| 3441 |
if not msg:
|
|
|
|
| 3442 |
return JSONResponse({"error": "Message is required."}, status_code=status.HTTP_400_BAD_REQUEST)
|
| 3443 |
user_msg_raw = msg
|
| 3444 |
p_name = form.get("patient") or ""
|
|
@@ -3459,10 +3696,32 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3459 |
is_start = session_action == "start" or not session_id
|
| 3460 |
if not session_id:
|
| 3461 |
session_id = f"session-{uuid.uuid4().hex}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3462 |
|
| 3463 |
if not DISABLE_LOCAL_INFERENCE:
|
| 3464 |
model_availability = _local_model_availability_payload()
|
| 3465 |
if not model_availability.get("has_any_local_model"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3466 |
return JSONResponse(
|
| 3467 |
{
|
| 3468 |
"error": (
|
|
@@ -3480,6 +3739,14 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3480 |
if not chosen_model:
|
| 3481 |
model_choice = available_models[0]
|
| 3482 |
elif chosen_model not in available_models:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3483 |
return JSONResponse(
|
| 3484 |
{
|
| 3485 |
"error": (
|
|
@@ -3534,11 +3801,33 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3534 |
if override_prompt.strip():
|
| 3535 |
prompt = override_prompt.strip()
|
| 3536 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3537 |
# Persist the exact prompt submitted for debug visibility in Settings.
|
| 3538 |
try:
|
| 3539 |
set_settings_meta(last_prompt_verbatim=prompt)
|
| 3540 |
except Exception:
|
| 3541 |
logger.exception("Unable to persist last_prompt_verbatim")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3542 |
|
| 3543 |
model_lock_acquired = False
|
| 3544 |
busy_meta_set = False
|
|
@@ -3547,6 +3836,15 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3547 |
queue_snapshot = {}
|
| 3548 |
queue_ticket, queue_snapshot = _chat_queue_enqueue(model_choice or "", mode, session_id, p_name)
|
| 3549 |
if queue_ticket is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3550 |
return JSONResponse(
|
| 3551 |
{
|
| 3552 |
"error": (
|
|
@@ -3559,10 +3857,26 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3559 |
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
| 3560 |
)
|
| 3561 |
queue_position = safe_int(queue_snapshot.get("position"), 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3562 |
if queue_position > 1 and not queue_wait_requested:
|
| 3563 |
_chat_queue_release(queue_ticket)
|
| 3564 |
active = queue_snapshot.get("active") or {}
|
| 3565 |
active_model = (active.get("model_choice") or "").strip() or "another consultation"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3566 |
return JSONResponse(
|
| 3567 |
{
|
| 3568 |
"error": (
|
|
@@ -3579,10 +3893,25 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3579 |
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
| 3580 |
)
|
| 3581 |
queue_wait_seconds = await asyncio.to_thread(_chat_queue_wait_turn, queue_ticket)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3582 |
_set_model_busy_meta(model_choice or "", mode, session_id, p_name)
|
| 3583 |
busy_meta_set = True
|
| 3584 |
|
| 3585 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3586 |
if not DISABLE_LOCAL_INFERENCE:
|
| 3587 |
MODEL_MUTEX.acquire()
|
| 3588 |
model_lock_acquired = True
|
|
@@ -3593,11 +3922,34 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3593 |
prompt,
|
| 3594 |
cfg,
|
| 3595 |
True,
|
|
|
|
| 3596 |
)
|
| 3597 |
else:
|
| 3598 |
-
res = await asyncio.to_thread(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3599 |
except RuntimeError as e:
|
| 3600 |
_dbg(f"chat runtime error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3601 |
if str(e) == "CHAT_QUEUE_TICKET_MISSING":
|
| 3602 |
return JSONResponse(
|
| 3603 |
{"error": "Queue ticket was lost. Please submit again."},
|
|
@@ -3624,6 +3976,18 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3624 |
},
|
| 3625 |
status_code=status.HTTP_400_BAD_REQUEST,
|
| 3626 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3627 |
if "Missing model cache" in str(e) or str(e) in {"REMOTE_TOKEN_MISSING", "LOCAL_INFERENCE_DISABLED"}:
|
| 3628 |
return JSONResponse(
|
| 3629 |
{"error": str(e), "offline_missing": True},
|
|
@@ -3712,6 +4076,16 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3712 |
upsert_history_entry(entry)
|
| 3713 |
|
| 3714 |
metrics = _update_chat_metrics(store, models["active_name"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3715 |
|
| 3716 |
return JSONResponse(
|
| 3717 |
{
|
|
@@ -3738,6 +4112,14 @@ async def chat(request: Request, _=Depends(require_auth)):
|
|
| 3738 |
}
|
| 3739 |
)
|
| 3740 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3741 |
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 3742 |
|
| 3743 |
|
|
|
|
| 50 |
import mimetypes
|
| 51 |
import io
|
| 52 |
import logging
|
| 53 |
+
import traceback
|
| 54 |
+
from logging.handlers import RotatingFileHandler
|
| 55 |
|
| 56 |
import medgemma4
|
| 57 |
import medgemma27b
|
|
|
|
| 234 |
)
|
| 235 |
# Default remote text model; when MedGemma 4B/27B is selected we pass that through instead.
|
| 236 |
REMOTE_MODEL = os.environ.get("REMOTE_MODEL") or "google/medgemma-1.5-4b-it"
|
| 237 |
+
try:
|
| 238 |
+
HF_REMOTE_TIMEOUT_SECONDS = float(os.environ.get("HF_REMOTE_TIMEOUT_SECONDS", "300").strip())
|
| 239 |
+
except Exception:
|
| 240 |
+
HF_REMOTE_TIMEOUT_SECONDS = 300.0
|
| 241 |
+
if HF_REMOTE_TIMEOUT_SECONDS < 10:
|
| 242 |
+
HF_REMOTE_TIMEOUT_SECONDS = 10.0
|
| 243 |
|
| 244 |
_dbg(
|
| 245 |
"env flags: "
|
|
|
|
| 330 |
|
| 331 |
BACKUP_ROOT = BASE_STORE / "backups"
|
| 332 |
BACKUP_ROOT.mkdir(parents=True, exist_ok=True)
|
| 333 |
+
|
| 334 |
+
# Runtime log file (used heavily for HF debugging and support snapshots).
|
| 335 |
+
LOG_ROOT = BASE_STORE / "logs"
|
| 336 |
+
LOG_ROOT.mkdir(parents=True, exist_ok=True)
|
| 337 |
+
RUNTIME_LOG_PATH = LOG_ROOT / "runtime.log"
|
| 338 |
+
try:
|
| 339 |
+
RUNTIME_LOG_MAX_BYTES = int((os.environ.get("RUNTIME_LOG_MAX_BYTES") or "5242880").strip())
|
| 340 |
+
except Exception:
|
| 341 |
+
RUNTIME_LOG_MAX_BYTES = 5242880
|
| 342 |
+
if RUNTIME_LOG_MAX_BYTES < 65536:
|
| 343 |
+
RUNTIME_LOG_MAX_BYTES = 65536
|
| 344 |
+
try:
|
| 345 |
+
RUNTIME_LOG_BACKUPS = int((os.environ.get("RUNTIME_LOG_BACKUPS") or "3").strip())
|
| 346 |
+
except Exception:
|
| 347 |
+
RUNTIME_LOG_BACKUPS = 3
|
| 348 |
+
if RUNTIME_LOG_BACKUPS < 1:
|
| 349 |
+
RUNTIME_LOG_BACKUPS = 1
|
| 350 |
+
RUNTIME_LOG_ENABLED = (os.environ.get("RUNTIME_LOG_ENABLED", "1").strip().lower() not in {"0", "false", "no", "off"})
|
| 351 |
+
|
| 352 |
+
runtime_logger = logging.getLogger("sma.runtime")
|
| 353 |
+
runtime_logger.setLevel(logging.INFO)
|
| 354 |
+
runtime_logger.propagate = False
|
| 355 |
+
if RUNTIME_LOG_ENABLED:
|
| 356 |
+
has_runtime_handler = any(getattr(h, "_sma_runtime_handler", False) for h in runtime_logger.handlers)
|
| 357 |
+
if not has_runtime_handler:
|
| 358 |
+
runtime_handler = RotatingFileHandler(
|
| 359 |
+
str(RUNTIME_LOG_PATH),
|
| 360 |
+
maxBytes=RUNTIME_LOG_MAX_BYTES,
|
| 361 |
+
backupCount=RUNTIME_LOG_BACKUPS,
|
| 362 |
+
encoding="utf-8",
|
| 363 |
+
)
|
| 364 |
+
runtime_handler._sma_runtime_handler = True # type: ignore[attr-defined]
|
| 365 |
+
runtime_handler.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(message)s"))
|
| 366 |
+
runtime_logger.addHandler(runtime_handler)
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def _trim_log_value(value, limit: int = 900) -> str:
|
| 370 |
+
"""Convert values to compact single-line strings for runtime logs."""
|
| 371 |
+
text = str(value)
|
| 372 |
+
if len(text) > limit:
|
| 373 |
+
text = text[: limit - 3] + "..."
|
| 374 |
+
return text.replace("\n", "\\n")
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
def _runtime_log(event: str, level: int = logging.INFO, **fields):
|
| 378 |
+
"""Write structured runtime log entries to rotating file."""
|
| 379 |
+
if not RUNTIME_LOG_ENABLED:
|
| 380 |
+
return
|
| 381 |
+
parts = [f"event={_trim_log_value(event, 120)}"]
|
| 382 |
+
for key, val in fields.items():
|
| 383 |
+
parts.append(f"{key}={_trim_log_value(val)}")
|
| 384 |
+
runtime_logger.log(level, " | ".join(parts))
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _runtime_log_tail(lines: int = 500) -> str:
|
| 388 |
+
"""Read the tail of the runtime log file as UTF-8 text."""
|
| 389 |
+
max_lines = max(50, min(int(lines or 500), 5000))
|
| 390 |
+
if not RUNTIME_LOG_PATH.exists():
|
| 391 |
+
return ""
|
| 392 |
+
try:
|
| 393 |
+
text = RUNTIME_LOG_PATH.read_text(encoding="utf-8", errors="replace")
|
| 394 |
+
except Exception:
|
| 395 |
+
return ""
|
| 396 |
+
log_lines = text.splitlines()
|
| 397 |
+
if len(log_lines) <= max_lines:
|
| 398 |
+
return text
|
| 399 |
+
return "\n".join(log_lines[-max_lines:])
|
| 400 |
+
|
| 401 |
# Prefer keeping the DB alongside app.py; migrate from legacy data/ path if present
|
| 402 |
DB_PATH = APP_HOME / "app.db"
|
| 403 |
LEGACY_DB = APP_HOME / "data" / "app.db"
|
|
|
|
| 571 |
"""Print the fully-resolved database path at startup for operational visibility."""
|
| 572 |
try:
|
| 573 |
print(f"[startup] Database path: {DB_PATH.resolve()}", flush=True)
|
| 574 |
+
_runtime_log(
|
| 575 |
+
"startup.ready",
|
| 576 |
+
db_path=DB_PATH.resolve(),
|
| 577 |
+
runtime_log_path=RUNTIME_LOG_PATH.resolve(),
|
| 578 |
+
is_hf_space=IS_HF_SPACE,
|
| 579 |
+
disable_local_inference=DISABLE_LOCAL_INFERENCE,
|
| 580 |
+
remote_timeout_s=HF_REMOTE_TIMEOUT_SECONDS,
|
| 581 |
+
remote_token_set=bool(HF_REMOTE_TOKEN),
|
| 582 |
+
)
|
| 583 |
except Exception as exc:
|
| 584 |
print(f"[startup] Database path unavailable: {exc}", flush=True)
|
| 585 |
|
|
|
|
| 2508 |
Local installs are always tracked; runnable models may include remote-mode
|
| 2509 |
execution when local inference is disabled and a remote token is configured.
|
| 2510 |
"""
|
| 2511 |
+
if DISABLE_LOCAL_INFERENCE:
|
| 2512 |
+
remote_models_env = (os.environ.get("REMOTE_AVAILABLE_MODELS") or "").strip()
|
| 2513 |
+
if remote_models_env:
|
| 2514 |
+
remote_models = [
|
| 2515 |
+
item.strip()
|
| 2516 |
+
for item in remote_models_env.split(",")
|
| 2517 |
+
if item.strip()
|
| 2518 |
+
]
|
| 2519 |
+
else:
|
| 2520 |
+
remote_models = [REMOTE_MODEL] if (REMOTE_MODEL or "").strip() else list(REQUIRED_MODELS)
|
| 2521 |
+
has_remote = bool(HF_REMOTE_TOKEN)
|
| 2522 |
+
models = [
|
| 2523 |
+
{
|
| 2524 |
+
"model": model_name,
|
| 2525 |
+
"installed": has_remote,
|
| 2526 |
+
"error": "" if has_remote else "remote inference token missing",
|
| 2527 |
+
}
|
| 2528 |
+
for model_name in remote_models
|
| 2529 |
+
]
|
| 2530 |
+
message = "" if has_remote else (
|
| 2531 |
+
"Remote MedGemma is not configured. Set HF_REMOTE_TOKEN in Hugging Face Space secrets."
|
| 2532 |
+
)
|
| 2533 |
+
return {
|
| 2534 |
+
"mode": "remote",
|
| 2535 |
+
"models": models,
|
| 2536 |
+
"required_models": remote_models,
|
| 2537 |
+
"available_models": remote_models if has_remote else [],
|
| 2538 |
+
"missing_models": [] if has_remote else remote_models,
|
| 2539 |
+
"has_any_local_model": has_remote,
|
| 2540 |
+
"disable_submit": not has_remote,
|
| 2541 |
+
"message": message,
|
| 2542 |
+
}
|
| 2543 |
+
|
| 2544 |
models = []
|
| 2545 |
available_models = []
|
| 2546 |
missing_models = []
|
|
|
|
| 2610 |
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 2611 |
|
| 2612 |
|
| 2613 |
+
@app.get("/api/runtime-log")
|
| 2614 |
+
async def runtime_log_view(request: Request, lines: int = 500, _=Depends(require_auth)):
|
| 2615 |
+
"""Return tail lines from runtime log for in-app debugging."""
|
| 2616 |
+
try:
|
| 2617 |
+
text = _runtime_log_tail(lines)
|
| 2618 |
+
exists = RUNTIME_LOG_PATH.exists()
|
| 2619 |
+
size = RUNTIME_LOG_PATH.stat().st_size if exists else 0
|
| 2620 |
+
return {
|
| 2621 |
+
"path": str(RUNTIME_LOG_PATH),
|
| 2622 |
+
"exists": bool(exists),
|
| 2623 |
+
"size": size,
|
| 2624 |
+
"lines_requested": lines,
|
| 2625 |
+
"text": text,
|
| 2626 |
+
}
|
| 2627 |
+
except Exception as e:
|
| 2628 |
+
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 2629 |
+
|
| 2630 |
+
|
| 2631 |
+
@app.get("/api/runtime-log/download")
|
| 2632 |
+
async def runtime_log_download(request: Request, _=Depends(require_auth)):
|
| 2633 |
+
"""Download complete runtime log file as text/plain."""
|
| 2634 |
+
try:
|
| 2635 |
+
if not RUNTIME_LOG_PATH.exists():
|
| 2636 |
+
return Response(content="", media_type="text/plain; charset=utf-8")
|
| 2637 |
+
content = RUNTIME_LOG_PATH.read_text(encoding="utf-8", errors="replace")
|
| 2638 |
+
headers = {"Content-Disposition": 'attachment; filename="runtime.log"'}
|
| 2639 |
+
return Response(content=content, media_type="text/plain; charset=utf-8", headers=headers)
|
| 2640 |
+
except Exception as e:
|
| 2641 |
+
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 2642 |
+
|
| 2643 |
+
|
| 2644 |
+
@app.post("/api/runtime-log/clear")
|
| 2645 |
+
async def runtime_log_clear(request: Request, _=Depends(require_auth)):
|
| 2646 |
+
"""Clear runtime log content while preserving file and handler."""
|
| 2647 |
+
try:
|
| 2648 |
+
RUNTIME_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 2649 |
+
RUNTIME_LOG_PATH.write_text("", encoding="utf-8")
|
| 2650 |
+
_runtime_log("runtime.log.cleared", actor=request.session.get("user") or "unknown")
|
| 2651 |
+
return {"status": "cleared", "path": str(RUNTIME_LOG_PATH)}
|
| 2652 |
+
except Exception as e:
|
| 2653 |
+
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 2654 |
+
|
| 2655 |
+
|
| 2656 |
@app.get("/api/triage/options")
|
| 2657 |
async def triage_options(_=Depends(require_auth)):
|
| 2658 |
"""Expose dropdown options for triage form (table-backed)."""
|
|
|
|
| 3444 |
return max_new_tokens
|
| 3445 |
|
| 3446 |
|
| 3447 |
+
def _generate_response_local(model_choice: str, force_cpu_slow: bool, prompt: str, cfg: dict, trace_id: str = ""):
|
| 3448 |
"""
|
| 3449 |
Generate Response Local helper.
|
| 3450 |
Detailed inline notes are included to support safe maintenance and future edits.
|
|
|
|
| 3466 |
raise RuntimeError(f"CUDA_NOT_AVAILABLE{detail}")
|
| 3467 |
if is_large_model and runtime_device != "cuda" and not force_cpu_slow:
|
| 3468 |
raise RuntimeError("SLOW_28B_CPU")
|
| 3469 |
+
local_started = time.perf_counter()
|
| 3470 |
+
_runtime_log(
|
| 3471 |
+
"inference.local.start",
|
| 3472 |
+
trace_id=trace_id,
|
| 3473 |
+
model=model_name,
|
| 3474 |
+
runtime_device=runtime_device,
|
| 3475 |
+
is_large_model=is_large_model,
|
| 3476 |
+
force_cpu_slow=force_cpu_slow,
|
| 3477 |
+
)
|
| 3478 |
try:
|
| 3479 |
if is_large_model:
|
| 3480 |
# Ensure only one model family occupies VRAM at a time.
|
|
|
|
| 3517 |
)
|
| 3518 |
)
|
| 3519 |
if not cuda_runtime_failure:
|
| 3520 |
+
_runtime_log(
|
| 3521 |
+
"inference.local.error",
|
| 3522 |
+
level=logging.ERROR,
|
| 3523 |
+
trace_id=trace_id,
|
| 3524 |
+
model=model_name,
|
| 3525 |
+
runtime_device=runtime_device,
|
| 3526 |
+
error_type=type(exc).__name__,
|
| 3527 |
+
error=err_txt,
|
| 3528 |
+
elapsed_ms=int((time.perf_counter() - local_started) * 1000),
|
| 3529 |
+
)
|
| 3530 |
raise
|
| 3531 |
|
| 3532 |
_dbg(f"generate_response: cuda runtime failure detected: {err_txt}")
|
|
|
|
| 3568 |
)
|
| 3569 |
models["active_name"] = model_name
|
| 3570 |
models["is_text"] = True
|
| 3571 |
+
_runtime_log(
|
| 3572 |
+
"inference.local.success",
|
| 3573 |
+
trace_id=trace_id,
|
| 3574 |
+
model=model_name,
|
| 3575 |
+
runtime_device=runtime_device,
|
| 3576 |
+
response_len=len(res),
|
| 3577 |
+
elapsed_ms=int((time.perf_counter() - local_started) * 1000),
|
| 3578 |
+
)
|
| 3579 |
_dbg(f"generate_response: local response_len={len(res)}")
|
| 3580 |
return res
|
| 3581 |
|
| 3582 |
|
| 3583 |
+
def _generate_response(
|
| 3584 |
+
model_choice: str,
|
| 3585 |
+
force_cpu_slow: bool,
|
| 3586 |
+
prompt: str,
|
| 3587 |
+
cfg: dict,
|
| 3588 |
+
prelocked: bool = False,
|
| 3589 |
+
trace_id: str = "",
|
| 3590 |
+
):
|
| 3591 |
"""
|
| 3592 |
Generate Response helper.
|
| 3593 |
Detailed inline notes are included to support safe maintenance and future edits.
|
|
|
|
| 3602 |
if DISABLE_LOCAL_INFERENCE:
|
| 3603 |
if not HF_REMOTE_TOKEN:
|
| 3604 |
_dbg("generate_response: remote path selected but HF_REMOTE_TOKEN missing")
|
| 3605 |
+
_runtime_log(
|
| 3606 |
+
"inference.remote.token_missing",
|
| 3607 |
+
level=logging.ERROR,
|
| 3608 |
+
trace_id=trace_id,
|
| 3609 |
+
model_choice=model_choice,
|
| 3610 |
+
)
|
| 3611 |
raise RuntimeError("REMOTE_TOKEN_MISSING")
|
| 3612 |
+
client = InferenceClient(token=HF_REMOTE_TOKEN, timeout=HF_REMOTE_TIMEOUT_SECONDS)
|
| 3613 |
# Use requested model when provided (e.g., MedGemma) else default
|
| 3614 |
model_name = model_choice or REMOTE_MODEL
|
| 3615 |
_dbg(f"generate_response: remote inference model={model_name}")
|
| 3616 |
+
call_started = time.perf_counter()
|
| 3617 |
+
_runtime_log(
|
| 3618 |
+
"inference.remote.start",
|
| 3619 |
+
trace_id=trace_id,
|
| 3620 |
model=model_name,
|
| 3621 |
+
prompt_len=len(prompt or ""),
|
| 3622 |
+
max_new_tokens=cfg.get("tk"),
|
| 3623 |
+
temperature=cfg.get("t"),
|
| 3624 |
+
top_p=cfg.get("p"),
|
| 3625 |
top_k=cfg.get("k"),
|
| 3626 |
+
timeout_s=HF_REMOTE_TIMEOUT_SECONDS,
|
| 3627 |
)
|
| 3628 |
+
try:
|
| 3629 |
+
resp = client.text_generation(
|
| 3630 |
+
prompt,
|
| 3631 |
+
model=model_name,
|
| 3632 |
+
max_new_tokens=cfg["tk"],
|
| 3633 |
+
temperature=cfg["t"],
|
| 3634 |
+
top_p=cfg["p"],
|
| 3635 |
+
top_k=cfg.get("k"),
|
| 3636 |
+
)
|
| 3637 |
+
except Exception as exc:
|
| 3638 |
+
elapsed_ms = int((time.perf_counter() - call_started) * 1000)
|
| 3639 |
+
_runtime_log(
|
| 3640 |
+
"inference.remote.error",
|
| 3641 |
+
level=logging.ERROR,
|
| 3642 |
+
trace_id=trace_id,
|
| 3643 |
+
model=model_name,
|
| 3644 |
+
elapsed_ms=elapsed_ms,
|
| 3645 |
+
error_type=type(exc).__name__,
|
| 3646 |
+
error=str(exc),
|
| 3647 |
+
)
|
| 3648 |
+
raise RuntimeError(f"REMOTE_INFERENCE_FAILED: {exc}") from exc
|
| 3649 |
out = resp.strip()
|
| 3650 |
+
elapsed_ms = int((time.perf_counter() - call_started) * 1000)
|
| 3651 |
+
_runtime_log(
|
| 3652 |
+
"inference.remote.success",
|
| 3653 |
+
trace_id=trace_id,
|
| 3654 |
+
model=model_name,
|
| 3655 |
+
elapsed_ms=elapsed_ms,
|
| 3656 |
+
response_len=len(out),
|
| 3657 |
+
)
|
| 3658 |
_dbg(f"generate_response: remote response_len={len(out)}")
|
| 3659 |
return out
|
| 3660 |
|
| 3661 |
if prelocked:
|
| 3662 |
+
return _generate_response_local(model_choice, force_cpu_slow, prompt, cfg, trace_id=trace_id)
|
| 3663 |
with MODEL_MUTEX:
|
| 3664 |
+
return _generate_response_local(model_choice, force_cpu_slow, prompt, cfg, trace_id=trace_id)
|
| 3665 |
|
| 3666 |
|
| 3667 |
@app.post("/api/chat")
|
| 3668 |
async def chat(request: Request, _=Depends(require_auth)):
|
| 3669 |
"""Session-based chat endpoint (triage + inquiry)."""
|
| 3670 |
+
trace_id = "unassigned"
|
| 3671 |
try:
|
| 3672 |
store = request.state.store
|
| 3673 |
start_time = datetime.now()
|
| 3674 |
+
trace_id = uuid.uuid4().hex[:12]
|
| 3675 |
form = await request.form()
|
| 3676 |
msg = (form.get("message") or "").strip()
|
| 3677 |
if not msg:
|
| 3678 |
+
_runtime_log("chat.request.invalid", level=logging.WARNING, trace_id=trace_id, reason="empty_message")
|
| 3679 |
return JSONResponse({"error": "Message is required."}, status_code=status.HTTP_400_BAD_REQUEST)
|
| 3680 |
user_msg_raw = msg
|
| 3681 |
p_name = form.get("patient") or ""
|
|
|
|
| 3696 |
is_start = session_action == "start" or not session_id
|
| 3697 |
if not session_id:
|
| 3698 |
session_id = f"session-{uuid.uuid4().hex}"
|
| 3699 |
+
_runtime_log(
|
| 3700 |
+
"chat.request.received",
|
| 3701 |
+
trace_id=trace_id,
|
| 3702 |
+
mode=mode,
|
| 3703 |
+
model_choice=model_choice or "",
|
| 3704 |
+
force_cpu_slow=force_cpu_slow,
|
| 3705 |
+
queue_wait_requested=queue_wait_requested,
|
| 3706 |
+
private=is_priv,
|
| 3707 |
+
session_action=session_action or ("start" if is_start else "message"),
|
| 3708 |
+
session_id=session_id,
|
| 3709 |
+
patient=p_name or "",
|
| 3710 |
+
message_len=len(msg),
|
| 3711 |
+
disable_local_inference=DISABLE_LOCAL_INFERENCE,
|
| 3712 |
+
is_hf_space=IS_HF_SPACE,
|
| 3713 |
+
)
|
| 3714 |
|
| 3715 |
if not DISABLE_LOCAL_INFERENCE:
|
| 3716 |
model_availability = _local_model_availability_payload()
|
| 3717 |
if not model_availability.get("has_any_local_model"):
|
| 3718 |
+
_runtime_log(
|
| 3719 |
+
"chat.request.rejected",
|
| 3720 |
+
level=logging.WARNING,
|
| 3721 |
+
trace_id=trace_id,
|
| 3722 |
+
reason="no_local_models",
|
| 3723 |
+
available_models=(model_availability.get("available_models") or []),
|
| 3724 |
+
)
|
| 3725 |
return JSONResponse(
|
| 3726 |
{
|
| 3727 |
"error": (
|
|
|
|
| 3739 |
if not chosen_model:
|
| 3740 |
model_choice = available_models[0]
|
| 3741 |
elif chosen_model not in available_models:
|
| 3742 |
+
_runtime_log(
|
| 3743 |
+
"chat.request.rejected",
|
| 3744 |
+
level=logging.WARNING,
|
| 3745 |
+
trace_id=trace_id,
|
| 3746 |
+
reason="model_not_installed",
|
| 3747 |
+
chosen_model=chosen_model,
|
| 3748 |
+
available_models=available_models,
|
| 3749 |
+
)
|
| 3750 |
return JSONResponse(
|
| 3751 |
{
|
| 3752 |
"error": (
|
|
|
|
| 3801 |
if override_prompt.strip():
|
| 3802 |
prompt = override_prompt.strip()
|
| 3803 |
|
| 3804 |
+
_runtime_log(
|
| 3805 |
+
"chat.prompt.ready",
|
| 3806 |
+
trace_id=trace_id,
|
| 3807 |
+
mode=mode,
|
| 3808 |
+
model_choice=model_choice or "",
|
| 3809 |
+
prompt_len=len(prompt or ""),
|
| 3810 |
+
override_prompt=bool(override_prompt.strip()),
|
| 3811 |
+
cfg_tokens=cfg.get("tk"),
|
| 3812 |
+
cfg_temp=cfg.get("t"),
|
| 3813 |
+
cfg_top_p=cfg.get("p"),
|
| 3814 |
+
cfg_top_k=cfg.get("k"),
|
| 3815 |
+
triage_pathway_status=prompt_meta.get("triage_pathway_status") or "",
|
| 3816 |
+
triage_pathway_reason=prompt_meta.get("triage_pathway_reason") or "",
|
| 3817 |
+
triage_pathway_supplemented=bool(prompt_meta.get("triage_pathway_supplemented")),
|
| 3818 |
+
)
|
| 3819 |
+
|
| 3820 |
# Persist the exact prompt submitted for debug visibility in Settings.
|
| 3821 |
try:
|
| 3822 |
set_settings_meta(last_prompt_verbatim=prompt)
|
| 3823 |
except Exception:
|
| 3824 |
logger.exception("Unable to persist last_prompt_verbatim")
|
| 3825 |
+
_runtime_log(
|
| 3826 |
+
"chat.prompt.persist_failed",
|
| 3827 |
+
level=logging.WARNING,
|
| 3828 |
+
trace_id=trace_id,
|
| 3829 |
+
error="Unable to persist last_prompt_verbatim",
|
| 3830 |
+
)
|
| 3831 |
|
| 3832 |
model_lock_acquired = False
|
| 3833 |
busy_meta_set = False
|
|
|
|
| 3836 |
queue_snapshot = {}
|
| 3837 |
queue_ticket, queue_snapshot = _chat_queue_enqueue(model_choice or "", mode, session_id, p_name)
|
| 3838 |
if queue_ticket is None:
|
| 3839 |
+
_runtime_log(
|
| 3840 |
+
"chat.queue.rejected",
|
| 3841 |
+
level=logging.WARNING,
|
| 3842 |
+
trace_id=trace_id,
|
| 3843 |
+
reason="queue_full",
|
| 3844 |
+
mode=mode,
|
| 3845 |
+
model_choice=model_choice or "",
|
| 3846 |
+
queue_size=(queue_snapshot or {}).get("size"),
|
| 3847 |
+
)
|
| 3848 |
return JSONResponse(
|
| 3849 |
{
|
| 3850 |
"error": (
|
|
|
|
| 3857 |
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
| 3858 |
)
|
| 3859 |
queue_position = safe_int(queue_snapshot.get("position"), 1)
|
| 3860 |
+
_runtime_log(
|
| 3861 |
+
"chat.queue.enqueued",
|
| 3862 |
+
trace_id=trace_id,
|
| 3863 |
+
mode=mode,
|
| 3864 |
+
model_choice=model_choice or "",
|
| 3865 |
+
queue_ticket=queue_ticket,
|
| 3866 |
+
queue_position=queue_position,
|
| 3867 |
+
queue_snapshot_size=(queue_snapshot or {}).get("size"),
|
| 3868 |
+
)
|
| 3869 |
if queue_position > 1 and not queue_wait_requested:
|
| 3870 |
_chat_queue_release(queue_ticket)
|
| 3871 |
active = queue_snapshot.get("active") or {}
|
| 3872 |
active_model = (active.get("model_choice") or "").strip() or "another consultation"
|
| 3873 |
+
_runtime_log(
|
| 3874 |
+
"chat.queue.wait_required",
|
| 3875 |
+
level=logging.WARNING,
|
| 3876 |
+
trace_id=trace_id,
|
| 3877 |
+
queue_position=queue_position,
|
| 3878 |
+
active_model=active_model,
|
| 3879 |
+
)
|
| 3880 |
return JSONResponse(
|
| 3881 |
{
|
| 3882 |
"error": (
|
|
|
|
| 3893 |
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
| 3894 |
)
|
| 3895 |
queue_wait_seconds = await asyncio.to_thread(_chat_queue_wait_turn, queue_ticket)
|
| 3896 |
+
_runtime_log(
|
| 3897 |
+
"chat.queue.turn_acquired",
|
| 3898 |
+
trace_id=trace_id,
|
| 3899 |
+
queue_ticket=queue_ticket,
|
| 3900 |
+
wait_seconds=queue_wait_seconds,
|
| 3901 |
+
queue_wait_requested=queue_wait_requested,
|
| 3902 |
+
)
|
| 3903 |
_set_model_busy_meta(model_choice or "", mode, session_id, p_name)
|
| 3904 |
busy_meta_set = True
|
| 3905 |
|
| 3906 |
try:
|
| 3907 |
+
_runtime_log(
|
| 3908 |
+
"chat.inference.start",
|
| 3909 |
+
trace_id=trace_id,
|
| 3910 |
+
mode=mode,
|
| 3911 |
+
model_choice=model_choice or "",
|
| 3912 |
+
disable_local_inference=DISABLE_LOCAL_INFERENCE,
|
| 3913 |
+
force_cpu_slow=force_cpu_slow,
|
| 3914 |
+
)
|
| 3915 |
if not DISABLE_LOCAL_INFERENCE:
|
| 3916 |
MODEL_MUTEX.acquire()
|
| 3917 |
model_lock_acquired = True
|
|
|
|
| 3922 |
prompt,
|
| 3923 |
cfg,
|
| 3924 |
True,
|
| 3925 |
+
trace_id,
|
| 3926 |
)
|
| 3927 |
else:
|
| 3928 |
+
res = await asyncio.to_thread(
|
| 3929 |
+
_generate_response,
|
| 3930 |
+
model_choice,
|
| 3931 |
+
force_cpu_slow,
|
| 3932 |
+
prompt,
|
| 3933 |
+
cfg,
|
| 3934 |
+
False,
|
| 3935 |
+
trace_id,
|
| 3936 |
+
)
|
| 3937 |
+
_runtime_log(
|
| 3938 |
+
"chat.inference.success",
|
| 3939 |
+
trace_id=trace_id,
|
| 3940 |
+
mode=mode,
|
| 3941 |
+
model_choice=model_choice or "",
|
| 3942 |
+
response_len=len(res or ""),
|
| 3943 |
+
)
|
| 3944 |
except RuntimeError as e:
|
| 3945 |
_dbg(f"chat runtime error: {e}")
|
| 3946 |
+
_runtime_log(
|
| 3947 |
+
"chat.inference.runtime_error",
|
| 3948 |
+
level=logging.ERROR,
|
| 3949 |
+
trace_id=trace_id,
|
| 3950 |
+
error=str(e),
|
| 3951 |
+
error_type=type(e).__name__,
|
| 3952 |
+
)
|
| 3953 |
if str(e) == "CHAT_QUEUE_TICKET_MISSING":
|
| 3954 |
return JSONResponse(
|
| 3955 |
{"error": "Queue ticket was lost. Please submit again."},
|
|
|
|
| 3976 |
},
|
| 3977 |
status_code=status.HTTP_400_BAD_REQUEST,
|
| 3978 |
)
|
| 3979 |
+
if str(e).startswith("REMOTE_INFERENCE_FAILED"):
|
| 3980 |
+
return JSONResponse(
|
| 3981 |
+
{
|
| 3982 |
+
"error": (
|
| 3983 |
+
"Remote MedGemma inference failed before a response was returned. "
|
| 3984 |
+
"Open Settings -> Runtime Debug Log (HF) for full details."
|
| 3985 |
+
),
|
| 3986 |
+
"remote_inference_failed": True,
|
| 3987 |
+
"details": str(e),
|
| 3988 |
+
},
|
| 3989 |
+
status_code=status.HTTP_502_BAD_GATEWAY,
|
| 3990 |
+
)
|
| 3991 |
if "Missing model cache" in str(e) or str(e) in {"REMOTE_TOKEN_MISSING", "LOCAL_INFERENCE_DISABLED"}:
|
| 3992 |
return JSONResponse(
|
| 3993 |
{"error": str(e), "offline_missing": True},
|
|
|
|
| 4076 |
upsert_history_entry(entry)
|
| 4077 |
|
| 4078 |
metrics = _update_chat_metrics(store, models["active_name"])
|
| 4079 |
+
_runtime_log(
|
| 4080 |
+
"chat.response.ready",
|
| 4081 |
+
trace_id=trace_id,
|
| 4082 |
+
mode=mode,
|
| 4083 |
+
model=models["active_name"],
|
| 4084 |
+
duration_ms=elapsed_ms,
|
| 4085 |
+
queue_wait_seconds=queue_wait_seconds,
|
| 4086 |
+
session_id=session_id,
|
| 4087 |
+
private=is_priv,
|
| 4088 |
+
)
|
| 4089 |
|
| 4090 |
return JSONResponse(
|
| 4091 |
{
|
|
|
|
| 4112 |
}
|
| 4113 |
)
|
| 4114 |
except Exception as e:
|
| 4115 |
+
_runtime_log(
|
| 4116 |
+
"chat.request.exception",
|
| 4117 |
+
level=logging.ERROR,
|
| 4118 |
+
trace_id=trace_id,
|
| 4119 |
+
error_type=type(e).__name__,
|
| 4120 |
+
error=str(e),
|
| 4121 |
+
traceback=traceback.format_exc(),
|
| 4122 |
+
)
|
| 4123 |
return JSONResponse({"error": str(e)}, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
| 4124 |
|
| 4125 |
|
static/js/settings.js
CHANGED
|
@@ -1382,6 +1382,96 @@ async function resetTriagePromptTreeToDefault() {
|
|
| 1382 |
}
|
| 1383 |
}
|
| 1384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1385 |
if (document.readyState !== 'loading') {
|
| 1386 |
applyStoredUserMode();
|
| 1387 |
loadSettingsUI().catch(() => {});
|
|
@@ -1486,6 +1576,7 @@ async function loadSettingsUI() {
|
|
| 1486 |
applySettingsToUI(s);
|
| 1487 |
loadPromptTemplates().catch(() => {});
|
| 1488 |
loadTriagePromptTree().catch(() => {});
|
|
|
|
| 1489 |
try { localStorage.setItem('user_mode', s.user_mode || 'user'); } catch (err) { /* ignore */ }
|
| 1490 |
if (!offlineInitialized) {
|
| 1491 |
offlineInitialized = true;
|
|
@@ -1502,6 +1593,7 @@ async function loadSettingsUI() {
|
|
| 1502 |
applySettingsToUI({ ...DEFAULT_SETTINGS, user_mode: localMode });
|
| 1503 |
loadPromptTemplates().catch(() => {});
|
| 1504 |
loadTriagePromptTree().catch(() => {});
|
|
|
|
| 1505 |
}
|
| 1506 |
}
|
| 1507 |
|
|
@@ -2758,6 +2850,9 @@ window.moveEquipmentCategory = moveEquipmentCategory;
|
|
| 2758 |
window.addConsumableCategory = addConsumableCategory;
|
| 2759 |
window.removeConsumableCategory = removeConsumableCategory;
|
| 2760 |
window.moveConsumableCategory = moveConsumableCategory;
|
|
|
|
|
|
|
|
|
|
| 2761 |
|
| 2762 |
|
| 2763 |
//
|
|
|
|
| 1382 |
}
|
| 1383 |
}
|
| 1384 |
|
| 1385 |
+
function setRuntimeLogStatus(message, isError = false) {
|
| 1386 |
+
const el = document.getElementById('runtime-log-status');
|
| 1387 |
+
if (!el) return;
|
| 1388 |
+
el.textContent = message || '';
|
| 1389 |
+
el.style.color = isError ? 'var(--red)' : '#2c3e50';
|
| 1390 |
+
}
|
| 1391 |
+
|
| 1392 |
+
function updateRuntimeLogMeta(meta = {}) {
|
| 1393 |
+
const el = document.getElementById('runtime-log-meta');
|
| 1394 |
+
if (!el) return;
|
| 1395 |
+
const exists = !!meta.exists;
|
| 1396 |
+
const size = Number(meta.size || 0);
|
| 1397 |
+
if (!exists) {
|
| 1398 |
+
el.textContent = 'No log file yet';
|
| 1399 |
+
return;
|
| 1400 |
+
}
|
| 1401 |
+
const kb = (size / 1024).toFixed(1);
|
| 1402 |
+
el.textContent = `${kb} KB`;
|
| 1403 |
+
}
|
| 1404 |
+
|
| 1405 |
+
function getRuntimeLogLinesRequested() {
|
| 1406 |
+
const el = document.getElementById('runtime-log-lines');
|
| 1407 |
+
const parsed = Number(el?.value || 600);
|
| 1408 |
+
if (Number.isFinite(parsed) && parsed >= 50 && parsed <= 5000) {
|
| 1409 |
+
return Math.floor(parsed);
|
| 1410 |
+
}
|
| 1411 |
+
return 600;
|
| 1412 |
+
}
|
| 1413 |
+
|
| 1414 |
+
async function loadRuntimeLog(lines = null) {
|
| 1415 |
+
const textarea = document.getElementById('runtime-log-text');
|
| 1416 |
+
const lineCount = lines == null ? getRuntimeLogLinesRequested() : Number(lines);
|
| 1417 |
+
if (textarea) {
|
| 1418 |
+
textarea.value = '';
|
| 1419 |
+
}
|
| 1420 |
+
setRuntimeLogStatus('Loading runtime log...');
|
| 1421 |
+
try {
|
| 1422 |
+
const res = await fetch(`/api/runtime-log?lines=${encodeURIComponent(lineCount)}`, {
|
| 1423 |
+
credentials: 'same-origin',
|
| 1424 |
+
});
|
| 1425 |
+
const payload = await res.json();
|
| 1426 |
+
if (!res.ok || payload?.error) {
|
| 1427 |
+
throw new Error(payload?.error || `Status ${res.status}`);
|
| 1428 |
+
}
|
| 1429 |
+
const text = String(payload?.text || '');
|
| 1430 |
+
if (textarea) {
|
| 1431 |
+
textarea.value = text || '[Runtime log is currently empty]';
|
| 1432 |
+
textarea.scrollTop = textarea.scrollHeight;
|
| 1433 |
+
}
|
| 1434 |
+
updateRuntimeLogMeta(payload || {});
|
| 1435 |
+
const loadedLines = text ? text.split('\n').length : 0;
|
| 1436 |
+
const pathLabel = payload?.path ? ` (${payload.path})` : '';
|
| 1437 |
+
setRuntimeLogStatus(`Loaded ${loadedLines} line${loadedLines === 1 ? '' : 's'}${pathLabel}.`);
|
| 1438 |
+
} catch (err) {
|
| 1439 |
+
setRuntimeLogStatus(`Runtime log load failed: ${err.message}`, true);
|
| 1440 |
+
}
|
| 1441 |
+
}
|
| 1442 |
+
|
| 1443 |
+
function downloadRuntimeLog() {
|
| 1444 |
+
setRuntimeLogStatus('Downloading runtime log...');
|
| 1445 |
+
const anchor = document.createElement('a');
|
| 1446 |
+
anchor.href = '/api/runtime-log/download';
|
| 1447 |
+
anchor.download = 'runtime.log';
|
| 1448 |
+
document.body.appendChild(anchor);
|
| 1449 |
+
anchor.click();
|
| 1450 |
+
anchor.remove();
|
| 1451 |
+
setTimeout(() => {
|
| 1452 |
+
setRuntimeLogStatus('Runtime log download requested.');
|
| 1453 |
+
}, 100);
|
| 1454 |
+
}
|
| 1455 |
+
|
| 1456 |
+
async function clearRuntimeLog() {
|
| 1457 |
+
if (!confirm('Clear the runtime log file?')) return;
|
| 1458 |
+
setRuntimeLogStatus('Clearing runtime log...');
|
| 1459 |
+
try {
|
| 1460 |
+
const res = await fetch('/api/runtime-log/clear', {
|
| 1461 |
+
method: 'POST',
|
| 1462 |
+
credentials: 'same-origin',
|
| 1463 |
+
});
|
| 1464 |
+
const payload = await res.json();
|
| 1465 |
+
if (!res.ok || payload?.error) {
|
| 1466 |
+
throw new Error(payload?.error || `Status ${res.status}`);
|
| 1467 |
+
}
|
| 1468 |
+
await loadRuntimeLog(getRuntimeLogLinesRequested());
|
| 1469 |
+
setRuntimeLogStatus('Runtime log cleared.');
|
| 1470 |
+
} catch (err) {
|
| 1471 |
+
setRuntimeLogStatus(`Runtime log clear failed: ${err.message}`, true);
|
| 1472 |
+
}
|
| 1473 |
+
}
|
| 1474 |
+
|
| 1475 |
if (document.readyState !== 'loading') {
|
| 1476 |
applyStoredUserMode();
|
| 1477 |
loadSettingsUI().catch(() => {});
|
|
|
|
| 1576 |
applySettingsToUI(s);
|
| 1577 |
loadPromptTemplates().catch(() => {});
|
| 1578 |
loadTriagePromptTree().catch(() => {});
|
| 1579 |
+
loadRuntimeLog().catch(() => {});
|
| 1580 |
try { localStorage.setItem('user_mode', s.user_mode || 'user'); } catch (err) { /* ignore */ }
|
| 1581 |
if (!offlineInitialized) {
|
| 1582 |
offlineInitialized = true;
|
|
|
|
| 1593 |
applySettingsToUI({ ...DEFAULT_SETTINGS, user_mode: localMode });
|
| 1594 |
loadPromptTemplates().catch(() => {});
|
| 1595 |
loadTriagePromptTree().catch(() => {});
|
| 1596 |
+
loadRuntimeLog().catch(() => {});
|
| 1597 |
}
|
| 1598 |
}
|
| 1599 |
|
|
|
|
| 2850 |
window.addConsumableCategory = addConsumableCategory;
|
| 2851 |
window.removeConsumableCategory = removeConsumableCategory;
|
| 2852 |
window.moveConsumableCategory = moveConsumableCategory;
|
| 2853 |
+
window.loadRuntimeLog = loadRuntimeLog;
|
| 2854 |
+
window.downloadRuntimeLog = downloadRuntimeLog;
|
| 2855 |
+
window.clearRuntimeLog = clearRuntimeLog;
|
| 2856 |
|
| 2857 |
|
| 2858 |
//
|
templates/index.html
CHANGED
|
@@ -1217,6 +1217,26 @@
|
|
| 1217 |
<div style="font-size:12px; color:#555;">Recommended sequence: Run readiness check → Download missing models → Create backup snapshot → Enable offline mode.</div>
|
| 1218 |
</div>
|
| 1219 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1220 |
<div class="collapsible" style="margin-bottom:12px;">
|
| 1221 |
<div class="col-header crew-med-header" onclick="toggleSection(this)" style="background:#fff; justify-content:flex-start;">
|
| 1222 |
<span class="dev-tag">dev:settings-last-prompt</span><span class="detail-icon history-arrow" style="font-size:18px; margin-right:8px;">▸</span><span style="font-weight:700;">Last Prompt Verbatim</span>
|
|
@@ -1488,7 +1508,7 @@
|
|
| 1488 |
<script src="/static/js/crew.js?v=20260223"></script>
|
| 1489 |
<script src="/static/js/pharmacy.js?v=20260223"></script>
|
| 1490 |
<script src="/static/js/equipment.js?v=20260223"></script>
|
| 1491 |
-
<script src="/static/js/settings.js?v=
|
| 1492 |
<script src="/static/js/main.js?v=20260223"></script>
|
| 1493 |
<script src="/static/js/recovery.js?v=20260304"></script>
|
| 1494 |
<script>
|
|
|
|
| 1217 |
<div style="font-size:12px; color:#555;">Recommended sequence: Run readiness check → Download missing models → Create backup snapshot → Enable offline mode.</div>
|
| 1218 |
</div>
|
| 1219 |
</div>
|
| 1220 |
+
<div class="collapsible" style="margin-bottom:12px;">
|
| 1221 |
+
<div class="col-header crew-med-header" onclick="toggleSection(this)" style="background:#fff; justify-content:flex-start;">
|
| 1222 |
+
<span class="dev-tag">dev:settings-runtime-log</span><span class="detail-icon history-arrow" style="font-size:18px; margin-right:8px;">▸</span><span style="font-weight:700;">Runtime Debug Log (HF)</span>
|
| 1223 |
+
<span id="runtime-log-meta" style="margin-left:auto; font-size:12px; color:#4a5568;"></span>
|
| 1224 |
+
</div>
|
| 1225 |
+
<div class="col-body" style="padding:12px; background:#f8f9fa; display:none;">
|
| 1226 |
+
<div style="font-size:12px; color:#475569; margin-bottom:8px;">
|
| 1227 |
+
Use this log to troubleshoot stalled or failed MedGemma requests on Hugging Face.
|
| 1228 |
+
</div>
|
| 1229 |
+
<div style="display:flex; flex-wrap:wrap; gap:8px; align-items:center; margin-bottom:8px;">
|
| 1230 |
+
<label for="runtime-log-lines" style="font-size:12px; color:#333;">Lines</label>
|
| 1231 |
+
<input id="runtime-log-lines" data-settings-no-autosave="1" type="number" min="50" max="5000" step="50" value="600" style="width:90px; padding:6px;">
|
| 1232 |
+
<button class="btn btn-sm" style="background:var(--inquiry);" onclick="loadRuntimeLog()">Refresh</button>
|
| 1233 |
+
<button class="btn btn-sm" style="background:#0b8457;" onclick="downloadRuntimeLog()">Download</button>
|
| 1234 |
+
<button class="btn btn-sm" style="background:#8b0000;" onclick="clearRuntimeLog()">Clear</button>
|
| 1235 |
+
</div>
|
| 1236 |
+
<div id="runtime-log-status" style="font-size:12px; color:#2c3e50; margin-bottom:8px;">Not loaded yet.</div>
|
| 1237 |
+
<textarea id="runtime-log-text" data-settings-no-autosave="1" readonly style="width:100%; min-height:240px; font-family:Menlo, Consolas, monospace; font-size:12px; line-height:1.35; background:#fff;"></textarea>
|
| 1238 |
+
</div>
|
| 1239 |
+
</div>
|
| 1240 |
<div class="collapsible" style="margin-bottom:12px;">
|
| 1241 |
<div class="col-header crew-med-header" onclick="toggleSection(this)" style="background:#fff; justify-content:flex-start;">
|
| 1242 |
<span class="dev-tag">dev:settings-last-prompt</span><span class="detail-icon history-arrow" style="font-size:18px; margin-right:8px;">▸</span><span style="font-weight:700;">Last Prompt Verbatim</span>
|
|
|
|
| 1508 |
<script src="/static/js/crew.js?v=20260223"></script>
|
| 1509 |
<script src="/static/js/pharmacy.js?v=20260223"></script>
|
| 1510 |
<script src="/static/js/equipment.js?v=20260223"></script>
|
| 1511 |
+
<script src="/static/js/settings.js?v=20260308"></script>
|
| 1512 |
<script src="/static/js/main.js?v=20260223"></script>
|
| 1513 |
<script src="/static/js/recovery.js?v=20260304"></script>
|
| 1514 |
<script>
|