Update app.py
Browse files
app.py
CHANGED
|
@@ -295,69 +295,189 @@ def call_vlm(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tup
|
|
| 295 |
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
| 296 |
raise FileNotFoundError("Face or eye image path missing")
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
client = get_gradio_client(GRADIO_VLM_SPACE)
|
| 299 |
message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
|
| 300 |
|
| 301 |
try:
|
| 302 |
-
logger.info("Calling VLM Space: %s", GRADIO_VLM_SPACE)
|
| 303 |
result = client.predict(message=message, history=[], api_name="/chat_fn")
|
|
|
|
|
|
|
| 304 |
except Exception as e:
|
| 305 |
logger.exception("VLM call failed")
|
| 306 |
raise RuntimeError(f"VLM call failed: {e}")
|
| 307 |
|
| 308 |
-
# Normalize result
|
| 309 |
if isinstance(result, (list, tuple)):
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
elif isinstance(result, dict):
|
|
|
|
| 312 |
out = result
|
| 313 |
else:
|
|
|
|
| 314 |
out = {"text": str(result)}
|
| 315 |
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
# Try to parse JSON
|
| 319 |
parsed = None
|
| 320 |
try:
|
| 321 |
parsed = json.loads(text_out)
|
| 322 |
if not isinstance(parsed, dict):
|
|
|
|
| 323 |
parsed = None
|
| 324 |
-
|
|
|
|
|
|
|
|
|
|
| 325 |
# Try to extract JSON from text
|
| 326 |
try:
|
| 327 |
first = text_out.find("{")
|
| 328 |
last = text_out.rfind("}")
|
| 329 |
if first != -1 and last != -1 and last > first:
|
| 330 |
-
|
|
|
|
| 331 |
if not isinstance(parsed, dict):
|
|
|
|
| 332 |
parsed = None
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
| 334 |
parsed = None
|
| 335 |
|
| 336 |
return parsed, text_out
|
| 337 |
|
| 338 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
"""Call LLM with VLM output and return structured risk assessment"""
|
| 340 |
if not GRADIO_AVAILABLE:
|
|
|
|
|
|
|
| 341 |
raise RuntimeError("gradio_client not installed")
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
# Prepare input
|
| 346 |
vlm_text = vlm_output if isinstance(vlm_output, str) else json.dumps(vlm_output, default=str)
|
| 347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
instruction = (
|
| 349 |
"\n\nSTRICT INSTRUCTIONS:\n"
|
| 350 |
"1) OUTPUT ONLY a single valid JSON object — no prose, no code fences.\n"
|
| 351 |
"2) Include keys: risk_score, jaundice_probability, anemia_probability, "
|
| 352 |
"hydration_issue_probability, neurological_issue_probability, summary, recommendation, confidence.\n"
|
| 353 |
"3) Use numeric values for probabilities (0-1) and risk_score (0-100).\n"
|
| 354 |
-
"4) Use neutral wording in summary/recommendation.\n
|
|
|
|
| 355 |
"VLM Output:\n" + vlm_text + "\n"
|
| 356 |
)
|
| 357 |
|
| 358 |
# Call with safe defaults
|
| 359 |
try:
|
|
|
|
| 360 |
logger.info("Calling LLM Space: %s", LLM_GRADIO_SPACE)
|
|
|
|
| 361 |
result = client.predict(
|
| 362 |
input_data=instruction,
|
| 363 |
max_new_tokens=1024.0,
|
|
@@ -378,10 +498,36 @@ def call_llm(vlm_output: Any) -> Dict[str, Any]:
|
|
| 378 |
parsed = extract_json_from_llm_output(text_out)
|
| 379 |
logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2))
|
| 380 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
return parsed
|
| 382 |
|
| 383 |
except Exception as e:
|
| 384 |
-
logger.exception("LLM call failed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
raise RuntimeError(f"LLM call failed: {e}")
|
| 386 |
|
| 387 |
# ============================================================================
|
|
@@ -421,9 +567,9 @@ async def process_screening(screening_id: str):
|
|
| 421 |
# Call VLM
|
| 422 |
vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
|
| 423 |
|
| 424 |
-
# Call LLM
|
| 425 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 426 |
-
structured_risk = await asyncio.to_thread(call_llm, llm_input)
|
| 427 |
|
| 428 |
# Store results
|
| 429 |
screenings_db[screening_id]["ai_results"] = {
|
|
@@ -487,12 +633,37 @@ async def read_root():
|
|
| 487 |
|
| 488 |
@app.get("/health")
|
| 489 |
async def health_check():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
return {
|
| 491 |
"status": "healthy",
|
| 492 |
"detector": detector_type or "none",
|
| 493 |
"vlm_available": GRADIO_AVAILABLE,
|
| 494 |
"vlm_space": GRADIO_VLM_SPACE,
|
| 495 |
-
"llm_space": LLM_GRADIO_SPACE
|
|
|
|
|
|
|
|
|
|
| 496 |
}
|
| 497 |
|
| 498 |
@app.post("/api/v1/validate-eye-photo")
|
|
@@ -615,14 +786,49 @@ async def get_history(user_id: str):
|
|
| 615 |
history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
|
| 616 |
return {"screenings": history}
|
| 617 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
@app.post("/api/v1/get-vitals")
|
| 619 |
async def get_vitals_from_upload(
|
| 620 |
face_image: UploadFile = File(...),
|
| 621 |
eye_image: UploadFile = File(...)
|
| 622 |
):
|
| 623 |
-
"""Synchronous VLM + LLM pipeline"""
|
| 624 |
if not GRADIO_AVAILABLE:
|
| 625 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 626 |
|
| 627 |
try:
|
| 628 |
uid = str(uuid.uuid4())
|
|
@@ -634,23 +840,38 @@ async def get_vitals_from_upload(
|
|
| 634 |
with open(eye_path, "wb") as f:
|
| 635 |
f.write(await eye_image.read())
|
| 636 |
|
|
|
|
| 637 |
vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
|
|
|
|
|
|
|
| 638 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 639 |
-
structured_risk = await asyncio.to_thread(call_llm, llm_input)
|
| 640 |
|
| 641 |
return {
|
| 642 |
"vlm_features": vlm_features,
|
| 643 |
"vlm_raw": vlm_raw,
|
| 644 |
-
"structured_risk": structured_risk
|
|
|
|
| 645 |
}
|
| 646 |
|
| 647 |
except Exception as e:
|
| 648 |
logger.exception("Get vitals failed")
|
| 649 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
@app.post("/api/v1/get-vitals/{screening_id}")
|
| 652 |
async def get_vitals_for_screening(screening_id: str):
|
| 653 |
-
"""Re-run VLM + LLM on existing screening"""
|
| 654 |
if screening_id not in screenings_db:
|
| 655 |
raise HTTPException(status_code=404, detail="Screening not found")
|
| 656 |
|
|
@@ -659,31 +880,44 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 659 |
eye_path = entry.get("eye_image_path")
|
| 660 |
|
| 661 |
if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
|
| 662 |
-
raise HTTPException(status_code=400, detail="Images missing")
|
| 663 |
|
| 664 |
try:
|
| 665 |
vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
|
| 666 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 667 |
-
structured_risk = await asyncio.to_thread(call_llm, llm_input)
|
| 668 |
|
| 669 |
entry.setdefault("ai_results", {}).update({
|
| 670 |
"vlm_features": vlm_features,
|
| 671 |
"vlm_raw": vlm_raw,
|
| 672 |
"structured_risk": structured_risk,
|
| 673 |
-
"last_vitals_run": datetime.utcnow().isoformat() + "Z"
|
|
|
|
| 674 |
})
|
| 675 |
|
| 676 |
return {
|
| 677 |
"screening_id": screening_id,
|
| 678 |
"vlm_features": vlm_features,
|
| 679 |
"vlm_raw": vlm_raw,
|
| 680 |
-
"structured_risk": structured_risk
|
|
|
|
| 681 |
}
|
| 682 |
|
| 683 |
except Exception as e:
|
| 684 |
logger.exception("Get vitals for screening failed")
|
| 685 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
|
| 687 |
if __name__ == "__main__":
|
| 688 |
import uvicorn
|
| 689 |
-
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=
|
|
|
|
| 295 |
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
| 296 |
raise FileNotFoundError("Face or eye image path missing")
|
| 297 |
|
| 298 |
+
logger.info("VLM Input - Face: %s (exists: %s, size: %d bytes)",
|
| 299 |
+
face_path, os.path.exists(face_path), os.path.getsize(face_path))
|
| 300 |
+
logger.info("VLM Input - Eye: %s (exists: %s, size: %d bytes)",
|
| 301 |
+
eye_path, os.path.exists(eye_path), os.path.getsize(eye_path))
|
| 302 |
+
logger.info("VLM Prompt: %s", prompt[:100])
|
| 303 |
+
|
| 304 |
client = get_gradio_client(GRADIO_VLM_SPACE)
|
| 305 |
message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
|
| 306 |
|
| 307 |
try:
|
| 308 |
+
logger.info("Calling VLM Space: %s with api_name=/chat_fn", GRADIO_VLM_SPACE)
|
| 309 |
result = client.predict(message=message, history=[], api_name="/chat_fn")
|
| 310 |
+
logger.info("VLM raw result type: %s", type(result))
|
| 311 |
+
logger.info("VLM raw result: %s", str(result)[:500])
|
| 312 |
except Exception as e:
|
| 313 |
logger.exception("VLM call failed")
|
| 314 |
raise RuntimeError(f"VLM call failed: {e}")
|
| 315 |
|
| 316 |
+
# Normalize result - handle different return formats
|
| 317 |
if isinstance(result, (list, tuple)):
|
| 318 |
+
logger.info("VLM returned list/tuple with %d elements", len(result))
|
| 319 |
+
if len(result) > 0:
|
| 320 |
+
out = result[0]
|
| 321 |
+
else:
|
| 322 |
+
out = {}
|
| 323 |
elif isinstance(result, dict):
|
| 324 |
+
logger.info("VLM returned dict with keys: %s", list(result.keys()))
|
| 325 |
out = result
|
| 326 |
else:
|
| 327 |
+
logger.info("VLM returned unknown type, converting to string")
|
| 328 |
out = {"text": str(result)}
|
| 329 |
|
| 330 |
+
# Extract text from various possible formats
|
| 331 |
+
text_out = None
|
| 332 |
+
if isinstance(out, dict):
|
| 333 |
+
text_out = out.get("text") or out.get("output") or out.get("content")
|
| 334 |
+
|
| 335 |
+
if not text_out:
|
| 336 |
+
# If still no text, try the whole result
|
| 337 |
+
if isinstance(result, str):
|
| 338 |
+
text_out = result
|
| 339 |
+
else:
|
| 340 |
+
text_out = json.dumps(out)
|
| 341 |
+
|
| 342 |
+
logger.info("VLM extracted text (first 300 chars): %s", text_out[:300] if text_out else "EMPTY")
|
| 343 |
+
|
| 344 |
+
if not text_out or len(text_out.strip()) == 0:
|
| 345 |
+
logger.warning("VLM returned empty text output!")
|
| 346 |
+
text_out = "{}" # Provide empty JSON as fallback
|
| 347 |
|
| 348 |
# Try to parse JSON
|
| 349 |
parsed = None
|
| 350 |
try:
|
| 351 |
parsed = json.loads(text_out)
|
| 352 |
if not isinstance(parsed, dict):
|
| 353 |
+
logger.warning("VLM JSON parsed but not a dict: %s", type(parsed))
|
| 354 |
parsed = None
|
| 355 |
+
else:
|
| 356 |
+
logger.info("VLM successfully parsed JSON with keys: %s", list(parsed.keys()))
|
| 357 |
+
except Exception as parse_err:
|
| 358 |
+
logger.info("VLM text is not direct JSON: %s", str(parse_err))
|
| 359 |
# Try to extract JSON from text
|
| 360 |
try:
|
| 361 |
first = text_out.find("{")
|
| 362 |
last = text_out.rfind("}")
|
| 363 |
if first != -1 and last != -1 and last > first:
|
| 364 |
+
json_str = text_out[first:last+1]
|
| 365 |
+
parsed = json.loads(json_str)
|
| 366 |
if not isinstance(parsed, dict):
|
| 367 |
+
logger.warning("Extracted JSON is not a dict")
|
| 368 |
parsed = None
|
| 369 |
+
else:
|
| 370 |
+
logger.info("Successfully extracted JSON from text with keys: %s", list(parsed.keys()))
|
| 371 |
+
except Exception as extract_err:
|
| 372 |
+
logger.warning("Could not extract JSON from VLM text: %s", str(extract_err))
|
| 373 |
parsed = None
|
| 374 |
|
| 375 |
return parsed, text_out
|
| 376 |
|
| 377 |
+
def get_fallback_risk_assessment(vlm_output: Any, reason: str = "LLM unavailable") -> Dict[str, Any]:
|
| 378 |
+
"""Generate basic risk assessment from VLM output when LLM is unavailable"""
|
| 379 |
+
logger.warning("Using fallback risk assessment: %s", reason)
|
| 380 |
+
|
| 381 |
+
# Try to extract basic info from VLM output
|
| 382 |
+
vlm_dict = {}
|
| 383 |
+
if isinstance(vlm_output, dict):
|
| 384 |
+
vlm_dict = vlm_output
|
| 385 |
+
elif isinstance(vlm_output, str):
|
| 386 |
+
try:
|
| 387 |
+
vlm_dict = json.loads(vlm_output)
|
| 388 |
+
except Exception:
|
| 389 |
+
pass
|
| 390 |
+
|
| 391 |
+
# Check if VLM data is empty/invalid
|
| 392 |
+
has_data = bool(vlm_dict and any(vlm_dict.values()))
|
| 393 |
+
|
| 394 |
+
if not has_data:
|
| 395 |
+
logger.warning("VLM output is empty or invalid, returning minimal assessment")
|
| 396 |
+
return {
|
| 397 |
+
"risk_score": 0.0,
|
| 398 |
+
"jaundice_probability": 0.0,
|
| 399 |
+
"anemia_probability": 0.0,
|
| 400 |
+
"hydration_issue_probability": 0.0,
|
| 401 |
+
"neurological_issue_probability": 0.0,
|
| 402 |
+
"confidence": 0.1,
|
| 403 |
+
"summary": "Unable to analyze images. Please ensure photos are clear and well-lit.",
|
| 404 |
+
"recommendation": "Retake photos with better lighting and clearer view of face and eyes.",
|
| 405 |
+
"fallback_mode": True,
|
| 406 |
+
"fallback_reason": "no_vlm_data"
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
# Basic heuristic risk scoring based on VLM features
|
| 410 |
+
risk_score = 20.0 # Conservative default
|
| 411 |
+
jaundice_prob = 0.0
|
| 412 |
+
anemia_prob = 0.0
|
| 413 |
+
hydration_prob = 0.0
|
| 414 |
+
neuro_prob = 0.0
|
| 415 |
+
|
| 416 |
+
# Extract VLM features if available
|
| 417 |
+
# Look for color indicators
|
| 418 |
+
sclera_yellow = vlm_dict.get("sclera_yellowness", 0)
|
| 419 |
+
pallor = vlm_dict.get("pallor_score", 0)
|
| 420 |
+
redness = vlm_dict.get("redness", 0)
|
| 421 |
+
|
| 422 |
+
if isinstance(sclera_yellow, (int, float)) and sclera_yellow > 0.3:
|
| 423 |
+
jaundice_prob = min(0.6, sclera_yellow)
|
| 424 |
+
risk_score += 15
|
| 425 |
+
|
| 426 |
+
if isinstance(pallor, (int, float)) and pallor > 0.4:
|
| 427 |
+
anemia_prob = min(0.7, pallor)
|
| 428 |
+
risk_score += 20
|
| 429 |
+
|
| 430 |
+
if isinstance(redness, (int, float)) and redness > 0.5:
|
| 431 |
+
hydration_prob = min(0.5, redness * 0.8)
|
| 432 |
+
risk_score += 10
|
| 433 |
+
|
| 434 |
+
return {
|
| 435 |
+
"risk_score": round(min(100.0, risk_score), 2),
|
| 436 |
+
"jaundice_probability": round(jaundice_prob, 4),
|
| 437 |
+
"anemia_probability": round(anemia_prob, 4),
|
| 438 |
+
"hydration_issue_probability": round(hydration_prob, 4),
|
| 439 |
+
"neurological_issue_probability": round(neuro_prob, 4),
|
| 440 |
+
"confidence": 0.4, # Low confidence for fallback
|
| 441 |
+
"summary": "Basic screening completed. Advanced AI analysis temporarily unavailable.",
|
| 442 |
+
"recommendation": "Consider consulting a healthcare professional for a comprehensive assessment.",
|
| 443 |
+
"fallback_mode": True,
|
| 444 |
+
"fallback_reason": reason
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
def call_llm(vlm_output: Any, use_fallback_on_error: bool = True) -> Dict[str, Any]:
|
| 448 |
"""Call LLM with VLM output and return structured risk assessment"""
|
| 449 |
if not GRADIO_AVAILABLE:
|
| 450 |
+
if use_fallback_on_error:
|
| 451 |
+
return get_fallback_risk_assessment(vlm_output, reason="gradio_not_available")
|
| 452 |
raise RuntimeError("gradio_client not installed")
|
| 453 |
|
| 454 |
+
# Check if VLM output is empty/useless
|
|
|
|
|
|
|
| 455 |
vlm_text = vlm_output if isinstance(vlm_output, str) else json.dumps(vlm_output, default=str)
|
| 456 |
|
| 457 |
+
# Detect empty or minimal VLM output
|
| 458 |
+
if not vlm_text or vlm_text.strip() in ["{}", "[]", ""]:
|
| 459 |
+
logger.warning("VLM output is empty, using fallback assessment")
|
| 460 |
+
if use_fallback_on_error:
|
| 461 |
+
return get_fallback_risk_assessment(vlm_output, reason="empty_vlm_output")
|
| 462 |
+
raise RuntimeError("VLM output is empty")
|
| 463 |
+
|
| 464 |
+
# Prepare input
|
| 465 |
instruction = (
|
| 466 |
"\n\nSTRICT INSTRUCTIONS:\n"
|
| 467 |
"1) OUTPUT ONLY a single valid JSON object — no prose, no code fences.\n"
|
| 468 |
"2) Include keys: risk_score, jaundice_probability, anemia_probability, "
|
| 469 |
"hydration_issue_probability, neurological_issue_probability, summary, recommendation, confidence.\n"
|
| 470 |
"3) Use numeric values for probabilities (0-1) and risk_score (0-100).\n"
|
| 471 |
+
"4) Use neutral wording in summary/recommendation.\n"
|
| 472 |
+
"5) If VLM data is minimal or unclear, set low probabilities and low confidence.\n\n"
|
| 473 |
"VLM Output:\n" + vlm_text + "\n"
|
| 474 |
)
|
| 475 |
|
| 476 |
# Call with safe defaults
|
| 477 |
try:
|
| 478 |
+
client = get_gradio_client(LLM_GRADIO_SPACE)
|
| 479 |
logger.info("Calling LLM Space: %s", LLM_GRADIO_SPACE)
|
| 480 |
+
|
| 481 |
result = client.predict(
|
| 482 |
input_data=instruction,
|
| 483 |
max_new_tokens=1024.0,
|
|
|
|
| 498 |
parsed = extract_json_from_llm_output(text_out)
|
| 499 |
logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2))
|
| 500 |
|
| 501 |
+
# Check if LLM returned essentially empty results (all zeros)
|
| 502 |
+
all_zero = all(
|
| 503 |
+
parsed.get(k, 0) == 0
|
| 504 |
+
for k in ["jaundice_probability", "anemia_probability",
|
| 505 |
+
"hydration_issue_probability", "neurological_issue_probability"]
|
| 506 |
+
)
|
| 507 |
+
|
| 508 |
+
if all_zero and parsed.get("risk_score", 0) == 0:
|
| 509 |
+
logger.warning("LLM returned all-zero assessment, likely due to poor VLM input")
|
| 510 |
+
parsed["summary"] = "Image analysis incomplete. Please ensure photos are clear and well-lit."
|
| 511 |
+
parsed["recommendation"] = "Retake photos with face clearly visible and eyes open."
|
| 512 |
+
parsed["confidence"] = 0.1
|
| 513 |
+
|
| 514 |
return parsed
|
| 515 |
|
| 516 |
except Exception as e:
|
| 517 |
+
logger.exception("LLM call failed: %s", str(e))
|
| 518 |
+
|
| 519 |
+
# Check if it's a quota error
|
| 520 |
+
error_msg = str(e).lower()
|
| 521 |
+
if "quota" in error_msg or "gpu" in error_msg:
|
| 522 |
+
logger.warning("GPU quota exceeded, using fallback assessment")
|
| 523 |
+
if use_fallback_on_error:
|
| 524 |
+
return get_fallback_risk_assessment(vlm_output, reason="gpu_quota_exceeded")
|
| 525 |
+
|
| 526 |
+
# For other errors, also use fallback if enabled
|
| 527 |
+
if use_fallback_on_error:
|
| 528 |
+
logger.warning("LLM error, using fallback assessment")
|
| 529 |
+
return get_fallback_risk_assessment(vlm_output, reason=f"llm_error: {str(e)[:100]}")
|
| 530 |
+
|
| 531 |
raise RuntimeError(f"LLM call failed: {e}")
|
| 532 |
|
| 533 |
# ============================================================================
|
|
|
|
| 567 |
# Call VLM
|
| 568 |
vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
|
| 569 |
|
| 570 |
+
# Call LLM with fallback enabled
|
| 571 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 572 |
+
structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
|
| 573 |
|
| 574 |
# Store results
|
| 575 |
screenings_db[screening_id]["ai_results"] = {
|
|
|
|
| 633 |
|
| 634 |
@app.get("/health")
|
| 635 |
async def health_check():
|
| 636 |
+
"""Health check with LLM availability status"""
|
| 637 |
+
llm_status = "available"
|
| 638 |
+
llm_message = None
|
| 639 |
+
|
| 640 |
+
# Quick test of LLM availability
|
| 641 |
+
if GRADIO_AVAILABLE:
|
| 642 |
+
try:
|
| 643 |
+
client = get_gradio_client(LLM_GRADIO_SPACE)
|
| 644 |
+
# Just checking if we can connect, not running inference
|
| 645 |
+
llm_status = "available"
|
| 646 |
+
except Exception as e:
|
| 647 |
+
error_msg = str(e).lower()
|
| 648 |
+
if "quota" in error_msg or "gpu" in error_msg:
|
| 649 |
+
llm_status = "quota_exceeded"
|
| 650 |
+
llm_message = "GPU quota exceeded. Using fallback assessments."
|
| 651 |
+
else:
|
| 652 |
+
llm_status = "error"
|
| 653 |
+
llm_message = "LLM temporarily unavailable"
|
| 654 |
+
else:
|
| 655 |
+
llm_status = "not_installed"
|
| 656 |
+
llm_message = "Gradio client not available"
|
| 657 |
+
|
| 658 |
return {
|
| 659 |
"status": "healthy",
|
| 660 |
"detector": detector_type or "none",
|
| 661 |
"vlm_available": GRADIO_AVAILABLE,
|
| 662 |
"vlm_space": GRADIO_VLM_SPACE,
|
| 663 |
+
"llm_space": LLM_GRADIO_SPACE,
|
| 664 |
+
"llm_status": llm_status,
|
| 665 |
+
"llm_message": llm_message,
|
| 666 |
+
"fallback_enabled": True
|
| 667 |
}
|
| 668 |
|
| 669 |
@app.post("/api/v1/validate-eye-photo")
|
|
|
|
| 786 |
history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
|
| 787 |
return {"screenings": history}
|
| 788 |
|
| 789 |
+
@app.get("/api/v1/debug/spaces")
|
| 790 |
+
async def debug_spaces():
|
| 791 |
+
"""Debug endpoint to test VLM and LLM spaces"""
|
| 792 |
+
results = {
|
| 793 |
+
"vlm": {"available": False, "error": None},
|
| 794 |
+
"llm": {"available": False, "error": None}
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
# Test VLM
|
| 798 |
+
if GRADIO_AVAILABLE:
|
| 799 |
+
try:
|
| 800 |
+
client = get_gradio_client(GRADIO_VLM_SPACE)
|
| 801 |
+
results["vlm"]["available"] = True
|
| 802 |
+
results["vlm"]["space"] = GRADIO_VLM_SPACE
|
| 803 |
+
except Exception as e:
|
| 804 |
+
results["vlm"]["error"] = str(e)
|
| 805 |
+
else:
|
| 806 |
+
results["vlm"]["error"] = "Gradio not installed"
|
| 807 |
+
|
| 808 |
+
# Test LLM
|
| 809 |
+
if GRADIO_AVAILABLE:
|
| 810 |
+
try:
|
| 811 |
+
client = get_gradio_client(LLM_GRADIO_SPACE)
|
| 812 |
+
results["llm"]["available"] = True
|
| 813 |
+
results["llm"]["space"] = LLM_GRADIO_SPACE
|
| 814 |
+
except Exception as e:
|
| 815 |
+
results["llm"]["error"] = str(e)
|
| 816 |
+
else:
|
| 817 |
+
results["llm"]["error"] = "Gradio not installed"
|
| 818 |
+
|
| 819 |
+
return results
|
| 820 |
+
|
| 821 |
@app.post("/api/v1/get-vitals")
|
| 822 |
async def get_vitals_from_upload(
|
| 823 |
face_image: UploadFile = File(...),
|
| 824 |
eye_image: UploadFile = File(...)
|
| 825 |
):
|
| 826 |
+
"""Synchronous VLM + LLM pipeline with graceful fallback"""
|
| 827 |
if not GRADIO_AVAILABLE:
|
| 828 |
+
raise HTTPException(
|
| 829 |
+
status_code=503,
|
| 830 |
+
detail="AI services temporarily unavailable. Please try again later."
|
| 831 |
+
)
|
| 832 |
|
| 833 |
try:
|
| 834 |
uid = str(uuid.uuid4())
|
|
|
|
| 840 |
with open(eye_path, "wb") as f:
|
| 841 |
f.write(await eye_image.read())
|
| 842 |
|
| 843 |
+
# Call VLM
|
| 844 |
vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
|
| 845 |
+
|
| 846 |
+
# Call LLM with fallback enabled
|
| 847 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 848 |
+
structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
|
| 849 |
|
| 850 |
return {
|
| 851 |
"vlm_features": vlm_features,
|
| 852 |
"vlm_raw": vlm_raw,
|
| 853 |
+
"structured_risk": structured_risk,
|
| 854 |
+
"using_fallback": structured_risk.get("fallback_mode", False)
|
| 855 |
}
|
| 856 |
|
| 857 |
except Exception as e:
|
| 858 |
logger.exception("Get vitals failed")
|
| 859 |
+
error_msg = str(e).lower()
|
| 860 |
+
|
| 861 |
+
if "quota" in error_msg or "gpu" in error_msg:
|
| 862 |
+
raise HTTPException(
|
| 863 |
+
status_code=503,
|
| 864 |
+
detail="AI service is currently at capacity. Please try again in a few minutes."
|
| 865 |
+
)
|
| 866 |
+
|
| 867 |
+
raise HTTPException(
|
| 868 |
+
status_code=500,
|
| 869 |
+
detail="Unable to process images. Please ensure images are clear and try again."
|
| 870 |
+
)
|
| 871 |
|
| 872 |
@app.post("/api/v1/get-vitals/{screening_id}")
|
| 873 |
async def get_vitals_for_screening(screening_id: str):
|
| 874 |
+
"""Re-run VLM + LLM on existing screening with fallback support"""
|
| 875 |
if screening_id not in screenings_db:
|
| 876 |
raise HTTPException(status_code=404, detail="Screening not found")
|
| 877 |
|
|
|
|
| 880 |
eye_path = entry.get("eye_image_path")
|
| 881 |
|
| 882 |
if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
|
| 883 |
+
raise HTTPException(status_code=400, detail="Images missing for this screening")
|
| 884 |
|
| 885 |
try:
|
| 886 |
vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
|
| 887 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 888 |
+
structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
|
| 889 |
|
| 890 |
entry.setdefault("ai_results", {}).update({
|
| 891 |
"vlm_features": vlm_features,
|
| 892 |
"vlm_raw": vlm_raw,
|
| 893 |
"structured_risk": structured_risk,
|
| 894 |
+
"last_vitals_run": datetime.utcnow().isoformat() + "Z",
|
| 895 |
+
"using_fallback": structured_risk.get("fallback_mode", False)
|
| 896 |
})
|
| 897 |
|
| 898 |
return {
|
| 899 |
"screening_id": screening_id,
|
| 900 |
"vlm_features": vlm_features,
|
| 901 |
"vlm_raw": vlm_raw,
|
| 902 |
+
"structured_risk": structured_risk,
|
| 903 |
+
"using_fallback": structured_risk.get("fallback_mode", False)
|
| 904 |
}
|
| 905 |
|
| 906 |
except Exception as e:
|
| 907 |
logger.exception("Get vitals for screening failed")
|
| 908 |
+
error_msg = str(e).lower()
|
| 909 |
+
|
| 910 |
+
if "quota" in error_msg or "gpu" in error_msg:
|
| 911 |
+
raise HTTPException(
|
| 912 |
+
status_code=503,
|
| 913 |
+
detail="AI service is currently at capacity. Please try again in a few minutes."
|
| 914 |
+
)
|
| 915 |
+
|
| 916 |
+
raise HTTPException(
|
| 917 |
+
status_code=500,
|
| 918 |
+
detail="Unable to re-process screening. Please try again."
|
| 919 |
+
)
|
| 920 |
|
| 921 |
if __name__ == "__main__":
|
| 922 |
import uvicorn
|
| 923 |
+
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=
|