Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
Elderly HealthWatch AI Backend (FastAPI) - Refactored with Qwen2.5-VL
|
| 3 |
-
Updated to use
|
| 4 |
"""
|
| 5 |
|
| 6 |
import io
|
|
@@ -32,8 +32,23 @@ except Exception:
|
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
| 33 |
logger = logging.getLogger("elderly_healthwatch")
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
VLM_MODEL_ID = os.getenv("VLM_MODEL_ID", "Qwen/Qwen2.5-VL-7B-Instruct")
|
| 38 |
LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
|
| 39 |
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
|
@@ -67,26 +82,26 @@ os.makedirs(TMP_DIR, exist_ok=True)
|
|
| 67 |
# In-memory database
|
| 68 |
screenings_db: Dict[str, Dict[str, Any]] = {}
|
| 69 |
|
|
|
|
|
|
|
|
|
|
| 70 |
# ============================================================================
|
| 71 |
# Face Detection Setup
|
| 72 |
# ============================================================================
|
| 73 |
def setup_face_detector():
|
| 74 |
"""Initialize face detector (MTCNN or OpenCV fallback)"""
|
| 75 |
-
# Try facenet-pytorch MTCNN
|
| 76 |
try:
|
| 77 |
from facenet_pytorch import MTCNN
|
| 78 |
return MTCNN(keep_all=False, device="cpu"), "facenet_pytorch"
|
| 79 |
except Exception:
|
| 80 |
pass
|
| 81 |
|
| 82 |
-
# Try classic MTCNN
|
| 83 |
try:
|
| 84 |
from mtcnn import MTCNN
|
| 85 |
return MTCNN(), "mtcnn"
|
| 86 |
except Exception:
|
| 87 |
pass
|
| 88 |
|
| 89 |
-
# OpenCV Haar cascade fallback
|
| 90 |
try:
|
| 91 |
face_path = os.path.join(cv2.data.haarcascades, "haarcascade_frontalface_default.xml")
|
| 92 |
eye_path = os.path.join(cv2.data.haarcascades, "haarcascade_eye.xml")
|
|
@@ -143,7 +158,6 @@ def detect_face_and_eyes(pil_img: Image.Image) -> Dict[str, Any]:
|
|
| 143 |
|
| 144 |
img_arr = np.asarray(pil_img)
|
| 145 |
|
| 146 |
-
# Facenet-pytorch MTCNN
|
| 147 |
if detector_type == "facenet_pytorch":
|
| 148 |
try:
|
| 149 |
boxes, probs, landmarks = face_detector.detect(pil_img, landmarks=True)
|
|
@@ -171,7 +185,6 @@ def detect_face_and_eyes(pil_img: Image.Image) -> Dict[str, Any]:
|
|
| 171 |
return {"face_detected": False, "face_confidence": 0.0, "eye_openness_score": 0.0,
|
| 172 |
"left_eye": None, "right_eye": None}
|
| 173 |
|
| 174 |
-
# Classic MTCNN
|
| 175 |
elif detector_type == "mtcnn":
|
| 176 |
try:
|
| 177 |
detections = face_detector.detect_faces(img_arr)
|
|
@@ -195,7 +208,6 @@ def detect_face_and_eyes(pil_img: Image.Image) -> Dict[str, Any]:
|
|
| 195 |
return {"face_detected": False, "face_confidence": 0.0, "eye_openness_score": 0.0,
|
| 196 |
"left_eye": None, "right_eye": None}
|
| 197 |
|
| 198 |
-
# OpenCV fallback
|
| 199 |
elif detector_type == "opencv":
|
| 200 |
try:
|
| 201 |
gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
|
|
@@ -287,7 +299,7 @@ def extract_json_from_llm_output(raw_text: str) -> Dict[str, Any]:
|
|
| 287 |
}
|
| 288 |
|
| 289 |
# ============================================================================
|
| 290 |
-
# VLM & LLM Integration - UPDATED FOR QWEN2.5-VL
|
| 291 |
# ============================================================================
|
| 292 |
def get_gradio_client(space: str) -> Client:
|
| 293 |
"""Get Gradio client with optional auth"""
|
|
@@ -295,11 +307,42 @@ def get_gradio_client(space: str) -> Client:
|
|
| 295 |
raise RuntimeError("gradio_client not installed")
|
| 296 |
return Client(space, hf_token=HF_TOKEN) if HF_TOKEN else Client(space)
|
| 297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
def call_vlm(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tuple[Optional[Dict], str]:
|
| 299 |
"""
|
| 300 |
Call Qwen2.5-VL and return (parsed_features, raw_text)
|
| 301 |
-
|
| 302 |
"""
|
|
|
|
|
|
|
| 303 |
prompt = prompt or DEFAULT_VLM_PROMPT
|
| 304 |
|
| 305 |
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
|
@@ -311,85 +354,85 @@ def call_vlm(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tup
|
|
| 311 |
eye_path, os.path.exists(eye_path), os.path.getsize(eye_path))
|
| 312 |
logger.info("VLM Prompt: %s", prompt[:100])
|
| 313 |
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
# First call with face image
|
| 320 |
-
result_face = client.predict(
|
| 321 |
-
image=handle_file(face_path),
|
| 322 |
-
text_input=prompt + " Focus on the face and overall skin condition.",
|
| 323 |
-
model_id=VLM_MODEL_ID,
|
| 324 |
-
api_name="/run_example"
|
| 325 |
-
)
|
| 326 |
-
|
| 327 |
-
# Second call with eye image
|
| 328 |
-
result_eye = client.predict(
|
| 329 |
-
image=handle_file(eye_path),
|
| 330 |
-
text_input=prompt + " Focus on the eyes, sclera color, and eye health.",
|
| 331 |
-
model_id=VLM_MODEL_ID,
|
| 332 |
-
api_name="/run_example"
|
| 333 |
-
)
|
| 334 |
-
|
| 335 |
-
logger.info("VLM Face result type: %s", type(result_face))
|
| 336 |
-
logger.info("VLM Eye result type: %s", type(result_eye))
|
| 337 |
-
|
| 338 |
-
# Extract text from tuple results (returns tuple of 2 elements)
|
| 339 |
-
face_text = result_face[0] if isinstance(result_face, (list, tuple)) and len(result_face) > 0 else str(result_face)
|
| 340 |
-
eye_text = result_eye[0] if isinstance(result_eye, (list, tuple)) and len(result_eye) > 0 else str(result_eye)
|
| 341 |
-
|
| 342 |
-
# Combine both analyses
|
| 343 |
-
combined_text = f"Face Analysis:\n{face_text}\n\nEye Analysis:\n{eye_text}"
|
| 344 |
-
|
| 345 |
-
logger.info("VLM combined text (first 500 chars): %s", combined_text[:500])
|
| 346 |
-
|
| 347 |
-
except Exception as e:
|
| 348 |
-
logger.exception("VLM call failed")
|
| 349 |
-
raise RuntimeError(f"VLM call failed: {e}")
|
| 350 |
-
|
| 351 |
-
# Try to parse JSON from the combined text
|
| 352 |
-
parsed = None
|
| 353 |
-
try:
|
| 354 |
-
# Try direct JSON parse first
|
| 355 |
-
parsed = json.loads(combined_text)
|
| 356 |
-
if not isinstance(parsed, dict):
|
| 357 |
-
logger.warning("VLM JSON parsed but not a dict: %s", type(parsed))
|
| 358 |
-
parsed = None
|
| 359 |
-
else:
|
| 360 |
-
logger.info("VLM successfully parsed JSON with keys: %s", list(parsed.keys()))
|
| 361 |
-
except Exception:
|
| 362 |
-
# Try to extract JSON block
|
| 363 |
try:
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
if not isinstance(parsed, dict):
|
| 370 |
parsed = None
|
| 371 |
else:
|
| 372 |
-
logger.info("
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
-
|
|
|
|
|
|
|
| 387 |
|
| 388 |
def get_fallback_risk_assessment(vlm_output: Any, reason: str = "LLM unavailable") -> Dict[str, Any]:
|
| 389 |
"""Generate basic risk assessment from VLM output when LLM is unavailable"""
|
| 390 |
logger.warning("Using fallback risk assessment: %s", reason)
|
| 391 |
|
| 392 |
-
# Try to extract basic info from VLM output
|
| 393 |
vlm_dict = {}
|
| 394 |
if isinstance(vlm_output, dict):
|
| 395 |
vlm_dict = vlm_output
|
|
@@ -399,7 +442,6 @@ def get_fallback_risk_assessment(vlm_output: Any, reason: str = "LLM unavailable
|
|
| 399 |
except Exception:
|
| 400 |
pass
|
| 401 |
|
| 402 |
-
# Check if VLM data is empty/invalid
|
| 403 |
has_data = bool(vlm_dict and any(vlm_dict.values()))
|
| 404 |
|
| 405 |
if not has_data:
|
|
@@ -417,14 +459,13 @@ def get_fallback_risk_assessment(vlm_output: Any, reason: str = "LLM unavailable
|
|
| 417 |
"fallback_reason": "no_vlm_data"
|
| 418 |
}
|
| 419 |
|
| 420 |
-
# Basic heuristic risk scoring
|
| 421 |
-
risk_score = 20.0
|
| 422 |
jaundice_prob = 0.0
|
| 423 |
anemia_prob = 0.0
|
| 424 |
hydration_prob = 0.0
|
| 425 |
neuro_prob = 0.0
|
| 426 |
|
| 427 |
-
# Extract VLM features if available
|
| 428 |
sclera_yellow = vlm_dict.get("sclera_yellowness", 0)
|
| 429 |
pallor = vlm_dict.get("pallor_score", 0)
|
| 430 |
redness = vlm_dict.get("redness", 0)
|
|
@@ -461,17 +502,14 @@ def call_llm(vlm_output: Any, use_fallback_on_error: bool = True) -> Dict[str, A
|
|
| 461 |
return get_fallback_risk_assessment(vlm_output, reason="gradio_not_available")
|
| 462 |
raise RuntimeError("gradio_client not installed")
|
| 463 |
|
| 464 |
-
# Check if VLM output is empty/useless
|
| 465 |
vlm_text = vlm_output if isinstance(vlm_output, str) else json.dumps(vlm_output, default=str)
|
| 466 |
|
| 467 |
-
# Detect empty or minimal VLM output
|
| 468 |
if not vlm_text or vlm_text.strip() in ["{}", "[]", ""]:
|
| 469 |
logger.warning("VLM output is empty, using fallback assessment")
|
| 470 |
if use_fallback_on_error:
|
| 471 |
return get_fallback_risk_assessment(vlm_output, reason="empty_vlm_output")
|
| 472 |
raise RuntimeError("VLM output is empty")
|
| 473 |
|
| 474 |
-
# Prepare input
|
| 475 |
instruction = (
|
| 476 |
"\n\nSTRICT INSTRUCTIONS:\n"
|
| 477 |
"1) OUTPUT ONLY a single valid JSON object — no prose, no code fences.\n"
|
|
@@ -483,7 +521,6 @@ def call_llm(vlm_output: Any, use_fallback_on_error: bool = True) -> Dict[str, A
|
|
| 483 |
"VLM Output:\n" + vlm_text + "\n"
|
| 484 |
)
|
| 485 |
|
| 486 |
-
# Call with safe defaults
|
| 487 |
try:
|
| 488 |
client = get_gradio_client(LLM_GRADIO_SPACE)
|
| 489 |
logger.info("Calling LLM Space: %s", LLM_GRADIO_SPACE)
|
|
@@ -508,7 +545,6 @@ def call_llm(vlm_output: Any, use_fallback_on_error: bool = True) -> Dict[str, A
|
|
| 508 |
parsed = extract_json_from_llm_output(text_out)
|
| 509 |
logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2))
|
| 510 |
|
| 511 |
-
# Check if LLM returned essentially empty results
|
| 512 |
all_zero = all(
|
| 513 |
parsed.get(k, 0) == 0
|
| 514 |
for k in ["jaundice_probability", "anemia_probability",
|
|
@@ -516,7 +552,7 @@ def call_llm(vlm_output: Any, use_fallback_on_error: bool = True) -> Dict[str, A
|
|
| 516 |
)
|
| 517 |
|
| 518 |
if all_zero and parsed.get("risk_score", 0) == 0:
|
| 519 |
-
logger.warning("LLM returned all-zero assessment
|
| 520 |
parsed["summary"] = "Image analysis incomplete. Please ensure photos are clear and well-lit."
|
| 521 |
parsed["recommendation"] = "Retake photos with face clearly visible and eyes open."
|
| 522 |
parsed["confidence"] = 0.1
|
|
@@ -555,7 +591,6 @@ async def process_screening(screening_id: str):
|
|
| 555 |
face_path = entry["face_image_path"]
|
| 556 |
eye_path = entry["eye_image_path"]
|
| 557 |
|
| 558 |
-
# Load images and get quality metrics
|
| 559 |
face_img = Image.open(face_path).convert("RGB")
|
| 560 |
detection_result = detect_face_and_eyes(face_img)
|
| 561 |
|
|
@@ -579,7 +614,6 @@ async def process_screening(screening_id: str):
|
|
| 579 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 580 |
structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
|
| 581 |
|
| 582 |
-
# Store results
|
| 583 |
screenings_db[screening_id]["ai_results"] = {
|
| 584 |
"vlm_features": vlm_features,
|
| 585 |
"vlm_raw": vlm_raw,
|
|
@@ -587,7 +621,6 @@ async def process_screening(screening_id: str):
|
|
| 587 |
"processing_time_ms": 1200
|
| 588 |
}
|
| 589 |
|
| 590 |
-
# Build disease predictions
|
| 591 |
disease_predictions = [
|
| 592 |
{
|
| 593 |
"condition": "Anemia-like-signs",
|
|
@@ -624,7 +657,7 @@ async def process_screening(screening_id: str):
|
|
| 624 |
screenings_db[screening_id]["error"] = str(e)
|
| 625 |
|
| 626 |
# ============================================================================
|
| 627 |
-
# FastAPI App & Routes
|
| 628 |
# ============================================================================
|
| 629 |
app = FastAPI(title="Elderly HealthWatch AI Backend")
|
| 630 |
app.add_middleware(
|
|
@@ -637,7 +670,11 @@ app.add_middleware(
|
|
| 637 |
|
| 638 |
@app.get("/")
|
| 639 |
async def read_root():
|
| 640 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
|
| 642 |
@app.get("/health")
|
| 643 |
async def health_check():
|
|
@@ -665,8 +702,8 @@ async def health_check():
|
|
| 665 |
"status": "healthy",
|
| 666 |
"detector": detector_type or "none",
|
| 667 |
"vlm_available": GRADIO_AVAILABLE,
|
| 668 |
-
"
|
| 669 |
-
"
|
| 670 |
"llm_space": LLM_GRADIO_SPACE,
|
| 671 |
"llm_status": llm_status,
|
| 672 |
"llm_message": llm_message,
|
|
@@ -797,21 +834,25 @@ async def get_history(user_id: str):
|
|
| 797 |
async def debug_spaces():
|
| 798 |
"""Debug endpoint to test VLM and LLM spaces"""
|
| 799 |
results = {
|
| 800 |
-
"
|
| 801 |
"llm": {"available": False, "error": None}
|
| 802 |
}
|
| 803 |
|
|
|
|
| 804 |
if GRADIO_AVAILABLE:
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
|
|
|
|
|
|
| 812 |
else:
|
| 813 |
-
results["
|
| 814 |
|
|
|
|
| 815 |
if GRADIO_AVAILABLE:
|
| 816 |
try:
|
| 817 |
client = get_gradio_client(LLM_GRADIO_SPACE)
|
|
@@ -822,6 +863,7 @@ async def debug_spaces():
|
|
| 822 |
else:
|
| 823 |
results["llm"]["error"] = "Gradio not installed"
|
| 824 |
|
|
|
|
| 825 |
return results
|
| 826 |
|
| 827 |
@app.post("/api/v1/get-vitals")
|
|
@@ -857,7 +899,8 @@ async def get_vitals_from_upload(
|
|
| 857 |
"vlm_features": vlm_features,
|
| 858 |
"vlm_raw": vlm_raw,
|
| 859 |
"structured_risk": structured_risk,
|
| 860 |
-
"using_fallback": structured_risk.get("fallback_mode", False)
|
|
|
|
| 861 |
}
|
| 862 |
|
| 863 |
except Exception as e:
|
|
@@ -898,7 +941,8 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 898 |
"vlm_raw": vlm_raw,
|
| 899 |
"structured_risk": structured_risk,
|
| 900 |
"last_vitals_run": datetime.utcnow().isoformat() + "Z",
|
| 901 |
-
"using_fallback": structured_risk.get("fallback_mode", False)
|
|
|
|
| 902 |
})
|
| 903 |
|
| 904 |
return {
|
|
@@ -906,7 +950,8 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 906 |
"vlm_features": vlm_features,
|
| 907 |
"vlm_raw": vlm_raw,
|
| 908 |
"structured_risk": structured_risk,
|
| 909 |
-
"using_fallback": structured_risk.get("fallback_mode", False)
|
|
|
|
| 910 |
}
|
| 911 |
|
| 912 |
except Exception as e:
|
|
|
|
| 1 |
"""
|
| 2 |
Elderly HealthWatch AI Backend (FastAPI) - Refactored with Qwen2.5-VL
|
| 3 |
+
Updated to use Qwen2.5-VL with multiple space options and fallback
|
| 4 |
"""
|
| 5 |
|
| 6 |
import io
|
|
|
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
| 33 |
logger = logging.getLogger("elderly_healthwatch")
|
| 34 |
|
| 35 |
+
# Multiple VLM options - will try in order until one works
|
| 36 |
+
VLM_SPACES = [
|
| 37 |
+
{
|
| 38 |
+
"space": "Qwen/Qwen2.5-VL-7B-Instruct",
|
| 39 |
+
"model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
|
| 40 |
+
"api_name": "/model_chat",
|
| 41 |
+
"type": "official"
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"space": "mrdbourke/Qwen2.5-VL-Instruct-Demo",
|
| 45 |
+
"model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
|
| 46 |
+
"api_name": "/run_example",
|
| 47 |
+
"type": "demo"
|
| 48 |
+
}
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", VLM_SPACES[0]["space"])
|
| 52 |
VLM_MODEL_ID = os.getenv("VLM_MODEL_ID", "Qwen/Qwen2.5-VL-7B-Instruct")
|
| 53 |
LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
|
| 54 |
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
|
|
|
| 82 |
# In-memory database
|
| 83 |
screenings_db: Dict[str, Dict[str, Any]] = {}
|
| 84 |
|
| 85 |
+
# Track which VLM space is working
|
| 86 |
+
active_vlm_config = None
|
| 87 |
+
|
| 88 |
# ============================================================================
|
| 89 |
# Face Detection Setup
|
| 90 |
# ============================================================================
|
| 91 |
def setup_face_detector():
|
| 92 |
"""Initialize face detector (MTCNN or OpenCV fallback)"""
|
|
|
|
| 93 |
try:
|
| 94 |
from facenet_pytorch import MTCNN
|
| 95 |
return MTCNN(keep_all=False, device="cpu"), "facenet_pytorch"
|
| 96 |
except Exception:
|
| 97 |
pass
|
| 98 |
|
|
|
|
| 99 |
try:
|
| 100 |
from mtcnn import MTCNN
|
| 101 |
return MTCNN(), "mtcnn"
|
| 102 |
except Exception:
|
| 103 |
pass
|
| 104 |
|
|
|
|
| 105 |
try:
|
| 106 |
face_path = os.path.join(cv2.data.haarcascades, "haarcascade_frontalface_default.xml")
|
| 107 |
eye_path = os.path.join(cv2.data.haarcascades, "haarcascade_eye.xml")
|
|
|
|
| 158 |
|
| 159 |
img_arr = np.asarray(pil_img)
|
| 160 |
|
|
|
|
| 161 |
if detector_type == "facenet_pytorch":
|
| 162 |
try:
|
| 163 |
boxes, probs, landmarks = face_detector.detect(pil_img, landmarks=True)
|
|
|
|
| 185 |
return {"face_detected": False, "face_confidence": 0.0, "eye_openness_score": 0.0,
|
| 186 |
"left_eye": None, "right_eye": None}
|
| 187 |
|
|
|
|
| 188 |
elif detector_type == "mtcnn":
|
| 189 |
try:
|
| 190 |
detections = face_detector.detect_faces(img_arr)
|
|
|
|
| 208 |
return {"face_detected": False, "face_confidence": 0.0, "eye_openness_score": 0.0,
|
| 209 |
"left_eye": None, "right_eye": None}
|
| 210 |
|
|
|
|
| 211 |
elif detector_type == "opencv":
|
| 212 |
try:
|
| 213 |
gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
|
|
|
|
| 299 |
}
|
| 300 |
|
| 301 |
# ============================================================================
|
| 302 |
+
# VLM & LLM Integration - UPDATED FOR QWEN2.5-VL with Fallback
|
| 303 |
# ============================================================================
|
| 304 |
def get_gradio_client(space: str) -> Client:
|
| 305 |
"""Get Gradio client with optional auth"""
|
|
|
|
| 307 |
raise RuntimeError("gradio_client not installed")
|
| 308 |
return Client(space, hf_token=HF_TOKEN) if HF_TOKEN else Client(space)
|
| 309 |
|
| 310 |
+
def call_vlm_single_image(client: Client, image_path: str, prompt: str, config: Dict) -> str:
|
| 311 |
+
"""Call VLM with a single image using appropriate API"""
|
| 312 |
+
try:
|
| 313 |
+
if config["type"] == "demo":
|
| 314 |
+
# mrdbourke style API
|
| 315 |
+
result = client.predict(
|
| 316 |
+
image=handle_file(image_path),
|
| 317 |
+
text_input=prompt,
|
| 318 |
+
model_id=config["model_id"],
|
| 319 |
+
api_name=config["api_name"]
|
| 320 |
+
)
|
| 321 |
+
# Extract text from tuple response
|
| 322 |
+
if isinstance(result, (list, tuple)) and len(result) > 0:
|
| 323 |
+
return str(result[0])
|
| 324 |
+
return str(result)
|
| 325 |
+
else:
|
| 326 |
+
# Try official Qwen space API (if it exists)
|
| 327 |
+
result = client.predict(
|
| 328 |
+
query=prompt,
|
| 329 |
+
image=handle_file(image_path),
|
| 330 |
+
api_name=config["api_name"]
|
| 331 |
+
)
|
| 332 |
+
if isinstance(result, (list, tuple)) and len(result) > 0:
|
| 333 |
+
return str(result[0])
|
| 334 |
+
return str(result)
|
| 335 |
+
except Exception as e:
|
| 336 |
+
logger.error("VLM single image call failed with config %s: %s", config, str(e))
|
| 337 |
+
raise
|
| 338 |
+
|
| 339 |
def call_vlm(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tuple[Optional[Dict], str]:
|
| 340 |
"""
|
| 341 |
Call Qwen2.5-VL and return (parsed_features, raw_text)
|
| 342 |
+
Tries multiple VLM spaces until one works
|
| 343 |
"""
|
| 344 |
+
global active_vlm_config
|
| 345 |
+
|
| 346 |
prompt = prompt or DEFAULT_VLM_PROMPT
|
| 347 |
|
| 348 |
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
|
|
|
| 354 |
eye_path, os.path.exists(eye_path), os.path.getsize(eye_path))
|
| 355 |
logger.info("VLM Prompt: %s", prompt[:100])
|
| 356 |
|
| 357 |
+
# Try active config first if we have one that worked before
|
| 358 |
+
configs_to_try = []
|
| 359 |
+
if active_vlm_config:
|
| 360 |
+
configs_to_try.append(active_vlm_config)
|
| 361 |
+
configs_to_try.extend([c for c in VLM_SPACES if c != active_vlm_config])
|
| 362 |
|
| 363 |
+
last_error = None
|
| 364 |
+
for config in configs_to_try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
try:
|
| 366 |
+
logger.info("Trying VLM Space: %s with api_name=%s", config["space"], config["api_name"])
|
| 367 |
+
client = get_gradio_client(config["space"])
|
| 368 |
+
|
| 369 |
+
# Call VLM twice - once for face, once for eyes
|
| 370 |
+
face_text = call_vlm_single_image(
|
| 371 |
+
client, face_path,
|
| 372 |
+
prompt + " Focus on the face and overall skin condition.",
|
| 373 |
+
config
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
eye_text = call_vlm_single_image(
|
| 377 |
+
client, eye_path,
|
| 378 |
+
prompt + " Focus on the eyes, sclera color, and eye health.",
|
| 379 |
+
config
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# Success! Save this config
|
| 383 |
+
active_vlm_config = config
|
| 384 |
+
|
| 385 |
+
# Combine both analyses
|
| 386 |
+
combined_text = f"Face Analysis:\n{face_text}\n\nEye Analysis:\n{eye_text}"
|
| 387 |
+
logger.info("VLM combined text (first 500 chars): %s", combined_text[:500])
|
| 388 |
+
|
| 389 |
+
# Try to parse JSON
|
| 390 |
+
parsed = None
|
| 391 |
+
try:
|
| 392 |
+
parsed = json.loads(combined_text)
|
| 393 |
if not isinstance(parsed, dict):
|
| 394 |
parsed = None
|
| 395 |
else:
|
| 396 |
+
logger.info("VLM successfully parsed JSON with keys: %s", list(parsed.keys()))
|
| 397 |
+
except Exception:
|
| 398 |
+
try:
|
| 399 |
+
first = combined_text.find("{")
|
| 400 |
+
last = combined_text.rfind("}")
|
| 401 |
+
if first != -1 and last != -1 and last > first:
|
| 402 |
+
json_str = combined_text[first:last+1]
|
| 403 |
+
parsed = json.loads(json_str)
|
| 404 |
+
if not isinstance(parsed, dict):
|
| 405 |
+
parsed = None
|
| 406 |
+
else:
|
| 407 |
+
logger.info("Successfully extracted JSON from text with keys: %s", list(parsed.keys()))
|
| 408 |
+
except Exception as extract_err:
|
| 409 |
+
logger.warning("Could not extract JSON from VLM text: %s", str(extract_err))
|
| 410 |
+
parsed = None
|
| 411 |
+
|
| 412 |
+
# If no JSON found, create structured data from text
|
| 413 |
+
if parsed is None:
|
| 414 |
+
logger.info("No JSON found, creating structured data from text analysis")
|
| 415 |
+
parsed = {
|
| 416 |
+
"face_analysis": face_text[:500],
|
| 417 |
+
"eye_analysis": eye_text[:500],
|
| 418 |
+
"combined_analysis": combined_text[:1000]
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
return parsed, combined_text
|
| 422 |
+
|
| 423 |
+
except Exception as e:
|
| 424 |
+
logger.warning("VLM space %s failed: %s", config["space"], str(e))
|
| 425 |
+
last_error = e
|
| 426 |
+
continue
|
| 427 |
|
| 428 |
+
# All configs failed
|
| 429 |
+
logger.error("All VLM spaces failed. Last error: %s", str(last_error))
|
| 430 |
+
raise RuntimeError(f"All VLM spaces failed. Last error: {last_error}")
|
| 431 |
|
| 432 |
def get_fallback_risk_assessment(vlm_output: Any, reason: str = "LLM unavailable") -> Dict[str, Any]:
|
| 433 |
"""Generate basic risk assessment from VLM output when LLM is unavailable"""
|
| 434 |
logger.warning("Using fallback risk assessment: %s", reason)
|
| 435 |
|
|
|
|
| 436 |
vlm_dict = {}
|
| 437 |
if isinstance(vlm_output, dict):
|
| 438 |
vlm_dict = vlm_output
|
|
|
|
| 442 |
except Exception:
|
| 443 |
pass
|
| 444 |
|
|
|
|
| 445 |
has_data = bool(vlm_dict and any(vlm_dict.values()))
|
| 446 |
|
| 447 |
if not has_data:
|
|
|
|
| 459 |
"fallback_reason": "no_vlm_data"
|
| 460 |
}
|
| 461 |
|
| 462 |
+
# Basic heuristic risk scoring
|
| 463 |
+
risk_score = 20.0
|
| 464 |
jaundice_prob = 0.0
|
| 465 |
anemia_prob = 0.0
|
| 466 |
hydration_prob = 0.0
|
| 467 |
neuro_prob = 0.0
|
| 468 |
|
|
|
|
| 469 |
sclera_yellow = vlm_dict.get("sclera_yellowness", 0)
|
| 470 |
pallor = vlm_dict.get("pallor_score", 0)
|
| 471 |
redness = vlm_dict.get("redness", 0)
|
|
|
|
| 502 |
return get_fallback_risk_assessment(vlm_output, reason="gradio_not_available")
|
| 503 |
raise RuntimeError("gradio_client not installed")
|
| 504 |
|
|
|
|
| 505 |
vlm_text = vlm_output if isinstance(vlm_output, str) else json.dumps(vlm_output, default=str)
|
| 506 |
|
|
|
|
| 507 |
if not vlm_text or vlm_text.strip() in ["{}", "[]", ""]:
|
| 508 |
logger.warning("VLM output is empty, using fallback assessment")
|
| 509 |
if use_fallback_on_error:
|
| 510 |
return get_fallback_risk_assessment(vlm_output, reason="empty_vlm_output")
|
| 511 |
raise RuntimeError("VLM output is empty")
|
| 512 |
|
|
|
|
| 513 |
instruction = (
|
| 514 |
"\n\nSTRICT INSTRUCTIONS:\n"
|
| 515 |
"1) OUTPUT ONLY a single valid JSON object — no prose, no code fences.\n"
|
|
|
|
| 521 |
"VLM Output:\n" + vlm_text + "\n"
|
| 522 |
)
|
| 523 |
|
|
|
|
| 524 |
try:
|
| 525 |
client = get_gradio_client(LLM_GRADIO_SPACE)
|
| 526 |
logger.info("Calling LLM Space: %s", LLM_GRADIO_SPACE)
|
|
|
|
| 545 |
parsed = extract_json_from_llm_output(text_out)
|
| 546 |
logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2))
|
| 547 |
|
|
|
|
| 548 |
all_zero = all(
|
| 549 |
parsed.get(k, 0) == 0
|
| 550 |
for k in ["jaundice_probability", "anemia_probability",
|
|
|
|
| 552 |
)
|
| 553 |
|
| 554 |
if all_zero and parsed.get("risk_score", 0) == 0:
|
| 555 |
+
logger.warning("LLM returned all-zero assessment")
|
| 556 |
parsed["summary"] = "Image analysis incomplete. Please ensure photos are clear and well-lit."
|
| 557 |
parsed["recommendation"] = "Retake photos with face clearly visible and eyes open."
|
| 558 |
parsed["confidence"] = 0.1
|
|
|
|
| 591 |
face_path = entry["face_image_path"]
|
| 592 |
eye_path = entry["eye_image_path"]
|
| 593 |
|
|
|
|
| 594 |
face_img = Image.open(face_path).convert("RGB")
|
| 595 |
detection_result = detect_face_and_eyes(face_img)
|
| 596 |
|
|
|
|
| 614 |
llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
|
| 615 |
structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
|
| 616 |
|
|
|
|
| 617 |
screenings_db[screening_id]["ai_results"] = {
|
| 618 |
"vlm_features": vlm_features,
|
| 619 |
"vlm_raw": vlm_raw,
|
|
|
|
| 621 |
"processing_time_ms": 1200
|
| 622 |
}
|
| 623 |
|
|
|
|
| 624 |
disease_predictions = [
|
| 625 |
{
|
| 626 |
"condition": "Anemia-like-signs",
|
|
|
|
| 657 |
screenings_db[screening_id]["error"] = str(e)
|
| 658 |
|
| 659 |
# ============================================================================
|
| 660 |
+
# FastAPI App & Routes (REST OF THE CODE REMAINS THE SAME)
|
| 661 |
# ============================================================================
|
| 662 |
app = FastAPI(title="Elderly HealthWatch AI Backend")
|
| 663 |
app.add_middleware(
|
|
|
|
| 670 |
|
| 671 |
@app.get("/")
|
| 672 |
async def read_root():
|
| 673 |
+
return {
|
| 674 |
+
"message": "Elderly HealthWatch AI Backend - Using Qwen2.5-VL",
|
| 675 |
+
"active_vlm": active_vlm_config["space"] if active_vlm_config else "Not yet determined",
|
| 676 |
+
"available_vlm_spaces": [c["space"] for c in VLM_SPACES]
|
| 677 |
+
}
|
| 678 |
|
| 679 |
@app.get("/health")
|
| 680 |
async def health_check():
|
|
|
|
| 702 |
"status": "healthy",
|
| 703 |
"detector": detector_type or "none",
|
| 704 |
"vlm_available": GRADIO_AVAILABLE,
|
| 705 |
+
"active_vlm_space": active_vlm_config["space"] if active_vlm_config else "Not yet determined",
|
| 706 |
+
"available_vlm_spaces": [c["space"] for c in VLM_SPACES],
|
| 707 |
"llm_space": LLM_GRADIO_SPACE,
|
| 708 |
"llm_status": llm_status,
|
| 709 |
"llm_message": llm_message,
|
|
|
|
| 834 |
async def debug_spaces():
|
| 835 |
"""Debug endpoint to test VLM and LLM spaces"""
|
| 836 |
results = {
|
| 837 |
+
"vlm_spaces": [],
|
| 838 |
"llm": {"available": False, "error": None}
|
| 839 |
}
|
| 840 |
|
| 841 |
+
# Test each VLM space
|
| 842 |
if GRADIO_AVAILABLE:
|
| 843 |
+
for config in VLM_SPACES:
|
| 844 |
+
space_result = {"space": config["space"], "available": False, "error": None}
|
| 845 |
+
try:
|
| 846 |
+
client = get_gradio_client(config["space"])
|
| 847 |
+
space_result["available"] = True
|
| 848 |
+
space_result["config"] = config
|
| 849 |
+
except Exception as e:
|
| 850 |
+
space_result["error"] = str(e)
|
| 851 |
+
results["vlm_spaces"].append(space_result)
|
| 852 |
else:
|
| 853 |
+
results["vlm_error"] = "Gradio not installed"
|
| 854 |
|
| 855 |
+
# Test LLM
|
| 856 |
if GRADIO_AVAILABLE:
|
| 857 |
try:
|
| 858 |
client = get_gradio_client(LLM_GRADIO_SPACE)
|
|
|
|
| 863 |
else:
|
| 864 |
results["llm"]["error"] = "Gradio not installed"
|
| 865 |
|
| 866 |
+
results["active_vlm"] = active_vlm_config
|
| 867 |
return results
|
| 868 |
|
| 869 |
@app.post("/api/v1/get-vitals")
|
|
|
|
| 899 |
"vlm_features": vlm_features,
|
| 900 |
"vlm_raw": vlm_raw,
|
| 901 |
"structured_risk": structured_risk,
|
| 902 |
+
"using_fallback": structured_risk.get("fallback_mode", False),
|
| 903 |
+
"vlm_space_used": active_vlm_config["space"] if active_vlm_config else "unknown"
|
| 904 |
}
|
| 905 |
|
| 906 |
except Exception as e:
|
|
|
|
| 941 |
"vlm_raw": vlm_raw,
|
| 942 |
"structured_risk": structured_risk,
|
| 943 |
"last_vitals_run": datetime.utcnow().isoformat() + "Z",
|
| 944 |
+
"using_fallback": structured_risk.get("fallback_mode", False),
|
| 945 |
+
"vlm_space_used": active_vlm_config["space"] if active_vlm_config else "unknown"
|
| 946 |
})
|
| 947 |
|
| 948 |
return {
|
|
|
|
| 950 |
"vlm_features": vlm_features,
|
| 951 |
"vlm_raw": vlm_raw,
|
| 952 |
"structured_risk": structured_risk,
|
| 953 |
+
"using_fallback": structured_risk.get("fallback_mode", False),
|
| 954 |
+
"vlm_space_used": active_vlm_config["space"] if active_vlm_config else "unknown"
|
| 955 |
}
|
| 956 |
|
| 957 |
except Exception as e:
|