Update app.py
Browse files
app.py
CHANGED
|
@@ -3,18 +3,19 @@
|
|
| 3 |
Elderly HealthWatch AI Backend (FastAPI)
|
| 4 |
Pipeline:
|
| 5 |
- receive images
|
| 6 |
-
- run VLM (remote gradio / chat_fn) -> JSON feature vector + raw text
|
| 7 |
- run LLM (remote gradio /chat) -> structured risk JSON (per requested schema)
|
| 8 |
- continue rest of processing and store results
|
| 9 |
|
| 10 |
Notes:
|
| 11 |
- Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
|
| 12 |
- If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
|
| 13 |
-
- This variant:
|
| 14 |
* logs raw VLM responses,
|
| 15 |
* always returns raw VLM output in API responses,
|
| 16 |
* extracts JSON from VLM via regex when possible, and
|
| 17 |
-
* sends
|
|
|
|
| 18 |
"""
|
| 19 |
|
| 20 |
import io
|
|
@@ -29,9 +30,8 @@ import time
|
|
| 29 |
from typing import Dict, Any, Optional, Tuple
|
| 30 |
from datetime import datetime
|
| 31 |
|
| 32 |
-
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
|
| 33 |
from fastapi.middleware.cors import CORSMiddleware
|
| 34 |
-
from pydantic import BaseModel, HttpUrl
|
| 35 |
from PIL import Image
|
| 36 |
import numpy as np
|
| 37 |
import cv2 # opencv-python-headless expected installed
|
|
@@ -54,7 +54,7 @@ HF_TOKEN = os.getenv("HF_TOKEN", None)
|
|
| 54 |
|
| 55 |
# Default VLM prompt
|
| 56 |
DEFAULT_VLM_PROMPT = (
|
| 57 |
-
"From the provided face
|
| 58 |
"(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
|
| 59 |
"and output a clean JSON feature vector only with values ranging as probabilities."
|
| 60 |
)
|
|
@@ -246,7 +246,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
|
|
| 246 |
return out
|
| 247 |
|
| 248 |
# -----------------------
|
| 249 |
-
# Gradio / VLM helper (
|
| 250 |
# -----------------------
|
| 251 |
def get_gradio_client_for_space(space: str) -> Client:
|
| 252 |
if not GRADIO_AVAILABLE:
|
|
@@ -255,59 +255,25 @@ def get_gradio_client_for_space(space: str) -> Client:
|
|
| 255 |
return Client(space, hf_token=HF_TOKEN)
|
| 256 |
return Client(space)
|
| 257 |
|
| 258 |
-
def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None
|
| 259 |
-
raise_on_file_delivery_failure: bool = False
|
| 260 |
-
) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
|
| 261 |
"""
|
| 262 |
-
Synchronous call to remote VLM (gradio /chat_fn).
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
- vlm_files_seen (int or None)
|
| 267 |
-
- vlm_raw_len (int)
|
| 268 |
-
- vlm_out_object (short repr)
|
| 269 |
"""
|
| 270 |
prompt = prompt or DEFAULT_VLM_PROMPT
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
if not os.path.exists(face_path):
|
| 274 |
-
raise FileNotFoundError(f"Face image not found at: {face_path}")
|
| 275 |
-
if not os.path.exists(eye_path):
|
| 276 |
-
raise FileNotFoundError(f"Eye image not found at: {eye_path}")
|
| 277 |
-
|
| 278 |
-
face_size = os.path.getsize(face_path)
|
| 279 |
-
eye_size = os.path.getsize(eye_path)
|
| 280 |
-
logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
|
| 281 |
-
|
| 282 |
-
if face_size == 0 or eye_size == 0:
|
| 283 |
-
raise ValueError("One or both images are empty (0 bytes)")
|
| 284 |
-
|
| 285 |
if not GRADIO_AVAILABLE:
|
| 286 |
raise RuntimeError("gradio_client not available in this environment.")
|
| 287 |
|
| 288 |
client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
|
| 289 |
-
|
| 290 |
-
# Verify files can be opened as images
|
| 291 |
-
try:
|
| 292 |
-
Image.open(face_path).verify()
|
| 293 |
-
Image.open(eye_path).verify()
|
| 294 |
-
logger.info("Both images verified as valid")
|
| 295 |
-
except Exception as e:
|
| 296 |
-
raise ValueError(f"Invalid image file(s): {e}")
|
| 297 |
-
|
| 298 |
message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
|
| 299 |
-
|
| 300 |
-
logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
|
| 301 |
-
client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
|
| 302 |
-
# NOTE: only send face image to the Space
|
| 303 |
-
|
| 304 |
-
message = {"text": prompt, "files": [handle_file(face_path)]}
|
| 305 |
-
|
| 306 |
-
meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
|
| 307 |
|
| 308 |
# SINGLE CALL (no retries)
|
| 309 |
try:
|
| 310 |
-
logger.info("Calling VLM Space %s
|
| 311 |
result = client.predict(message=message, history=[], api_name="/chat_fn")
|
| 312 |
except Exception as e:
|
| 313 |
logger.exception("VLM call failed (no retries)")
|
|
@@ -315,9 +281,9 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
|
|
| 315 |
|
| 316 |
# Normalize result
|
| 317 |
raw_text = ""
|
| 318 |
-
out = None
|
| 319 |
if not result:
|
| 320 |
logger.warning("VLM returned empty result object")
|
|
|
|
| 321 |
else:
|
| 322 |
if isinstance(result, (list, tuple)):
|
| 323 |
out = result[0]
|
|
@@ -327,42 +293,12 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
|
|
| 327 |
out = {"text": str(result)}
|
| 328 |
|
| 329 |
text_out = out.get("text") or out.get("output") or ""
|
| 330 |
-
raw_text = text_out
|
| 331 |
-
|
| 332 |
-
try:
|
| 333 |
-
meta["vlm_out_object"] = str(out)[:2000]
|
| 334 |
-
except Exception:
|
| 335 |
-
meta["vlm_out_object"] = "<unreprable>"
|
| 336 |
-
|
| 337 |
-
logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
|
| 338 |
-
|
| 339 |
-
# --- Check whether the remote acknowledged receiving files (expect 1) ---
|
| 340 |
-
files_seen = None
|
| 341 |
-
try:
|
| 342 |
-
if isinstance(out, dict):
|
| 343 |
-
for key in ("files", "output_files", "files_sent", "uploaded_files", "received_files"):
|
| 344 |
-
if key in out and isinstance(out[key], (list, tuple)):
|
| 345 |
-
files_seen = len(out[key])
|
| 346 |
-
break
|
| 347 |
-
|
| 348 |
-
if files_seen is None and raw_text:
|
| 349 |
-
ext_matches = re.findall(r"\.(?:jpg|jpeg|png|bmp|gif)\b", raw_text, flags=re.IGNORECASE)
|
| 350 |
-
if ext_matches:
|
| 351 |
-
files_seen = len(ext_matches)
|
| 352 |
-
else:
|
| 353 |
-
matches = re.findall(r"\b(?:uploaded|received|file)\b", raw_text, flags=re.IGNORECASE)
|
| 354 |
-
if matches:
|
| 355 |
-
files_seen = max(1, len(matches))
|
| 356 |
-
|
| 357 |
-
meta["vlm_files_seen"] = files_seen
|
| 358 |
-
meta["vlm_file_delivery_ok"] = (files_seen is not None and files_seen >= 1)
|
| 359 |
-
except Exception:
|
| 360 |
-
meta["vlm_files_seen"] = None
|
| 361 |
-
meta["vlm_file_delivery_ok"] = False
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
|
| 367 |
# Log raw VLM output for debugging/auditing
|
| 368 |
logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
|
|
@@ -390,8 +326,8 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
|
|
| 390 |
else:
|
| 391 |
logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
|
| 392 |
|
| 393 |
-
# Always return
|
| 394 |
-
return parsed_features, (raw_text or "")
|
| 395 |
|
| 396 |
# -----------------------
|
| 397 |
# Gradio / LLM helper (defensive, with retry + clamps)
|
|
@@ -624,7 +560,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
|
|
| 624 |
is_valid = eye_openness_score >= 0.3
|
| 625 |
return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
|
| 626 |
"message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
|
| 627 |
-
"message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास
|
| 628 |
"eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
|
| 629 |
except Exception:
|
| 630 |
traceback.print_exc()
|
|
@@ -648,7 +584,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
|
|
| 648 |
is_valid = eye_openness_score >= 0.3
|
| 649 |
return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
|
| 650 |
"message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
|
| 651 |
-
"message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास
|
| 652 |
"eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
|
| 653 |
|
| 654 |
if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
|
|
@@ -675,7 +611,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
|
|
| 675 |
left_eye = {"x": cx, "y": cy}
|
| 676 |
return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
|
| 677 |
"message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
|
| 678 |
-
"message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास
|
| 679 |
"eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
|
| 680 |
except Exception:
|
| 681 |
traceback.print_exc()
|
|
@@ -774,7 +710,6 @@ async def get_vitals_from_upload(
|
|
| 774 |
"""
|
| 775 |
Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
|
| 776 |
{ vlm_parsed_features, vlm_raw_output, llm_structured_risk }
|
| 777 |
-
Note: VLM will receive only the face image (not the eye image).
|
| 778 |
"""
|
| 779 |
if not GRADIO_AVAILABLE:
|
| 780 |
raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
|
|
@@ -797,13 +732,12 @@ async def get_vitals_from_upload(
|
|
| 797 |
raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
|
| 798 |
|
| 799 |
try:
|
| 800 |
-
# Run VLM (off the event loop)
|
| 801 |
-
vlm_features, vlm_raw
|
| 802 |
|
| 803 |
-
# Log VLM outputs
|
| 804 |
logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 805 |
logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
| 806 |
-
logger.info("get_vitals_from_upload - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
|
| 807 |
|
| 808 |
# Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
|
| 809 |
if vlm_features:
|
|
@@ -816,11 +750,10 @@ async def get_vitals_from_upload(
|
|
| 816 |
# Run LLM (off the event loop)
|
| 817 |
structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
|
| 818 |
|
| 819 |
-
# Return merged result (includes raw VLM output
|
| 820 |
return {
|
| 821 |
"vlm_raw_output": vlm_raw,
|
| 822 |
"vlm_parsed_features": vlm_features,
|
| 823 |
-
"vlm_meta": vlm_meta,
|
| 824 |
"llm_structured_risk": structured_risk
|
| 825 |
}
|
| 826 |
except Exception as e:
|
|
@@ -832,7 +765,6 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 832 |
"""
|
| 833 |
Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
|
| 834 |
Useful for re-processing or debugging.
|
| 835 |
-
Note: VLM will receive only the face image (not the eye image).
|
| 836 |
"""
|
| 837 |
if screening_id not in screenings_db:
|
| 838 |
raise HTTPException(status_code=404, detail="Screening not found")
|
|
@@ -844,12 +776,11 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 844 |
raise HTTPException(status_code=400, detail="Stored images missing for this screening")
|
| 845 |
|
| 846 |
try:
|
| 847 |
-
# Run VLM off the event loop
|
| 848 |
-
vlm_features, vlm_raw
|
| 849 |
|
| 850 |
logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 851 |
logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
| 852 |
-
logger.info("get_vitals_for_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
|
| 853 |
|
| 854 |
if vlm_features:
|
| 855 |
llm_input = json.dumps(vlm_features, ensure_ascii=False)
|
|
@@ -865,7 +796,6 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 865 |
entry["ai_results"].update({
|
| 866 |
"vlm_parsed_features": vlm_features,
|
| 867 |
"vlm_raw": vlm_raw,
|
| 868 |
-
"vlm_meta": vlm_meta,
|
| 869 |
"structured_risk": structured_risk,
|
| 870 |
"last_vitals_run": datetime.utcnow().isoformat() + "Z"
|
| 871 |
})
|
|
@@ -874,113 +804,12 @@ async def get_vitals_for_screening(screening_id: str):
|
|
| 874 |
"screening_id": screening_id,
|
| 875 |
"vlm_raw_output": vlm_raw,
|
| 876 |
"vlm_parsed_features": vlm_features,
|
| 877 |
-
"vlm_meta": vlm_meta,
|
| 878 |
"llm_structured_risk": structured_risk
|
| 879 |
}
|
| 880 |
except Exception as e:
|
| 881 |
logger.exception("get_vitals_for_screening pipeline failed")
|
| 882 |
raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
|
| 883 |
|
| 884 |
-
# -----------------------
|
| 885 |
-
# URL-based vitals endpoint (optional)
|
| 886 |
-
# -----------------------
|
| 887 |
-
class ImageUrls(BaseModel):
|
| 888 |
-
face_image_url: HttpUrl
|
| 889 |
-
eye_image_url: HttpUrl
|
| 890 |
-
|
| 891 |
-
import httpx # make sure to add httpx to requirements
|
| 892 |
-
|
| 893 |
-
# helper: download URL to file with safety checks
|
| 894 |
-
async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
|
| 895 |
-
"""
|
| 896 |
-
Download an image from `url` and save to dest_path.
|
| 897 |
-
Guards:
|
| 898 |
-
- timeout
|
| 899 |
-
- max bytes
|
| 900 |
-
- basic content-type check (image/*)
|
| 901 |
-
Raises HTTPException on failure.
|
| 902 |
-
"""
|
| 903 |
-
try:
|
| 904 |
-
async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
|
| 905 |
-
resp = await client.get(url, timeout=timeout_seconds)
|
| 906 |
-
resp.raise_for_status()
|
| 907 |
-
|
| 908 |
-
content_type = resp.headers.get("Content-Type", "")
|
| 909 |
-
if not content_type.startswith("image/"):
|
| 910 |
-
raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
|
| 911 |
-
|
| 912 |
-
total = 0
|
| 913 |
-
with open(dest_path, "wb") as f:
|
| 914 |
-
async for chunk in resp.aiter_bytes():
|
| 915 |
-
if not chunk:
|
| 916 |
-
continue
|
| 917 |
-
total += len(chunk)
|
| 918 |
-
if total > max_bytes:
|
| 919 |
-
raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
|
| 920 |
-
f.write(chunk)
|
| 921 |
-
except httpx.HTTPStatusError as e:
|
| 922 |
-
raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
|
| 923 |
-
except Exception as e:
|
| 924 |
-
raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
|
| 925 |
-
|
| 926 |
-
@app.post("/api/v1/get-vitals-by-url")
|
| 927 |
-
async def get_vitals_from_urls(payload: ImageUrls = Body(...)):
|
| 928 |
-
"""
|
| 929 |
-
Download face and eye images from given URLs, then run the same VLM -> LLM pipeline and return results.
|
| 930 |
-
Note: VLM will receive only the face image (not the eye image).
|
| 931 |
-
Body: { "face_image_url": "...", "eye_image_url": "..." }
|
| 932 |
-
"""
|
| 933 |
-
if not GRADIO_AVAILABLE:
|
| 934 |
-
raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
|
| 935 |
-
|
| 936 |
-
# prepare tmp paths
|
| 937 |
-
try:
|
| 938 |
-
tmp_dir = "/tmp/elderly_healthwatch"
|
| 939 |
-
os.makedirs(tmp_dir, exist_ok=True)
|
| 940 |
-
uid = str(uuid.uuid4())
|
| 941 |
-
face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
|
| 942 |
-
eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
|
| 943 |
-
except Exception as e:
|
| 944 |
-
logger.exception("Failed to prepare temp paths")
|
| 945 |
-
raise HTTPException(status_code=500, detail=f"Server error preparing temp files: {e}")
|
| 946 |
-
|
| 947 |
-
# download images (with guards)
|
| 948 |
-
try:
|
| 949 |
-
await download_image_to_path(str(payload.face_image_url), face_path)
|
| 950 |
-
await download_image_to_path(str(payload.eye_image_url), eye_path)
|
| 951 |
-
except HTTPException:
|
| 952 |
-
raise
|
| 953 |
-
except Exception as e:
|
| 954 |
-
logger.exception("Downloading images failed")
|
| 955 |
-
raise HTTPException(status_code=400, detail=f"Failed to download images: {e}")
|
| 956 |
-
|
| 957 |
-
# run existing pipeline (off the event loop)
|
| 958 |
-
try:
|
| 959 |
-
vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
|
| 960 |
-
|
| 961 |
-
logger.info("get_vitals_from_urls - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 962 |
-
logger.info("get_vitals_from_urls - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
| 963 |
-
logger.info("get_vitals_from_urls - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
|
| 964 |
-
|
| 965 |
-
if vlm_features:
|
| 966 |
-
llm_input = json.dumps(vlm_features, ensure_ascii=False)
|
| 967 |
-
logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
|
| 968 |
-
else:
|
| 969 |
-
llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
|
| 970 |
-
logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
|
| 971 |
-
|
| 972 |
-
structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
|
| 973 |
-
|
| 974 |
-
return {
|
| 975 |
-
"vlm_raw_output": vlm_raw,
|
| 976 |
-
"vlm_parsed_features": vlm_features,
|
| 977 |
-
"vlm_meta": vlm_meta,
|
| 978 |
-
"llm_structured_risk": structured_risk
|
| 979 |
-
}
|
| 980 |
-
except Exception as e:
|
| 981 |
-
logger.exception("get_vitals_by_url pipeline failed")
|
| 982 |
-
raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
|
| 983 |
-
|
| 984 |
# -----------------------
|
| 985 |
# Main background pipeline (upload -> process_screening)
|
| 986 |
# -----------------------
|
|
@@ -989,7 +818,7 @@ async def process_screening(screening_id: str):
|
|
| 989 |
Main pipeline:
|
| 990 |
- load images
|
| 991 |
- quick detector-based quality metrics
|
| 992 |
-
- run VLM -> vlm_features (dict or None) + vlm_raw (string)
|
| 993 |
- run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
|
| 994 |
- merge results into ai_results and finish
|
| 995 |
"""
|
|
@@ -1072,18 +901,16 @@ async def process_screening(screening_id: str):
|
|
| 1072 |
screenings_db[screening_id]["quality_metrics"] = quality_metrics
|
| 1073 |
|
| 1074 |
# --------------------------
|
| 1075 |
-
# RUN VLM -> get vlm_features + vlm_raw
|
| 1076 |
# --------------------------
|
| 1077 |
vlm_features = None
|
| 1078 |
vlm_raw = None
|
| 1079 |
-
vlm_meta = {}
|
| 1080 |
try:
|
| 1081 |
-
vlm_features, vlm_raw
|
| 1082 |
screenings_db[screening_id].setdefault("ai_results", {})
|
| 1083 |
screenings_db[screening_id]["ai_results"].update({
|
| 1084 |
"vlm_parsed_features": vlm_features,
|
| 1085 |
-
"vlm_raw": vlm_raw
|
| 1086 |
-
"vlm_meta": vlm_meta
|
| 1087 |
})
|
| 1088 |
except Exception as e:
|
| 1089 |
logger.exception("VLM feature extraction failed")
|
|
@@ -1091,12 +918,10 @@ async def process_screening(screening_id: str):
|
|
| 1091 |
screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
|
| 1092 |
vlm_features = None
|
| 1093 |
vlm_raw = ""
|
| 1094 |
-
vlm_meta = {"error": str(e)}
|
| 1095 |
|
| 1096 |
# Log VLM outputs in pipeline context
|
| 1097 |
logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 1098 |
logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
| 1099 |
-
logger.info("process_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
|
| 1100 |
|
| 1101 |
# --------------------------
|
| 1102 |
# RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
|
|
@@ -1175,4 +1000,4 @@ async def process_screening(screening_id: str):
|
|
| 1175 |
# -----------------------
|
| 1176 |
if __name__ == "__main__":
|
| 1177 |
import uvicorn
|
| 1178 |
-
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
|
|
|
|
| 3 |
Elderly HealthWatch AI Backend (FastAPI)
|
| 4 |
Pipeline:
|
| 5 |
- receive images
|
| 6 |
+
- run VLM (remote gradio / chat_fn) -> JSON feature vector + raw text
|
| 7 |
- run LLM (remote gradio /chat) -> structured risk JSON (per requested schema)
|
| 8 |
- continue rest of processing and store results
|
| 9 |
|
| 10 |
Notes:
|
| 11 |
- Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
|
| 12 |
- If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
|
| 13 |
+
- This final variant:
|
| 14 |
* logs raw VLM responses,
|
| 15 |
* always returns raw VLM output in API responses,
|
| 16 |
* extracts JSON from VLM via regex when possible, and
|
| 17 |
+
* sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
|
| 18 |
+
- VLM calls were simplified to a single call (no retries).
|
| 19 |
"""
|
| 20 |
|
| 21 |
import io
|
|
|
|
| 30 |
from typing import Dict, Any, Optional, Tuple
|
| 31 |
from datetime import datetime
|
| 32 |
|
| 33 |
+
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
|
| 34 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 35 |
from PIL import Image
|
| 36 |
import numpy as np
|
| 37 |
import cv2 # opencv-python-headless expected installed
|
|
|
|
| 54 |
|
| 55 |
# Default VLM prompt
|
| 56 |
DEFAULT_VLM_PROMPT = (
|
| 57 |
+
"From the provided face/eye images, compute the required screening features "
|
| 58 |
"(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
|
| 59 |
"and output a clean JSON feature vector only with values ranging as probabilities."
|
| 60 |
)
|
|
|
|
| 246 |
return out
|
| 247 |
|
| 248 |
# -----------------------
|
| 249 |
+
# Gradio / VLM helper (single-call, no retries)
|
| 250 |
# -----------------------
|
| 251 |
def get_gradio_client_for_space(space: str) -> Client:
|
| 252 |
if not GRADIO_AVAILABLE:
|
|
|
|
| 255 |
return Client(space, hf_token=HF_TOKEN)
|
| 256 |
return Client(space)
|
| 257 |
|
| 258 |
+
def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tuple[Optional[Dict[str, Any]], str]:
|
|
|
|
|
|
|
| 259 |
"""
|
| 260 |
+
Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
|
| 261 |
+
(parsed_features_dict_or_None, raw_text_response_str)
|
| 262 |
+
|
| 263 |
+
Simplified: single call (no retries). Attempts json.loads then regex extraction.
|
|
|
|
|
|
|
|
|
|
| 264 |
"""
|
| 265 |
prompt = prompt or DEFAULT_VLM_PROMPT
|
| 266 |
+
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
| 267 |
+
raise FileNotFoundError("Face or eye image path missing for VLM call.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
if not GRADIO_AVAILABLE:
|
| 269 |
raise RuntimeError("gradio_client not available in this environment.")
|
| 270 |
|
| 271 |
client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
| 274 |
# SINGLE CALL (no retries)
|
| 275 |
try:
|
| 276 |
+
logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
|
| 277 |
result = client.predict(message=message, history=[], api_name="/chat_fn")
|
| 278 |
except Exception as e:
|
| 279 |
logger.exception("VLM call failed (no retries)")
|
|
|
|
| 281 |
|
| 282 |
# Normalize result
|
| 283 |
raw_text = ""
|
|
|
|
| 284 |
if not result:
|
| 285 |
logger.warning("VLM returned empty result object")
|
| 286 |
+
raw_text = ""
|
| 287 |
else:
|
| 288 |
if isinstance(result, (list, tuple)):
|
| 289 |
out = result[0]
|
|
|
|
| 293 |
out = {"text": str(result)}
|
| 294 |
|
| 295 |
text_out = out.get("text") or out.get("output") or ""
|
| 296 |
+
raw_text = text_out
|
| 297 |
+
logger.info("VLM response object (debug): %s", out)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
+
# If files present but text empty, log it explicitly
|
| 300 |
+
if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
|
| 301 |
+
logger.warning("VLM returned no text AND files: %s", out.get("files"))
|
| 302 |
|
| 303 |
# Log raw VLM output for debugging/auditing
|
| 304 |
logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
|
|
|
|
| 326 |
else:
|
| 327 |
logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
|
| 328 |
|
| 329 |
+
# Always return raw_text (may be empty string) and parsed_features (or None)
|
| 330 |
+
return parsed_features, (raw_text or "")
|
| 331 |
|
| 332 |
# -----------------------
|
| 333 |
# Gradio / LLM helper (defensive, with retry + clamps)
|
|
|
|
| 560 |
is_valid = eye_openness_score >= 0.3
|
| 561 |
return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
|
| 562 |
"message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
|
| 563 |
+
"message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
|
| 564 |
"eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
|
| 565 |
except Exception:
|
| 566 |
traceback.print_exc()
|
|
|
|
| 584 |
is_valid = eye_openness_score >= 0.3
|
| 585 |
return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
|
| 586 |
"message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
|
| 587 |
+
"message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
|
| 588 |
"eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
|
| 589 |
|
| 590 |
if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
|
|
|
|
| 611 |
left_eye = {"x": cx, "y": cy}
|
| 612 |
return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
|
| 613 |
"message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
|
| 614 |
+
"message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
|
| 615 |
"eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
|
| 616 |
except Exception:
|
| 617 |
traceback.print_exc()
|
|
|
|
| 710 |
"""
|
| 711 |
Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
|
| 712 |
{ vlm_parsed_features, vlm_raw_output, llm_structured_risk }
|
|
|
|
| 713 |
"""
|
| 714 |
if not GRADIO_AVAILABLE:
|
| 715 |
raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
|
|
|
|
| 732 |
raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
|
| 733 |
|
| 734 |
try:
|
| 735 |
+
# Run VLM (off the event loop)
|
| 736 |
+
vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
|
| 737 |
|
| 738 |
+
# Log VLM outputs (already logged inside run_vlm..., but additional context)
|
| 739 |
logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 740 |
logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
|
|
|
| 741 |
|
| 742 |
# Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
|
| 743 |
if vlm_features:
|
|
|
|
| 750 |
# Run LLM (off the event loop)
|
| 751 |
structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
|
| 752 |
|
| 753 |
+
# Return merged result (includes raw VLM output for debugging)
|
| 754 |
return {
|
| 755 |
"vlm_raw_output": vlm_raw,
|
| 756 |
"vlm_parsed_features": vlm_features,
|
|
|
|
| 757 |
"llm_structured_risk": structured_risk
|
| 758 |
}
|
| 759 |
except Exception as e:
|
|
|
|
| 765 |
"""
|
| 766 |
Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
|
| 767 |
Useful for re-processing or debugging.
|
|
|
|
| 768 |
"""
|
| 769 |
if screening_id not in screenings_db:
|
| 770 |
raise HTTPException(status_code=404, detail="Screening not found")
|
|
|
|
| 776 |
raise HTTPException(status_code=400, detail="Stored images missing for this screening")
|
| 777 |
|
| 778 |
try:
|
| 779 |
+
# Run VLM off the event loop
|
| 780 |
+
vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
|
| 781 |
|
| 782 |
logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 783 |
logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
|
|
|
| 784 |
|
| 785 |
if vlm_features:
|
| 786 |
llm_input = json.dumps(vlm_features, ensure_ascii=False)
|
|
|
|
| 796 |
entry["ai_results"].update({
|
| 797 |
"vlm_parsed_features": vlm_features,
|
| 798 |
"vlm_raw": vlm_raw,
|
|
|
|
| 799 |
"structured_risk": structured_risk,
|
| 800 |
"last_vitals_run": datetime.utcnow().isoformat() + "Z"
|
| 801 |
})
|
|
|
|
| 804 |
"screening_id": screening_id,
|
| 805 |
"vlm_raw_output": vlm_raw,
|
| 806 |
"vlm_parsed_features": vlm_features,
|
|
|
|
| 807 |
"llm_structured_risk": structured_risk
|
| 808 |
}
|
| 809 |
except Exception as e:
|
| 810 |
logger.exception("get_vitals_for_screening pipeline failed")
|
| 811 |
raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
|
| 812 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
# -----------------------
|
| 814 |
# Main background pipeline (upload -> process_screening)
|
| 815 |
# -----------------------
|
|
|
|
| 818 |
Main pipeline:
|
| 819 |
- load images
|
| 820 |
- quick detector-based quality metrics
|
| 821 |
+
- run VLM -> vlm_features (dict or None) + vlm_raw (string)
|
| 822 |
- run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
|
| 823 |
- merge results into ai_results and finish
|
| 824 |
"""
|
|
|
|
| 901 |
screenings_db[screening_id]["quality_metrics"] = quality_metrics
|
| 902 |
|
| 903 |
# --------------------------
|
| 904 |
+
# RUN VLM -> get vlm_features + vlm_raw
|
| 905 |
# --------------------------
|
| 906 |
vlm_features = None
|
| 907 |
vlm_raw = None
|
|
|
|
| 908 |
try:
|
| 909 |
+
vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
|
| 910 |
screenings_db[screening_id].setdefault("ai_results", {})
|
| 911 |
screenings_db[screening_id]["ai_results"].update({
|
| 912 |
"vlm_parsed_features": vlm_features,
|
| 913 |
+
"vlm_raw": vlm_raw
|
|
|
|
| 914 |
})
|
| 915 |
except Exception as e:
|
| 916 |
logger.exception("VLM feature extraction failed")
|
|
|
|
| 918 |
screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
|
| 919 |
vlm_features = None
|
| 920 |
vlm_raw = ""
|
|
|
|
| 921 |
|
| 922 |
# Log VLM outputs in pipeline context
|
| 923 |
logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
|
| 924 |
logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
|
|
|
|
| 925 |
|
| 926 |
# --------------------------
|
| 927 |
# RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
|
|
|
|
| 1000 |
# -----------------------
|
| 1001 |
if __name__ == "__main__":
|
| 1002 |
import uvicorn
|
| 1003 |
+
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
|