Spaces:

dpv007
/

cloud

Sleeping

App Files Files Community

dpv007 commited on Dec 12, 2025

Commit

3bfde28

verified ·

1 Parent(s): 7088aee

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -212

app.py CHANGED Viewed

@@ -3,18 +3,19 @@
 Elderly HealthWatch AI Backend (FastAPI)
 Pipeline:
  - receive images
- - run VLM (remote gradio / chat_fn) -> JSON feature vector + raw text + meta
  - run LLM (remote gradio /chat) -> structured risk JSON (per requested schema)
  - continue rest of processing and store results
 Notes:
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
- - This variant:
     * logs raw VLM responses,
     * always returns raw VLM output in API responses,
     * extracts JSON from VLM via regex when possible, and
-    * sends only the face image to the VLM (not the eye image).
 """
 import io
@@ -29,9 +30,8 @@ import time
 from typing import Dict, Any, Optional, Tuple
 from datetime import datetime
-from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Body
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel, HttpUrl
 from PIL import Image
 import numpy as np
 import cv2  # opencv-python-headless expected installed
@@ -54,7 +54,7 @@ HF_TOKEN = os.getenv("HF_TOKEN", None)
 # Default VLM prompt
 DEFAULT_VLM_PROMPT = (
-    "From the provided face image, compute the required screening features "
     "(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
     "and output a clean JSON feature vector only with values ranging as probabilities."
 )
@@ -246,7 +246,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     return out
 # -----------------------
-# Gradio / VLM helper (sends only face image, returns meta)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
@@ -255,59 +255,25 @@ def get_gradio_client_for_space(space: str) -> Client:
         return Client(space, hf_token=HF_TOKEN)
     return Client(space)
-def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
-                             raise_on_file_delivery_failure: bool = False
-                             ) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
     """
-    Synchronous call to remote VLM (gradio /chat_fn). Sends ONLY the face image file.
-    Returns tuple: (parsed_features_dict_or_None, raw_text_response_str, meta)
-    meta includes:
-      - vlm_file_delivery_ok (bool)  # expects ≥1 file acknowledged (face)
-      - vlm_files_seen (int or None)
-      - vlm_raw_len (int)
-      - vlm_out_object (short repr)
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
-    if not os.path.exists(face_path):
-        raise FileNotFoundError(f"Face image not found at: {face_path}")
-    if not os.path.exists(eye_path):
-        raise FileNotFoundError(f"Eye image not found at: {eye_path}")
-    face_size = os.path.getsize(face_path)
-    eye_size = os.path.getsize(eye_path)
-    logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
-    if face_size == 0 or eye_size == 0:
-        raise ValueError("One or both images are empty (0 bytes)")
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not available in this environment.")
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
-    # Verify files can be opened as images
-    try:
-        Image.open(face_path).verify()
-        Image.open(eye_path).verify()
-        logger.info("Both images verified as valid")
-    except Exception as e:
-        raise ValueError(f"Invalid image file(s): {e}")
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
-    logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
-    client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
-    # NOTE: only send face image to the Space
-    message = {"text": prompt, "files": [handle_file(face_path)]}
-    meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
     # SINGLE CALL (no retries)
     try:
-        logger.info("Calling VLM Space %s with 1 file (face only)", GRADIO_VLM_SPACE)
         result = client.predict(message=message, history=[], api_name="/chat_fn")
     except Exception as e:
         logger.exception("VLM call failed (no retries)")
@@ -315,9 +281,9 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
     # Normalize result
     raw_text = ""
-    out = None
     if not result:
         logger.warning("VLM returned empty result object")
     else:
         if isinstance(result, (list, tuple)):
             out = result[0]
@@ -327,42 +293,12 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
             out = {"text": str(result)}
         text_out = out.get("text") or out.get("output") or ""
-        raw_text = text_out or ""
-        meta["vlm_raw_len"] = len(raw_text or "")
-        try:
-            meta["vlm_out_object"] = str(out)[:2000]
-        except Exception:
-            meta["vlm_out_object"] = "<unreprable>"
-        logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
-    # --- Check whether the remote acknowledged receiving files (expect 1) ---
-    files_seen = None
-    try:
-        if isinstance(out, dict):
-            for key in ("files", "output_files", "files_sent", "uploaded_files", "received_files"):
-                if key in out and isinstance(out[key], (list, tuple)):
-                    files_seen = len(out[key])
-                    break
-        if files_seen is None and raw_text:
-            ext_matches = re.findall(r"\.(?:jpg|jpeg|png|bmp|gif)\b", raw_text, flags=re.IGNORECASE)
-            if ext_matches:
-                files_seen = len(ext_matches)
-            else:
-                matches = re.findall(r"\b(?:uploaded|received|file)\b", raw_text, flags=re.IGNORECASE)
-                if matches:
-                    files_seen = max(1, len(matches))
-        meta["vlm_files_seen"] = files_seen
-        meta["vlm_file_delivery_ok"] = (files_seen is not None and files_seen >= 1)
-    except Exception:
-        meta["vlm_files_seen"] = None
-        meta["vlm_file_delivery_ok"] = False
-    if raise_on_file_delivery_failure and not meta["vlm_file_delivery_ok"]:
-        logger.error("VLM did not acknowledge receiving the face file. meta=%s", meta)
-        raise RuntimeError("VLM Space did not acknowledge receiving the face image")
     # Log raw VLM output for debugging/auditing
     logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
@@ -390,8 +326,8 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
     else:
         logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
-    # Always return parsed_features (or None), raw_text (string), and meta dict
-    return parsed_features, (raw_text or ""), meta
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)
@@ -624,7 +560,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
                 is_valid = eye_openness_score >= 0.3
                 return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
                         "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
-                        "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
                         "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
             except Exception:
                 traceback.print_exc()
@@ -648,7 +584,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
             is_valid = eye_openness_score >= 0.3
             return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
                     "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
-                    "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
                     "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
         if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
@@ -675,7 +611,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
                     left_eye = {"x": cx, "y": cy}
                 return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
                         "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
-                        "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
                         "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
             except Exception:
                 traceback.print_exc()
@@ -774,7 +710,6 @@ async def get_vitals_from_upload(
     """
     Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
       { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
-    Note: VLM will receive only the face image (not the eye image).
     """
     if not GRADIO_AVAILABLE:
         raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
@@ -797,13 +732,12 @@ async def get_vitals_from_upload(
         raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
     try:
-        # Run VLM (off the event loop) - returns (features, raw, meta)
-        vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
-        # Log VLM outputs
         logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
         logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
-        logger.info("get_vitals_from_upload - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
         # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
         if vlm_features:
@@ -816,11 +750,10 @@ async def get_vitals_from_upload(
         # Run LLM (off the event loop)
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
-        # Return merged result (includes raw VLM output + meta for debugging)
         return {
             "vlm_raw_output": vlm_raw,
             "vlm_parsed_features": vlm_features,
-            "vlm_meta": vlm_meta,
             "llm_structured_risk": structured_risk
         }
     except Exception as e:
@@ -832,7 +765,6 @@ async def get_vitals_for_screening(screening_id: str):
     """
     Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
     Useful for re-processing or debugging.
-    Note: VLM will receive only the face image (not the eye image).
     """
     if screening_id not in screenings_db:
         raise HTTPException(status_code=404, detail="Screening not found")
@@ -844,12 +776,11 @@ async def get_vitals_for_screening(screening_id: str):
         raise HTTPException(status_code=400, detail="Stored images missing for this screening")
     try:
-        # Run VLM off the event loop (returns features, raw, meta)
-        vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
         logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
         logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
-        logger.info("get_vitals_for_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
         if vlm_features:
             llm_input = json.dumps(vlm_features, ensure_ascii=False)
@@ -865,7 +796,6 @@ async def get_vitals_for_screening(screening_id: str):
         entry["ai_results"].update({
             "vlm_parsed_features": vlm_features,
             "vlm_raw": vlm_raw,
-            "vlm_meta": vlm_meta,
             "structured_risk": structured_risk,
             "last_vitals_run": datetime.utcnow().isoformat() + "Z"
         })
@@ -874,113 +804,12 @@ async def get_vitals_for_screening(screening_id: str):
             "screening_id": screening_id,
             "vlm_raw_output": vlm_raw,
             "vlm_parsed_features": vlm_features,
-            "vlm_meta": vlm_meta,
             "llm_structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_for_screening pipeline failed")
         raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
-# -----------------------
-# URL-based vitals endpoint (optional)
-# -----------------------
-class ImageUrls(BaseModel):
-    face_image_url: HttpUrl
-    eye_image_url: HttpUrl
-import httpx  # make sure to add httpx to requirements
-# helper: download URL to file with safety checks
-async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
-    """
-    Download an image from `url` and save to dest_path.
-    Guards:
-     - timeout
-     - max bytes
-     - basic content-type check (image/*)
-    Raises HTTPException on failure.
-    """
-    try:
-        async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
-            resp = await client.get(url, timeout=timeout_seconds)
-            resp.raise_for_status()
-            content_type = resp.headers.get("Content-Type", "")
-            if not content_type.startswith("image/"):
-                raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
-            total = 0
-            with open(dest_path, "wb") as f:
-                async for chunk in resp.aiter_bytes():
-                    if not chunk:
-                        continue
-                    total += len(chunk)
-                    if total > max_bytes:
-                        raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
-                    f.write(chunk)
-    except httpx.HTTPStatusError as e:
-        raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
-@app.post("/api/v1/get-vitals-by-url")
-async def get_vitals_from_urls(payload: ImageUrls = Body(...)):
-    """
-    Download face and eye images from given URLs, then run the same VLM -> LLM pipeline and return results.
-    Note: VLM will receive only the face image (not the eye image).
-    Body: { "face_image_url": "...", "eye_image_url": "..." }
-    """
-    if not GRADIO_AVAILABLE:
-        raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
-    # prepare tmp paths
-    try:
-        tmp_dir = "/tmp/elderly_healthwatch"
-        os.makedirs(tmp_dir, exist_ok=True)
-        uid = str(uuid.uuid4())
-        face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
-        eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
-    except Exception as e:
-        logger.exception("Failed to prepare temp paths")
-        raise HTTPException(status_code=500, detail=f"Server error preparing temp files: {e}")
-    # download images (with guards)
-    try:
-        await download_image_to_path(str(payload.face_image_url), face_path)
-        await download_image_to_path(str(payload.eye_image_url), eye_path)
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Downloading images failed")
-        raise HTTPException(status_code=400, detail=f"Failed to download images: {e}")
-    # run existing pipeline (off the event loop)
-    try:
-        vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
-        logger.info("get_vitals_from_urls - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
-        logger.info("get_vitals_from_urls - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
-        logger.info("get_vitals_from_urls - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
-        if vlm_features:
-            llm_input = json.dumps(vlm_features, ensure_ascii=False)
-            logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
-        else:
-            llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
-            logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
-        structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
-        return {
-            "vlm_raw_output": vlm_raw,
-            "vlm_parsed_features": vlm_features,
-            "vlm_meta": vlm_meta,
-            "llm_structured_risk": structured_risk
-        }
-    except Exception as e:
-        logger.exception("get_vitals_by_url pipeline failed")
-        raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
 # -----------------------
 # Main background pipeline (upload -> process_screening)
 # -----------------------
@@ -989,7 +818,7 @@ async def process_screening(screening_id: str):
     Main pipeline:
      - load images
      - quick detector-based quality metrics
-     - run VLM -> vlm_features (dict or None) + vlm_raw (string) + vlm_meta
      - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
      - merge results into ai_results and finish
     """
@@ -1072,18 +901,16 @@ async def process_screening(screening_id: str):
         screenings_db[screening_id]["quality_metrics"] = quality_metrics
         # --------------------------
-        # RUN VLM -> get vlm_features + vlm_raw + vlm_meta
         # --------------------------
         vlm_features = None
         vlm_raw = None
-        vlm_meta = {}
         try:
-            vlm_features, vlm_raw, vlm_meta = run_vlm_and_get_features(face_path, eye_path)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({
                 "vlm_parsed_features": vlm_features,
-                "vlm_raw": vlm_raw,
-                "vlm_meta": vlm_meta
             })
         except Exception as e:
             logger.exception("VLM feature extraction failed")
@@ -1091,12 +918,10 @@ async def process_screening(screening_id: str):
             screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
             vlm_features = None
             vlm_raw = ""
-            vlm_meta = {"error": str(e)}
         # Log VLM outputs in pipeline context
         logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
         logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
-        logger.info("process_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
         # --------------------------
         # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
@@ -1175,4 +1000,4 @@ async def process_screening(screening_id: str):
 # -----------------------
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

 Elderly HealthWatch AI Backend (FastAPI)
 Pipeline:
  - receive images
+ - run VLM (remote gradio / chat_fn) -> JSON feature vector + raw text
  - run LLM (remote gradio /chat) -> structured risk JSON (per requested schema)
  - continue rest of processing and store results
 Notes:
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
+ - This final variant:
     * logs raw VLM responses,
     * always returns raw VLM output in API responses,
     * extracts JSON from VLM via regex when possible, and
+    * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
+ - VLM calls were simplified to a single call (no retries).
 """
 import io
 from typing import Dict, Any, Optional, Tuple
 from datetime import datetime
+from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from PIL import Image
 import numpy as np
 import cv2  # opencv-python-headless expected installed
 # Default VLM prompt
 DEFAULT_VLM_PROMPT = (
+    "From the provided face/eye images, compute the required screening features "
     "(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
     "and output a clean JSON feature vector only with values ranging as probabilities."
 )
     return out
 # -----------------------
+# Gradio / VLM helper (single-call, no retries)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
         return Client(space, hf_token=HF_TOKEN)
     return Client(space)
+def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tuple[Optional[Dict[str, Any]], str]:
     """
+    Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
+       (parsed_features_dict_or_None, raw_text_response_str)
+    Simplified: single call (no retries). Attempts json.loads then regex extraction.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
+    if not os.path.exists(face_path) or not os.path.exists(eye_path):
+        raise FileNotFoundError("Face or eye image path missing for VLM call.")
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not available in this environment.")
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
     # SINGLE CALL (no retries)
     try:
+        logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
         result = client.predict(message=message, history=[], api_name="/chat_fn")
     except Exception as e:
         logger.exception("VLM call failed (no retries)")
     # Normalize result
     raw_text = ""
     if not result:
         logger.warning("VLM returned empty result object")
+        raw_text = ""
     else:
         if isinstance(result, (list, tuple)):
             out = result[0]
             out = {"text": str(result)}
         text_out = out.get("text") or out.get("output") or ""
+        raw_text = text_out
+        logger.info("VLM response object (debug): %s", out)
+        # If files present but text empty, log it explicitly
+        if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
+            logger.warning("VLM returned no text AND files: %s", out.get("files"))
     # Log raw VLM output for debugging/auditing
     logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
     else:
         logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
+    # Always return raw_text (may be empty string) and parsed_features (or None)
+    return parsed_features, (raw_text or "")
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)
                 is_valid = eye_openness_score >= 0.3
                 return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
                         "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
+                        "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                         "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
             except Exception:
                 traceback.print_exc()
             is_valid = eye_openness_score >= 0.3
             return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
                     "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
+                    "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                     "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
         if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
                     left_eye = {"x": cx, "y": cy}
                 return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
                         "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
+                        "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                         "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
             except Exception:
                 traceback.print_exc()
     """
     Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
       { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
     """
     if not GRADIO_AVAILABLE:
         raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
         raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
     try:
+        # Run VLM (off the event loop)
+        vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
+        # Log VLM outputs (already logged inside run_vlm..., but additional context)
         logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
         logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
         # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
         if vlm_features:
         # Run LLM (off the event loop)
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
+        # Return merged result (includes raw VLM output for debugging)
         return {
             "vlm_raw_output": vlm_raw,
             "vlm_parsed_features": vlm_features,
             "llm_structured_risk": structured_risk
         }
     except Exception as e:
     """
     Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
     Useful for re-processing or debugging.
     """
     if screening_id not in screenings_db:
         raise HTTPException(status_code=404, detail="Screening not found")
         raise HTTPException(status_code=400, detail="Stored images missing for this screening")
     try:
+        # Run VLM off the event loop
+        vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
         logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
         logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
         if vlm_features:
             llm_input = json.dumps(vlm_features, ensure_ascii=False)
         entry["ai_results"].update({
             "vlm_parsed_features": vlm_features,
             "vlm_raw": vlm_raw,
             "structured_risk": structured_risk,
             "last_vitals_run": datetime.utcnow().isoformat() + "Z"
         })
             "screening_id": screening_id,
             "vlm_raw_output": vlm_raw,
             "vlm_parsed_features": vlm_features,
             "llm_structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_for_screening pipeline failed")
         raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
 # -----------------------
 # Main background pipeline (upload -> process_screening)
 # -----------------------
     Main pipeline:
      - load images
      - quick detector-based quality metrics
+     - run VLM -> vlm_features (dict or None) + vlm_raw (string)
      - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
      - merge results into ai_results and finish
     """
         screenings_db[screening_id]["quality_metrics"] = quality_metrics
         # --------------------------
+        # RUN VLM -> get vlm_features + vlm_raw
         # --------------------------
         vlm_features = None
         vlm_raw = None
         try:
+            vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({
                 "vlm_parsed_features": vlm_features,
+                "vlm_raw": vlm_raw
             })
         except Exception as e:
             logger.exception("VLM feature extraction failed")
             screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
             vlm_features = None
             vlm_raw = ""
         # Log VLM outputs in pipeline context
         logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
         logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
         # --------------------------
         # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
 # -----------------------
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)