dpv007 commited on
Commit
3bfde28
·
verified ·
1 Parent(s): 7088aee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -212
app.py CHANGED
@@ -3,18 +3,19 @@
3
  Elderly HealthWatch AI Backend (FastAPI)
4
  Pipeline:
5
  - receive images
6
- - run VLM (remote gradio / chat_fn) -> JSON feature vector + raw text + meta
7
  - run LLM (remote gradio /chat) -> structured risk JSON (per requested schema)
8
  - continue rest of processing and store results
9
 
10
  Notes:
11
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
12
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
13
- - This variant:
14
  * logs raw VLM responses,
15
  * always returns raw VLM output in API responses,
16
  * extracts JSON from VLM via regex when possible, and
17
- * sends only the face image to the VLM (not the eye image).
 
18
  """
19
 
20
  import io
@@ -29,9 +30,8 @@ import time
29
  from typing import Dict, Any, Optional, Tuple
30
  from datetime import datetime
31
 
32
- from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Body
33
  from fastapi.middleware.cors import CORSMiddleware
34
- from pydantic import BaseModel, HttpUrl
35
  from PIL import Image
36
  import numpy as np
37
  import cv2 # opencv-python-headless expected installed
@@ -54,7 +54,7 @@ HF_TOKEN = os.getenv("HF_TOKEN", None)
54
 
55
  # Default VLM prompt
56
  DEFAULT_VLM_PROMPT = (
57
- "From the provided face image, compute the required screening features "
58
  "(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
59
  "and output a clean JSON feature vector only with values ranging as probabilities."
60
  )
@@ -246,7 +246,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
246
  return out
247
 
248
  # -----------------------
249
- # Gradio / VLM helper (sends only face image, returns meta)
250
  # -----------------------
251
  def get_gradio_client_for_space(space: str) -> Client:
252
  if not GRADIO_AVAILABLE:
@@ -255,59 +255,25 @@ def get_gradio_client_for_space(space: str) -> Client:
255
  return Client(space, hf_token=HF_TOKEN)
256
  return Client(space)
257
 
258
- def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
259
- raise_on_file_delivery_failure: bool = False
260
- ) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
261
  """
262
- Synchronous call to remote VLM (gradio /chat_fn). Sends ONLY the face image file.
263
- Returns tuple: (parsed_features_dict_or_None, raw_text_response_str, meta)
264
- meta includes:
265
- - vlm_file_delivery_ok (bool) # expects ≥1 file acknowledged (face)
266
- - vlm_files_seen (int or None)
267
- - vlm_raw_len (int)
268
- - vlm_out_object (short repr)
269
  """
270
  prompt = prompt or DEFAULT_VLM_PROMPT
271
-
272
-
273
- if not os.path.exists(face_path):
274
- raise FileNotFoundError(f"Face image not found at: {face_path}")
275
- if not os.path.exists(eye_path):
276
- raise FileNotFoundError(f"Eye image not found at: {eye_path}")
277
-
278
- face_size = os.path.getsize(face_path)
279
- eye_size = os.path.getsize(eye_path)
280
- logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
281
-
282
- if face_size == 0 or eye_size == 0:
283
- raise ValueError("One or both images are empty (0 bytes)")
284
-
285
  if not GRADIO_AVAILABLE:
286
  raise RuntimeError("gradio_client not available in this environment.")
287
 
288
  client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
289
-
290
- # Verify files can be opened as images
291
- try:
292
- Image.open(face_path).verify()
293
- Image.open(eye_path).verify()
294
- logger.info("Both images verified as valid")
295
- except Exception as e:
296
- raise ValueError(f"Invalid image file(s): {e}")
297
-
298
  message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
299
-
300
- logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
301
- client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
302
- # NOTE: only send face image to the Space
303
-
304
- message = {"text": prompt, "files": [handle_file(face_path)]}
305
-
306
- meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
307
 
308
  # SINGLE CALL (no retries)
309
  try:
310
- logger.info("Calling VLM Space %s with 1 file (face only)", GRADIO_VLM_SPACE)
311
  result = client.predict(message=message, history=[], api_name="/chat_fn")
312
  except Exception as e:
313
  logger.exception("VLM call failed (no retries)")
@@ -315,9 +281,9 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
315
 
316
  # Normalize result
317
  raw_text = ""
318
- out = None
319
  if not result:
320
  logger.warning("VLM returned empty result object")
 
321
  else:
322
  if isinstance(result, (list, tuple)):
323
  out = result[0]
@@ -327,42 +293,12 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
327
  out = {"text": str(result)}
328
 
329
  text_out = out.get("text") or out.get("output") or ""
330
- raw_text = text_out or ""
331
- meta["vlm_raw_len"] = len(raw_text or "")
332
- try:
333
- meta["vlm_out_object"] = str(out)[:2000]
334
- except Exception:
335
- meta["vlm_out_object"] = "<unreprable>"
336
-
337
- logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
338
-
339
- # --- Check whether the remote acknowledged receiving files (expect 1) ---
340
- files_seen = None
341
- try:
342
- if isinstance(out, dict):
343
- for key in ("files", "output_files", "files_sent", "uploaded_files", "received_files"):
344
- if key in out and isinstance(out[key], (list, tuple)):
345
- files_seen = len(out[key])
346
- break
347
-
348
- if files_seen is None and raw_text:
349
- ext_matches = re.findall(r"\.(?:jpg|jpeg|png|bmp|gif)\b", raw_text, flags=re.IGNORECASE)
350
- if ext_matches:
351
- files_seen = len(ext_matches)
352
- else:
353
- matches = re.findall(r"\b(?:uploaded|received|file)\b", raw_text, flags=re.IGNORECASE)
354
- if matches:
355
- files_seen = max(1, len(matches))
356
-
357
- meta["vlm_files_seen"] = files_seen
358
- meta["vlm_file_delivery_ok"] = (files_seen is not None and files_seen >= 1)
359
- except Exception:
360
- meta["vlm_files_seen"] = None
361
- meta["vlm_file_delivery_ok"] = False
362
 
363
- if raise_on_file_delivery_failure and not meta["vlm_file_delivery_ok"]:
364
- logger.error("VLM did not acknowledge receiving the face file. meta=%s", meta)
365
- raise RuntimeError("VLM Space did not acknowledge receiving the face image")
366
 
367
  # Log raw VLM output for debugging/auditing
368
  logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
@@ -390,8 +326,8 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
390
  else:
391
  logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
392
 
393
- # Always return parsed_features (or None), raw_text (string), and meta dict
394
- return parsed_features, (raw_text or ""), meta
395
 
396
  # -----------------------
397
  # Gradio / LLM helper (defensive, with retry + clamps)
@@ -624,7 +560,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
624
  is_valid = eye_openness_score >= 0.3
625
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
626
  "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
627
- "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
628
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
629
  except Exception:
630
  traceback.print_exc()
@@ -648,7 +584,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
648
  is_valid = eye_openness_score >= 0.3
649
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
650
  "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
651
- "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
652
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
653
 
654
  if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
@@ -675,7 +611,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
675
  left_eye = {"x": cx, "y": cy}
676
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
677
  "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
678
- "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
679
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
680
  except Exception:
681
  traceback.print_exc()
@@ -774,7 +710,6 @@ async def get_vitals_from_upload(
774
  """
775
  Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
776
  { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
777
- Note: VLM will receive only the face image (not the eye image).
778
  """
779
  if not GRADIO_AVAILABLE:
780
  raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
@@ -797,13 +732,12 @@ async def get_vitals_from_upload(
797
  raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
798
 
799
  try:
800
- # Run VLM (off the event loop) - returns (features, raw, meta)
801
- vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
802
 
803
- # Log VLM outputs
804
  logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
805
  logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
806
- logger.info("get_vitals_from_upload - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
807
 
808
  # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
809
  if vlm_features:
@@ -816,11 +750,10 @@ async def get_vitals_from_upload(
816
  # Run LLM (off the event loop)
817
  structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
818
 
819
- # Return merged result (includes raw VLM output + meta for debugging)
820
  return {
821
  "vlm_raw_output": vlm_raw,
822
  "vlm_parsed_features": vlm_features,
823
- "vlm_meta": vlm_meta,
824
  "llm_structured_risk": structured_risk
825
  }
826
  except Exception as e:
@@ -832,7 +765,6 @@ async def get_vitals_for_screening(screening_id: str):
832
  """
833
  Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
834
  Useful for re-processing or debugging.
835
- Note: VLM will receive only the face image (not the eye image).
836
  """
837
  if screening_id not in screenings_db:
838
  raise HTTPException(status_code=404, detail="Screening not found")
@@ -844,12 +776,11 @@ async def get_vitals_for_screening(screening_id: str):
844
  raise HTTPException(status_code=400, detail="Stored images missing for this screening")
845
 
846
  try:
847
- # Run VLM off the event loop (returns features, raw, meta)
848
- vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
849
 
850
  logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
851
  logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
852
- logger.info("get_vitals_for_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
853
 
854
  if vlm_features:
855
  llm_input = json.dumps(vlm_features, ensure_ascii=False)
@@ -865,7 +796,6 @@ async def get_vitals_for_screening(screening_id: str):
865
  entry["ai_results"].update({
866
  "vlm_parsed_features": vlm_features,
867
  "vlm_raw": vlm_raw,
868
- "vlm_meta": vlm_meta,
869
  "structured_risk": structured_risk,
870
  "last_vitals_run": datetime.utcnow().isoformat() + "Z"
871
  })
@@ -874,113 +804,12 @@ async def get_vitals_for_screening(screening_id: str):
874
  "screening_id": screening_id,
875
  "vlm_raw_output": vlm_raw,
876
  "vlm_parsed_features": vlm_features,
877
- "vlm_meta": vlm_meta,
878
  "llm_structured_risk": structured_risk
879
  }
880
  except Exception as e:
881
  logger.exception("get_vitals_for_screening pipeline failed")
882
  raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
883
 
884
- # -----------------------
885
- # URL-based vitals endpoint (optional)
886
- # -----------------------
887
- class ImageUrls(BaseModel):
888
- face_image_url: HttpUrl
889
- eye_image_url: HttpUrl
890
-
891
- import httpx # make sure to add httpx to requirements
892
-
893
- # helper: download URL to file with safety checks
894
- async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
895
- """
896
- Download an image from `url` and save to dest_path.
897
- Guards:
898
- - timeout
899
- - max bytes
900
- - basic content-type check (image/*)
901
- Raises HTTPException on failure.
902
- """
903
- try:
904
- async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
905
- resp = await client.get(url, timeout=timeout_seconds)
906
- resp.raise_for_status()
907
-
908
- content_type = resp.headers.get("Content-Type", "")
909
- if not content_type.startswith("image/"):
910
- raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
911
-
912
- total = 0
913
- with open(dest_path, "wb") as f:
914
- async for chunk in resp.aiter_bytes():
915
- if not chunk:
916
- continue
917
- total += len(chunk)
918
- if total > max_bytes:
919
- raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
920
- f.write(chunk)
921
- except httpx.HTTPStatusError as e:
922
- raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
923
- except Exception as e:
924
- raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
925
-
926
- @app.post("/api/v1/get-vitals-by-url")
927
- async def get_vitals_from_urls(payload: ImageUrls = Body(...)):
928
- """
929
- Download face and eye images from given URLs, then run the same VLM -> LLM pipeline and return results.
930
- Note: VLM will receive only the face image (not the eye image).
931
- Body: { "face_image_url": "...", "eye_image_url": "..." }
932
- """
933
- if not GRADIO_AVAILABLE:
934
- raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
935
-
936
- # prepare tmp paths
937
- try:
938
- tmp_dir = "/tmp/elderly_healthwatch"
939
- os.makedirs(tmp_dir, exist_ok=True)
940
- uid = str(uuid.uuid4())
941
- face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
942
- eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
943
- except Exception as e:
944
- logger.exception("Failed to prepare temp paths")
945
- raise HTTPException(status_code=500, detail=f"Server error preparing temp files: {e}")
946
-
947
- # download images (with guards)
948
- try:
949
- await download_image_to_path(str(payload.face_image_url), face_path)
950
- await download_image_to_path(str(payload.eye_image_url), eye_path)
951
- except HTTPException:
952
- raise
953
- except Exception as e:
954
- logger.exception("Downloading images failed")
955
- raise HTTPException(status_code=400, detail=f"Failed to download images: {e}")
956
-
957
- # run existing pipeline (off the event loop)
958
- try:
959
- vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
960
-
961
- logger.info("get_vitals_from_urls - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
962
- logger.info("get_vitals_from_urls - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
963
- logger.info("get_vitals_from_urls - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
964
-
965
- if vlm_features:
966
- llm_input = json.dumps(vlm_features, ensure_ascii=False)
967
- logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
968
- else:
969
- llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
970
- logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
971
-
972
- structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
973
-
974
- return {
975
- "vlm_raw_output": vlm_raw,
976
- "vlm_parsed_features": vlm_features,
977
- "vlm_meta": vlm_meta,
978
- "llm_structured_risk": structured_risk
979
- }
980
- except Exception as e:
981
- logger.exception("get_vitals_by_url pipeline failed")
982
- raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
983
-
984
  # -----------------------
985
  # Main background pipeline (upload -> process_screening)
986
  # -----------------------
@@ -989,7 +818,7 @@ async def process_screening(screening_id: str):
989
  Main pipeline:
990
  - load images
991
  - quick detector-based quality metrics
992
- - run VLM -> vlm_features (dict or None) + vlm_raw (string) + vlm_meta
993
  - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
994
  - merge results into ai_results and finish
995
  """
@@ -1072,18 +901,16 @@ async def process_screening(screening_id: str):
1072
  screenings_db[screening_id]["quality_metrics"] = quality_metrics
1073
 
1074
  # --------------------------
1075
- # RUN VLM -> get vlm_features + vlm_raw + vlm_meta
1076
  # --------------------------
1077
  vlm_features = None
1078
  vlm_raw = None
1079
- vlm_meta = {}
1080
  try:
1081
- vlm_features, vlm_raw, vlm_meta = run_vlm_and_get_features(face_path, eye_path)
1082
  screenings_db[screening_id].setdefault("ai_results", {})
1083
  screenings_db[screening_id]["ai_results"].update({
1084
  "vlm_parsed_features": vlm_features,
1085
- "vlm_raw": vlm_raw,
1086
- "vlm_meta": vlm_meta
1087
  })
1088
  except Exception as e:
1089
  logger.exception("VLM feature extraction failed")
@@ -1091,12 +918,10 @@ async def process_screening(screening_id: str):
1091
  screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
1092
  vlm_features = None
1093
  vlm_raw = ""
1094
- vlm_meta = {"error": str(e)}
1095
 
1096
  # Log VLM outputs in pipeline context
1097
  logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
1098
  logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
1099
- logger.info("process_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
1100
 
1101
  # --------------------------
1102
  # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
@@ -1175,4 +1000,4 @@ async def process_screening(screening_id: str):
1175
  # -----------------------
1176
  if __name__ == "__main__":
1177
  import uvicorn
1178
- uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
3
  Elderly HealthWatch AI Backend (FastAPI)
4
  Pipeline:
5
  - receive images
6
+ - run VLM (remote gradio / chat_fn) -> JSON feature vector + raw text
7
  - run LLM (remote gradio /chat) -> structured risk JSON (per requested schema)
8
  - continue rest of processing and store results
9
 
10
  Notes:
11
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
12
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
13
+ - This final variant:
14
  * logs raw VLM responses,
15
  * always returns raw VLM output in API responses,
16
  * extracts JSON from VLM via regex when possible, and
17
+ * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
18
+ - VLM calls were simplified to a single call (no retries).
19
  """
20
 
21
  import io
 
30
  from typing import Dict, Any, Optional, Tuple
31
  from datetime import datetime
32
 
33
+ from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
34
  from fastapi.middleware.cors import CORSMiddleware
 
35
  from PIL import Image
36
  import numpy as np
37
  import cv2 # opencv-python-headless expected installed
 
54
 
55
  # Default VLM prompt
56
  DEFAULT_VLM_PROMPT = (
57
+ "From the provided face/eye images, compute the required screening features "
58
  "(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
59
  "and output a clean JSON feature vector only with values ranging as probabilities."
60
  )
 
246
  return out
247
 
248
  # -----------------------
249
+ # Gradio / VLM helper (single-call, no retries)
250
  # -----------------------
251
  def get_gradio_client_for_space(space: str) -> Client:
252
  if not GRADIO_AVAILABLE:
 
255
  return Client(space, hf_token=HF_TOKEN)
256
  return Client(space)
257
 
258
+ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tuple[Optional[Dict[str, Any]], str]:
 
 
259
  """
260
+ Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
261
+ (parsed_features_dict_or_None, raw_text_response_str)
262
+
263
+ Simplified: single call (no retries). Attempts json.loads then regex extraction.
 
 
 
264
  """
265
  prompt = prompt or DEFAULT_VLM_PROMPT
266
+ if not os.path.exists(face_path) or not os.path.exists(eye_path):
267
+ raise FileNotFoundError("Face or eye image path missing for VLM call.")
 
 
 
 
 
 
 
 
 
 
 
 
268
  if not GRADIO_AVAILABLE:
269
  raise RuntimeError("gradio_client not available in this environment.")
270
 
271
  client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
 
 
 
 
 
 
 
 
 
272
  message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
 
 
 
 
 
 
 
 
273
 
274
  # SINGLE CALL (no retries)
275
  try:
276
+ logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
277
  result = client.predict(message=message, history=[], api_name="/chat_fn")
278
  except Exception as e:
279
  logger.exception("VLM call failed (no retries)")
 
281
 
282
  # Normalize result
283
  raw_text = ""
 
284
  if not result:
285
  logger.warning("VLM returned empty result object")
286
+ raw_text = ""
287
  else:
288
  if isinstance(result, (list, tuple)):
289
  out = result[0]
 
293
  out = {"text": str(result)}
294
 
295
  text_out = out.get("text") or out.get("output") or ""
296
+ raw_text = text_out
297
+ logger.info("VLM response object (debug): %s", out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
+ # If files present but text empty, log it explicitly
300
+ if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
301
+ logger.warning("VLM returned no text AND files: %s", out.get("files"))
302
 
303
  # Log raw VLM output for debugging/auditing
304
  logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
 
326
  else:
327
  logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
328
 
329
+ # Always return raw_text (may be empty string) and parsed_features (or None)
330
+ return parsed_features, (raw_text or "")
331
 
332
  # -----------------------
333
  # Gradio / LLM helper (defensive, with retry + clamps)
 
560
  is_valid = eye_openness_score >= 0.3
561
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
562
  "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
563
+ "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
564
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
565
  except Exception:
566
  traceback.print_exc()
 
584
  is_valid = eye_openness_score >= 0.3
585
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
586
  "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
587
+ "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
588
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
589
 
590
  if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
 
611
  left_eye = {"x": cx, "y": cy}
612
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
613
  "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
614
+ "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
615
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
616
  except Exception:
617
  traceback.print_exc()
 
710
  """
711
  Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
712
  { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
 
713
  """
714
  if not GRADIO_AVAILABLE:
715
  raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
 
732
  raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
733
 
734
  try:
735
+ # Run VLM (off the event loop)
736
+ vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
737
 
738
+ # Log VLM outputs (already logged inside run_vlm..., but additional context)
739
  logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
740
  logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
 
741
 
742
  # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
743
  if vlm_features:
 
750
  # Run LLM (off the event loop)
751
  structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
752
 
753
+ # Return merged result (includes raw VLM output for debugging)
754
  return {
755
  "vlm_raw_output": vlm_raw,
756
  "vlm_parsed_features": vlm_features,
 
757
  "llm_structured_risk": structured_risk
758
  }
759
  except Exception as e:
 
765
  """
766
  Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
767
  Useful for re-processing or debugging.
 
768
  """
769
  if screening_id not in screenings_db:
770
  raise HTTPException(status_code=404, detail="Screening not found")
 
776
  raise HTTPException(status_code=400, detail="Stored images missing for this screening")
777
 
778
  try:
779
+ # Run VLM off the event loop
780
+ vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
781
 
782
  logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
783
  logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
 
784
 
785
  if vlm_features:
786
  llm_input = json.dumps(vlm_features, ensure_ascii=False)
 
796
  entry["ai_results"].update({
797
  "vlm_parsed_features": vlm_features,
798
  "vlm_raw": vlm_raw,
 
799
  "structured_risk": structured_risk,
800
  "last_vitals_run": datetime.utcnow().isoformat() + "Z"
801
  })
 
804
  "screening_id": screening_id,
805
  "vlm_raw_output": vlm_raw,
806
  "vlm_parsed_features": vlm_features,
 
807
  "llm_structured_risk": structured_risk
808
  }
809
  except Exception as e:
810
  logger.exception("get_vitals_for_screening pipeline failed")
811
  raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
813
  # -----------------------
814
  # Main background pipeline (upload -> process_screening)
815
  # -----------------------
 
818
  Main pipeline:
819
  - load images
820
  - quick detector-based quality metrics
821
+ - run VLM -> vlm_features (dict or None) + vlm_raw (string)
822
  - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
823
  - merge results into ai_results and finish
824
  """
 
901
  screenings_db[screening_id]["quality_metrics"] = quality_metrics
902
 
903
  # --------------------------
904
+ # RUN VLM -> get vlm_features + vlm_raw
905
  # --------------------------
906
  vlm_features = None
907
  vlm_raw = None
 
908
  try:
909
+ vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
910
  screenings_db[screening_id].setdefault("ai_results", {})
911
  screenings_db[screening_id]["ai_results"].update({
912
  "vlm_parsed_features": vlm_features,
913
+ "vlm_raw": vlm_raw
 
914
  })
915
  except Exception as e:
916
  logger.exception("VLM feature extraction failed")
 
918
  screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
919
  vlm_features = None
920
  vlm_raw = ""
 
921
 
922
  # Log VLM outputs in pipeline context
923
  logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
924
  logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
 
925
 
926
  # --------------------------
927
  # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
 
1000
  # -----------------------
1001
  if __name__ == "__main__":
1002
  import uvicorn
1003
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)