dpv007 commited on
Commit
d8e884b
·
verified ·
1 Parent(s): f283510

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +481 -15
app.py CHANGED
@@ -265,7 +265,7 @@ def call_space_multipart(space: str, api_name: str, prompt: str, face_path: str,
265
  # Many Spaces accept a "data" field which is a JSON array of inputs; we provide prompt as first arg
266
  # and attach the file with a 'file' key. Some Spaces expect different key names — this is a pragmatic fallback.
267
  files = {
268
- "data": (None, json.dumps([prompt, None])), # second element reserved for the file param; some spaces ignore it
269
  "file": (os.path.basename(face_path), open(face_path, "rb"), "image/jpeg")
270
  }
271
  try:
@@ -332,7 +332,6 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
332
  out = call_space_multipart(GRADIO_VLM_SPACE, "chat_fn", prompt, face_path)
333
  raw_text = ""
334
  if isinstance(out, dict):
335
- # Some spaces return {'data': [...]} or similar
336
  raw_text = json.dumps(out)
337
  else:
338
  raw_text = str(out)
@@ -377,12 +376,11 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
377
  {"message": prompt, "file": file_wrapper},
378
  {"prompt": prompt, "image": file_wrapper},
379
  {"prompt": prompt, "file": file_wrapper},
380
- {"input_data": [prompt, None]} # sometimes chat endpoints want a data array; we can't attach file here but we try
381
  ]
382
  for named_args in named_attempts:
383
  try:
384
  logger.info("Attempting gradio_client.predict named call with args: %s", list(named_args.keys()))
385
- # always pass api_name explicitly
386
  result = client.predict(api_name="/chat_fn", **named_args)
387
  meta["vlm_upload_method"] = "gradio_named:" + ",".join(list(named_args.keys()))
388
  tried_methods.append(f"gradio_named_{','.join(list(named_args.keys()))}")
@@ -415,7 +413,6 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
415
  if isinstance(out, dict):
416
  possible_text = out.get("data") or out.get("text") or out.get("output") or out.get("raw") or out.get("msg")
417
  if possible_text is None:
418
- # Some Spaces return {'data': ['...']} or {'data': [{...}]}
419
  if "data" in out and isinstance(out["data"], (list, tuple)) and len(out["data"]) > 0:
420
  possible_text = out["data"][0]
421
  if isinstance(possible_text, (dict, list)):
@@ -754,7 +751,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
754
  is_valid = eye_openness_score >= 0.3
755
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
756
  "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
757
- "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
758
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
759
 
760
  if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
@@ -781,7 +778,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
781
  left_eye = {"x": cx, "y": cy}
782
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
783
  "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
784
- "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
785
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
786
  except Exception:
787
  traceback.print_exc()
@@ -797,14 +794,483 @@ async def validate_eye_photo(image: UploadFile = File(...)):
797
  "message_hindi": "छवि प्रोसेस करने में त्रुटि। कृपया पुनः प्रयास करें।",
798
  "error": str(e)}
799
 
800
- # ... (the rest of your endpoints remain unchanged, same as original) ...
801
- # For brevity, the remainder of endpoints (upload_images, analyze_screening, get_status, get_results,
802
- # get_history, get_vitals_from_upload, get_vitals_for_screening, get_vitals_from_urls, process_screening)
803
- # are unchanged from your original file. They will call the updated run_vlm_and_get_features above.
804
- #
805
- # (Paste the remaining original code for endpoints/process_screening as-is here or keep the version you had,
806
- # because the only functional changes required were to the VLM upload logic.)
807
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  # -----------------------
809
  # Run server (for local debugging)
810
  # -----------------------
 
265
  # Many Spaces accept a "data" field which is a JSON array of inputs; we provide prompt as first arg
266
  # and attach the file with a 'file' key. Some Spaces expect different key names — this is a pragmatic fallback.
267
  files = {
268
+ "data": (None, json.dumps([prompt, None])),
269
  "file": (os.path.basename(face_path), open(face_path, "rb"), "image/jpeg")
270
  }
271
  try:
 
332
  out = call_space_multipart(GRADIO_VLM_SPACE, "chat_fn", prompt, face_path)
333
  raw_text = ""
334
  if isinstance(out, dict):
 
335
  raw_text = json.dumps(out)
336
  else:
337
  raw_text = str(out)
 
376
  {"message": prompt, "file": file_wrapper},
377
  {"prompt": prompt, "image": file_wrapper},
378
  {"prompt": prompt, "file": file_wrapper},
379
+ {"input_data": [prompt, None]}
380
  ]
381
  for named_args in named_attempts:
382
  try:
383
  logger.info("Attempting gradio_client.predict named call with args: %s", list(named_args.keys()))
 
384
  result = client.predict(api_name="/chat_fn", **named_args)
385
  meta["vlm_upload_method"] = "gradio_named:" + ",".join(list(named_args.keys()))
386
  tried_methods.append(f"gradio_named_{','.join(list(named_args.keys()))}")
 
413
  if isinstance(out, dict):
414
  possible_text = out.get("data") or out.get("text") or out.get("output") or out.get("raw") or out.get("msg")
415
  if possible_text is None:
 
416
  if "data" in out and isinstance(out["data"], (list, tuple)) and len(out["data"]) > 0:
417
  possible_text = out["data"][0]
418
  if isinstance(possible_text, (dict, list)):
 
751
  is_valid = eye_openness_score >= 0.3
752
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
753
  "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
754
+ "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
755
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
756
 
757
  if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
 
778
  left_eye = {"x": cx, "y": cy}
779
  return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
780
  "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
781
+ "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
782
  "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
783
  except Exception:
784
  traceback.print_exc()
 
794
  "message_hindi": "छवि प्रोसेस करने में त्रुटि। कृपया पुनः प्रयास करें।",
795
  "error": str(e)}
796
 
797
+ @app.post("/api/v1/upload")
798
+ async def upload_images(
799
+ background_tasks: BackgroundTasks,
800
+ face_image: UploadFile = File(...),
801
+ eye_image: UploadFile = File(...)
802
+ ):
803
+ """
804
+ Save images and enqueue background processing. VLM -> LLM runs inside process_screening.
805
+ """
806
+ try:
807
+ screening_id = str(uuid.uuid4())
808
+ now = datetime.utcnow().isoformat() + "Z"
809
+ tmp_dir = "/tmp/elderly_healthwatch"
810
+ os.makedirs(tmp_dir, exist_ok=True)
811
+ face_path = os.path.join(tmp_dir, f"{screening_id}_face.jpg")
812
+ eye_path = os.path.join(tmp_dir, f"{screening_id}_eye.jpg")
813
+ face_bytes = await face_image.read()
814
+ eye_bytes = await eye_image.read()
815
+ with open(face_path, "wb") as f:
816
+ f.write(face_bytes)
817
+ with open(eye_path, "wb") as f:
818
+ f.write(eye_bytes)
819
+ screenings_db[screening_id] = {
820
+ "id": screening_id,
821
+ "timestamp": now,
822
+ "face_image_path": face_path,
823
+ "eye_image_path": eye_path,
824
+ "status": "queued",
825
+ "quality_metrics": {},
826
+ "ai_results": {},
827
+ "disease_predictions": [],
828
+ "recommendations": {}
829
+ }
830
+ background_tasks.add_task(process_screening, screening_id)
831
+ return {"screening_id": screening_id}
832
+ except Exception as e:
833
+ traceback.print_exc()
834
+ raise HTTPException(status_code=500, detail=f"Failed to upload images: {e}")
835
+
836
+ @app.post("/api/v1/analyze/{screening_id}")
837
+ async def analyze_screening(screening_id: str, background_tasks: BackgroundTasks):
838
+ if screening_id not in screenings_db:
839
+ raise HTTPException(status_code=404, detail="Screening not found")
840
+ if screenings_db[screening_id].get("status") == "processing":
841
+ return {"message": "Already processing"}
842
+ screenings_db[screening_id]["status"] = "queued"
843
+ background_tasks.add_task(process_screening, screening_id)
844
+ return {"message": "Analysis enqueued"}
845
+
846
+ @app.get("/api/v1/status/{screening_id}")
847
+ async def get_status(screening_id: str):
848
+ if screening_id not in screenings_db:
849
+ raise HTTPException(status_code=404, detail="Screening not found")
850
+ status = screenings_db[screening_id].get("status", "unknown")
851
+ progress = 50 if status == "processing" else (100 if status == "completed" else 0)
852
+ return {"screening_id": screening_id, "status": status, "progress": progress}
853
+
854
+ @app.get("/api/v1/results/{screening_id}")
855
+ async def get_results(screening_id: str):
856
+ if screening_id not in screenings_db:
857
+ raise HTTPException(status_code=404, detail="Screening not found")
858
+ # Ensure vlm_raw is always present in ai_results for debugging
859
+ entry = screenings_db[screening_id]
860
+ entry.setdefault("ai_results", {})
861
+ entry["ai_results"].setdefault("vlm_raw", entry.get("ai_results", {}).get("vlm_raw", ""))
862
+ return entry
863
+
864
+ @app.get("/api/v1/history/{user_id}")
865
+ async def get_history(user_id: str):
866
+ history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
867
+ return {"screenings": history}
868
+
869
+ # -----------------------
870
+ # Immediate VLM -> LLM routes (return vitals in one call)
871
+ # -----------------------
872
+ @app.post("/api/v1/get-vitals")
873
+ async def get_vitals_from_upload(
874
+ face_image: UploadFile = File(...),
875
+ eye_image: UploadFile = File(...)
876
+ ):
877
+ """
878
+ Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
879
+ { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
880
+ Note: VLM will receive only the face image (not the eye image).
881
+ """
882
+ if not GRADIO_AVAILABLE:
883
+ raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
884
+
885
+ # save files to a temp directory
886
+ try:
887
+ tmp_dir = "/tmp/elderly_healthwatch"
888
+ os.makedirs(tmp_dir, exist_ok=True)
889
+ uid = str(uuid.uuid4())
890
+ face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
891
+ eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
892
+ face_bytes = await face_image.read()
893
+ eye_bytes = await eye_image.read()
894
+ with open(face_path, "wb") as f:
895
+ f.write(face_bytes)
896
+ with open(eye_path, "wb") as f:
897
+ f.write(eye_bytes)
898
+ except Exception as e:
899
+ logger.exception("Failed saving uploaded images")
900
+ raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
901
+
902
+ try:
903
+ # Run VLM (off the event loop) - returns (features, raw, meta)
904
+ vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
905
+
906
+ # Log VLM outputs
907
+ logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
908
+ logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
909
+ logger.info("get_vitals_from_upload - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
910
+
911
+ # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
912
+ if vlm_features:
913
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
914
+ logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
915
+ else:
916
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
917
+ logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
918
+
919
+ # Run LLM (off the event loop)
920
+ structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
921
+
922
+ # Return merged result (includes raw VLM output + meta for debugging)
923
+ return {
924
+ "vlm_raw_output": vlm_raw,
925
+ "vlm_parsed_features": vlm_features,
926
+ "vlm_meta": vlm_meta,
927
+ "llm_structured_risk": structured_risk
928
+ }
929
+ except Exception as e:
930
+ logger.exception("get_vitals_from_upload pipeline failed")
931
+ raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
932
+
933
+ @app.post("/api/v1/get-vitals/{screening_id}")
934
+ async def get_vitals_for_screening(screening_id: str):
935
+ """
936
+ Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
937
+ Useful for re-processing or debugging.
938
+ Note: VLM will receive only the face image (not the eye image).
939
+ """
940
+ if screening_id not in screenings_db:
941
+ raise HTTPException(status_code=404, detail="Screening not found")
942
+
943
+ entry = screenings_db[screening_id]
944
+ face_path = entry.get("face_image_path")
945
+ eye_path = entry.get("eye_image_path")
946
+ if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
947
+ raise HTTPException(status_code=400, detail="Stored images missing for this screening")
948
+
949
+ try:
950
+ # Run VLM off the event loop (returns features, raw, meta)
951
+ vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
952
+
953
+ logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
954
+ logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
955
+ logger.info("get_vitals_for_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
956
+
957
+ if vlm_features:
958
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
959
+ logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
960
+ else:
961
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
962
+ logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
963
+
964
+ structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
965
+
966
+ # Optionally store this run's outputs back into the DB for inspection
967
+ entry.setdefault("ai_results", {})
968
+ entry["ai_results"].update({
969
+ "vlm_parsed_features": vlm_features,
970
+ "vlm_raw": vlm_raw,
971
+ "vlm_meta": vlm_meta,
972
+ "structured_risk": structured_risk,
973
+ "last_vitals_run": datetime.utcnow().isoformat() + "Z"
974
+ })
975
+
976
+ return {
977
+ "screening_id": screening_id,
978
+ "vlm_raw_output": vlm_raw,
979
+ "vlm_parsed_features": vlm_features,
980
+ "vlm_meta": vlm_meta,
981
+ "llm_structured_risk": structured_risk
982
+ }
983
+ except Exception as e:
984
+ logger.exception("get_vitals_for_screening pipeline failed")
985
+ raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
986
+
987
+ # -----------------------
988
+ # URL-based vitals endpoint (optional)
989
+ # -----------------------
990
+ class ImageUrls(BaseModel):
991
+ face_image_url: HttpUrl
992
+ eye_image_url: HttpUrl
993
+
994
+ # helper: download URL to file with safety checks
995
+ async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
996
+ """
997
+ Download an image from `url` and save to dest_path.
998
+ Guards:
999
+ - timeout
1000
+ - max bytes
1001
+ - basic content-type check (image/*)
1002
+ Raises HTTPException on failure.
1003
+ """
1004
+ try:
1005
+ async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
1006
+ resp = await client.get(url, timeout=timeout_seconds)
1007
+ resp.raise_for_status()
1008
+
1009
+ content_type = resp.headers.get("Content-Type", "")
1010
+ if not content_type.startswith("image/"):
1011
+ raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
1012
+
1013
+ total = 0
1014
+ with open(dest_path, "wb") as f:
1015
+ async for chunk in resp.aiter_bytes():
1016
+ if not chunk:
1017
+ continue
1018
+ total += len(chunk)
1019
+ if total > max_bytes:
1020
+ raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
1021
+ f.write(chunk)
1022
+ except httpx.HTTPStatusError as e:
1023
+ raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
1024
+ except Exception as e:
1025
+ raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
1026
+
1027
+ @app.post("/api/v1/get-vitals-by-url")
1028
+ async def get_vitals_from_urls(payload: ImageUrls = Body(...)):
1029
+ """
1030
+ Download face and eye images from given URLs, then run the same VLM -> LLM pipeline and return results.
1031
+ Note: VLM will receive only the face image (not the eye image).
1032
+ Body: { "face_image_url": "...", "eye_image_url": "..." }
1033
+ """
1034
+ if not GRADIO_AVAILABLE:
1035
+ raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
1036
+
1037
+ # prepare tmp paths
1038
+ try:
1039
+ tmp_dir = "/tmp/elderly_healthwatch"
1040
+ os.makedirs(tmp_dir, exist_ok=True)
1041
+ uid = str(uuid.uuid4())
1042
+ face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
1043
+ eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
1044
+ except Exception as e:
1045
+ logger.exception("Failed to prepare temp paths")
1046
+ raise HTTPException(status_code=500, detail=f"Server error preparing temp files: {e}")
1047
+
1048
+ # download images (with guards)
1049
+ try:
1050
+ await download_image_to_path(str(payload.face_image_url), face_path)
1051
+ await download_image_to_path(str(payload.eye_image_url), eye_path)
1052
+ except HTTPException:
1053
+ raise
1054
+ except Exception as e:
1055
+ logger.exception("Downloading images failed")
1056
+ raise HTTPException(status_code=400, detail=f"Failed to download images: {e}")
1057
+
1058
+ # run existing pipeline (off the event loop)
1059
+ try:
1060
+ vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
1061
+
1062
+ logger.info("get_vitals_from_urls - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
1063
+ logger.info("get_vitals_from_urls - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
1064
+ logger.info("get_vitals_from_urls - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
1065
+
1066
+ if vlm_features:
1067
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
1068
+ logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
1069
+ else:
1070
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
1071
+ logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
1072
+
1073
+ structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
1074
+
1075
+ return {
1076
+ "vlm_raw_output": vlm_raw,
1077
+ "vlm_parsed_features": vlm_features,
1078
+ "vlm_meta": vlm_meta,
1079
+ "llm_structured_risk": structured_risk
1080
+ }
1081
+ except Exception as e:
1082
+ logger.exception("get_vitals_by_url pipeline failed")
1083
+ raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
1084
+
1085
+ # -----------------------
1086
+ # Main background pipeline (upload -> process_screening)
1087
+ # -----------------------
1088
+ async def process_screening(screening_id: str):
1089
+ """
1090
+ Main pipeline:
1091
+ - load images
1092
+ - quick detector-based quality metrics
1093
+ - run VLM -> vlm_features (dict or None) + vlm_raw (string) + vlm_meta
1094
+ - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
1095
+ - merge results into ai_results and finish
1096
+ """
1097
+ try:
1098
+ if screening_id not in screenings_db:
1099
+ logger.error("[process_screening] screening %s not found", screening_id)
1100
+ return
1101
+ screenings_db[screening_id]["status"] = "processing"
1102
+ logger.info("[process_screening] Starting %s", screening_id)
1103
+
1104
+ entry = screenings_db[screening_id]
1105
+ face_path = entry.get("face_image_path")
1106
+ eye_path = entry.get("eye_image_path")
1107
+
1108
+ if not (face_path and os.path.exists(face_path)):
1109
+ raise RuntimeError("Face image missing")
1110
+ if not (eye_path and os.path.exists(eye_path)):
1111
+ raise RuntimeError("Eye image missing")
1112
+
1113
+ face_img = Image.open(face_path).convert("RGB")
1114
+ eye_img = Image.open(eye_path).convert("RGB")
1115
+
1116
+ # Basic detection + quality metrics (facenet/mtcnn/opencv)
1117
+ face_detected = False
1118
+ face_confidence = 0.0
1119
+ left_eye_coord = right_eye_coord = None
1120
+
1121
+ if mtcnn is not None and not isinstance(mtcnn, dict) and (_MTCNN_IMPL == "facenet_pytorch" or _MTCNN_IMPL == "mtcnn"):
1122
+ try:
1123
+ if _MTCNN_IMPL == "facenet_pytorch":
1124
+ boxes, probs, landmarks = mtcnn.detect(face_img, landmarks=True)
1125
+ if boxes is not None and len(boxes) > 0:
1126
+ face_detected = True
1127
+ face_confidence = float(probs[0]) if probs is not None else 0.0
1128
+ if landmarks is not None:
1129
+ lm = landmarks[0]
1130
+ if len(lm) >= 2:
1131
+ left_eye_coord = {"x": float(lm[0][0]), "y": float(lm[0][1])}
1132
+ right_eye_coord = {"x": float(lm[1][0]), "y": float(lm[1][1])}
1133
+ else:
1134
+ arr = np.asarray(face_img)
1135
+ detections = mtcnn.detect_faces(arr)
1136
+ if detections:
1137
+ face_detected = True
1138
+ face_confidence = float(detections[0].get("confidence", 0.0))
1139
+ k = detections[0].get("keypoints", {})
1140
+ left_eye_coord = k.get("left_eye")
1141
+ right_eye_coord = k.get("right_eye")
1142
+ except Exception:
1143
+ traceback.print_exc()
1144
+
1145
+ if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
1146
+ try:
1147
+ arr = np.asarray(face_img)
1148
+ gray = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)
1149
+ face_cascade = mtcnn["face_cascade"]
1150
+ eye_cascade = mtcnn["eye_cascade"]
1151
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60))
1152
+ if len(faces) > 0:
1153
+ face_detected = True
1154
+ (x, y, w, h) = faces[0]
1155
+ face_confidence = min(1.0, (w*h) / (arr.shape[0]*arr.shape[1]) * 4.0)
1156
+ roi_gray = gray[y:y+h, x:x+w]
1157
+ eyes = eye_cascade.detectMultiScale(roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(20, 10))
1158
+ if len(eyes) >= 1:
1159
+ ex, ey, ew, eh = eyes[0]
1160
+ left_eye_coord = {"x": float(x + ex + ew/2), "y": float(y + ey + eh/2)}
1161
+ except Exception:
1162
+ traceback.print_exc()
1163
+
1164
+ face_quality_score = 0.85 if face_detected and face_confidence > 0.6 else 0.45
1165
+ quality_metrics = {
1166
+ "face_detected": face_detected,
1167
+ "face_confidence": round(face_confidence, 3),
1168
+ "face_quality_score": round(face_quality_score, 2),
1169
+ "eye_coords": {"left_eye": left_eye_coord, "right_eye": right_eye_coord},
1170
+ "face_brightness": int(np.mean(np.asarray(face_img.convert("L")))),
1171
+ "face_blur_estimate": int(np.var(np.asarray(face_img.convert("L"))))
1172
+ }
1173
+ screenings_db[screening_id]["quality_metrics"] = quality_metrics
1174
+
1175
+ # --------------------------
1176
+ # RUN VLM -> get vlm_features + vlm_raw + vlm_meta
1177
+ # --------------------------
1178
+ vlm_features = None
1179
+ vlm_raw = None
1180
+ vlm_meta = {}
1181
+ try:
1182
+ vlm_features, vlm_raw, vlm_meta = run_vlm_and_get_features(face_path, eye_path)
1183
+ screenings_db[screening_id].setdefault("ai_results", {})
1184
+ screenings_db[screening_id]["ai_results"].update({
1185
+ "vlm_parsed_features": vlm_features,
1186
+ "vlm_raw": vlm_raw,
1187
+ "vlm_meta": vlm_meta
1188
+ })
1189
+ except Exception as e:
1190
+ logger.exception("VLM feature extraction failed")
1191
+ screenings_db[screening_id].setdefault("ai_results", {})
1192
+ screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
1193
+ vlm_features = None
1194
+ vlm_raw = ""
1195
+ vlm_meta = {"error": str(e)}
1196
+
1197
+ # Log VLM outputs in pipeline context
1198
+ logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
1199
+ logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
1200
+ logger.info("process_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
1201
+
1202
+ # --------------------------
1203
+ # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
1204
+ # --------------------------
1205
+ structured_risk = None
1206
+ try:
1207
+ if vlm_features:
1208
+ # prefer cleaned JSON
1209
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
1210
+ else:
1211
+ # fallback to raw string (may be empty)
1212
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
1213
+
1214
+ structured_risk = run_llm_on_vlm(llm_input)
1215
+ screenings_db[screening_id].setdefault("ai_results", {})
1216
+ screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
1217
+ except Exception as e:
1218
+ logger.exception("LLM processing failed")
1219
+ screenings_db[screening_id].setdefault("ai_results", {})
1220
+ screenings_db[screening_id]["ai_results"].update({"llm_error": str(e)})
1221
+ structured_risk = {
1222
+ "risk_score": 0.0,
1223
+ "jaundice_probability": 0.0,
1224
+ "anemia_probability": 0.0,
1225
+ "hydration_issue_probability": 0.0,
1226
+ "neurological_issue_probability": 0.0,
1227
+ "summary": "",
1228
+ "recommendation": "",
1229
+ "confidence": 0.0
1230
+ }
1231
+
1232
+ # Use structured_risk for summary recommendations & simple disease inference placeholders
1233
+ screenings_db[screening_id].setdefault("ai_results", {})
1234
+ screenings_db[screening_id]["ai_results"].update({
1235
+ "processing_time_ms": 1200
1236
+ })
1237
+
1238
+ disease_predictions = [
1239
+ {
1240
+ "condition": "Anemia-like-signs",
1241
+ "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
1242
+ "probability": structured_risk.get("anemia_probability", 0.0),
1243
+ "confidence": structured_risk.get("confidence", 0.0)
1244
+ },
1245
+ {
1246
+ "condition": "Jaundice-like-signs",
1247
+ "risk_level": "Medium" if structured_risk.get("jaundice_probability", 0.0) > 0.5 else "Low",
1248
+ "probability": structured_risk.get("jaundice_probability", 0.0),
1249
+ "confidence": structured_risk.get("confidence", 0.0)
1250
+ }
1251
+ ]
1252
+
1253
+ recommendations = {
1254
+ "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
1255
+ "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
1256
+ "message_hindi": ""
1257
+ }
1258
+
1259
+ screenings_db[screening_id].update({
1260
+ "status": "completed",
1261
+ "disease_predictions": disease_predictions,
1262
+ "recommendations": recommendations
1263
+ })
1264
+
1265
+ logger.info("[process_screening] Completed %s", screening_id)
1266
+ except Exception as e:
1267
+ traceback.print_exc()
1268
+ if screening_id in screenings_db:
1269
+ screenings_db[screening_id]["status"] = "failed"
1270
+ screenings_db[screening_id]["error"] = str(e)
1271
+ else:
1272
+ logger.error("[process_screening] Failed for unknown screening %s: %s", screening_id, str(e))
1273
+
1274
  # -----------------------
1275
  # Run server (for local debugging)
1276
  # -----------------------