dpv007 commited on
Commit
7dedfff
·
verified ·
1 Parent(s): a83b25f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -791
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  """
3
  Elderly HealthWatch AI Backend (FastAPI)
4
  Pipeline:
@@ -15,6 +14,7 @@ Notes:
15
  * always returns raw VLM output in API responses,
16
  * extracts JSON from VLM via regex when possible, and
17
  * sends only the face image to the VLM (not the eye image).
 
18
  """
19
 
20
  import io
@@ -51,6 +51,7 @@ logger = logging.getLogger("elderly_healthwatch")
51
  GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct")
52
  LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
53
  HF_TOKEN = os.getenv("HF_TOKEN", None)
 
54
 
55
  # Default VLM prompt
56
  DEFAULT_VLM_PROMPT = (
@@ -245,6 +246,74 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
245
  }
246
  return out
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  # -----------------------
249
  # Gradio / VLM helper (sends only face image, returns meta)
250
  # -----------------------
@@ -256,58 +325,77 @@ def get_gradio_client_for_space(space: str) -> Client:
256
  return Client(space)
257
 
258
  def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
259
- raise_on_file_delivery_failure: bool = False
 
260
  ) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
261
  """
262
- Synchronous call to remote VLM (gradio /chat_fn). Sends ONLY the face image file.
 
263
  Returns tuple: (parsed_features_dict_or_None, raw_text_response_str, meta)
264
  meta includes:
265
  - vlm_file_delivery_ok (bool) # expects ≥1 file acknowledged (face)
266
  - vlm_files_seen (int or None)
267
  - vlm_raw_len (int)
268
  - vlm_out_object (short repr)
 
269
  """
270
  prompt = prompt or DEFAULT_VLM_PROMPT
271
 
272
 
273
  if not os.path.exists(face_path):
274
  raise FileNotFoundError(f"Face image not found at: {face_path}")
275
- if not os.path.exists(eye_path):
276
  raise FileNotFoundError(f"Eye image not found at: {eye_path}")
277
 
278
  face_size = os.path.getsize(face_path)
279
- eye_size = os.path.getsize(eye_path)
280
- logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
281
 
282
- if face_size == 0 or eye_size == 0:
283
- raise ValueError("One or both images are empty (0 bytes)")
284
 
285
  if not GRADIO_AVAILABLE:
286
  raise RuntimeError("gradio_client not available in this environment.")
287
 
288
- client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
289
-
290
- # Verify files can be opened as images
291
  try:
292
  Image.open(face_path).verify()
293
- Image.open(eye_path).verify()
294
- logger.info("Both images verified as valid")
295
  except Exception as e:
296
- raise ValueError(f"Invalid image file(s): {e}")
297
 
298
- message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
299
-
300
- logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
301
  client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
302
- # NOTE: only send face image to the Space
303
-
304
- message = {"text": prompt, "files": [handle_file(face_path)]}
 
 
 
 
305
 
306
- meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
  # SINGLE CALL (no retries)
309
  try:
310
- logger.info("Calling VLM Space %s with 1 file (face only)", GRADIO_VLM_SPACE)
311
  result = client.predict(message=message, history=[], api_name="/chat_fn")
312
  except Exception as e:
313
  logger.exception("VLM call failed (no retries)")
@@ -407,772 +495,4 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
407
  Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
408
  - Logs the VLM raw string and the chosen payload.
409
  - Sends cleaned JSON (json.dumps(vlm_features)) if vlm_features_or_raw is dict, else sends raw string.
410
- - Uses regex to extract the final JSON from LLM raw output.
411
- """
412
- if not GRADIO_AVAILABLE:
413
- raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
414
-
415
- # Try to import AppError for specific handling; fallback to Exception if unavailable
416
- try:
417
- from gradio_client import AppError # type: ignore
418
- except Exception:
419
- AppError = Exception # fallback
420
-
421
- client = get_gradio_client_for_space(LLM_GRADIO_SPACE)
422
- model_identity = model_identity or LLM_MODEL_IDENTITY
423
- system_prompt = system_prompt or LLM_SYSTEM_PROMPT
424
- developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
425
-
426
- # Decide what to send to LLM and log the raw input
427
- if isinstance(vlm_features_or_raw, str):
428
- vlm_raw_str = vlm_features_or_raw
429
- logger.info("LLM input will be RAW VLM STRING (len=%d)", len(vlm_raw_str or ""))
430
- vlm_json_str_to_send = vlm_raw_str if vlm_raw_str and vlm_raw_str.strip() else "{}"
431
- else:
432
- vlm_raw_str = json.dumps(vlm_features_or_raw, ensure_ascii=False) if vlm_features_or_raw else "{}"
433
- logger.info("LLM input will be CLEANED VLM JSON (len=%d)", len(vlm_raw_str))
434
- vlm_json_str_to_send = vlm_raw_str
435
-
436
- # Build instruction payload
437
- instruction = (
438
- "\n\nSTRICT INSTRUCTIONS (READ CAREFULLY):\n"
439
- "1) OUTPUT ONLY a single valid JSON object and nothing else — no prose, no explanation, no code fences.\n"
440
- "2) The JSON MUST include these keys: risk_score, jaundice_probability, anemia_probability, "
441
- "hydration_issue_probability, neurological_issue_probability, summary, recommendation, confidence.\n"
442
- "3) Use numeric values for probabilities (0..1) and for risk_score (0..100). Use strings for summary and recommendation.\n"
443
- "4) Do NOT mention disease names in summary or recommendation; use neutral wording only.\n"
444
- "If you cannot estimate a value, set it to null.\n\n"
445
- "Now, based on the VLM output below, produce ONLY the JSON object described above.\n\n"
446
- "===BEGIN VLM OUTPUT===\n"
447
- f"{vlm_json_str_to_send}\n"
448
- "===END VLM OUTPUT===\n\n"
449
- )
450
-
451
- # Defensive coercion / clamps
452
- try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
453
- if try_max_new_tokens <= 0:
454
- try_max_new_tokens = 1024
455
-
456
- try_temperature = float(temperature) if temperature is not None else 0.0
457
- # Some Spaces validate temperature >= 0.1
458
- if try_temperature < 0.1:
459
- try_temperature = 0.1
460
-
461
- predict_kwargs = dict(
462
- input_data=instruction,
463
- max_new_tokens=float(try_max_new_tokens),
464
- model_identity=model_identity,
465
- system_prompt=system_prompt,
466
- developer_prompt=developer_prompt,
467
- reasoning_effort=reasoning_effort,
468
- temperature=float(try_temperature),
469
- top_p=0.9,
470
- top_k=50,
471
- repetition_penalty=1.0,
472
- api_name="/chat"
473
- )
474
-
475
- last_exc = None
476
- for attempt in (1, 2):
477
- try:
478
- logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
479
- LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
480
- result = client.predict(**predict_kwargs)
481
-
482
- # normalize to string
483
- if isinstance(result, (dict, list)):
484
- text_out = json.dumps(result)
485
- else:
486
- text_out = str(result)
487
-
488
- if not text_out or len(text_out.strip()) == 0:
489
- raise RuntimeError("LLM returned empty response")
490
-
491
- logger.info("LLM raw output (len=%d):\n%s", len(text_out or ""), (text_out[:2000] + "...") if len(text_out) > 2000 else text_out)
492
-
493
- # parse with regex extractor (may raise)
494
- parsed = None
495
- try:
496
- parsed = extract_json_via_regex(text_out)
497
- except Exception:
498
- # fallback: attempt json.loads naive
499
- try:
500
- parsed = json.loads(text_out)
501
- if not isinstance(parsed, dict):
502
- parsed = None
503
- except Exception:
504
- parsed = None
505
-
506
- if parsed is None:
507
- raise ValueError("Failed to extract JSON from LLM output")
508
-
509
- # pretty log parsed JSON
510
- try:
511
- logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2, ensure_ascii=False))
512
- except Exception:
513
- logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
514
-
515
- # defensive clamps (same as extractor expectations)
516
- def safe_prob(val):
517
- try:
518
- v = float(val)
519
- return max(0.0, min(1.0, v))
520
- except Exception:
521
- return 0.0
522
-
523
- for k in [
524
- "jaundice_probability",
525
- "anemia_probability",
526
- "hydration_issue_probability",
527
- "neurological_issue_probability"
528
- ]:
529
- parsed[k] = safe_prob(parsed.get(k, 0.0))
530
-
531
- try:
532
- rs = float(parsed.get("risk_score", 0.0))
533
- parsed["risk_score"] = round(max(0.0, min(100.0, rs)), 2)
534
- except Exception:
535
- parsed["risk_score"] = 0.0
536
-
537
- parsed["confidence"] = safe_prob(parsed.get("confidence", 0.0))
538
- parsed["summary"] = str(parsed.get("summary", "") or "").strip()
539
- parsed["recommendation"] = str(parsed.get("recommendation", "") or "").strip()
540
-
541
- for k in [
542
- "jaundice_probability",
543
- "anemia_probability",
544
- "hydration_issue_probability",
545
- "neurological_issue_probability",
546
- "confidence",
547
- "risk_score"
548
- ]:
549
- parsed[f"{k}_was_missing"] = False
550
-
551
- return parsed
552
-
553
- except AppError as app_e:
554
- logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
555
- last_exc = app_e
556
- if attempt == 1:
557
- predict_kwargs["temperature"] = 0.2
558
- predict_kwargs["max_new_tokens"] = float(512)
559
- logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
560
- continue
561
- else:
562
- raise RuntimeError(f"LLM call failed (AppError): {app_e}")
563
- except Exception as e:
564
- logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
565
- last_exc = e
566
- if attempt == 1:
567
- predict_kwargs["temperature"] = 0.2
568
- predict_kwargs["max_new_tokens"] = float(512)
569
- continue
570
- raise RuntimeError(f"LLM call failed: {e}")
571
-
572
- raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
573
-
574
- # -----------------------
575
- # API endpoints
576
- # -----------------------
577
- @app.get("/")
578
- async def read_root():
579
- return {"message": "Elderly HealthWatch AI Backend"}
580
-
581
- @app.get("/health")
582
- async def health_check():
583
- impl = None
584
- if mtcnn is None:
585
- impl = "none"
586
- elif isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
587
- impl = "opencv_haar_fallback"
588
- else:
589
- impl = _MTCNN_IMPL
590
- return {
591
- "status": "healthy",
592
- "detector": impl,
593
- "vlm_available": GRADIO_AVAILABLE,
594
- "vlm_space": GRADIO_VLM_SPACE,
595
- "llm_space": LLM_GRADIO_SPACE
596
- }
597
-
598
- @app.post("/api/v1/validate-eye-photo")
599
- async def validate_eye_photo(image: UploadFile = File(...)):
600
- if mtcnn is None:
601
- raise HTTPException(status_code=500, detail="No face detector available in this deployment.")
602
- try:
603
- content = await image.read()
604
- if not content:
605
- raise HTTPException(status_code=400, detail="Empty file uploaded.")
606
- pil_img = load_image_from_bytes(content)
607
- img_arr = np.asarray(pil_img) # RGB
608
-
609
- if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "facenet_pytorch":
610
- try:
611
- boxes, probs, landmarks = mtcnn.detect(pil_img, landmarks=True)
612
- if boxes is None or len(boxes) == 0:
613
- return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
614
- "message_english": "No face detected. Please ensure your face is clearly visible in the frame.",
615
- "message_hindi": "कोई चेहरा नहीं मिला। कृपया सुनिश्चित करें कि आपका चेहरा फ्रेम में स्पष्ट रूप से दिखाई दे रहा है।"}
616
- prob = float(probs[0]) if probs is not None else 0.0
617
- lm = landmarks[0] if landmarks is not None else None
618
- if lm is not None and len(lm) >= 2:
619
- left_eye = {"x": float(lm[0][0]), "y": float(lm[0][1])}
620
- right_eye = {"x": float(lm[1][0]), "y": float(lm[1][1])}
621
- else:
622
- left_eye = right_eye = None
623
- eye_openness_score = estimate_eye_openness_from_detection(prob)
624
- is_valid = eye_openness_score >= 0.3
625
- return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
626
- "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
627
- "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
628
- "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
629
- except Exception:
630
- traceback.print_exc()
631
- raise HTTPException(status_code=500, detail="Face detector failed during inference.")
632
-
633
- if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "mtcnn":
634
- try:
635
- detections = mtcnn.detect_faces(img_arr)
636
- except Exception:
637
- detections = mtcnn.detect_faces(pil_img)
638
- if not detections:
639
- return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
640
- "message_english": "No face detected. Please ensure your face is clearly visible in the frame.",
641
- "message_hindi": "कोई चेहरा नहीं मिला। कृपया सुनिश्चित करें कि आपका चेहरा फ्रेम में स्पष्ट रूप से दिखाई दे रहा है।"}
642
- face = detections[0]
643
- keypoints = face.get("keypoints", {})
644
- left_eye = keypoints.get("left_eye")
645
- right_eye = keypoints.get("right_eye")
646
- confidence = float(face.get("confidence", 0.0))
647
- eye_openness_score = estimate_eye_openness_from_detection(confidence)
648
- is_valid = eye_openness_score >= 0.3
649
- return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
650
- "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
651
- "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
652
- "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
653
-
654
- if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
655
- try:
656
- gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
657
- face_cascade = mtcnn["face_cascade"]
658
- eye_cascade = mtcnn["eye_cascade"]
659
- faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60))
660
- if len(faces) == 0:
661
- return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
662
- "message_english": "No face detected. Please ensure your face is clearly visible in the frame.",
663
- "message_hindi": "कोई चेहरा नहीं मिला। कृपया सुनिश्चित करें कि आपका चेहरा फ्रेम में स्पष्ट रूप से दिखाई दे रहा है।"}
664
- (x, y, w, h) = faces[0]
665
- roi_gray = gray[y:y+h, x:x+w]
666
- eyes = eye_cascade.detectMultiScale(roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(20, 10))
667
- eye_openness_score = 1.0 if len(eyes) >= 1 else 0.0
668
- is_valid = eye_openness_score >= 0.3
669
- left_eye = None
670
- right_eye = None
671
- if len(eyes) >= 1:
672
- ex, ey, ew, eh = eyes[0]
673
- cx = float(x + ex + ew/2)
674
- cy = float(y + ey + eh/2)
675
- left_eye = {"x": cx, "y": cy}
676
- return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
677
- "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
678
- "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
679
- "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
680
- except Exception:
681
- traceback.print_exc()
682
- raise HTTPException(status_code=500, detail="OpenCV fallback detector failed.")
683
-
684
- raise HTTPException(status_code=500, detail="Invalid detector configuration.")
685
- except HTTPException:
686
- raise
687
- except Exception as e:
688
- traceback.print_exc()
689
- return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
690
- "message_english": "Error processing image. Please try again.",
691
- "message_hindi": "छवि प्रोसेस करने में त्रुटि। कृपया पुनः प्रयास करें।",
692
- "error": str(e)}
693
-
694
- @app.post("/api/v1/upload")
695
- async def upload_images(
696
- background_tasks: BackgroundTasks,
697
- face_image: UploadFile = File(...),
698
- eye_image: UploadFile = File(...)
699
- ):
700
- """
701
- Save images and enqueue background processing. VLM -> LLM runs inside process_screening.
702
- """
703
- try:
704
- screening_id = str(uuid.uuid4())
705
- now = datetime.utcnow().isoformat() + "Z"
706
- tmp_dir = "/tmp/elderly_healthwatch"
707
- os.makedirs(tmp_dir, exist_ok=True)
708
- face_path = os.path.join(tmp_dir, f"{screening_id}_face.jpg")
709
- eye_path = os.path.join(tmp_dir, f"{screening_id}_eye.jpg")
710
- face_bytes = await face_image.read()
711
- eye_bytes = await eye_image.read()
712
- with open(face_path, "wb") as f:
713
- f.write(face_bytes)
714
- with open(eye_path, "wb") as f:
715
- f.write(eye_bytes)
716
- screenings_db[screening_id] = {
717
- "id": screening_id,
718
- "timestamp": now,
719
- "face_image_path": face_path,
720
- "eye_image_path": eye_path,
721
- "status": "queued",
722
- "quality_metrics": {},
723
- "ai_results": {},
724
- "disease_predictions": [],
725
- "recommendations": {}
726
- }
727
- background_tasks.add_task(process_screening, screening_id)
728
- return {"screening_id": screening_id}
729
- except Exception as e:
730
- traceback.print_exc()
731
- raise HTTPException(status_code=500, detail=f"Failed to upload images: {e}")
732
-
733
- @app.post("/api/v1/analyze/{screening_id}")
734
- async def analyze_screening(screening_id: str, background_tasks: BackgroundTasks):
735
- if screening_id not in screenings_db:
736
- raise HTTPException(status_code=404, detail="Screening not found")
737
- if screenings_db[screening_id].get("status") == "processing":
738
- return {"message": "Already processing"}
739
- screenings_db[screening_id]["status"] = "queued"
740
- background_tasks.add_task(process_screening, screening_id)
741
- return {"message": "Analysis enqueued"}
742
-
743
- @app.get("/api/v1/status/{screening_id}")
744
- async def get_status(screening_id: str):
745
- if screening_id not in screenings_db:
746
- raise HTTPException(status_code=404, detail="Screening not found")
747
- status = screenings_db[screening_id].get("status", "unknown")
748
- progress = 50 if status == "processing" else (100 if status == "completed" else 0)
749
- return {"screening_id": screening_id, "status": status, "progress": progress}
750
-
751
- @app.get("/api/v1/results/{screening_id}")
752
- async def get_results(screening_id: str):
753
- if screening_id not in screenings_db:
754
- raise HTTPException(status_code=404, detail="Screening not found")
755
- # Ensure vlm_raw is always present in ai_results for debugging
756
- entry = screenings_db[screening_id]
757
- entry.setdefault("ai_results", {})
758
- entry["ai_results"].setdefault("vlm_raw", entry.get("ai_results", {}).get("vlm_raw", ""))
759
- return entry
760
-
761
- @app.get("/api/v1/history/{user_id}")
762
- async def get_history(user_id: str):
763
- history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
764
- return {"screenings": history}
765
-
766
- # -----------------------
767
- # Immediate VLM -> LLM routes (return vitals in one call)
768
- # -----------------------
769
- @app.post("/api/v1/get-vitals")
770
- async def get_vitals_from_upload(
771
- face_image: UploadFile = File(...),
772
- eye_image: UploadFile = File(...)
773
- ):
774
- """
775
- Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
776
- { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
777
- Note: VLM will receive only the face image (not the eye image).
778
- """
779
- if not GRADIO_AVAILABLE:
780
- raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
781
-
782
- # save files to a temp directory
783
- try:
784
- tmp_dir = "/tmp/elderly_healthwatch"
785
- os.makedirs(tmp_dir, exist_ok=True)
786
- uid = str(uuid.uuid4())
787
- face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
788
- eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
789
- face_bytes = await face_image.read()
790
- eye_bytes = await eye_image.read()
791
- with open(face_path, "wb") as f:
792
- f.write(face_bytes)
793
- with open(eye_path, "wb") as f:
794
- f.write(eye_bytes)
795
- except Exception as e:
796
- logger.exception("Failed saving uploaded images")
797
- raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
798
-
799
- try:
800
- # Run VLM (off the event loop) - returns (features, raw, meta)
801
- vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
802
-
803
- # Log VLM outputs
804
- logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
805
- logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
806
- logger.info("get_vitals_from_upload - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
807
-
808
- # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
809
- if vlm_features:
810
- llm_input = json.dumps(vlm_features, ensure_ascii=False)
811
- logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
812
- else:
813
- llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
814
- logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
815
-
816
- # Run LLM (off the event loop)
817
- structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
818
-
819
- # Return merged result (includes raw VLM output + meta for debugging)
820
- return {
821
- "vlm_raw_output": vlm_raw,
822
- "vlm_parsed_features": vlm_features,
823
- "vlm_meta": vlm_meta,
824
- "llm_structured_risk": structured_risk
825
- }
826
- except Exception as e:
827
- logger.exception("get_vitals_from_upload pipeline failed")
828
- raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
829
-
830
- @app.post("/api/v1/get-vitals/{screening_id}")
831
- async def get_vitals_for_screening(screening_id: str):
832
- """
833
- Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
834
- Useful for re-processing or debugging.
835
- Note: VLM will receive only the face image (not the eye image).
836
- """
837
- if screening_id not in screenings_db:
838
- raise HTTPException(status_code=404, detail="Screening not found")
839
-
840
- entry = screenings_db[screening_id]
841
- face_path = entry.get("face_image_path")
842
- eye_path = entry.get("eye_image_path")
843
- if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
844
- raise HTTPException(status_code=400, detail="Stored images missing for this screening")
845
-
846
- try:
847
- # Run VLM off the event loop (returns features, raw, meta)
848
- vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
849
-
850
- logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
851
- logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
852
- logger.info("get_vitals_for_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
853
-
854
- if vlm_features:
855
- llm_input = json.dumps(vlm_features, ensure_ascii=False)
856
- logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
857
- else:
858
- llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
859
- logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
860
-
861
- structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
862
-
863
- # Optionally store this run's outputs back into the DB for inspection
864
- entry.setdefault("ai_results", {})
865
- entry["ai_results"].update({
866
- "vlm_parsed_features": vlm_features,
867
- "vlm_raw": vlm_raw,
868
- "vlm_meta": vlm_meta,
869
- "structured_risk": structured_risk,
870
- "last_vitals_run": datetime.utcnow().isoformat() + "Z"
871
- })
872
-
873
- return {
874
- "screening_id": screening_id,
875
- "vlm_raw_output": vlm_raw,
876
- "vlm_parsed_features": vlm_features,
877
- "vlm_meta": vlm_meta,
878
- "llm_structured_risk": structured_risk
879
- }
880
- except Exception as e:
881
- logger.exception("get_vitals_for_screening pipeline failed")
882
- raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
883
-
884
- # -----------------------
885
- # URL-based vitals endpoint (optional)
886
- # -----------------------
887
- class ImageUrls(BaseModel):
888
- face_image_url: HttpUrl
889
- eye_image_url: HttpUrl
890
-
891
- import httpx # make sure to add httpx to requirements
892
-
893
- # helper: download URL to file with safety checks
894
- async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
895
- """
896
- Download an image from `url` and save to dest_path.
897
- Guards:
898
- - timeout
899
- - max bytes
900
- - basic content-type check (image/*)
901
- Raises HTTPException on failure.
902
- """
903
- try:
904
- async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
905
- resp = await client.get(url, timeout=timeout_seconds)
906
- resp.raise_for_status()
907
-
908
- content_type = resp.headers.get("Content-Type", "")
909
- if not content_type.startswith("image/"):
910
- raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
911
-
912
- total = 0
913
- with open(dest_path, "wb") as f:
914
- async for chunk in resp.aiter_bytes():
915
- if not chunk:
916
- continue
917
- total += len(chunk)
918
- if total > max_bytes:
919
- raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
920
- f.write(chunk)
921
- except httpx.HTTPStatusError as e:
922
- raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
923
- except Exception as e:
924
- raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
925
-
926
- @app.post("/api/v1/get-vitals-by-url")
927
- async def get_vitals_from_urls(payload: ImageUrls = Body(...)):
928
- """
929
- Download face and eye images from given URLs, then run the same VLM -> LLM pipeline and return results.
930
- Note: VLM will receive only the face image (not the eye image).
931
- Body: { "face_image_url": "...", "eye_image_url": "..." }
932
- """
933
- if not GRADIO_AVAILABLE:
934
- raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
935
-
936
- # prepare tmp paths
937
- try:
938
- tmp_dir = "/tmp/elderly_healthwatch"
939
- os.makedirs(tmp_dir, exist_ok=True)
940
- uid = str(uuid.uuid4())
941
- face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
942
- eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
943
- except Exception as e:
944
- logger.exception("Failed to prepare temp paths")
945
- raise HTTPException(status_code=500, detail=f"Server error preparing temp files: {e}")
946
-
947
- # download images (with guards)
948
- try:
949
- await download_image_to_path(str(payload.face_image_url), face_path)
950
- await download_image_to_path(str(payload.eye_image_url), eye_path)
951
- except HTTPException:
952
- raise
953
- except Exception as e:
954
- logger.exception("Downloading images failed")
955
- raise HTTPException(status_code=400, detail=f"Failed to download images: {e}")
956
-
957
- # run existing pipeline (off the event loop)
958
- try:
959
- vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
960
-
961
- logger.info("get_vitals_from_urls - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
962
- logger.info("get_vitals_from_urls - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
963
- logger.info("get_vitals_from_urls - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
964
-
965
- if vlm_features:
966
- llm_input = json.dumps(vlm_features, ensure_ascii=False)
967
- logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
968
- else:
969
- llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
970
- logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
971
-
972
- structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
973
-
974
- return {
975
- "vlm_raw_output": vlm_raw,
976
- "vlm_parsed_features": vlm_features,
977
- "vlm_meta": vlm_meta,
978
- "llm_structured_risk": structured_risk
979
- }
980
- except Exception as e:
981
- logger.exception("get_vitals_by_url pipeline failed")
982
- raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
983
-
984
- # -----------------------
985
- # Main background pipeline (upload -> process_screening)
986
- # -----------------------
987
- async def process_screening(screening_id: str):
988
- """
989
- Main pipeline:
990
- - load images
991
- - quick detector-based quality metrics
992
- - run VLM -> vlm_features (dict or None) + vlm_raw (string) + vlm_meta
993
- - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
994
- - merge results into ai_results and finish
995
- """
996
- try:
997
- if screening_id not in screenings_db:
998
- logger.error("[process_screening] screening %s not found", screening_id)
999
- return
1000
- screenings_db[screening_id]["status"] = "processing"
1001
- logger.info("[process_screening] Starting %s", screening_id)
1002
-
1003
- entry = screenings_db[screening_id]
1004
- face_path = entry.get("face_image_path")
1005
- eye_path = entry.get("eye_image_path")
1006
-
1007
- if not (face_path and os.path.exists(face_path)):
1008
- raise RuntimeError("Face image missing")
1009
- if not (eye_path and os.path.exists(eye_path)):
1010
- raise RuntimeError("Eye image missing")
1011
-
1012
- face_img = Image.open(face_path).convert("RGB")
1013
- eye_img = Image.open(eye_path).convert("RGB")
1014
-
1015
- # Basic detection + quality metrics (facenet/mtcnn/opencv)
1016
- face_detected = False
1017
- face_confidence = 0.0
1018
- left_eye_coord = right_eye_coord = None
1019
-
1020
- if mtcnn is not None and not isinstance(mtcnn, dict) and (_MTCNN_IMPL == "facenet_pytorch" or _MTCNN_IMPL == "mtcnn"):
1021
- try:
1022
- if _MTCNN_IMPL == "facenet_pytorch":
1023
- boxes, probs, landmarks = mtcnn.detect(face_img, landmarks=True)
1024
- if boxes is not None and len(boxes) > 0:
1025
- face_detected = True
1026
- face_confidence = float(probs[0]) if probs is not None else 0.0
1027
- if landmarks is not None:
1028
- lm = landmarks[0]
1029
- if len(lm) >= 2:
1030
- left_eye_coord = {"x": float(lm[0][0]), "y": float(lm[0][1])}
1031
- right_eye_coord = {"x": float(lm[1][0]), "y": float(lm[1][1])}
1032
- else:
1033
- arr = np.asarray(face_img)
1034
- detections = mtcnn.detect_faces(arr)
1035
- if detections:
1036
- face_detected = True
1037
- face_confidence = float(detections[0].get("confidence", 0.0))
1038
- k = detections[0].get("keypoints", {})
1039
- left_eye_coord = k.get("left_eye")
1040
- right_eye_coord = k.get("right_eye")
1041
- except Exception:
1042
- traceback.print_exc()
1043
-
1044
- if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
1045
- try:
1046
- arr = np.asarray(face_img)
1047
- gray = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)
1048
- face_cascade = mtcnn["face_cascade"]
1049
- eye_cascade = mtcnn["eye_cascade"]
1050
- faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60))
1051
- if len(faces) > 0:
1052
- face_detected = True
1053
- (x, y, w, h) = faces[0]
1054
- face_confidence = min(1.0, (w*h) / (arr.shape[0]*arr.shape[1]) * 4.0)
1055
- roi_gray = gray[y:y+h, x:x+w]
1056
- eyes = eye_cascade.detectMultiScale(roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(20, 10))
1057
- if len(eyes) >= 1:
1058
- ex, ey, ew, eh = eyes[0]
1059
- left_eye_coord = {"x": float(x + ex + ew/2), "y": float(y + ey + eh/2)}
1060
- except Exception:
1061
- traceback.print_exc()
1062
-
1063
- face_quality_score = 0.85 if face_detected and face_confidence > 0.6 else 0.45
1064
- quality_metrics = {
1065
- "face_detected": face_detected,
1066
- "face_confidence": round(face_confidence, 3),
1067
- "face_quality_score": round(face_quality_score, 2),
1068
- "eye_coords": {"left_eye": left_eye_coord, "right_eye": right_eye_coord},
1069
- "face_brightness": int(np.mean(np.asarray(face_img.convert("L")))),
1070
- "face_blur_estimate": int(np.var(np.asarray(face_img.convert("L"))))
1071
- }
1072
- screenings_db[screening_id]["quality_metrics"] = quality_metrics
1073
-
1074
- # --------------------------
1075
- # RUN VLM -> get vlm_features + vlm_raw + vlm_meta
1076
- # --------------------------
1077
- vlm_features = None
1078
- vlm_raw = None
1079
- vlm_meta = {}
1080
- try:
1081
- vlm_features, vlm_raw, vlm_meta = run_vlm_and_get_features(face_path, eye_path)
1082
- screenings_db[screening_id].setdefault("ai_results", {})
1083
- screenings_db[screening_id]["ai_results"].update({
1084
- "vlm_parsed_features": vlm_features,
1085
- "vlm_raw": vlm_raw,
1086
- "vlm_meta": vlm_meta
1087
- })
1088
- except Exception as e:
1089
- logger.exception("VLM feature extraction failed")
1090
- screenings_db[screening_id].setdefault("ai_results", {})
1091
- screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
1092
- vlm_features = None
1093
- vlm_raw = ""
1094
- vlm_meta = {"error": str(e)}
1095
-
1096
- # Log VLM outputs in pipeline context
1097
- logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
1098
- logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
1099
- logger.info("process_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
1100
-
1101
- # --------------------------
1102
- # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
1103
- # --------------------------
1104
- structured_risk = None
1105
- try:
1106
- if vlm_features:
1107
- # prefer cleaned JSON
1108
- llm_input = json.dumps(vlm_features, ensure_ascii=False)
1109
- else:
1110
- # fallback to raw string (may be empty)
1111
- llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
1112
-
1113
- structured_risk = run_llm_on_vlm(llm_input)
1114
- screenings_db[screening_id].setdefault("ai_results", {})
1115
- screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
1116
- except Exception as e:
1117
- logger.exception("LLM processing failed")
1118
- screenings_db[screening_id].setdefault("ai_results", {})
1119
- screenings_db[screening_id]["ai_results"].update({"llm_error": str(e)})
1120
- structured_risk = {
1121
- "risk_score": 0.0,
1122
- "jaundice_probability": 0.0,
1123
- "anemia_probability": 0.0,
1124
- "hydration_issue_probability": 0.0,
1125
- "neurological_issue_probability": 0.0,
1126
- "summary": "",
1127
- "recommendation": "",
1128
- "confidence": 0.0
1129
- }
1130
-
1131
- # Use structured_risk for summary recommendations & simple disease inference placeholders
1132
- screenings_db[screening_id].setdefault("ai_results", {})
1133
- screenings_db[screening_id]["ai_results"].update({
1134
- "processing_time_ms": 1200
1135
- })
1136
-
1137
- disease_predictions = [
1138
- {
1139
- "condition": "Anemia-like-signs",
1140
- "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
1141
- "probability": structured_risk.get("anemia_probability", 0.0),
1142
- "confidence": structured_risk.get("confidence", 0.0)
1143
- },
1144
- {
1145
- "condition": "Jaundice-like-signs",
1146
- "risk_level": "Medium" if structured_risk.get("jaundice_probability", 0.0) > 0.5 else "Low",
1147
- "probability": structured_risk.get("jaundice_probability", 0.0),
1148
- "confidence": structured_risk.get("confidence", 0.0)
1149
- }
1150
- ]
1151
-
1152
- recommendations = {
1153
- "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
1154
- "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
1155
- "message_hindi": ""
1156
- }
1157
-
1158
- screenings_db[screening_id].update({
1159
- "status": "completed",
1160
- "disease_predictions": disease_predictions,
1161
- "recommendations": recommendations
1162
- })
1163
-
1164
- logger.info("[process_screening] Completed %s", screening_id)
1165
- except Exception as e:
1166
- traceback.print_exc()
1167
- if screening_id in screenings_db:
1168
- screenings_db[screening_id]["status"] = "failed"
1169
- screenings_db[screening_id]["error"] = str(e)
1170
- else:
1171
- logger.error("[process_screening] Failed for unknown screening %s: %s", screening_id, str(e))
1172
-
1173
- # -----------------------
1174
- # Run server (for local debugging)
1175
- # -----------------------
1176
- if __name__ == "__main__":
1177
- import uvicorn
1178
- uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
 
1
  """
2
  Elderly HealthWatch AI Backend (FastAPI)
3
  Pipeline:
 
14
  * always returns raw VLM output in API responses,
15
  * extracts JSON from VLM via regex when possible, and
16
  * sends only the face image to the VLM (not the eye image).
17
+ * uploads face image to temp hosting and uses URL instead of file path
18
  """
19
 
20
  import io
 
51
  GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct")
52
  LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
53
  HF_TOKEN = os.getenv("HF_TOKEN", None)
54
+ USE_IMAGE_URLS = True # Always use URLs instead of files for VLM
55
 
56
  # Default VLM prompt
57
  DEFAULT_VLM_PROMPT = (
 
246
  }
247
  return out
248
 
249
+ # -----------------------
250
+ # Image upload to temp hosting
251
+ # -----------------------
252
+ import httpx # make sure to add httpx to requirements
253
+ import base64
254
+
255
+ # helper: upload image to temporary hosting and get URL
256
+ async def upload_image_to_temp_host(image_path: str) -> str:
257
+ """
258
+ Upload an image to a temporary hosting service (using tmpfiles.org as example).
259
+ Returns the public URL of the uploaded image.
260
+ Alternative services: catbox.moe, 0x0.st, etc.
261
+ """
262
+ try:
263
+ with open(image_path, 'rb') as f:
264
+ files = {'file': f}
265
+ async with httpx.AsyncClient(timeout=30.0) as client:
266
+ # Using tmpfiles.org as temporary host (24 hour retention)
267
+ response = await client.post('https://tmpfiles.org/api/v1/upload', files=files)
268
+ response.raise_for_status()
269
+ result = response.json()
270
+
271
+ # tmpfiles.org returns: {"status": "success", "data": {"url": "..."}}
272
+ if result.get('status') == 'success':
273
+ url = result['data']['url']
274
+ # Convert download URL to direct URL
275
+ url = url.replace('tmpfiles.org/', 'tmpfiles.org/dl/')
276
+ logger.info(f"Image uploaded successfully: {url}")
277
+ return url
278
+ else:
279
+ raise ValueError(f"Upload failed: {result}")
280
+ except Exception as e:
281
+ logger.exception(f"Failed to upload image to temp host: {e}")
282
+ raise HTTPException(status_code=500, detail=f"Failed to upload image: {e}")
283
+
284
+ # helper: download URL to file with safety checks
285
+ async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
286
+ """
287
+ Download an image from `url` and save to dest_path.
288
+ Guards:
289
+ - timeout
290
+ - max bytes
291
+ - basic content-type check (image/*)
292
+ Raises HTTPException on failure.
293
+ """
294
+ try:
295
+ async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
296
+ resp = await client.get(url, timeout=timeout_seconds)
297
+ resp.raise_for_status()
298
+
299
+ content_type = resp.headers.get("Content-Type", "")
300
+ if not content_type.startswith("image/"):
301
+ raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
302
+
303
+ total = 0
304
+ with open(dest_path, "wb") as f:
305
+ async for chunk in resp.aiter_bytes():
306
+ if not chunk:
307
+ continue
308
+ total += len(chunk)
309
+ if total > max_bytes:
310
+ raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
311
+ f.write(chunk)
312
+ except httpx.HTTPStatusError as e:
313
+ raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
314
+ except Exception as e:
315
+ raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
316
+
317
  # -----------------------
318
  # Gradio / VLM helper (sends only face image, returns meta)
319
  # -----------------------
 
325
  return Client(space)
326
 
327
  def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
328
+ raise_on_file_delivery_failure: bool = False,
329
+ use_url: bool = False
330
  ) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
331
  """
332
+ Synchronous call to remote VLM (gradio /chat_fn). Sends ONLY the face image.
333
+ If use_url=True, uploads image to temp host and sends URL instead of file path.
334
  Returns tuple: (parsed_features_dict_or_None, raw_text_response_str, meta)
335
  meta includes:
336
  - vlm_file_delivery_ok (bool) # expects ≥1 file acknowledged (face)
337
  - vlm_files_seen (int or None)
338
  - vlm_raw_len (int)
339
  - vlm_out_object (short repr)
340
+ - face_url (str, if use_url=True)
341
  """
342
  prompt = prompt or DEFAULT_VLM_PROMPT
343
 
344
 
345
  if not os.path.exists(face_path):
346
  raise FileNotFoundError(f"Face image not found at: {face_path}")
347
+ if eye_path and not os.path.exists(eye_path):
348
  raise FileNotFoundError(f"Eye image not found at: {eye_path}")
349
 
350
  face_size = os.path.getsize(face_path)
351
+ logger.info(f"VLM input file - Face: {face_size} bytes")
 
352
 
353
+ if face_size == 0:
354
+ raise ValueError("Face image is empty (0 bytes)")
355
 
356
  if not GRADIO_AVAILABLE:
357
  raise RuntimeError("gradio_client not available in this environment.")
358
 
359
+ # Verify file can be opened as image
 
 
360
  try:
361
  Image.open(face_path).verify()
362
+ logger.info("Face image verified as valid")
 
363
  except Exception as e:
364
+ raise ValueError(f"Invalid image file: {e}")
365
 
 
 
 
366
  client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
367
+
368
+ meta: Dict[str, Any] = {
369
+ "vlm_file_delivery_ok": False,
370
+ "vlm_files_seen": None,
371
+ "vlm_raw_len": 0,
372
+ "vlm_out_object": None
373
+ }
374
 
375
+ # Upload to temp host if use_url=True
376
+ if use_url:
377
+ try:
378
+ # Run async upload in sync context using asyncio
379
+ loop = asyncio.new_event_loop()
380
+ asyncio.set_event_loop(loop)
381
+ face_url = loop.run_until_complete(upload_image_to_temp_host(face_path))
382
+ loop.close()
383
+
384
+ meta["face_url"] = face_url
385
+ logger.info(f"Using image URL for VLM: {face_url}")
386
+
387
+ # Pass URL directly to Gradio client using handle_file
388
+ message = {"text": prompt, "files": [handle_file(face_url)]}
389
+ except Exception as e:
390
+ logger.exception("Failed to upload image to temp host")
391
+ raise RuntimeError(f"Image upload failed: {e}")
392
+ else:
393
+ # Original behavior: use file path
394
+ message = {"text": prompt, "files": [handle_file(face_path)]}
395
 
396
  # SINGLE CALL (no retries)
397
  try:
398
+ logger.info("Calling VLM Space %s with %s", GRADIO_VLM_SPACE, "URL" if use_url else "file")
399
  result = client.predict(message=message, history=[], api_name="/chat_fn")
400
  except Exception as e:
401
  logger.exception("VLM call failed (no retries)")
 
495
  Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
496
  - Logs the VLM raw string and the chosen payload.
497
  - Sends cleaned JSON (json.dumps(vlm_features)) if vlm_features_or_raw is dict, else sends raw string.
498
+ - Uses regex to extract the final JSON from