dpv007 commited on
Commit
56f3b6f
·
verified ·
1 Parent(s): 6d0113b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +262 -28
app.py CHANGED
@@ -295,69 +295,189 @@ def call_vlm(face_path: str, eye_path: str, prompt: Optional[str] = None) -> Tup
295
  if not os.path.exists(face_path) or not os.path.exists(eye_path):
296
  raise FileNotFoundError("Face or eye image path missing")
297
 
 
 
 
 
 
 
298
  client = get_gradio_client(GRADIO_VLM_SPACE)
299
  message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
300
 
301
  try:
302
- logger.info("Calling VLM Space: %s", GRADIO_VLM_SPACE)
303
  result = client.predict(message=message, history=[], api_name="/chat_fn")
 
 
304
  except Exception as e:
305
  logger.exception("VLM call failed")
306
  raise RuntimeError(f"VLM call failed: {e}")
307
 
308
- # Normalize result
309
  if isinstance(result, (list, tuple)):
310
- out = result[0]
 
 
 
 
311
  elif isinstance(result, dict):
 
312
  out = result
313
  else:
 
314
  out = {"text": str(result)}
315
 
316
- text_out = out.get("text") or out.get("output") or json.dumps(out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
  # Try to parse JSON
319
  parsed = None
320
  try:
321
  parsed = json.loads(text_out)
322
  if not isinstance(parsed, dict):
 
323
  parsed = None
324
- except Exception:
 
 
 
325
  # Try to extract JSON from text
326
  try:
327
  first = text_out.find("{")
328
  last = text_out.rfind("}")
329
  if first != -1 and last != -1 and last > first:
330
- parsed = json.loads(text_out[first:last+1])
 
331
  if not isinstance(parsed, dict):
 
332
  parsed = None
333
- except Exception:
 
 
 
334
  parsed = None
335
 
336
  return parsed, text_out
337
 
338
- def call_llm(vlm_output: Any) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  """Call LLM with VLM output and return structured risk assessment"""
340
  if not GRADIO_AVAILABLE:
 
 
341
  raise RuntimeError("gradio_client not installed")
342
 
343
- client = get_gradio_client(LLM_GRADIO_SPACE)
344
-
345
- # Prepare input
346
  vlm_text = vlm_output if isinstance(vlm_output, str) else json.dumps(vlm_output, default=str)
347
 
 
 
 
 
 
 
 
 
348
  instruction = (
349
  "\n\nSTRICT INSTRUCTIONS:\n"
350
  "1) OUTPUT ONLY a single valid JSON object — no prose, no code fences.\n"
351
  "2) Include keys: risk_score, jaundice_probability, anemia_probability, "
352
  "hydration_issue_probability, neurological_issue_probability, summary, recommendation, confidence.\n"
353
  "3) Use numeric values for probabilities (0-1) and risk_score (0-100).\n"
354
- "4) Use neutral wording in summary/recommendation.\n\n"
 
355
  "VLM Output:\n" + vlm_text + "\n"
356
  )
357
 
358
  # Call with safe defaults
359
  try:
 
360
  logger.info("Calling LLM Space: %s", LLM_GRADIO_SPACE)
 
361
  result = client.predict(
362
  input_data=instruction,
363
  max_new_tokens=1024.0,
@@ -378,10 +498,36 @@ def call_llm(vlm_output: Any) -> Dict[str, Any]:
378
  parsed = extract_json_from_llm_output(text_out)
379
  logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2))
380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  return parsed
382
 
383
  except Exception as e:
384
- logger.exception("LLM call failed")
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  raise RuntimeError(f"LLM call failed: {e}")
386
 
387
  # ============================================================================
@@ -421,9 +567,9 @@ async def process_screening(screening_id: str):
421
  # Call VLM
422
  vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
423
 
424
- # Call LLM
425
  llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
426
- structured_risk = await asyncio.to_thread(call_llm, llm_input)
427
 
428
  # Store results
429
  screenings_db[screening_id]["ai_results"] = {
@@ -487,12 +633,37 @@ async def read_root():
487
 
488
  @app.get("/health")
489
  async def health_check():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  return {
491
  "status": "healthy",
492
  "detector": detector_type or "none",
493
  "vlm_available": GRADIO_AVAILABLE,
494
  "vlm_space": GRADIO_VLM_SPACE,
495
- "llm_space": LLM_GRADIO_SPACE
 
 
 
496
  }
497
 
498
  @app.post("/api/v1/validate-eye-photo")
@@ -615,14 +786,49 @@ async def get_history(user_id: str):
615
  history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
616
  return {"screenings": history}
617
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  @app.post("/api/v1/get-vitals")
619
  async def get_vitals_from_upload(
620
  face_image: UploadFile = File(...),
621
  eye_image: UploadFile = File(...)
622
  ):
623
- """Synchronous VLM + LLM pipeline"""
624
  if not GRADIO_AVAILABLE:
625
- raise HTTPException(status_code=500, detail="VLM/LLM not available")
 
 
 
626
 
627
  try:
628
  uid = str(uuid.uuid4())
@@ -634,23 +840,38 @@ async def get_vitals_from_upload(
634
  with open(eye_path, "wb") as f:
635
  f.write(await eye_image.read())
636
 
 
637
  vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
 
 
638
  llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
639
- structured_risk = await asyncio.to_thread(call_llm, llm_input)
640
 
641
  return {
642
  "vlm_features": vlm_features,
643
  "vlm_raw": vlm_raw,
644
- "structured_risk": structured_risk
 
645
  }
646
 
647
  except Exception as e:
648
  logger.exception("Get vitals failed")
649
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
650
 
651
  @app.post("/api/v1/get-vitals/{screening_id}")
652
  async def get_vitals_for_screening(screening_id: str):
653
- """Re-run VLM + LLM on existing screening"""
654
  if screening_id not in screenings_db:
655
  raise HTTPException(status_code=404, detail="Screening not found")
656
 
@@ -659,31 +880,44 @@ async def get_vitals_for_screening(screening_id: str):
659
  eye_path = entry.get("eye_image_path")
660
 
661
  if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
662
- raise HTTPException(status_code=400, detail="Images missing")
663
 
664
  try:
665
  vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
666
  llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
667
- structured_risk = await asyncio.to_thread(call_llm, llm_input)
668
 
669
  entry.setdefault("ai_results", {}).update({
670
  "vlm_features": vlm_features,
671
  "vlm_raw": vlm_raw,
672
  "structured_risk": structured_risk,
673
- "last_vitals_run": datetime.utcnow().isoformat() + "Z"
 
674
  })
675
 
676
  return {
677
  "screening_id": screening_id,
678
  "vlm_features": vlm_features,
679
  "vlm_raw": vlm_raw,
680
- "structured_risk": structured_risk
 
681
  }
682
 
683
  except Exception as e:
684
  logger.exception("Get vitals for screening failed")
685
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
686
 
687
  if __name__ == "__main__":
688
  import uvicorn
689
- uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
295
  if not os.path.exists(face_path) or not os.path.exists(eye_path):
296
  raise FileNotFoundError("Face or eye image path missing")
297
 
298
+ logger.info("VLM Input - Face: %s (exists: %s, size: %d bytes)",
299
+ face_path, os.path.exists(face_path), os.path.getsize(face_path))
300
+ logger.info("VLM Input - Eye: %s (exists: %s, size: %d bytes)",
301
+ eye_path, os.path.exists(eye_path), os.path.getsize(eye_path))
302
+ logger.info("VLM Prompt: %s", prompt[:100])
303
+
304
  client = get_gradio_client(GRADIO_VLM_SPACE)
305
  message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
306
 
307
  try:
308
+ logger.info("Calling VLM Space: %s with api_name=/chat_fn", GRADIO_VLM_SPACE)
309
  result = client.predict(message=message, history=[], api_name="/chat_fn")
310
+ logger.info("VLM raw result type: %s", type(result))
311
+ logger.info("VLM raw result: %s", str(result)[:500])
312
  except Exception as e:
313
  logger.exception("VLM call failed")
314
  raise RuntimeError(f"VLM call failed: {e}")
315
 
316
+ # Normalize result - handle different return formats
317
  if isinstance(result, (list, tuple)):
318
+ logger.info("VLM returned list/tuple with %d elements", len(result))
319
+ if len(result) > 0:
320
+ out = result[0]
321
+ else:
322
+ out = {}
323
  elif isinstance(result, dict):
324
+ logger.info("VLM returned dict with keys: %s", list(result.keys()))
325
  out = result
326
  else:
327
+ logger.info("VLM returned unknown type, converting to string")
328
  out = {"text": str(result)}
329
 
330
+ # Extract text from various possible formats
331
+ text_out = None
332
+ if isinstance(out, dict):
333
+ text_out = out.get("text") or out.get("output") or out.get("content")
334
+
335
+ if not text_out:
336
+ # If still no text, try the whole result
337
+ if isinstance(result, str):
338
+ text_out = result
339
+ else:
340
+ text_out = json.dumps(out)
341
+
342
+ logger.info("VLM extracted text (first 300 chars): %s", text_out[:300] if text_out else "EMPTY")
343
+
344
+ if not text_out or len(text_out.strip()) == 0:
345
+ logger.warning("VLM returned empty text output!")
346
+ text_out = "{}" # Provide empty JSON as fallback
347
 
348
  # Try to parse JSON
349
  parsed = None
350
  try:
351
  parsed = json.loads(text_out)
352
  if not isinstance(parsed, dict):
353
+ logger.warning("VLM JSON parsed but not a dict: %s", type(parsed))
354
  parsed = None
355
+ else:
356
+ logger.info("VLM successfully parsed JSON with keys: %s", list(parsed.keys()))
357
+ except Exception as parse_err:
358
+ logger.info("VLM text is not direct JSON: %s", str(parse_err))
359
  # Try to extract JSON from text
360
  try:
361
  first = text_out.find("{")
362
  last = text_out.rfind("}")
363
  if first != -1 and last != -1 and last > first:
364
+ json_str = text_out[first:last+1]
365
+ parsed = json.loads(json_str)
366
  if not isinstance(parsed, dict):
367
+ logger.warning("Extracted JSON is not a dict")
368
  parsed = None
369
+ else:
370
+ logger.info("Successfully extracted JSON from text with keys: %s", list(parsed.keys()))
371
+ except Exception as extract_err:
372
+ logger.warning("Could not extract JSON from VLM text: %s", str(extract_err))
373
  parsed = None
374
 
375
  return parsed, text_out
376
 
377
+ def get_fallback_risk_assessment(vlm_output: Any, reason: str = "LLM unavailable") -> Dict[str, Any]:
378
+ """Generate basic risk assessment from VLM output when LLM is unavailable"""
379
+ logger.warning("Using fallback risk assessment: %s", reason)
380
+
381
+ # Try to extract basic info from VLM output
382
+ vlm_dict = {}
383
+ if isinstance(vlm_output, dict):
384
+ vlm_dict = vlm_output
385
+ elif isinstance(vlm_output, str):
386
+ try:
387
+ vlm_dict = json.loads(vlm_output)
388
+ except Exception:
389
+ pass
390
+
391
+ # Check if VLM data is empty/invalid
392
+ has_data = bool(vlm_dict and any(vlm_dict.values()))
393
+
394
+ if not has_data:
395
+ logger.warning("VLM output is empty or invalid, returning minimal assessment")
396
+ return {
397
+ "risk_score": 0.0,
398
+ "jaundice_probability": 0.0,
399
+ "anemia_probability": 0.0,
400
+ "hydration_issue_probability": 0.0,
401
+ "neurological_issue_probability": 0.0,
402
+ "confidence": 0.1,
403
+ "summary": "Unable to analyze images. Please ensure photos are clear and well-lit.",
404
+ "recommendation": "Retake photos with better lighting and clearer view of face and eyes.",
405
+ "fallback_mode": True,
406
+ "fallback_reason": "no_vlm_data"
407
+ }
408
+
409
+ # Basic heuristic risk scoring based on VLM features
410
+ risk_score = 20.0 # Conservative default
411
+ jaundice_prob = 0.0
412
+ anemia_prob = 0.0
413
+ hydration_prob = 0.0
414
+ neuro_prob = 0.0
415
+
416
+ # Extract VLM features if available
417
+ # Look for color indicators
418
+ sclera_yellow = vlm_dict.get("sclera_yellowness", 0)
419
+ pallor = vlm_dict.get("pallor_score", 0)
420
+ redness = vlm_dict.get("redness", 0)
421
+
422
+ if isinstance(sclera_yellow, (int, float)) and sclera_yellow > 0.3:
423
+ jaundice_prob = min(0.6, sclera_yellow)
424
+ risk_score += 15
425
+
426
+ if isinstance(pallor, (int, float)) and pallor > 0.4:
427
+ anemia_prob = min(0.7, pallor)
428
+ risk_score += 20
429
+
430
+ if isinstance(redness, (int, float)) and redness > 0.5:
431
+ hydration_prob = min(0.5, redness * 0.8)
432
+ risk_score += 10
433
+
434
+ return {
435
+ "risk_score": round(min(100.0, risk_score), 2),
436
+ "jaundice_probability": round(jaundice_prob, 4),
437
+ "anemia_probability": round(anemia_prob, 4),
438
+ "hydration_issue_probability": round(hydration_prob, 4),
439
+ "neurological_issue_probability": round(neuro_prob, 4),
440
+ "confidence": 0.4, # Low confidence for fallback
441
+ "summary": "Basic screening completed. Advanced AI analysis temporarily unavailable.",
442
+ "recommendation": "Consider consulting a healthcare professional for a comprehensive assessment.",
443
+ "fallback_mode": True,
444
+ "fallback_reason": reason
445
+ }
446
+
447
+ def call_llm(vlm_output: Any, use_fallback_on_error: bool = True) -> Dict[str, Any]:
448
  """Call LLM with VLM output and return structured risk assessment"""
449
  if not GRADIO_AVAILABLE:
450
+ if use_fallback_on_error:
451
+ return get_fallback_risk_assessment(vlm_output, reason="gradio_not_available")
452
  raise RuntimeError("gradio_client not installed")
453
 
454
+ # Check if VLM output is empty/useless
 
 
455
  vlm_text = vlm_output if isinstance(vlm_output, str) else json.dumps(vlm_output, default=str)
456
 
457
+ # Detect empty or minimal VLM output
458
+ if not vlm_text or vlm_text.strip() in ["{}", "[]", ""]:
459
+ logger.warning("VLM output is empty, using fallback assessment")
460
+ if use_fallback_on_error:
461
+ return get_fallback_risk_assessment(vlm_output, reason="empty_vlm_output")
462
+ raise RuntimeError("VLM output is empty")
463
+
464
+ # Prepare input
465
  instruction = (
466
  "\n\nSTRICT INSTRUCTIONS:\n"
467
  "1) OUTPUT ONLY a single valid JSON object — no prose, no code fences.\n"
468
  "2) Include keys: risk_score, jaundice_probability, anemia_probability, "
469
  "hydration_issue_probability, neurological_issue_probability, summary, recommendation, confidence.\n"
470
  "3) Use numeric values for probabilities (0-1) and risk_score (0-100).\n"
471
+ "4) Use neutral wording in summary/recommendation.\n"
472
+ "5) If VLM data is minimal or unclear, set low probabilities and low confidence.\n\n"
473
  "VLM Output:\n" + vlm_text + "\n"
474
  )
475
 
476
  # Call with safe defaults
477
  try:
478
+ client = get_gradio_client(LLM_GRADIO_SPACE)
479
  logger.info("Calling LLM Space: %s", LLM_GRADIO_SPACE)
480
+
481
  result = client.predict(
482
  input_data=instruction,
483
  max_new_tokens=1024.0,
 
498
  parsed = extract_json_from_llm_output(text_out)
499
  logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2))
500
 
501
+ # Check if LLM returned essentially empty results (all zeros)
502
+ all_zero = all(
503
+ parsed.get(k, 0) == 0
504
+ for k in ["jaundice_probability", "anemia_probability",
505
+ "hydration_issue_probability", "neurological_issue_probability"]
506
+ )
507
+
508
+ if all_zero and parsed.get("risk_score", 0) == 0:
509
+ logger.warning("LLM returned all-zero assessment, likely due to poor VLM input")
510
+ parsed["summary"] = "Image analysis incomplete. Please ensure photos are clear and well-lit."
511
+ parsed["recommendation"] = "Retake photos with face clearly visible and eyes open."
512
+ parsed["confidence"] = 0.1
513
+
514
  return parsed
515
 
516
  except Exception as e:
517
+ logger.exception("LLM call failed: %s", str(e))
518
+
519
+ # Check if it's a quota error
520
+ error_msg = str(e).lower()
521
+ if "quota" in error_msg or "gpu" in error_msg:
522
+ logger.warning("GPU quota exceeded, using fallback assessment")
523
+ if use_fallback_on_error:
524
+ return get_fallback_risk_assessment(vlm_output, reason="gpu_quota_exceeded")
525
+
526
+ # For other errors, also use fallback if enabled
527
+ if use_fallback_on_error:
528
+ logger.warning("LLM error, using fallback assessment")
529
+ return get_fallback_risk_assessment(vlm_output, reason=f"llm_error: {str(e)[:100]}")
530
+
531
  raise RuntimeError(f"LLM call failed: {e}")
532
 
533
  # ============================================================================
 
567
  # Call VLM
568
  vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
569
 
570
+ # Call LLM with fallback enabled
571
  llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
572
+ structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
573
 
574
  # Store results
575
  screenings_db[screening_id]["ai_results"] = {
 
633
 
634
  @app.get("/health")
635
  async def health_check():
636
+ """Health check with LLM availability status"""
637
+ llm_status = "available"
638
+ llm_message = None
639
+
640
+ # Quick test of LLM availability
641
+ if GRADIO_AVAILABLE:
642
+ try:
643
+ client = get_gradio_client(LLM_GRADIO_SPACE)
644
+ # Just checking if we can connect, not running inference
645
+ llm_status = "available"
646
+ except Exception as e:
647
+ error_msg = str(e).lower()
648
+ if "quota" in error_msg or "gpu" in error_msg:
649
+ llm_status = "quota_exceeded"
650
+ llm_message = "GPU quota exceeded. Using fallback assessments."
651
+ else:
652
+ llm_status = "error"
653
+ llm_message = "LLM temporarily unavailable"
654
+ else:
655
+ llm_status = "not_installed"
656
+ llm_message = "Gradio client not available"
657
+
658
  return {
659
  "status": "healthy",
660
  "detector": detector_type or "none",
661
  "vlm_available": GRADIO_AVAILABLE,
662
  "vlm_space": GRADIO_VLM_SPACE,
663
+ "llm_space": LLM_GRADIO_SPACE,
664
+ "llm_status": llm_status,
665
+ "llm_message": llm_message,
666
+ "fallback_enabled": True
667
  }
668
 
669
  @app.post("/api/v1/validate-eye-photo")
 
786
  history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
787
  return {"screenings": history}
788
 
789
+ @app.get("/api/v1/debug/spaces")
790
+ async def debug_spaces():
791
+ """Debug endpoint to test VLM and LLM spaces"""
792
+ results = {
793
+ "vlm": {"available": False, "error": None},
794
+ "llm": {"available": False, "error": None}
795
+ }
796
+
797
+ # Test VLM
798
+ if GRADIO_AVAILABLE:
799
+ try:
800
+ client = get_gradio_client(GRADIO_VLM_SPACE)
801
+ results["vlm"]["available"] = True
802
+ results["vlm"]["space"] = GRADIO_VLM_SPACE
803
+ except Exception as e:
804
+ results["vlm"]["error"] = str(e)
805
+ else:
806
+ results["vlm"]["error"] = "Gradio not installed"
807
+
808
+ # Test LLM
809
+ if GRADIO_AVAILABLE:
810
+ try:
811
+ client = get_gradio_client(LLM_GRADIO_SPACE)
812
+ results["llm"]["available"] = True
813
+ results["llm"]["space"] = LLM_GRADIO_SPACE
814
+ except Exception as e:
815
+ results["llm"]["error"] = str(e)
816
+ else:
817
+ results["llm"]["error"] = "Gradio not installed"
818
+
819
+ return results
820
+
821
  @app.post("/api/v1/get-vitals")
822
  async def get_vitals_from_upload(
823
  face_image: UploadFile = File(...),
824
  eye_image: UploadFile = File(...)
825
  ):
826
+ """Synchronous VLM + LLM pipeline with graceful fallback"""
827
  if not GRADIO_AVAILABLE:
828
+ raise HTTPException(
829
+ status_code=503,
830
+ detail="AI services temporarily unavailable. Please try again later."
831
+ )
832
 
833
  try:
834
  uid = str(uuid.uuid4())
 
840
  with open(eye_path, "wb") as f:
841
  f.write(await eye_image.read())
842
 
843
+ # Call VLM
844
  vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
845
+
846
+ # Call LLM with fallback enabled
847
  llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
848
+ structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
849
 
850
  return {
851
  "vlm_features": vlm_features,
852
  "vlm_raw": vlm_raw,
853
+ "structured_risk": structured_risk,
854
+ "using_fallback": structured_risk.get("fallback_mode", False)
855
  }
856
 
857
  except Exception as e:
858
  logger.exception("Get vitals failed")
859
+ error_msg = str(e).lower()
860
+
861
+ if "quota" in error_msg or "gpu" in error_msg:
862
+ raise HTTPException(
863
+ status_code=503,
864
+ detail="AI service is currently at capacity. Please try again in a few minutes."
865
+ )
866
+
867
+ raise HTTPException(
868
+ status_code=500,
869
+ detail="Unable to process images. Please ensure images are clear and try again."
870
+ )
871
 
872
  @app.post("/api/v1/get-vitals/{screening_id}")
873
  async def get_vitals_for_screening(screening_id: str):
874
+ """Re-run VLM + LLM on existing screening with fallback support"""
875
  if screening_id not in screenings_db:
876
  raise HTTPException(status_code=404, detail="Screening not found")
877
 
 
880
  eye_path = entry.get("eye_image_path")
881
 
882
  if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
883
+ raise HTTPException(status_code=400, detail="Images missing for this screening")
884
 
885
  try:
886
  vlm_features, vlm_raw = await asyncio.to_thread(call_vlm, face_path, eye_path)
887
  llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
888
+ structured_risk = await asyncio.to_thread(call_llm, llm_input, use_fallback_on_error=True)
889
 
890
  entry.setdefault("ai_results", {}).update({
891
  "vlm_features": vlm_features,
892
  "vlm_raw": vlm_raw,
893
  "structured_risk": structured_risk,
894
+ "last_vitals_run": datetime.utcnow().isoformat() + "Z",
895
+ "using_fallback": structured_risk.get("fallback_mode", False)
896
  })
897
 
898
  return {
899
  "screening_id": screening_id,
900
  "vlm_features": vlm_features,
901
  "vlm_raw": vlm_raw,
902
+ "structured_risk": structured_risk,
903
+ "using_fallback": structured_risk.get("fallback_mode", False)
904
  }
905
 
906
  except Exception as e:
907
  logger.exception("Get vitals for screening failed")
908
+ error_msg = str(e).lower()
909
+
910
+ if "quota" in error_msg or "gpu" in error_msg:
911
+ raise HTTPException(
912
+ status_code=503,
913
+ detail="AI service is currently at capacity. Please try again in a few minutes."
914
+ )
915
+
916
+ raise HTTPException(
917
+ status_code=500,
918
+ detail="Unable to re-process screening. Please try again."
919
+ )
920
 
921
  if __name__ == "__main__":
922
  import uvicorn
923
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=