dpv007 commited on
Commit
0baec85
·
verified ·
1 Parent(s): 7dedfff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +791 -111
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  """
2
  Elderly HealthWatch AI Backend (FastAPI)
3
  Pipeline:
@@ -14,7 +15,6 @@ Notes:
14
  * always returns raw VLM output in API responses,
15
  * extracts JSON from VLM via regex when possible, and
16
  * sends only the face image to the VLM (not the eye image).
17
- * uploads face image to temp hosting and uses URL instead of file path
18
  """
19
 
20
  import io
@@ -51,7 +51,6 @@ logger = logging.getLogger("elderly_healthwatch")
51
  GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct")
52
  LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
53
  HF_TOKEN = os.getenv("HF_TOKEN", None)
54
- USE_IMAGE_URLS = True # Always use URLs instead of files for VLM
55
 
56
  # Default VLM prompt
57
  DEFAULT_VLM_PROMPT = (
@@ -246,74 +245,6 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
246
  }
247
  return out
248
 
249
- # -----------------------
250
- # Image upload to temp hosting
251
- # -----------------------
252
- import httpx # make sure to add httpx to requirements
253
- import base64
254
-
255
- # helper: upload image to temporary hosting and get URL
256
- async def upload_image_to_temp_host(image_path: str) -> str:
257
- """
258
- Upload an image to a temporary hosting service (using tmpfiles.org as example).
259
- Returns the public URL of the uploaded image.
260
- Alternative services: catbox.moe, 0x0.st, etc.
261
- """
262
- try:
263
- with open(image_path, 'rb') as f:
264
- files = {'file': f}
265
- async with httpx.AsyncClient(timeout=30.0) as client:
266
- # Using tmpfiles.org as temporary host (24 hour retention)
267
- response = await client.post('https://tmpfiles.org/api/v1/upload', files=files)
268
- response.raise_for_status()
269
- result = response.json()
270
-
271
- # tmpfiles.org returns: {"status": "success", "data": {"url": "..."}}
272
- if result.get('status') == 'success':
273
- url = result['data']['url']
274
- # Convert download URL to direct URL
275
- url = url.replace('tmpfiles.org/', 'tmpfiles.org/dl/')
276
- logger.info(f"Image uploaded successfully: {url}")
277
- return url
278
- else:
279
- raise ValueError(f"Upload failed: {result}")
280
- except Exception as e:
281
- logger.exception(f"Failed to upload image to temp host: {e}")
282
- raise HTTPException(status_code=500, detail=f"Failed to upload image: {e}")
283
-
284
- # helper: download URL to file with safety checks
285
- async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
286
- """
287
- Download an image from `url` and save to dest_path.
288
- Guards:
289
- - timeout
290
- - max bytes
291
- - basic content-type check (image/*)
292
- Raises HTTPException on failure.
293
- """
294
- try:
295
- async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
296
- resp = await client.get(url, timeout=timeout_seconds)
297
- resp.raise_for_status()
298
-
299
- content_type = resp.headers.get("Content-Type", "")
300
- if not content_type.startswith("image/"):
301
- raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
302
-
303
- total = 0
304
- with open(dest_path, "wb") as f:
305
- async for chunk in resp.aiter_bytes():
306
- if not chunk:
307
- continue
308
- total += len(chunk)
309
- if total > max_bytes:
310
- raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
311
- f.write(chunk)
312
- except httpx.HTTPStatusError as e:
313
- raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
314
- except Exception as e:
315
- raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
316
-
317
  # -----------------------
318
  # Gradio / VLM helper (sends only face image, returns meta)
319
  # -----------------------
@@ -325,77 +256,58 @@ def get_gradio_client_for_space(space: str) -> Client:
325
  return Client(space)
326
 
327
  def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
328
- raise_on_file_delivery_failure: bool = False,
329
- use_url: bool = False
330
  ) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
331
  """
332
- Synchronous call to remote VLM (gradio /chat_fn). Sends ONLY the face image.
333
- If use_url=True, uploads image to temp host and sends URL instead of file path.
334
  Returns tuple: (parsed_features_dict_or_None, raw_text_response_str, meta)
335
  meta includes:
336
  - vlm_file_delivery_ok (bool) # expects ≥1 file acknowledged (face)
337
  - vlm_files_seen (int or None)
338
  - vlm_raw_len (int)
339
  - vlm_out_object (short repr)
340
- - face_url (str, if use_url=True)
341
  """
342
  prompt = prompt or DEFAULT_VLM_PROMPT
343
 
344
 
345
  if not os.path.exists(face_path):
346
  raise FileNotFoundError(f"Face image not found at: {face_path}")
347
- if eye_path and not os.path.exists(eye_path):
348
  raise FileNotFoundError(f"Eye image not found at: {eye_path}")
349
 
350
  face_size = os.path.getsize(face_path)
351
- logger.info(f"VLM input file - Face: {face_size} bytes")
 
352
 
353
- if face_size == 0:
354
- raise ValueError("Face image is empty (0 bytes)")
355
 
356
  if not GRADIO_AVAILABLE:
357
  raise RuntimeError("gradio_client not available in this environment.")
358
 
359
- # Verify file can be opened as image
 
 
360
  try:
361
  Image.open(face_path).verify()
362
- logger.info("Face image verified as valid")
 
363
  except Exception as e:
364
- raise ValueError(f"Invalid image file: {e}")
365
 
366
- client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
367
 
368
- meta: Dict[str, Any] = {
369
- "vlm_file_delivery_ok": False,
370
- "vlm_files_seen": None,
371
- "vlm_raw_len": 0,
372
- "vlm_out_object": None
373
- }
374
 
375
- # Upload to temp host if use_url=True
376
- if use_url:
377
- try:
378
- # Run async upload in sync context using asyncio
379
- loop = asyncio.new_event_loop()
380
- asyncio.set_event_loop(loop)
381
- face_url = loop.run_until_complete(upload_image_to_temp_host(face_path))
382
- loop.close()
383
-
384
- meta["face_url"] = face_url
385
- logger.info(f"Using image URL for VLM: {face_url}")
386
-
387
- # Pass URL directly to Gradio client using handle_file
388
- message = {"text": prompt, "files": [handle_file(face_url)]}
389
- except Exception as e:
390
- logger.exception("Failed to upload image to temp host")
391
- raise RuntimeError(f"Image upload failed: {e}")
392
- else:
393
- # Original behavior: use file path
394
- message = {"text": prompt, "files": [handle_file(face_path)]}
395
 
396
  # SINGLE CALL (no retries)
397
  try:
398
- logger.info("Calling VLM Space %s with %s", GRADIO_VLM_SPACE, "URL" if use_url else "file")
399
  result = client.predict(message=message, history=[], api_name="/chat_fn")
400
  except Exception as e:
401
  logger.exception("VLM call failed (no retries)")
@@ -495,4 +407,772 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
495
  Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
496
  - Logs the VLM raw string and the chosen payload.
497
  - Sends cleaned JSON (json.dumps(vlm_features)) if vlm_features_or_raw is dict, else sends raw string.
498
- - Uses regex to extract the final JSON from
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
  """
3
  Elderly HealthWatch AI Backend (FastAPI)
4
  Pipeline:
 
15
  * always returns raw VLM output in API responses,
16
  * extracts JSON from VLM via regex when possible, and
17
  * sends only the face image to the VLM (not the eye image).
 
18
  """
19
 
20
  import io
 
51
  GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct")
52
  LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
53
  HF_TOKEN = os.getenv("HF_TOKEN", None)
 
54
 
55
  # Default VLM prompt
56
  DEFAULT_VLM_PROMPT = (
 
245
  }
246
  return out
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  # -----------------------
249
  # Gradio / VLM helper (sends only face image, returns meta)
250
  # -----------------------
 
256
  return Client(space)
257
 
258
  def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
259
+ raise_on_file_delivery_failure: bool = False
 
260
  ) -> Tuple[Optional[Dict[str, Any]], str, Dict[str, Any]]:
261
  """
262
+ Synchronous call to remote VLM (gradio /chat_fn). Sends ONLY the face image file.
 
263
  Returns tuple: (parsed_features_dict_or_None, raw_text_response_str, meta)
264
  meta includes:
265
  - vlm_file_delivery_ok (bool) # expects ≥1 file acknowledged (face)
266
  - vlm_files_seen (int or None)
267
  - vlm_raw_len (int)
268
  - vlm_out_object (short repr)
 
269
  """
270
  prompt = prompt or DEFAULT_VLM_PROMPT
271
 
272
 
273
  if not os.path.exists(face_path):
274
  raise FileNotFoundError(f"Face image not found at: {face_path}")
275
+ if not os.path.exists(eye_path):
276
  raise FileNotFoundError(f"Eye image not found at: {eye_path}")
277
 
278
  face_size = os.path.getsize(face_path)
279
+ eye_size = os.path.getsize(eye_path)
280
+ logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
281
 
282
+ if face_size == 0 or eye_size == 0:
283
+ raise ValueError("One or both images are empty (0 bytes)")
284
 
285
  if not GRADIO_AVAILABLE:
286
  raise RuntimeError("gradio_client not available in this environment.")
287
 
288
+ client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
289
+
290
+ # Verify files can be opened as images
291
  try:
292
  Image.open(face_path).verify()
293
+ Image.open(eye_path).verify()
294
+ logger.info("Both images verified as valid")
295
  except Exception as e:
296
+ raise ValueError(f"Invalid image file(s): {e}")
297
 
298
+ message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
299
 
300
+ logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
301
+ client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
302
+ # NOTE: only send face image to the Space
 
 
 
303
 
304
+ message = {"text": prompt, "files": [handle_file(face_path)]}
305
+
306
+ meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
  # SINGLE CALL (no retries)
309
  try:
310
+ logger.info("Calling VLM Space %s with 1 file (face only)", GRADIO_VLM_SPACE)
311
  result = client.predict(message=message, history=[], api_name="/chat_fn")
312
  except Exception as e:
313
  logger.exception("VLM call failed (no retries)")
 
407
  Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
408
  - Logs the VLM raw string and the chosen payload.
409
  - Sends cleaned JSON (json.dumps(vlm_features)) if vlm_features_or_raw is dict, else sends raw string.
410
+ - Uses regex to extract the final JSON from LLM raw output.
411
+ """
412
+ if not GRADIO_AVAILABLE:
413
+ raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
414
+
415
+ # Try to import AppError for specific handling; fallback to Exception if unavailable
416
+ try:
417
+ from gradio_client import AppError # type: ignore
418
+ except Exception:
419
+ AppError = Exception # fallback
420
+
421
+ client = get_gradio_client_for_space(LLM_GRADIO_SPACE)
422
+ model_identity = model_identity or LLM_MODEL_IDENTITY
423
+ system_prompt = system_prompt or LLM_SYSTEM_PROMPT
424
+ developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
425
+
426
+ # Decide what to send to LLM and log the raw input
427
+ if isinstance(vlm_features_or_raw, str):
428
+ vlm_raw_str = vlm_features_or_raw
429
+ logger.info("LLM input will be RAW VLM STRING (len=%d)", len(vlm_raw_str or ""))
430
+ vlm_json_str_to_send = vlm_raw_str if vlm_raw_str and vlm_raw_str.strip() else "{}"
431
+ else:
432
+ vlm_raw_str = json.dumps(vlm_features_or_raw, ensure_ascii=False) if vlm_features_or_raw else "{}"
433
+ logger.info("LLM input will be CLEANED VLM JSON (len=%d)", len(vlm_raw_str))
434
+ vlm_json_str_to_send = vlm_raw_str
435
+
436
+ # Build instruction payload
437
+ instruction = (
438
+ "\n\nSTRICT INSTRUCTIONS (READ CAREFULLY):\n"
439
+ "1) OUTPUT ONLY a single valid JSON object and nothing else — no prose, no explanation, no code fences.\n"
440
+ "2) The JSON MUST include these keys: risk_score, jaundice_probability, anemia_probability, "
441
+ "hydration_issue_probability, neurological_issue_probability, summary, recommendation, confidence.\n"
442
+ "3) Use numeric values for probabilities (0..1) and for risk_score (0..100). Use strings for summary and recommendation.\n"
443
+ "4) Do NOT mention disease names in summary or recommendation; use neutral wording only.\n"
444
+ "If you cannot estimate a value, set it to null.\n\n"
445
+ "Now, based on the VLM output below, produce ONLY the JSON object described above.\n\n"
446
+ "===BEGIN VLM OUTPUT===\n"
447
+ f"{vlm_json_str_to_send}\n"
448
+ "===END VLM OUTPUT===\n\n"
449
+ )
450
+
451
+ # Defensive coercion / clamps
452
+ try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
453
+ if try_max_new_tokens <= 0:
454
+ try_max_new_tokens = 1024
455
+
456
+ try_temperature = float(temperature) if temperature is not None else 0.0
457
+ # Some Spaces validate temperature >= 0.1
458
+ if try_temperature < 0.1:
459
+ try_temperature = 0.1
460
+
461
+ predict_kwargs = dict(
462
+ input_data=instruction,
463
+ max_new_tokens=float(try_max_new_tokens),
464
+ model_identity=model_identity,
465
+ system_prompt=system_prompt,
466
+ developer_prompt=developer_prompt,
467
+ reasoning_effort=reasoning_effort,
468
+ temperature=float(try_temperature),
469
+ top_p=0.9,
470
+ top_k=50,
471
+ repetition_penalty=1.0,
472
+ api_name="/chat"
473
+ )
474
+
475
+ last_exc = None
476
+ for attempt in (1, 2):
477
+ try:
478
+ logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
479
+ LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
480
+ result = client.predict(**predict_kwargs)
481
+
482
+ # normalize to string
483
+ if isinstance(result, (dict, list)):
484
+ text_out = json.dumps(result)
485
+ else:
486
+ text_out = str(result)
487
+
488
+ if not text_out or len(text_out.strip()) == 0:
489
+ raise RuntimeError("LLM returned empty response")
490
+
491
+ logger.info("LLM raw output (len=%d):\n%s", len(text_out or ""), (text_out[:2000] + "...") if len(text_out) > 2000 else text_out)
492
+
493
+ # parse with regex extractor (may raise)
494
+ parsed = None
495
+ try:
496
+ parsed = extract_json_via_regex(text_out)
497
+ except Exception:
498
+ # fallback: attempt json.loads naive
499
+ try:
500
+ parsed = json.loads(text_out)
501
+ if not isinstance(parsed, dict):
502
+ parsed = None
503
+ except Exception:
504
+ parsed = None
505
+
506
+ if parsed is None:
507
+ raise ValueError("Failed to extract JSON from LLM output")
508
+
509
+ # pretty log parsed JSON
510
+ try:
511
+ logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2, ensure_ascii=False))
512
+ except Exception:
513
+ logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
514
+
515
+ # defensive clamps (same as extractor expectations)
516
+ def safe_prob(val):
517
+ try:
518
+ v = float(val)
519
+ return max(0.0, min(1.0, v))
520
+ except Exception:
521
+ return 0.0
522
+
523
+ for k in [
524
+ "jaundice_probability",
525
+ "anemia_probability",
526
+ "hydration_issue_probability",
527
+ "neurological_issue_probability"
528
+ ]:
529
+ parsed[k] = safe_prob(parsed.get(k, 0.0))
530
+
531
+ try:
532
+ rs = float(parsed.get("risk_score", 0.0))
533
+ parsed["risk_score"] = round(max(0.0, min(100.0, rs)), 2)
534
+ except Exception:
535
+ parsed["risk_score"] = 0.0
536
+
537
+ parsed["confidence"] = safe_prob(parsed.get("confidence", 0.0))
538
+ parsed["summary"] = str(parsed.get("summary", "") or "").strip()
539
+ parsed["recommendation"] = str(parsed.get("recommendation", "") or "").strip()
540
+
541
+ for k in [
542
+ "jaundice_probability",
543
+ "anemia_probability",
544
+ "hydration_issue_probability",
545
+ "neurological_issue_probability",
546
+ "confidence",
547
+ "risk_score"
548
+ ]:
549
+ parsed[f"{k}_was_missing"] = False
550
+
551
+ return parsed
552
+
553
+ except AppError as app_e:
554
+ logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
555
+ last_exc = app_e
556
+ if attempt == 1:
557
+ predict_kwargs["temperature"] = 0.2
558
+ predict_kwargs["max_new_tokens"] = float(512)
559
+ logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
560
+ continue
561
+ else:
562
+ raise RuntimeError(f"LLM call failed (AppError): {app_e}")
563
+ except Exception as e:
564
+ logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
565
+ last_exc = e
566
+ if attempt == 1:
567
+ predict_kwargs["temperature"] = 0.2
568
+ predict_kwargs["max_new_tokens"] = float(512)
569
+ continue
570
+ raise RuntimeError(f"LLM call failed: {e}")
571
+
572
+ raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
573
+
574
+ # -----------------------
575
+ # API endpoints
576
+ # -----------------------
577
+ @app.get("/")
578
+ async def read_root():
579
+ return {"message": "Elderly HealthWatch AI Backend"}
580
+
581
+ @app.get("/health")
582
+ async def health_check():
583
+ impl = None
584
+ if mtcnn is None:
585
+ impl = "none"
586
+ elif isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
587
+ impl = "opencv_haar_fallback"
588
+ else:
589
+ impl = _MTCNN_IMPL
590
+ return {
591
+ "status": "healthy",
592
+ "detector": impl,
593
+ "vlm_available": GRADIO_AVAILABLE,
594
+ "vlm_space": GRADIO_VLM_SPACE,
595
+ "llm_space": LLM_GRADIO_SPACE
596
+ }
597
+
598
+ @app.post("/api/v1/validate-eye-photo")
599
+ async def validate_eye_photo(image: UploadFile = File(...)):
600
+ if mtcnn is None:
601
+ raise HTTPException(status_code=500, detail="No face detector available in this deployment.")
602
+ try:
603
+ content = await image.read()
604
+ if not content:
605
+ raise HTTPException(status_code=400, detail="Empty file uploaded.")
606
+ pil_img = load_image_from_bytes(content)
607
+ img_arr = np.asarray(pil_img) # RGB
608
+
609
+ if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "facenet_pytorch":
610
+ try:
611
+ boxes, probs, landmarks = mtcnn.detect(pil_img, landmarks=True)
612
+ if boxes is None or len(boxes) == 0:
613
+ return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
614
+ "message_english": "No face detected. Please ensure your face is clearly visible in the frame.",
615
+ "message_hindi": "कोई चेहरा नहीं मिला। कृपया सुनिश्चित करें कि आपका चेहरा फ्रेम में स्पष्ट रूप से दिखाई दे रहा है।"}
616
+ prob = float(probs[0]) if probs is not None else 0.0
617
+ lm = landmarks[0] if landmarks is not None else None
618
+ if lm is not None and len(lm) >= 2:
619
+ left_eye = {"x": float(lm[0][0]), "y": float(lm[0][1])}
620
+ right_eye = {"x": float(lm[1][0]), "y": float(lm[1][1])}
621
+ else:
622
+ left_eye = right_eye = None
623
+ eye_openness_score = estimate_eye_openness_from_detection(prob)
624
+ is_valid = eye_openness_score >= 0.3
625
+ return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
626
+ "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
627
+ "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
628
+ "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
629
+ except Exception:
630
+ traceback.print_exc()
631
+ raise HTTPException(status_code=500, detail="Face detector failed during inference.")
632
+
633
+ if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "mtcnn":
634
+ try:
635
+ detections = mtcnn.detect_faces(img_arr)
636
+ except Exception:
637
+ detections = mtcnn.detect_faces(pil_img)
638
+ if not detections:
639
+ return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
640
+ "message_english": "No face detected. Please ensure your face is clearly visible in the frame.",
641
+ "message_hindi": "कोई चेहरा नहीं मिला। कृपया सुनिश्चित करें कि आपका चेहरा फ्रेम में स्पष्ट रूप से दिखाई दे रहा है।"}
642
+ face = detections[0]
643
+ keypoints = face.get("keypoints", {})
644
+ left_eye = keypoints.get("left_eye")
645
+ right_eye = keypoints.get("right_eye")
646
+ confidence = float(face.get("confidence", 0.0))
647
+ eye_openness_score = estimate_eye_openness_from_detection(confidence)
648
+ is_valid = eye_openness_score >= 0.3
649
+ return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
650
+ "message_english": "Photo looks good! Eyes are properly open." if is_valid else "Eyes appear to be closed or partially closed. Please open your eyes wide and try again.",
651
+ "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
652
+ "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
653
+
654
+ if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
655
+ try:
656
+ gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
657
+ face_cascade = mtcnn["face_cascade"]
658
+ eye_cascade = mtcnn["eye_cascade"]
659
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60))
660
+ if len(faces) == 0:
661
+ return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
662
+ "message_english": "No face detected. Please ensure your face is clearly visible in the frame.",
663
+ "message_hindi": "कोई चेहरा नहीं मिला। कृपया सुनिश्चित करें कि आपका चेहरा फ्रेम में स्पष्ट रूप से दिखाई दे रहा है।"}
664
+ (x, y, w, h) = faces[0]
665
+ roi_gray = gray[y:y+h, x:x+w]
666
+ eyes = eye_cascade.detectMultiScale(roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(20, 10))
667
+ eye_openness_score = 1.0 if len(eyes) >= 1 else 0.0
668
+ is_valid = eye_openness_score >= 0.3
669
+ left_eye = None
670
+ right_eye = None
671
+ if len(eyes) >= 1:
672
+ ex, ey, ew, eh = eyes[0]
673
+ cx = float(x + ex + ew/2)
674
+ cy = float(y + ey + eh/2)
675
+ left_eye = {"x": cx, "y": cy}
676
+ return {"valid": bool(is_valid), "face_detected": True, "eye_openness_score": round(eye_openness_score, 2),
677
+ "message_english": "Photo looks good! Eyes are detected." if is_valid else "Eyes not detected. Please open your eyes wide and try again.",
678
+ "message_hindi": "फोटो अच्छी है! आंखें मिलीं।" if is_valid else "आंखें नहीं मिलीं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें。",
679
+ "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
680
+ except Exception:
681
+ traceback.print_exc()
682
+ raise HTTPException(status_code=500, detail="OpenCV fallback detector failed.")
683
+
684
+ raise HTTPException(status_code=500, detail="Invalid detector configuration.")
685
+ except HTTPException:
686
+ raise
687
+ except Exception as e:
688
+ traceback.print_exc()
689
+ return {"valid": False, "face_detected": False, "eye_openness_score": 0.0,
690
+ "message_english": "Error processing image. Please try again.",
691
+ "message_hindi": "छवि प्रोसेस करने में त्रुटि। कृपया पुनः प्रयास करें।",
692
+ "error": str(e)}
693
+
694
+ @app.post("/api/v1/upload")
695
+ async def upload_images(
696
+ background_tasks: BackgroundTasks,
697
+ face_image: UploadFile = File(...),
698
+ eye_image: UploadFile = File(...)
699
+ ):
700
+ """
701
+ Save images and enqueue background processing. VLM -> LLM runs inside process_screening.
702
+ """
703
+ try:
704
+ screening_id = str(uuid.uuid4())
705
+ now = datetime.utcnow().isoformat() + "Z"
706
+ tmp_dir = "/tmp/elderly_healthwatch"
707
+ os.makedirs(tmp_dir, exist_ok=True)
708
+ face_path = os.path.join(tmp_dir, f"{screening_id}_face.jpg")
709
+ eye_path = os.path.join(tmp_dir, f"{screening_id}_eye.jpg")
710
+ face_bytes = await face_image.read()
711
+ eye_bytes = await eye_image.read()
712
+ with open(face_path, "wb") as f:
713
+ f.write(face_bytes)
714
+ with open(eye_path, "wb") as f:
715
+ f.write(eye_bytes)
716
+ screenings_db[screening_id] = {
717
+ "id": screening_id,
718
+ "timestamp": now,
719
+ "face_image_path": face_path,
720
+ "eye_image_path": eye_path,
721
+ "status": "queued",
722
+ "quality_metrics": {},
723
+ "ai_results": {},
724
+ "disease_predictions": [],
725
+ "recommendations": {}
726
+ }
727
+ background_tasks.add_task(process_screening, screening_id)
728
+ return {"screening_id": screening_id}
729
+ except Exception as e:
730
+ traceback.print_exc()
731
+ raise HTTPException(status_code=500, detail=f"Failed to upload images: {e}")
732
+
733
+ @app.post("/api/v1/analyze/{screening_id}")
734
+ async def analyze_screening(screening_id: str, background_tasks: BackgroundTasks):
735
+ if screening_id not in screenings_db:
736
+ raise HTTPException(status_code=404, detail="Screening not found")
737
+ if screenings_db[screening_id].get("status") == "processing":
738
+ return {"message": "Already processing"}
739
+ screenings_db[screening_id]["status"] = "queued"
740
+ background_tasks.add_task(process_screening, screening_id)
741
+ return {"message": "Analysis enqueued"}
742
+
743
+ @app.get("/api/v1/status/{screening_id}")
744
+ async def get_status(screening_id: str):
745
+ if screening_id not in screenings_db:
746
+ raise HTTPException(status_code=404, detail="Screening not found")
747
+ status = screenings_db[screening_id].get("status", "unknown")
748
+ progress = 50 if status == "processing" else (100 if status == "completed" else 0)
749
+ return {"screening_id": screening_id, "status": status, "progress": progress}
750
+
751
+ @app.get("/api/v1/results/{screening_id}")
752
+ async def get_results(screening_id: str):
753
+ if screening_id not in screenings_db:
754
+ raise HTTPException(status_code=404, detail="Screening not found")
755
+ # Ensure vlm_raw is always present in ai_results for debugging
756
+ entry = screenings_db[screening_id]
757
+ entry.setdefault("ai_results", {})
758
+ entry["ai_results"].setdefault("vlm_raw", entry.get("ai_results", {}).get("vlm_raw", ""))
759
+ return entry
760
+
761
+ @app.get("/api/v1/history/{user_id}")
762
+ async def get_history(user_id: str):
763
+ history = [s for s in screenings_db.values() if s.get("user_id") == user_id]
764
+ return {"screenings": history}
765
+
766
+ # -----------------------
767
+ # Immediate VLM -> LLM routes (return vitals in one call)
768
+ # -----------------------
769
+ @app.post("/api/v1/get-vitals")
770
+ async def get_vitals_from_upload(
771
+ face_image: UploadFile = File(...),
772
+ eye_image: UploadFile = File(...)
773
+ ):
774
+ """
775
+ Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
776
+ { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
777
+ Note: VLM will receive only the face image (not the eye image).
778
+ """
779
+ if not GRADIO_AVAILABLE:
780
+ raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
781
+
782
+ # save files to a temp directory
783
+ try:
784
+ tmp_dir = "/tmp/elderly_healthwatch"
785
+ os.makedirs(tmp_dir, exist_ok=True)
786
+ uid = str(uuid.uuid4())
787
+ face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
788
+ eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
789
+ face_bytes = await face_image.read()
790
+ eye_bytes = await eye_image.read()
791
+ with open(face_path, "wb") as f:
792
+ f.write(face_bytes)
793
+ with open(eye_path, "wb") as f:
794
+ f.write(eye_bytes)
795
+ except Exception as e:
796
+ logger.exception("Failed saving uploaded images")
797
+ raise HTTPException(status_code=500, detail=f"Failed saving images: {e}")
798
+
799
+ try:
800
+ # Run VLM (off the event loop) - returns (features, raw, meta)
801
+ vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
802
+
803
+ # Log VLM outputs
804
+ logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
805
+ logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
806
+ logger.info("get_vitals_from_upload - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
807
+
808
+ # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
809
+ if vlm_features:
810
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
811
+ logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
812
+ else:
813
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
814
+ logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
815
+
816
+ # Run LLM (off the event loop)
817
+ structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
818
+
819
+ # Return merged result (includes raw VLM output + meta for debugging)
820
+ return {
821
+ "vlm_raw_output": vlm_raw,
822
+ "vlm_parsed_features": vlm_features,
823
+ "vlm_meta": vlm_meta,
824
+ "llm_structured_risk": structured_risk
825
+ }
826
+ except Exception as e:
827
+ logger.exception("get_vitals_from_upload pipeline failed")
828
+ raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
829
+
830
+ @app.post("/api/v1/get-vitals/{screening_id}")
831
+ async def get_vitals_for_screening(screening_id: str):
832
+ """
833
+ Re-run VLM->LLM on images already stored for `screening_id` in screenings_db.
834
+ Useful for re-processing or debugging.
835
+ Note: VLM will receive only the face image (not the eye image).
836
+ """
837
+ if screening_id not in screenings_db:
838
+ raise HTTPException(status_code=404, detail="Screening not found")
839
+
840
+ entry = screenings_db[screening_id]
841
+ face_path = entry.get("face_image_path")
842
+ eye_path = entry.get("eye_image_path")
843
+ if not (face_path and os.path.exists(face_path) and eye_path and os.path.exists(eye_path)):
844
+ raise HTTPException(status_code=400, detail="Stored images missing for this screening")
845
+
846
+ try:
847
+ # Run VLM off the event loop (returns features, raw, meta)
848
+ vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
849
+
850
+ logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
851
+ logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
852
+ logger.info("get_vitals_for_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
853
+
854
+ if vlm_features:
855
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
856
+ logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
857
+ else:
858
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
859
+ logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
860
+
861
+ structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
862
+
863
+ # Optionally store this run's outputs back into the DB for inspection
864
+ entry.setdefault("ai_results", {})
865
+ entry["ai_results"].update({
866
+ "vlm_parsed_features": vlm_features,
867
+ "vlm_raw": vlm_raw,
868
+ "vlm_meta": vlm_meta,
869
+ "structured_risk": structured_risk,
870
+ "last_vitals_run": datetime.utcnow().isoformat() + "Z"
871
+ })
872
+
873
+ return {
874
+ "screening_id": screening_id,
875
+ "vlm_raw_output": vlm_raw,
876
+ "vlm_parsed_features": vlm_features,
877
+ "vlm_meta": vlm_meta,
878
+ "llm_structured_risk": structured_risk
879
+ }
880
+ except Exception as e:
881
+ logger.exception("get_vitals_for_screening pipeline failed")
882
+ raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
883
+
884
+ # -----------------------
885
+ # URL-based vitals endpoint (optional)
886
+ # -----------------------
887
+ class ImageUrls(BaseModel):
888
+ face_image_url: HttpUrl
889
+ eye_image_url: HttpUrl
890
+
891
+ import httpx # make sure to add httpx to requirements
892
+
893
+ # helper: download URL to file with safety checks
894
+ async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
895
+ """
896
+ Download an image from `url` and save to dest_path.
897
+ Guards:
898
+ - timeout
899
+ - max bytes
900
+ - basic content-type check (image/*)
901
+ Raises HTTPException on failure.
902
+ """
903
+ try:
904
+ async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True) as client:
905
+ resp = await client.get(url, timeout=timeout_seconds)
906
+ resp.raise_for_status()
907
+
908
+ content_type = resp.headers.get("Content-Type", "")
909
+ if not content_type.startswith("image/"):
910
+ raise ValueError(f"URL does not appear to be an image (Content-Type={content_type})")
911
+
912
+ total = 0
913
+ with open(dest_path, "wb") as f:
914
+ async for chunk in resp.aiter_bytes():
915
+ if not chunk:
916
+ continue
917
+ total += len(chunk)
918
+ if total > max_bytes:
919
+ raise ValueError(f"Image exceeds max allowed size ({max_bytes} bytes)")
920
+ f.write(chunk)
921
+ except httpx.HTTPStatusError as e:
922
+ raise HTTPException(status_code=400, detail=f"Failed to fetch image: {e.response.status_code} {str(e)}")
923
+ except Exception as e:
924
+ raise HTTPException(status_code=400, detail=f"Failed to download image: {str(e)}")
925
+
926
+ @app.post("/api/v1/get-vitals-by-url")
927
+ async def get_vitals_from_urls(payload: ImageUrls = Body(...)):
928
+ """
929
+ Download face and eye images from given URLs, then run the same VLM -> LLM pipeline and return results.
930
+ Note: VLM will receive only the face image (not the eye image).
931
+ Body: { "face_image_url": "...", "eye_image_url": "..." }
932
+ """
933
+ if not GRADIO_AVAILABLE:
934
+ raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
935
+
936
+ # prepare tmp paths
937
+ try:
938
+ tmp_dir = "/tmp/elderly_healthwatch"
939
+ os.makedirs(tmp_dir, exist_ok=True)
940
+ uid = str(uuid.uuid4())
941
+ face_path = os.path.join(tmp_dir, f"{uid}_face.jpg")
942
+ eye_path = os.path.join(tmp_dir, f"{uid}_eye.jpg")
943
+ except Exception as e:
944
+ logger.exception("Failed to prepare temp paths")
945
+ raise HTTPException(status_code=500, detail=f"Server error preparing temp files: {e}")
946
+
947
+ # download images (with guards)
948
+ try:
949
+ await download_image_to_path(str(payload.face_image_url), face_path)
950
+ await download_image_to_path(str(payload.eye_image_url), eye_path)
951
+ except HTTPException:
952
+ raise
953
+ except Exception as e:
954
+ logger.exception("Downloading images failed")
955
+ raise HTTPException(status_code=400, detail=f"Failed to download images: {e}")
956
+
957
+ # run existing pipeline (off the event loop)
958
+ try:
959
+ vlm_features, vlm_raw, vlm_meta = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
960
+
961
+ logger.info("get_vitals_from_urls - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
962
+ logger.info("get_vitals_from_urls - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
963
+ logger.info("get_vitals_from_urls - VLM meta: %s", json.dumps(vlm_meta, ensure_ascii=False))
964
+
965
+ if vlm_features:
966
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
967
+ logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
968
+ else:
969
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
970
+ logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
971
+
972
+ structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
973
+
974
+ return {
975
+ "vlm_raw_output": vlm_raw,
976
+ "vlm_parsed_features": vlm_features,
977
+ "vlm_meta": vlm_meta,
978
+ "llm_structured_risk": structured_risk
979
+ }
980
+ except Exception as e:
981
+ logger.exception("get_vitals_by_url pipeline failed")
982
+ raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
983
+
984
+ # -----------------------
985
+ # Main background pipeline (upload -> process_screening)
986
+ # -----------------------
987
+ async def process_screening(screening_id: str):
988
+ """
989
+ Main pipeline:
990
+ - load images
991
+ - quick detector-based quality metrics
992
+ - run VLM -> vlm_features (dict or None) + vlm_raw (string) + vlm_meta
993
+ - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
994
+ - merge results into ai_results and finish
995
+ """
996
+ try:
997
+ if screening_id not in screenings_db:
998
+ logger.error("[process_screening] screening %s not found", screening_id)
999
+ return
1000
+ screenings_db[screening_id]["status"] = "processing"
1001
+ logger.info("[process_screening] Starting %s", screening_id)
1002
+
1003
+ entry = screenings_db[screening_id]
1004
+ face_path = entry.get("face_image_path")
1005
+ eye_path = entry.get("eye_image_path")
1006
+
1007
+ if not (face_path and os.path.exists(face_path)):
1008
+ raise RuntimeError("Face image missing")
1009
+ if not (eye_path and os.path.exists(eye_path)):
1010
+ raise RuntimeError("Eye image missing")
1011
+
1012
+ face_img = Image.open(face_path).convert("RGB")
1013
+ eye_img = Image.open(eye_path).convert("RGB")
1014
+
1015
+ # Basic detection + quality metrics (facenet/mtcnn/opencv)
1016
+ face_detected = False
1017
+ face_confidence = 0.0
1018
+ left_eye_coord = right_eye_coord = None
1019
+
1020
+ if mtcnn is not None and not isinstance(mtcnn, dict) and (_MTCNN_IMPL == "facenet_pytorch" or _MTCNN_IMPL == "mtcnn"):
1021
+ try:
1022
+ if _MTCNN_IMPL == "facenet_pytorch":
1023
+ boxes, probs, landmarks = mtcnn.detect(face_img, landmarks=True)
1024
+ if boxes is not None and len(boxes) > 0:
1025
+ face_detected = True
1026
+ face_confidence = float(probs[0]) if probs is not None else 0.0
1027
+ if landmarks is not None:
1028
+ lm = landmarks[0]
1029
+ if len(lm) >= 2:
1030
+ left_eye_coord = {"x": float(lm[0][0]), "y": float(lm[0][1])}
1031
+ right_eye_coord = {"x": float(lm[1][0]), "y": float(lm[1][1])}
1032
+ else:
1033
+ arr = np.asarray(face_img)
1034
+ detections = mtcnn.detect_faces(arr)
1035
+ if detections:
1036
+ face_detected = True
1037
+ face_confidence = float(detections[0].get("confidence", 0.0))
1038
+ k = detections[0].get("keypoints", {})
1039
+ left_eye_coord = k.get("left_eye")
1040
+ right_eye_coord = k.get("right_eye")
1041
+ except Exception:
1042
+ traceback.print_exc()
1043
+
1044
+ if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
1045
+ try:
1046
+ arr = np.asarray(face_img)
1047
+ gray = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)
1048
+ face_cascade = mtcnn["face_cascade"]
1049
+ eye_cascade = mtcnn["eye_cascade"]
1050
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60))
1051
+ if len(faces) > 0:
1052
+ face_detected = True
1053
+ (x, y, w, h) = faces[0]
1054
+ face_confidence = min(1.0, (w*h) / (arr.shape[0]*arr.shape[1]) * 4.0)
1055
+ roi_gray = gray[y:y+h, x:x+w]
1056
+ eyes = eye_cascade.detectMultiScale(roi_gray, scaleFactor=1.1, minNeighbors=5, minSize=(20, 10))
1057
+ if len(eyes) >= 1:
1058
+ ex, ey, ew, eh = eyes[0]
1059
+ left_eye_coord = {"x": float(x + ex + ew/2), "y": float(y + ey + eh/2)}
1060
+ except Exception:
1061
+ traceback.print_exc()
1062
+
1063
+ face_quality_score = 0.85 if face_detected and face_confidence > 0.6 else 0.45
1064
+ quality_metrics = {
1065
+ "face_detected": face_detected,
1066
+ "face_confidence": round(face_confidence, 3),
1067
+ "face_quality_score": round(face_quality_score, 2),
1068
+ "eye_coords": {"left_eye": left_eye_coord, "right_eye": right_eye_coord},
1069
+ "face_brightness": int(np.mean(np.asarray(face_img.convert("L")))),
1070
+ "face_blur_estimate": int(np.var(np.asarray(face_img.convert("L"))))
1071
+ }
1072
+ screenings_db[screening_id]["quality_metrics"] = quality_metrics
1073
+
1074
+ # --------------------------
1075
+ # RUN VLM -> get vlm_features + vlm_raw + vlm_meta
1076
+ # --------------------------
1077
+ vlm_features = None
1078
+ vlm_raw = None
1079
+ vlm_meta = {}
1080
+ try:
1081
+ vlm_features, vlm_raw, vlm_meta = run_vlm_and_get_features(face_path, eye_path)
1082
+ screenings_db[screening_id].setdefault("ai_results", {})
1083
+ screenings_db[screening_id]["ai_results"].update({
1084
+ "vlm_parsed_features": vlm_features,
1085
+ "vlm_raw": vlm_raw,
1086
+ "vlm_meta": vlm_meta
1087
+ })
1088
+ except Exception as e:
1089
+ logger.exception("VLM feature extraction failed")
1090
+ screenings_db[screening_id].setdefault("ai_results", {})
1091
+ screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
1092
+ vlm_features = None
1093
+ vlm_raw = ""
1094
+ vlm_meta = {"error": str(e)}
1095
+
1096
+ # Log VLM outputs in pipeline context
1097
+ logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
1098
+ logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
1099
+ logger.info("process_screening(%s) - VLM meta: %s", screening_id, json.dumps(vlm_meta, ensure_ascii=False))
1100
+
1101
+ # --------------------------
1102
+ # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
1103
+ # --------------------------
1104
+ structured_risk = None
1105
+ try:
1106
+ if vlm_features:
1107
+ # prefer cleaned JSON
1108
+ llm_input = json.dumps(vlm_features, ensure_ascii=False)
1109
+ else:
1110
+ # fallback to raw string (may be empty)
1111
+ llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
1112
+
1113
+ structured_risk = run_llm_on_vlm(llm_input)
1114
+ screenings_db[screening_id].setdefault("ai_results", {})
1115
+ screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
1116
+ except Exception as e:
1117
+ logger.exception("LLM processing failed")
1118
+ screenings_db[screening_id].setdefault("ai_results", {})
1119
+ screenings_db[screening_id]["ai_results"].update({"llm_error": str(e)})
1120
+ structured_risk = {
1121
+ "risk_score": 0.0,
1122
+ "jaundice_probability": 0.0,
1123
+ "anemia_probability": 0.0,
1124
+ "hydration_issue_probability": 0.0,
1125
+ "neurological_issue_probability": 0.0,
1126
+ "summary": "",
1127
+ "recommendation": "",
1128
+ "confidence": 0.0
1129
+ }
1130
+
1131
+ # Use structured_risk for summary recommendations & simple disease inference placeholders
1132
+ screenings_db[screening_id].setdefault("ai_results", {})
1133
+ screenings_db[screening_id]["ai_results"].update({
1134
+ "processing_time_ms": 1200
1135
+ })
1136
+
1137
+ disease_predictions = [
1138
+ {
1139
+ "condition": "Anemia-like-signs",
1140
+ "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
1141
+ "probability": structured_risk.get("anemia_probability", 0.0),
1142
+ "confidence": structured_risk.get("confidence", 0.0)
1143
+ },
1144
+ {
1145
+ "condition": "Jaundice-like-signs",
1146
+ "risk_level": "Medium" if structured_risk.get("jaundice_probability", 0.0) > 0.5 else "Low",
1147
+ "probability": structured_risk.get("jaundice_probability", 0.0),
1148
+ "confidence": structured_risk.get("confidence", 0.0)
1149
+ }
1150
+ ]
1151
+
1152
+ recommendations = {
1153
+ "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
1154
+ "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
1155
+ "message_hindi": ""
1156
+ }
1157
+
1158
+ screenings_db[screening_id].update({
1159
+ "status": "completed",
1160
+ "disease_predictions": disease_predictions,
1161
+ "recommendations": recommendations
1162
+ })
1163
+
1164
+ logger.info("[process_screening] Completed %s", screening_id)
1165
+ except Exception as e:
1166
+ traceback.print_exc()
1167
+ if screening_id in screenings_db:
1168
+ screenings_db[screening_id]["status"] = "failed"
1169
+ screenings_db[screening_id]["error"] = str(e)
1170
+ else:
1171
+ logger.error("[process_screening] Failed for unknown screening %s: %s", screening_id, str(e))
1172
+
1173
+ # -----------------------
1174
+ # Run server (for local debugging)
1175
+ # -----------------------
1176
+ if __name__ == "__main__":
1177
+ import uvicorn
1178
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)