Spaces:

dpv007
/

cloud

Sleeping

App Files Files Community

dpv007 commited on Dec 12, 2025

Commit

f37add2

verified ·

1 Parent(s): d1faea8

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -161

app.py CHANGED Viewed

@@ -10,10 +10,11 @@ Pipeline:
 Notes:
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
- - This version includes a robust regex-based extractor that finds the outermost {...} block
-   in the LLM output, extracts numeric values for the required keys, and always returns
-   numeric defaults (no NaN) so frontends will not receive null/None for numeric fields.
- - This variant logs raw VLM & LLM outputs and the parsed JSON using Python logging.
 """
 import io
@@ -24,6 +25,7 @@ import asyncio
 import logging
 import traceback
 import re
 from typing import Dict, Any, Optional, Tuple
 from datetime import datetime
@@ -49,6 +51,10 @@ GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct
 LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 # Default VLM prompt
 DEFAULT_VLM_PROMPT = (
     "From the provided face/eye images, compute the required screening features "
@@ -140,34 +146,32 @@ def estimate_eye_openness_from_detection(confidence: float) -> float:
         return 0.0
 # -----------------------
-# Regex-based robust extractor
 # -----------------------
 def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     """
-    1) Finds the outermost { ... } block in raw_text.
-    2) Extracts numeric values after the listed keys using regex, tolerating:
-       - quotes, spaces, percent signs, percent numbers like "55%", strings like "0.12", integers, or numbers in quotes.
-    3) Returns a dict with numeric fields GUARANTEED to be floats (no None/NaN), and string fields for summary/recommendation.
     """
-    # Find the first {...} block (outermost approximation)
     match = re.search(r"\{[\s\S]*\}", raw_text)
     if not match:
-        raise ValueError("No JSON-like block found in LLM output")
     block = match.group(0)
     def find_number_for_key(key: str) -> Optional[float]:
-        """
-        Returns a float in range 0..1 for probabilities, and raw numeric for other keys depending on usage.
-        This helper returns None if not found; caller will replace with defaults (0.0).
-        """
-        # Try multiple patterns to be robust
-        # Pattern captures numbers possibly with % and optional quotes, e.g. "45%", '0.12', 0.5, " 87 "
         patterns = [
-            rf'"{key}"\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',  # "key": "45%" or "key": 0.45
             rf"'{key}'\s*:\s*['\"]?\s*([-+]?\d+(\.\d+)?)\s*%?\s*['\"]?",
-            rf'\b{key}\b\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',  # key: 45%
-            rf'"{key}"\s*:\s*["\']([^"\']+)["\']',  # capture quoted text (for non-numeric attempts)
             rf"'{key}'\s*:\s*['\"]([^'\"]+)['\"]"
         ]
         for pat in patterns:
@@ -177,33 +181,25 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
             g = m.group(1)
             if g is None:
                 continue
-            s = str(g).strip()
-            # Remove percent sign if present
-            s = s.replace("%", "").strip()
-            # Try to coerce to float
             try:
-                val = float(s)
-                return val
             except Exception:
-                # not numeric
                 return None
         return None
     def find_text_for_key(key: str) -> str:
-        # capture "key": "some text" allowing single/double quotes and also unquoted until comma/}
         m = re.search(rf'"{key}"\s*:\s*"([^"]*)"', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
         m = re.search(rf"'{key}'\s*:\s*'([^']*)'", block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
-        # fallback: key: some text (unquoted) up to comma or }
         m = re.search(rf'\b{key}\b\s*:\s*([^\n,}}]+)', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip().strip('",')
         return ""
-    # Extract raw numeric candidates
     raw_risk = find_number_for_key("risk_score")
     raw_jaundice = find_number_for_key("jaundice_probability")
     raw_anemia = find_number_for_key("anemia_probability")
@@ -211,17 +207,13 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     raw_neuro = find_number_for_key("neurological_issue_probability")
     raw_conf = find_number_for_key("confidence")
-    # Normalize:
-    # - For probabilities: if value > 1 and <=100 => treat as percent -> divide by 100. If <=1 treat as fraction.
     def normalize_prob(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v > 1.0 and v <= 100.0:
             return max(0.0, min(1.0, v / 100.0))
-        # if v is large >100, clamp to 1.0
         if v > 100.0:
             return 1.0
-        # otherwise assume already 0..1
         return max(0.0, min(1.0, v))
     jaundice_probability = normalize_prob(raw_jaundice)
@@ -230,17 +222,13 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     neurological_issue_probability = normalize_prob(raw_neuro)
     confidence = normalize_prob(raw_conf)
-    # risk_score: return in 0..100
     def normalize_risk(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v <= 1.0:
-            # fraction given -> scale to 0..100
             return round(max(0.0, min(100.0, v * 100.0)), 2)
-        # if between 1 and 100, assume it's already 0..100
         if v > 1.0 and v <= 100.0:
             return round(max(0.0, min(100.0, v)), 2)
-        # clamp anything insane
         return round(max(0.0, min(100.0, v if v < float('inf') else 100.0)), 2)
     risk_score = normalize_risk(raw_risk)
@@ -275,10 +263,10 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
        (parsed_features_dict_or_None, raw_text_response_str)
-    We attempt to parse JSON as before, but always return the original raw text so it can be
-    forwarded verbatim to the LLM if desired. This function now also tries the robust
-    regex extractor (extract_json_via_regex) on the raw text if json.loads fails, and logs
-    the extracted values.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path) or not os.path.exists(eye_path):
@@ -289,66 +277,82 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
-    try:
-        logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
-        result = client.predict(message=message, history=[], api_name="/chat_fn")
-    except Exception as e:
-        logger.exception("VLM call failed")
-        raise RuntimeError(f"VLM call failed: {e}")
-    if not result:
-        raise RuntimeError("Empty response from VLM")
-    # Normalize result
-    if isinstance(result, (list, tuple)):
-        out = result[0]
-    elif isinstance(result, dict):
-        out = result
-    else:
-        out = {"text": str(result)}
-    if not isinstance(out, dict):
-        raise RuntimeError("Unexpected VLM output format (expected dict with 'text' key)")
-    text_out = out.get("text") or out.get("output") or None
-    if not text_out:
-        text_out = json.dumps(out)
     # Log raw VLM output for debugging/auditing
-    try:
-        logger.info("VLM raw output:\n%s", text_out)
-    except Exception:
-        logger.info("VLM raw output (could not pretty print)")
-    # Try to parse JSON first (as before)
     parsed_features = None
     try:
-        parsed_features = json.loads(text_out)
-        if not isinstance(parsed_features, dict):
             parsed_features = None
     except Exception:
         parsed_features = None
-    # If json.loads failed, try regex-based extraction (robust)
-    if parsed_features is None:
         try:
             parsed_features = extract_json_via_regex(text_out)
             logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
         except Exception as e:
-            # No JSON-like block or extraction failed: keep parsed_features as None and log
-            logger.info("VLM regex extraction did not find structured JSON (this may be fine): %s", str(e))
             parsed_features = None
-    # Log parsed features if available
-    if parsed_features is not None:
-        try:
-            logger.info("VLM parsed features (final):\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
-        except Exception:
-            logger.info("VLM parsed features (raw): %s", str(parsed_features))
     else:
-        logger.info("VLM parsed features: None (raw output kept)")
-    return parsed_features, text_out
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)
@@ -362,9 +366,9 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
                    developer_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
     Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
-    - Coerces types (int for tokens), clamps ranges where remote spaces often expect them.
-    - Retries once with safe defaults if the Space rejects the inputs (e.g. temperature too low).
-    - Logs and returns regex-extracted JSON as before.
     """
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
@@ -380,13 +384,17 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
     system_prompt = system_prompt or LLM_SYSTEM_PROMPT
     developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
-    # Prepare the combined prompt: use raw string as-is, otherwise json.dumps the dict
     if isinstance(vlm_features_or_raw, str):
-        vlm_json_str = vlm_features_or_raw
     else:
-        vlm_json_str = json.dumps(vlm_features_or_raw, default=str)
-    # Strong, explicit instruction to output only JSON
     instruction = (
         "\n\nSTRICT INSTRUCTIONS (READ CAREFULLY):\n"
         "1) OUTPUT ONLY a single valid JSON object and nothing else — no prose, no explanation, no code fences.\n"
@@ -397,10 +405,9 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         "If you cannot estimate a value, set it to null.\n\n"
         "Now, based on the VLM output below, produce ONLY the JSON object described above.\n\n"
         "===BEGIN VLM OUTPUT===\n"
-        f"{vlm_json_str}\n"
         "===END VLM OUTPUT===\n\n"
     )
-    input_payload_str = instruction
     # Defensive coercion / clamps
     try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
@@ -408,13 +415,12 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         try_max_new_tokens = 1024
     try_temperature = float(temperature) if temperature is not None else 0.0
-    # Many demos require temperature >= 0.1; clamp to 0.1 minimum to avoid validation failures
     if try_temperature < 0.1:
         try_temperature = 0.1
-    # prepare kwargs for predict
     predict_kwargs = dict(
-        input_data=input_payload_str,
         max_new_tokens=float(try_max_new_tokens),
         model_identity=model_identity,
         system_prompt=system_prompt,
@@ -427,26 +433,39 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         api_name="/chat"
     )
-    # attempt + one retry with safer defaults if AppError occurs
     last_exc = None
     for attempt in (1, 2):
         try:
             logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
                         LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
             result = client.predict(**predict_kwargs)
             # normalize to string
             if isinstance(result, (dict, list)):
                 text_out = json.dumps(result)
             else:
                 text_out = str(result)
             if not text_out or len(text_out.strip()) == 0:
                 raise RuntimeError("LLM returned empty response")
-            logger.info("LLM raw output:\n%s", text_out)
             # parse with regex extractor (may raise)
-            parsed = extract_json_via_regex(text_out)
-            if not isinstance(parsed, dict):
-                raise ValueError("Parsed LLM output is not a JSON object/dict")
             # pretty log parsed JSON
             try:
@@ -454,7 +473,7 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
             except Exception:
                 logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
-            # defensive clamps (same as before)
             def safe_prob(val):
                 try:
                     v = float(val)
@@ -493,29 +512,24 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
             return parsed
         except AppError as app_e:
-            # Specific remote validation error: log and attempt a single retry with ultra-safe defaults
             logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
             last_exc = app_e
             if attempt == 1:
-                # tighten inputs and retry: force temperature=0.2, max_new_tokens=512
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
                 continue
             else:
-                # no more retries
                 raise RuntimeError(f"LLM call failed (AppError): {app_e}")
         except Exception as e:
             logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
             last_exc = e
-            # try one retry only for non-AppError exceptions
             if attempt == 1:
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 continue
             raise RuntimeError(f"LLM call failed: {e}")
-    # if we reach here, raise last caught exception
     raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
 # -----------------------
@@ -544,13 +558,8 @@ async def health_check():
 @app.post("/api/v1/validate-eye-photo")
 async def validate_eye_photo(image: UploadFile = File(...)):
-    """
-    Lightweight validation endpoint. Uses available detector (facenet/mtcnn/opencv) to check face/eye detection.
-    For full pipeline, use /api/v1/upload which invokes VLM+LLM in background.
-    """
     if mtcnn is None:
         raise HTTPException(status_code=500, detail="No face detector available in this deployment.")
     try:
         content = await image.read()
         if not content:
@@ -558,7 +567,6 @@ async def validate_eye_photo(image: UploadFile = File(...)):
         pil_img = load_image_from_bytes(content)
         img_arr = np.asarray(pil_img)  # RGB
-        # facenet-pytorch branch
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "facenet_pytorch":
             try:
                 boxes, probs, landmarks = mtcnn.detect(pil_img, landmarks=True)
@@ -583,7 +591,6 @@ async def validate_eye_photo(image: UploadFile = File(...)):
                 traceback.print_exc()
                 raise HTTPException(status_code=500, detail="Face detector failed during inference.")
-        # classic mtcnn branch
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "mtcnn":
             try:
                 detections = mtcnn.detect_faces(img_arr)
@@ -605,7 +612,6 @@ async def validate_eye_photo(image: UploadFile = File(...)):
                     "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                     "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
-        # OpenCV Haar cascade fallback
         if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
             try:
                 gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
@@ -707,7 +713,11 @@ async def get_status(screening_id: str):
 async def get_results(screening_id: str):
     if screening_id not in screenings_db:
         raise HTTPException(status_code=404, detail="Screening not found")
-    return screenings_db[screening_id]
 @app.get("/api/v1/history/{user_id}")
 async def get_history(user_id: str):
@@ -724,7 +734,7 @@ async def get_vitals_from_upload(
 ):
     """
     Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
-      { vlm_features, vlm_raw, structured_risk }
     """
     if not GRADIO_AVAILABLE:
         raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
@@ -750,21 +760,26 @@ async def get_vitals_from_upload(
         # Run VLM (off the event loop)
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
-        # Log VLM outputs (already logged inside run_vlm..., but log again with context)
-        logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:100] + "...") if vlm_raw else "None")
-        logger.info("get_vitals_from_upload - VLM parsed features: %s", vlm_features if vlm_features is not None else "None")
-        # Prefer sending raw vlm text to LLM (same behavior as process_screening)
-        llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
         # Run LLM (off the event loop)
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
-        # Return merged result
         return {
-            "vlm_features": vlm_features,
-            "vlm_raw": vlm_raw,
-            "structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_from_upload pipeline failed")
@@ -789,17 +804,22 @@ async def get_vitals_for_screening(screening_id: str):
         # Run VLM off the event loop
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
-        # Log VLM outputs
-        logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:100] + "...") if vlm_raw else "None")
-        logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, vlm_features if vlm_features is not None else "None")
-        llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
         # Optionally store this run's outputs back into the DB for inspection
         entry.setdefault("ai_results", {})
         entry["ai_results"].update({
-            "vlm_features": vlm_features,
             "vlm_raw": vlm_raw,
             "structured_risk": structured_risk,
             "last_vitals_run": datetime.utcnow().isoformat() + "Z"
@@ -807,16 +827,16 @@ async def get_vitals_for_screening(screening_id: str):
         return {
             "screening_id": screening_id,
-            "vlm_features": vlm_features,
-            "vlm_raw": vlm_raw,
-            "structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_for_screening pipeline failed")
         raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
 # -----------------------
-# Main processing pipeline
 # -----------------------
 async def process_screening(screening_id: str):
     """
@@ -824,7 +844,7 @@ async def process_screening(screening_id: str):
      - load images
      - quick detector-based quality metrics
      - run VLM -> vlm_features (dict or None) + vlm_raw (string)
-     - run LLM on vlm_raw (preferred) or vlm_features -> structured risk JSON
      - merge results into ai_results and finish
     """
     try:
@@ -914,7 +934,7 @@ async def process_screening(screening_id: str):
             vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({
-                "vlm_features": vlm_features,
                 "vlm_raw": vlm_raw
             })
         except Exception as e:
@@ -922,33 +942,25 @@ async def process_screening(screening_id: str):
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
             vlm_features = None
-            vlm_raw = None
         # Log VLM outputs in pipeline context
-        logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:100] + "...") if vlm_raw else "None")
-        logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, vlm_features if vlm_features is not None else "None")
         # --------------------------
-        # RUN LLM on vlm_raw (preferred) or vlm_features -> structured risk JSON
         # --------------------------
         structured_risk = None
         try:
-            if vlm_raw:
-                structured_risk = run_llm_on_vlm(vlm_raw)
-            elif vlm_features:
-                structured_risk = run_llm_on_vlm(vlm_features)
             else:
-                # Fallback if VLM failed: produce conservative defaults
-                structured_risk = {
-                    "risk_score": 0.0,
-                    "jaundice_probability": 0.0,
-                    "anemia_probability": 0.0,
-                    "hydration_issue_probability": 0.0,
-                    "neurological_issue_probability": 0.0,
-                    "summary": "",
-                    "recommendation": "",
-                    "confidence": 0.0
-                }
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
         except Exception as e:
@@ -967,19 +979,14 @@ async def process_screening(screening_id: str):
             }
         # Use structured_risk for summary recommendations & simple disease inference placeholders
-        hem = screenings_db[screening_id]["ai_results"].get("medical_insights", {}).get("hemoglobin_estimate", None)
-        bil = screenings_db[screening_id]["ai_results"].get("medical_insights", {}).get("bilirubin_estimate", None)
-        # Keep older ai_results shape for backward compatibility (if you want)
         screenings_db[screening_id].setdefault("ai_results", {})
         screenings_db[screening_id]["ai_results"].update({
             "processing_time_ms": 1200
         })
-        # disease_predictions & recommendations can be built from structured_risk if needed
         disease_predictions = [
             {
-                "condition": "Anemia-like-signs",  # internal tag (not surfaced in LLM summary)
                 "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
                 "probability": structured_risk.get("anemia_probability", 0.0),
                 "confidence": structured_risk.get("confidence", 0.0)
@@ -995,7 +1002,7 @@ async def process_screening(screening_id: str):
         recommendations = {
             "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
             "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
-            "message_hindi": ""  # could be auto-translated if desired
         }
         screenings_db[screening_id].update({

 Notes:
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
+ - This final variant:
+    * logs raw VLM responses,
+    * always returns raw VLM output in API responses,
+    * extracts JSON from VLM via regex when possible, and
+    * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
 """
 import io
 import logging
 import traceback
 import re
+import time
 from typing import Dict, Any, Optional, Tuple
 from datetime import datetime
 LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
 HF_TOKEN = os.getenv("HF_TOKEN", None)
+# VLM retry config (if VLM returns empty text)
+VLM_EMPTY_RETRIES = int(os.getenv("VLM_EMPTY_RETRIES", "2"))
+VLM_EMPTY_RETRY_SLEEP_S = float(os.getenv("VLM_EMPTY_RETRY_SLEEP_S", "0.5"))
 # Default VLM prompt
 DEFAULT_VLM_PROMPT = (
     "From the provided face/eye images, compute the required screening features "
         return 0.0
 # -----------------------
+# Regex-based robust extractor (used for both VLM raw parsing & LLM raw parsing)
 # -----------------------
 def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     """
+    Extract numeric fields and text fields from the first {...} block found in raw_text.
+    Returns a dict with:
+      - risk_score (0..100)
+      - jaundice_probability (0..1)
+      - anemia_probability (0..1)
+      - hydration_issue_probability (0..1)
+      - neurological_issue_probability (0..1)
+      - confidence (0..1)
+      - summary (string)
+      - recommendation (string)
     """
     match = re.search(r"\{[\s\S]*\}", raw_text)
     if not match:
+        raise ValueError("No JSON-like block found in text")
     block = match.group(0)
     def find_number_for_key(key: str) -> Optional[float]:
         patterns = [
+            rf'"{key}"\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',
             rf"'{key}'\s*:\s*['\"]?\s*([-+]?\d+(\.\d+)?)\s*%?\s*['\"]?",
+            rf'\b{key}\b\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',
+            rf'"{key}"\s*:\s*["\']([^"\']+)["\']',
             rf"'{key}'\s*:\s*['\"]([^'\"]+)['\"]"
         ]
         for pat in patterns:
             g = m.group(1)
             if g is None:
                 continue
+            s = str(g).strip().replace("%", "").strip()
             try:
+                return float(s)
             except Exception:
                 return None
         return None
     def find_text_for_key(key: str) -> str:
         m = re.search(rf'"{key}"\s*:\s*"([^"]*)"', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
         m = re.search(rf"'{key}'\s*:\s*'([^']*)'", block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
         m = re.search(rf'\b{key}\b\s*:\s*([^\n,}}]+)', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip().strip('",')
         return ""
     raw_risk = find_number_for_key("risk_score")
     raw_jaundice = find_number_for_key("jaundice_probability")
     raw_anemia = find_number_for_key("anemia_probability")
     raw_neuro = find_number_for_key("neurological_issue_probability")
     raw_conf = find_number_for_key("confidence")
     def normalize_prob(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v > 1.0 and v <= 100.0:
             return max(0.0, min(1.0, v / 100.0))
         if v > 100.0:
             return 1.0
         return max(0.0, min(1.0, v))
     jaundice_probability = normalize_prob(raw_jaundice)
     neurological_issue_probability = normalize_prob(raw_neuro)
     confidence = normalize_prob(raw_conf)
     def normalize_risk(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v <= 1.0:
             return round(max(0.0, min(100.0, v * 100.0)), 2)
         if v > 1.0 and v <= 100.0:
             return round(max(0.0, min(100.0, v)), 2)
         return round(max(0.0, min(100.0, v if v < float('inf') else 100.0)), 2)
     risk_score = normalize_risk(raw_risk)
     Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
        (parsed_features_dict_or_None, raw_text_response_str)
+    Robustness improvements:
+    - Retries a few times if raw text is empty.
+    - Attempts json.loads first, then extract_json_via_regex.
+    - Logs raw output and parsed features for debugging.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path) or not os.path.exists(eye_path):
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
+    last_exc = None
+    raw_text = None
+    for attempt in range(1, VLM_EMPTY_RETRIES + 2):  # attempts = retries+1
+        try:
+            logger.info("Calling VLM Space %s (attempt %d)", GRADIO_VLM_SPACE, attempt)
+            result = client.predict(message=message, history=[], api_name="/chat_fn")
+        except Exception as e:
+            logger.exception("VLM call failed on attempt %d", attempt)
+            last_exc = e
+            if attempt <= VLM_EMPTY_RETRIES:
+                time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
+                continue
+            raise RuntimeError(f"VLM call ultimately failed: {e}")
+        if not result:
+            logger.warning("VLM returned empty result object on attempt %d", attempt)
+            raw_text = ""
+        else:
+            # normalize result object
+            if isinstance(result, (list, tuple)):
+                out = result[0]
+            elif isinstance(result, dict):
+                out = result
+            else:
+                out = {"text": str(result)}
+            text_out = out.get("text") or out.get("output") or ""
+            # if files key exists but text is empty, log it
+            if isinstance(out, dict) and (out.get("files") == [] or not out.get("files")) and (not text_out.strip()):
+                logger.warning("VLM returned no text AND no files in response on attempt %d: %s", attempt, str(out))
+            raw_text = text_out
+        # if raw_text is non-empty, break; otherwise retry up to retries
+        if raw_text and raw_text.strip():
+            break
+        else:
+            logger.warning("VLM returned empty text on attempt %d. Retrying (%d remaining)...", attempt, max(0, VLM_EMPTY_RETRIES - (attempt - 1)))
+            if attempt <= VLM_EMPTY_RETRIES:
+                time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
+                continue
+            # no more retries
+            break
+    if raw_text is None:
+        raise RuntimeError(f"VLM returned no response (last error: {last_exc})")
+    text_out = raw_text
     # Log raw VLM output for debugging/auditing
+    logger.info("VLM raw output (length=%d):\n%s", len(text_out or ""), (text_out[:1000] + "...") if text_out and len(text_out) > 1000 else (text_out or "<EMPTY>"))
+    # Try to parse JSON first (fast path)
     parsed_features = None
     try:
+        parsed_features = json.loads(text_out) if text_out and text_out.strip() else None
+        if parsed_features is not None and not isinstance(parsed_features, dict):
             parsed_features = None
     except Exception:
         parsed_features = None
+    # If json.loads failed or returned None, try regex-based extraction
+    if parsed_features is None and text_out and text_out.strip():
         try:
             parsed_features = extract_json_via_regex(text_out)
             logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
         except Exception as e:
+            logger.info("VLM regex extraction failed or found nothing: %s", str(e))
             parsed_features = None
+    if parsed_features is None:
+        logger.info("VLM parsed features: None (will fallback to sending '{}' or raw string to LLM).")
     else:
+        logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
+    # Always return raw_text (may be empty string) and parsed_features (or None)
+    return parsed_features, (text_out or "")
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)
                    developer_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
     Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
+    - Logs the VLM raw string and the chosen payload.
+    - Sends cleaned JSON (json.dumps(vlm_features)) if vlm_features_or_raw is dict, else sends raw string.
+    - Uses regex to extract the final JSON from LLM raw output.
     """
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
     system_prompt = system_prompt or LLM_SYSTEM_PROMPT
     developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
+    # Decide what to send to LLM and log the raw input
     if isinstance(vlm_features_or_raw, str):
+        vlm_raw_str = vlm_features_or_raw
+        logger.info("LLM input will be RAW VLM STRING (len=%d)", len(vlm_raw_str or ""))
+        vlm_json_str_to_send = vlm_raw_str if vlm_raw_str and vlm_raw_str.strip() else "{}"
     else:
+        vlm_raw_str = json.dumps(vlm_features_or_raw, ensure_ascii=False) if vlm_features_or_raw else "{}"
+        logger.info("LLM input will be CLEANED VLM JSON (len=%d)", len(vlm_raw_str))
+        vlm_json_str_to_send = vlm_raw_str
+    # Build instruction payload
     instruction = (
         "\n\nSTRICT INSTRUCTIONS (READ CAREFULLY):\n"
         "1) OUTPUT ONLY a single valid JSON object and nothing else — no prose, no explanation, no code fences.\n"
         "If you cannot estimate a value, set it to null.\n\n"
         "Now, based on the VLM output below, produce ONLY the JSON object described above.\n\n"
         "===BEGIN VLM OUTPUT===\n"
+        f"{vlm_json_str_to_send}\n"
         "===END VLM OUTPUT===\n\n"
     )
     # Defensive coercion / clamps
     try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
         try_max_new_tokens = 1024
     try_temperature = float(temperature) if temperature is not None else 0.0
+    # Some Spaces validate temperature >= 0.1
     if try_temperature < 0.1:
         try_temperature = 0.1
     predict_kwargs = dict(
+        input_data=instruction,
         max_new_tokens=float(try_max_new_tokens),
         model_identity=model_identity,
         system_prompt=system_prompt,
         api_name="/chat"
     )
     last_exc = None
     for attempt in (1, 2):
         try:
             logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
                         LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
             result = client.predict(**predict_kwargs)
             # normalize to string
             if isinstance(result, (dict, list)):
                 text_out = json.dumps(result)
             else:
                 text_out = str(result)
             if not text_out or len(text_out.strip()) == 0:
                 raise RuntimeError("LLM returned empty response")
+            logger.info("LLM raw output (len=%d):\n%s", len(text_out or ""), (text_out[:2000] + "...") if len(text_out) > 2000 else text_out)
             # parse with regex extractor (may raise)
+            parsed = None
+            try:
+                parsed = extract_json_via_regex(text_out)
+            except Exception:
+                # fallback: attempt json.loads naive
+                try:
+                    parsed = json.loads(text_out)
+                    if not isinstance(parsed, dict):
+                        parsed = None
+                except Exception:
+                    parsed = None
+            if parsed is None:
+                raise ValueError("Failed to extract JSON from LLM output")
             # pretty log parsed JSON
             try:
             except Exception:
                 logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
+            # defensive clamps (same as extractor expectations)
             def safe_prob(val):
                 try:
                     v = float(val)
             return parsed
         except AppError as app_e:
             logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
             last_exc = app_e
             if attempt == 1:
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
                 continue
             else:
                 raise RuntimeError(f"LLM call failed (AppError): {app_e}")
         except Exception as e:
             logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
             last_exc = e
             if attempt == 1:
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 continue
             raise RuntimeError(f"LLM call failed: {e}")
     raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
 # -----------------------
 @app.post("/api/v1/validate-eye-photo")
 async def validate_eye_photo(image: UploadFile = File(...)):
     if mtcnn is None:
         raise HTTPException(status_code=500, detail="No face detector available in this deployment.")
     try:
         content = await image.read()
         if not content:
         pil_img = load_image_from_bytes(content)
         img_arr = np.asarray(pil_img)  # RGB
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "facenet_pytorch":
             try:
                 boxes, probs, landmarks = mtcnn.detect(pil_img, landmarks=True)
                 traceback.print_exc()
                 raise HTTPException(status_code=500, detail="Face detector failed during inference.")
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "mtcnn":
             try:
                 detections = mtcnn.detect_faces(img_arr)
                     "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                     "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
         if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
             try:
                 gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
 async def get_results(screening_id: str):
     if screening_id not in screenings_db:
         raise HTTPException(status_code=404, detail="Screening not found")
+    # Ensure vlm_raw is always present in ai_results for debugging
+    entry = screenings_db[screening_id]
+    entry.setdefault("ai_results", {})
+    entry["ai_results"].setdefault("vlm_raw", entry.get("ai_results", {}).get("vlm_raw", ""))
+    return entry
 @app.get("/api/v1/history/{user_id}")
 async def get_history(user_id: str):
 ):
     """
     Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
+      { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
     """
     if not GRADIO_AVAILABLE:
         raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
         # Run VLM (off the event loop)
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
+        # Log VLM outputs (already logged inside run_vlm..., but additional context)
+        logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
+        logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
+        # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
+        if vlm_features:
+            llm_input = json.dumps(vlm_features, ensure_ascii=False)
+            logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
+        else:
+            llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
+            logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
         # Run LLM (off the event loop)
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
+        # Return merged result (includes raw VLM output for debugging)
         return {
+            "vlm_raw_output": vlm_raw,
+            "vlm_parsed_features": vlm_features,
+            "llm_structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_from_upload pipeline failed")
         # Run VLM off the event loop
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
+        logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
+        logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
+        if vlm_features:
+            llm_input = json.dumps(vlm_features, ensure_ascii=False)
+            logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
+        else:
+            llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
+            logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
         # Optionally store this run's outputs back into the DB for inspection
         entry.setdefault("ai_results", {})
         entry["ai_results"].update({
+            "vlm_parsed_features": vlm_features,
             "vlm_raw": vlm_raw,
             "structured_risk": structured_risk,
             "last_vitals_run": datetime.utcnow().isoformat() + "Z"
         return {
             "screening_id": screening_id,
+            "vlm_raw_output": vlm_raw,
+            "vlm_parsed_features": vlm_features,
+            "llm_structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_for_screening pipeline failed")
         raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
 # -----------------------
+# Main background pipeline (upload -> process_screening)
 # -----------------------
 async def process_screening(screening_id: str):
     """
      - load images
      - quick detector-based quality metrics
      - run VLM -> vlm_features (dict or None) + vlm_raw (string)
+     - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
      - merge results into ai_results and finish
     """
     try:
             vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({
+                "vlm_parsed_features": vlm_features,
                 "vlm_raw": vlm_raw
             })
         except Exception as e:
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
             vlm_features = None
+            vlm_raw = ""
         # Log VLM outputs in pipeline context
+        logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
+        logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
         # --------------------------
+        # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
         # --------------------------
         structured_risk = None
         try:
+            if vlm_features:
+                # prefer cleaned JSON
+                llm_input = json.dumps(vlm_features, ensure_ascii=False)
             else:
+                # fallback to raw string (may be empty)
+                llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
+            structured_risk = run_llm_on_vlm(llm_input)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
         except Exception as e:
             }
         # Use structured_risk for summary recommendations & simple disease inference placeholders
         screenings_db[screening_id].setdefault("ai_results", {})
         screenings_db[screening_id]["ai_results"].update({
             "processing_time_ms": 1200
         })
         disease_predictions = [
             {
+                "condition": "Anemia-like-signs",
                 "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
                 "probability": structured_risk.get("anemia_probability", 0.0),
                 "confidence": structured_risk.get("confidence", 0.0)
         recommendations = {
             "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
             "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
+            "message_hindi": ""
         }
         screenings_db[screening_id].update({