Spaces:

dpv007
/

cloud

Sleeping

App Files Files Community

dpv007 commited on Dec 12, 2025

Commit

2d03b8b

verified ·

1 Parent(s): 3bfde28

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -152

app.py CHANGED Viewed

@@ -10,12 +10,10 @@ Pipeline:
 Notes:
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
- - This final variant:
-    * logs raw VLM responses,
-    * always returns raw VLM output in API responses,
-    * extracts JSON from VLM via regex when possible, and
-    * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
- - VLM calls were simplified to a single call (no retries).
 """
 import io
@@ -26,7 +24,6 @@ import asyncio
 import logging
 import traceback
 import re
-import time
 from typing import Dict, Any, Optional, Tuple
 from datetime import datetime
@@ -56,7 +53,7 @@ HF_TOKEN = os.getenv("HF_TOKEN", None)
 DEFAULT_VLM_PROMPT = (
     "From the provided face/eye images, compute the required screening features "
     "(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
-    "and output a clean JSON feature vector only with values ranging as probabilities."
 )
 # Default LLM prompts / metadata (stricter: force JSON-only output)
@@ -143,32 +140,34 @@ def estimate_eye_openness_from_detection(confidence: float) -> float:
         return 0.0
 # -----------------------
-# Regex-based robust extractor (used for both VLM raw parsing & LLM raw parsing)
 # -----------------------
 def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     """
-    Extract numeric fields and text fields from the first {...} block found in raw_text.
-    Returns a dict with:
-      - risk_score (0..100)
-      - jaundice_probability (0..1)
-      - anemia_probability (0..1)
-      - hydration_issue_probability (0..1)
-      - neurological_issue_probability (0..1)
-      - confidence (0..1)
-      - summary (string)
-      - recommendation (string)
     """
     match = re.search(r"\{[\s\S]*\}", raw_text)
     if not match:
-        raise ValueError("No JSON-like block found in text")
     block = match.group(0)
     def find_number_for_key(key: str) -> Optional[float]:
         patterns = [
-            rf'"{key}"\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',
             rf"'{key}'\s*:\s*['\"]?\s*([-+]?\d+(\.\d+)?)\s*%?\s*['\"]?",
-            rf'\b{key}\b\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',
-            rf'"{key}"\s*:\s*["\']([^"\']+)["\']',
             rf"'{key}'\s*:\s*['\"]([^'\"]+)['\"]"
         ]
         for pat in patterns:
@@ -178,25 +177,33 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
             g = m.group(1)
             if g is None:
                 continue
-            s = str(g).strip().replace("%", "").strip()
             try:
-                return float(s)
             except Exception:
                 return None
         return None
     def find_text_for_key(key: str) -> str:
         m = re.search(rf'"{key}"\s*:\s*"([^"]*)"', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
         m = re.search(rf"'{key}'\s*:\s*'([^']*)'", block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
         m = re.search(rf'\b{key}\b\s*:\s*([^\n,}}]+)', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip().strip('",')
         return ""
     raw_risk = find_number_for_key("risk_score")
     raw_jaundice = find_number_for_key("jaundice_probability")
     raw_anemia = find_number_for_key("anemia_probability")
@@ -204,13 +211,17 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     raw_neuro = find_number_for_key("neurological_issue_probability")
     raw_conf = find_number_for_key("confidence")
     def normalize_prob(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v > 1.0 and v <= 100.0:
             return max(0.0, min(1.0, v / 100.0))
         if v > 100.0:
             return 1.0
         return max(0.0, min(1.0, v))
     jaundice_probability = normalize_prob(raw_jaundice)
@@ -219,13 +230,17 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     neurological_issue_probability = normalize_prob(raw_neuro)
     confidence = normalize_prob(raw_conf)
     def normalize_risk(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v <= 1.0:
             return round(max(0.0, min(100.0, v * 100.0)), 2)
         if v > 1.0 and v <= 100.0:
             return round(max(0.0, min(100.0, v)), 2)
         return round(max(0.0, min(100.0, v if v < float('inf') else 100.0)), 2)
     risk_score = normalize_risk(raw_risk)
@@ -246,7 +261,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     return out
 # -----------------------
-# Gradio / VLM helper (single-call, no retries)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
@@ -260,7 +275,8 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
        (parsed_features_dict_or_None, raw_text_response_str)
-    Simplified: single call (no retries). Attempts json.loads then regex extraction.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path) or not os.path.exists(eye_path):
@@ -271,63 +287,53 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
-    # SINGLE CALL (no retries)
     try:
         logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
         result = client.predict(message=message, history=[], api_name="/chat_fn")
     except Exception as e:
-        logger.exception("VLM call failed (no retries)")
         raise RuntimeError(f"VLM call failed: {e}")
-    # Normalize result
-    raw_text = ""
     if not result:
-        logger.warning("VLM returned empty result object")
-        raw_text = ""
-    else:
-        if isinstance(result, (list, tuple)):
-            out = result[0]
-        elif isinstance(result, dict):
-            out = result
-        else:
-            out = {"text": str(result)}
-        text_out = out.get("text") or out.get("output") or ""
-        raw_text = text_out
-        logger.info("VLM response object (debug): %s", out)
-        # If files present but text empty, log it explicitly
-        if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
-            logger.warning("VLM returned no text AND files: %s", out.get("files"))
-    # Log raw VLM output for debugging/auditing
-    logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
-    # Try to parse JSON first (fast path)
     parsed_features = None
     try:
-        parsed_features = json.loads(raw_text) if raw_text and raw_text.strip() else None
-        if parsed_features is not None and not isinstance(parsed_features, dict):
             parsed_features = None
     except Exception:
-        parsed_features = None
-    # If json.loads failed or returned None, try regex-based extraction
-    if parsed_features is None and raw_text and raw_text.strip():
         try:
-            parsed_features = extract_json_via_regex(raw_text)
-            logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
-        except Exception as e:
-            logger.info("VLM regex extraction failed or found nothing: %s", str(e))
             parsed_features = None
-    if parsed_features is None:
-        logger.info("VLM parsed features: None (will fallback to sending '{}' or raw string to LLM).")
-    else:
-        logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
-    # Always return raw_text (may be empty string) and parsed_features (or None)
-    return parsed_features, (raw_text or "")
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)
@@ -341,9 +347,9 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
                    developer_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
     Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
-    - Logs the VLM raw string and the chosen payload.
-    - Sends cleaned JSON (json.dumps(vlm_features)) if vlm_features_or_raw is dict, else sends raw string.
-    - Uses regex to extract the final JSON from LLM raw output.
     """
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
@@ -359,17 +365,13 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
     system_prompt = system_prompt or LLM_SYSTEM_PROMPT
     developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
-    # Decide what to send to LLM and log the raw input
     if isinstance(vlm_features_or_raw, str):
-        vlm_raw_str = vlm_features_or_raw
-        logger.info("LLM input will be RAW VLM STRING (len=%d)", len(vlm_raw_str or ""))
-        vlm_json_str_to_send = vlm_raw_str if vlm_raw_str and vlm_raw_str.strip() else "{}"
     else:
-        vlm_raw_str = json.dumps(vlm_features_or_raw, ensure_ascii=False) if vlm_features_or_raw else "{}"
-        logger.info("LLM input will be CLEANED VLM JSON (len=%d)", len(vlm_raw_str))
-        vlm_json_str_to_send = vlm_raw_str
-    # Build instruction payload
     instruction = (
         "\n\nSTRICT INSTRUCTIONS (READ CAREFULLY):\n"
         "1) OUTPUT ONLY a single valid JSON object and nothing else — no prose, no explanation, no code fences.\n"
@@ -380,9 +382,10 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         "If you cannot estimate a value, set it to null.\n\n"
         "Now, based on the VLM output below, produce ONLY the JSON object described above.\n\n"
         "===BEGIN VLM OUTPUT===\n"
-        f"{vlm_json_str_to_send}\n"
         "===END VLM OUTPUT===\n\n"
     )
     # Defensive coercion / clamps
     try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
@@ -390,12 +393,13 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         try_max_new_tokens = 1024
     try_temperature = float(temperature) if temperature is not None else 0.0
-    # Some Spaces validate temperature >= 0.1
     if try_temperature < 0.1:
         try_temperature = 0.1
     predict_kwargs = dict(
-        input_data=instruction,
         max_new_tokens=float(try_max_new_tokens),
         model_identity=model_identity,
         system_prompt=system_prompt,
@@ -408,39 +412,26 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         api_name="/chat"
     )
     last_exc = None
     for attempt in (1, 2):
         try:
             logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
                         LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
             result = client.predict(**predict_kwargs)
             # normalize to string
             if isinstance(result, (dict, list)):
                 text_out = json.dumps(result)
             else:
                 text_out = str(result)
             if not text_out or len(text_out.strip()) == 0:
                 raise RuntimeError("LLM returned empty response")
-            logger.info("LLM raw output (len=%d):\n%s", len(text_out or ""), (text_out[:2000] + "...") if len(text_out) > 2000 else text_out)
             # parse with regex extractor (may raise)
-            parsed = None
-            try:
-                parsed = extract_json_via_regex(text_out)
-            except Exception:
-                # fallback: attempt json.loads naive
-                try:
-                    parsed = json.loads(text_out)
-                    if not isinstance(parsed, dict):
-                        parsed = None
-                except Exception:
-                    parsed = None
-            if parsed is None:
-                raise ValueError("Failed to extract JSON from LLM output")
             # pretty log parsed JSON
             try:
@@ -448,7 +439,7 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
             except Exception:
                 logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
-            # defensive clamps (same as extractor expectations)
             def safe_prob(val):
                 try:
                     v = float(val)
@@ -487,24 +478,29 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
             return parsed
         except AppError as app_e:
             logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
             last_exc = app_e
             if attempt == 1:
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
                 continue
             else:
                 raise RuntimeError(f"LLM call failed (AppError): {app_e}")
         except Exception as e:
             logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
             last_exc = e
             if attempt == 1:
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 continue
             raise RuntimeError(f"LLM call failed: {e}")
     raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
 # -----------------------
@@ -533,8 +529,13 @@ async def health_check():
 @app.post("/api/v1/validate-eye-photo")
 async def validate_eye_photo(image: UploadFile = File(...)):
     if mtcnn is None:
         raise HTTPException(status_code=500, detail="No face detector available in this deployment.")
     try:
         content = await image.read()
         if not content:
@@ -542,6 +543,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
         pil_img = load_image_from_bytes(content)
         img_arr = np.asarray(pil_img)  # RGB
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "facenet_pytorch":
             try:
                 boxes, probs, landmarks = mtcnn.detect(pil_img, landmarks=True)
@@ -566,6 +568,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
                 traceback.print_exc()
                 raise HTTPException(status_code=500, detail="Face detector failed during inference.")
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "mtcnn":
             try:
                 detections = mtcnn.detect_faces(img_arr)
@@ -587,6 +590,7 @@ async def validate_eye_photo(image: UploadFile = File(...)):
                     "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                     "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
         if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
             try:
                 gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
@@ -688,11 +692,7 @@ async def get_status(screening_id: str):
 async def get_results(screening_id: str):
     if screening_id not in screenings_db:
         raise HTTPException(status_code=404, detail="Screening not found")
-    # Ensure vlm_raw is always present in ai_results for debugging
-    entry = screenings_db[screening_id]
-    entry.setdefault("ai_results", {})
-    entry["ai_results"].setdefault("vlm_raw", entry.get("ai_results", {}).get("vlm_raw", ""))
-    return entry
 @app.get("/api/v1/history/{user_id}")
 async def get_history(user_id: str):
@@ -709,7 +709,7 @@ async def get_vitals_from_upload(
 ):
     """
     Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
-      { vlm_parsed_features, vlm_raw_output, llm_structured_risk }
     """
     if not GRADIO_AVAILABLE:
         raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
@@ -735,26 +735,17 @@ async def get_vitals_from_upload(
         # Run VLM (off the event loop)
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
-        # Log VLM outputs (already logged inside run_vlm..., but additional context)
-        logger.info("get_vitals_from_upload - VLM raw (snippet): %s", (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
-        logger.info("get_vitals_from_upload - VLM parsed features: %s", json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
-        # Decide what to feed to LLM: prefer cleaned JSON if available, else raw VLM string
-        if vlm_features:
-            llm_input = json.dumps(vlm_features, ensure_ascii=False)
-            logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
-        else:
-            llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
-            logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
         # Run LLM (off the event loop)
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
-        # Return merged result (includes raw VLM output for debugging)
         return {
-            "vlm_raw_output": vlm_raw,
-            "vlm_parsed_features": vlm_features,
-            "llm_structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_from_upload pipeline failed")
@@ -779,22 +770,13 @@ async def get_vitals_for_screening(screening_id: str):
         # Run VLM off the event loop
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
-        logger.info("get_vitals_for_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
-        logger.info("get_vitals_for_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
-        if vlm_features:
-            llm_input = json.dumps(vlm_features, ensure_ascii=False)
-            logger.info("Feeding CLEANED VLM JSON to LLM (len=%d).", len(llm_input))
-        else:
-            llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
-            logger.info("Feeding RAW VLM STRING to LLM (len=%d).", len(llm_input))
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
         # Optionally store this run's outputs back into the DB for inspection
         entry.setdefault("ai_results", {})
         entry["ai_results"].update({
-            "vlm_parsed_features": vlm_features,
             "vlm_raw": vlm_raw,
             "structured_risk": structured_risk,
             "last_vitals_run": datetime.utcnow().isoformat() + "Z"
@@ -802,16 +784,16 @@ async def get_vitals_for_screening(screening_id: str):
         return {
             "screening_id": screening_id,
-            "vlm_raw_output": vlm_raw,
-            "vlm_parsed_features": vlm_features,
-            "llm_structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_for_screening pipeline failed")
         raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
 # -----------------------
-# Main background pipeline (upload -> process_screening)
 # -----------------------
 async def process_screening(screening_id: str):
     """
@@ -819,7 +801,7 @@ async def process_screening(screening_id: str):
      - load images
      - quick detector-based quality metrics
      - run VLM -> vlm_features (dict or None) + vlm_raw (string)
-     - run LLM on vlm_features (preferred) or vlm_raw -> structured risk JSON
      - merge results into ai_results and finish
     """
     try:
@@ -909,7 +891,7 @@ async def process_screening(screening_id: str):
             vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({
-                "vlm_parsed_features": vlm_features,
                 "vlm_raw": vlm_raw
             })
         except Exception as e:
@@ -917,25 +899,29 @@ async def process_screening(screening_id: str):
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
             vlm_features = None
-            vlm_raw = ""
-        # Log VLM outputs in pipeline context
-        logger.info("process_screening(%s) - VLM raw (snippet): %s", screening_id, (vlm_raw[:500] + "...") if vlm_raw else "<EMPTY>")
-        logger.info("process_screening(%s) - VLM parsed features: %s", screening_id, json.dumps(vlm_features, indent=2, ensure_ascii=False) if vlm_features else "None")
         # --------------------------
-        # RUN LLM on vlm_parsed (preferred) or vlm_raw -> structured risk JSON
         # --------------------------
         structured_risk = None
         try:
-            if vlm_features:
-                # prefer cleaned JSON
-                llm_input = json.dumps(vlm_features, ensure_ascii=False)
             else:
-                # fallback to raw string (may be empty)
-                llm_input = vlm_raw if vlm_raw and vlm_raw.strip() else "{}"
-            structured_risk = run_llm_on_vlm(llm_input)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
         except Exception as e:
@@ -954,14 +940,19 @@ async def process_screening(screening_id: str):
             }
         # Use structured_risk for summary recommendations & simple disease inference placeholders
         screenings_db[screening_id].setdefault("ai_results", {})
         screenings_db[screening_id]["ai_results"].update({
             "processing_time_ms": 1200
         })
         disease_predictions = [
             {
-                "condition": "Anemia-like-signs",
                 "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
                 "probability": structured_risk.get("anemia_probability", 0.0),
                 "confidence": structured_risk.get("confidence", 0.0)
@@ -977,7 +968,7 @@ async def process_screening(screening_id: str):
         recommendations = {
             "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
             "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
-            "message_hindi": ""
         }
         screenings_db[screening_id].update({

 Notes:
  - Add gradio_client==1.13.2 (or another compatible 1.x) to requirements.txt
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
+ - This version includes a robust regex-based extractor that finds the outermost {...} block
+   in the LLM output, extracts numeric values for the required keys, and always returns
+   numeric defaults (no NaN) so frontends will not receive null/None for numeric fields.
+ - This variant logs raw LLM output and the parsed JSON using Python logging.
 """
 import io
 import logging
 import traceback
 import re
 from typing import Dict, Any, Optional, Tuple
 from datetime import datetime
 DEFAULT_VLM_PROMPT = (
     "From the provided face/eye images, compute the required screening features "
     "(pallor, sclera yellowness, redness, mobility metrics, quality checks) "
+    "and output a clean JSON feature vector only."
 )
 # Default LLM prompts / metadata (stricter: force JSON-only output)
         return 0.0
 # -----------------------
+# Regex-based robust extractor
 # -----------------------
 def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     """
+    1) Finds the outermost { ... } block in raw_text.
+    2) Extracts numeric values after the listed keys using regex, tolerating:
+       - quotes, spaces, percent signs, percent numbers like "55%", strings like "0.12", integers, or numbers in quotes.
+    3) Returns a dict with numeric fields GUARANTEED to be floats (no None/NaN), and string fields for summary/recommendation.
     """
+    # Find the first {...} block (outermost approximation)
     match = re.search(r"\{[\s\S]*\}", raw_text)
     if not match:
+        raise ValueError("No JSON-like block found in LLM output")
     block = match.group(0)
     def find_number_for_key(key: str) -> Optional[float]:
+        """
+        Returns a float in range 0..1 for probabilities, and raw numeric for other keys depending on usage.
+        This helper returns None if not found; caller will replace with defaults (0.0).
+        """
+        # Try multiple patterns to be robust
+        # Pattern captures numbers possibly with % and optional quotes, e.g. "45%", '0.12', 0.5, " 87 "
         patterns = [
+            rf'"{key}"\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',  # "key": "45%" or "key": 0.45
             rf"'{key}'\s*:\s*['\"]?\s*([-+]?\d+(\.\d+)?)\s*%?\s*['\"]?",
+            rf'\b{key}\b\s*:\s*["\']?\s*([-+]?\d+(\.\d+)?)\s*%?\s*["\']?',  # key: 45%
+            rf'"{key}"\s*:\s*["\']([^"\']+)["\']',  # capture quoted text (for non-numeric attempts)
             rf"'{key}'\s*:\s*['\"]([^'\"]+)['\"]"
         ]
         for pat in patterns:
             g = m.group(1)
             if g is None:
                 continue
+            s = str(g).strip()
+            # Remove percent sign if present
+            s = s.replace("%", "").strip()
+            # Try to coerce to float
             try:
+                val = float(s)
+                return val
             except Exception:
+                # not numeric
                 return None
         return None
     def find_text_for_key(key: str) -> str:
+        # capture "key": "some text" allowing single/double quotes and also unquoted until comma/}
         m = re.search(rf'"{key}"\s*:\s*"([^"]*)"', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
         m = re.search(rf"'{key}'\s*:\s*'([^']*)'", block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip()
+        # fallback: key: some text (unquoted) up to comma or }
         m = re.search(rf'\b{key}\b\s*:\s*([^\n,}}]+)', block, flags=re.IGNORECASE)
         if m:
             return m.group(1).strip().strip('",')
         return ""
+    # Extract raw numeric candidates
     raw_risk = find_number_for_key("risk_score")
     raw_jaundice = find_number_for_key("jaundice_probability")
     raw_anemia = find_number_for_key("anemia_probability")
     raw_neuro = find_number_for_key("neurological_issue_probability")
     raw_conf = find_number_for_key("confidence")
+    # Normalize:
+    # - For probabilities: if value > 1 and <=100 => treat as percent -> divide by 100. If <=1 treat as fraction.
     def normalize_prob(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v > 1.0 and v <= 100.0:
             return max(0.0, min(1.0, v / 100.0))
+        # if v is large >100, clamp to 1.0
         if v > 100.0:
             return 1.0
+        # otherwise assume already 0..1
         return max(0.0, min(1.0, v))
     jaundice_probability = normalize_prob(raw_jaundice)
     neurological_issue_probability = normalize_prob(raw_neuro)
     confidence = normalize_prob(raw_conf)
+    # risk_score: return in 0..100
     def normalize_risk(v: Optional[float]) -> float:
         if v is None:
             return 0.0
         if v <= 1.0:
+            # fraction given -> scale to 0..100
             return round(max(0.0, min(100.0, v * 100.0)), 2)
+        # if between 1 and 100, assume it's already 0..100
         if v > 1.0 and v <= 100.0:
             return round(max(0.0, min(100.0, v)), 2)
+        # clamp anything insane
         return round(max(0.0, min(100.0, v if v < float('inf') else 100.0)), 2)
     risk_score = normalize_risk(raw_risk)
     return out
 # -----------------------
+# Gradio / VLM helper (returns parsed dict OR None, plus raw text)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
     Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
        (parsed_features_dict_or_None, raw_text_response_str)
+    We attempt to parse JSON as before, but always return the original raw text so it can be
+    forwarded verbatim to the LLM if desired.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path) or not os.path.exists(eye_path):
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
     try:
         logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
         result = client.predict(message=message, history=[], api_name="/chat_fn")
     except Exception as e:
+        logger.exception("VLM call failed")
         raise RuntimeError(f"VLM call failed: {e}")
     if not result:
+        raise RuntimeError("Empty response from VLM")
+    # Normalize result
+    if isinstance(result, (list, tuple)):
+        out = result[0]
+    elif isinstance(result, dict):
+        out = result
+    else:
+        out = {"text": str(result)}
+    if not isinstance(out, dict):
+        raise RuntimeError("Unexpected VLM output format (expected dict with 'text' key)")
+    text_out = out.get("text") or out.get("output") or None
+    if not text_out:
+        text_out = json.dumps(out)
+    # Try to parse JSON but remember raw text always
     parsed_features = None
     try:
+        parsed_features = json.loads(text_out)
+        if not isinstance(parsed_features, dict):
             parsed_features = None
     except Exception:
         try:
+            s = text_out
+            first = s.find("{")
+            last = s.rfind("}")
+            if first != -1 and last != -1 and last > first:
+                maybe = s[first:last+1]
+                parsed_features = json.loads(maybe)
+                if not isinstance(parsed_features, dict):
+                    parsed_features = None
+            else:
+                parsed_features = None
+        except Exception:
             parsed_features = None
+    return parsed_features, text_out
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)
                    developer_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
     Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
+    - Coerces types (int for tokens), clamps ranges where remote spaces often expect them.
+    - Retries once with safe defaults if the Space rejects the inputs (e.g. temperature too low).
+    - Logs and returns regex-extracted JSON as before.
     """
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
     system_prompt = system_prompt or LLM_SYSTEM_PROMPT
     developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
+    # Prepare the combined prompt: use raw string as-is, otherwise json.dumps the dict
     if isinstance(vlm_features_or_raw, str):
+        vlm_json_str = vlm_features_or_raw
     else:
+        vlm_json_str = json.dumps(vlm_features_or_raw, default=str)
+    # Strong, explicit instruction to output only JSON
     instruction = (
         "\n\nSTRICT INSTRUCTIONS (READ CAREFULLY):\n"
         "1) OUTPUT ONLY a single valid JSON object and nothing else — no prose, no explanation, no code fences.\n"
         "If you cannot estimate a value, set it to null.\n\n"
         "Now, based on the VLM output below, produce ONLY the JSON object described above.\n\n"
         "===BEGIN VLM OUTPUT===\n"
+        f"{vlm_json_str}\n"
         "===END VLM OUTPUT===\n\n"
     )
+    input_payload_str = instruction
     # Defensive coercion / clamps
     try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
         try_max_new_tokens = 1024
     try_temperature = float(temperature) if temperature is not None else 0.0
+    # Many demos require temperature >= 0.1; clamp to 0.1 minimum to avoid validation failures
     if try_temperature < 0.1:
         try_temperature = 0.1
+    # prepare kwargs for predict
     predict_kwargs = dict(
+        input_data=input_payload_str,
         max_new_tokens=float(try_max_new_tokens),
         model_identity=model_identity,
         system_prompt=system_prompt,
         api_name="/chat"
     )
+    # attempt + one retry with safer defaults if AppError occurs
     last_exc = None
     for attempt in (1, 2):
         try:
             logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
                         LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
             result = client.predict(**predict_kwargs)
             # normalize to string
             if isinstance(result, (dict, list)):
                 text_out = json.dumps(result)
             else:
                 text_out = str(result)
             if not text_out or len(text_out.strip()) == 0:
                 raise RuntimeError("LLM returned empty response")
+            logger.info("LLM raw output:\n%s", text_out)
             # parse with regex extractor (may raise)
+            parsed = extract_json_via_regex(text_out)
+            if not isinstance(parsed, dict):
+                raise ValueError("Parsed LLM output is not a JSON object/dict")
             # pretty log parsed JSON
             try:
             except Exception:
                 logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
+            # defensive clamps (same as before)
             def safe_prob(val):
                 try:
                     v = float(val)
             return parsed
         except AppError as app_e:
+            # Specific remote validation error: log and attempt a single retry with ultra-safe defaults
             logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
             last_exc = app_e
             if attempt == 1:
+                # tighten inputs and retry: force temperature=0.2, max_new_tokens=512
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
                 continue
             else:
+                # no more retries
                 raise RuntimeError(f"LLM call failed (AppError): {app_e}")
         except Exception as e:
             logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
             last_exc = e
+            # try one retry only for non-AppError exceptions
             if attempt == 1:
                 predict_kwargs["temperature"] = 0.2
                 predict_kwargs["max_new_tokens"] = float(512)
                 continue
             raise RuntimeError(f"LLM call failed: {e}")
+    # if we reach here, raise last caught exception
     raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
 # -----------------------
 @app.post("/api/v1/validate-eye-photo")
 async def validate_eye_photo(image: UploadFile = File(...)):
+    """
+    Lightweight validation endpoint. Uses available detector (facenet/mtcnn/opencv) to check face/eye detection.
+    For full pipeline, use /api/v1/upload which invokes VLM+LLM in background.
+    """
     if mtcnn is None:
         raise HTTPException(status_code=500, detail="No face detector available in this deployment.")
     try:
         content = await image.read()
         if not content:
         pil_img = load_image_from_bytes(content)
         img_arr = np.asarray(pil_img)  # RGB
+        # facenet-pytorch branch
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "facenet_pytorch":
             try:
                 boxes, probs, landmarks = mtcnn.detect(pil_img, landmarks=True)
                 traceback.print_exc()
                 raise HTTPException(status_code=500, detail="Face detector failed during inference.")
+        # classic mtcnn branch
         if not isinstance(mtcnn, dict) and _MTCNN_IMPL == "mtcnn":
             try:
                 detections = mtcnn.detect_faces(img_arr)
                     "message_hindi": "फोटो अच्छी है! आंखें ठीक से खुली हैं।" if is_valid else "आंखें बंद या आंशिक रूप से बंद दिखाई दे रही हैं। कृपया अपनी आंखें चौड़ी खोलें और पुनः प्रयास करें।",
                     "eye_landmarks": {"left_eye": left_eye, "right_eye": right_eye}}
+        # OpenCV Haar cascade fallback
         if isinstance(mtcnn, dict) and mtcnn.get("impl") == "opencv":
             try:
                 gray = cv2.cvtColor(img_arr, cv2.COLOR_RGB2GRAY)
 async def get_results(screening_id: str):
     if screening_id not in screenings_db:
         raise HTTPException(status_code=404, detail="Screening not found")
+    return screenings_db[screening_id]
 @app.get("/api/v1/history/{user_id}")
 async def get_history(user_id: str):
 ):
     """
     Run VLM -> LLM pipeline synchronously (but off the event loop) and return:
+      { vlm_features, vlm_raw, structured_risk }
     """
     if not GRADIO_AVAILABLE:
         raise HTTPException(status_code=500, detail="VLM/LLM client not available in this deployment.")
         # Run VLM (off the event loop)
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
+        # Prefer sending raw vlm text to LLM (same behavior as process_screening)
+        llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
         # Run LLM (off the event loop)
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
+        # Return merged result
         return {
+            "vlm_features": vlm_features,
+            "vlm_raw": vlm_raw,
+            "structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_from_upload pipeline failed")
         # Run VLM off the event loop
         vlm_features, vlm_raw = await asyncio.to_thread(run_vlm_and_get_features, face_path, eye_path)
+        llm_input = vlm_raw if vlm_raw else (vlm_features if vlm_features else "{}")
         structured_risk = await asyncio.to_thread(run_llm_on_vlm, llm_input)
         # Optionally store this run's outputs back into the DB for inspection
         entry.setdefault("ai_results", {})
         entry["ai_results"].update({
+            "vlm_features": vlm_features,
             "vlm_raw": vlm_raw,
             "structured_risk": structured_risk,
             "last_vitals_run": datetime.utcnow().isoformat() + "Z"
         return {
             "screening_id": screening_id,
+            "vlm_features": vlm_features,
+            "vlm_raw": vlm_raw,
+            "structured_risk": structured_risk
         }
     except Exception as e:
         logger.exception("get_vitals_for_screening pipeline failed")
         raise HTTPException(status_code=500, detail=f"Pipeline failed: {e}")
 # -----------------------
+# Main processing pipeline
 # -----------------------
 async def process_screening(screening_id: str):
     """
      - load images
      - quick detector-based quality metrics
      - run VLM -> vlm_features (dict or None) + vlm_raw (string)
+     - run LLM on vlm_raw (preferred) or vlm_features -> structured risk JSON
      - merge results into ai_results and finish
     """
     try:
             vlm_features, vlm_raw = run_vlm_and_get_features(face_path, eye_path)
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({
+                "vlm_features": vlm_features,
                 "vlm_raw": vlm_raw
             })
         except Exception as e:
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"vlm_error": str(e)})
             vlm_features = None
+            vlm_raw = None
         # --------------------------
+        # RUN LLM on vlm_raw (preferred) or vlm_features -> structured risk JSON
         # --------------------------
         structured_risk = None
         try:
+            if vlm_raw:
+                structured_risk = run_llm_on_vlm(vlm_raw)
+            elif vlm_features:
+                structured_risk = run_llm_on_vlm(vlm_features)
             else:
+                # Fallback if VLM failed: produce conservative defaults
+                structured_risk = {
+                    "risk_score": 0.0,
+                    "jaundice_probability": 0.0,
+                    "anemia_probability": 0.0,
+                    "hydration_issue_probability": 0.0,
+                    "neurological_issue_probability": 0.0,
+                    "summary": "",
+                    "recommendation": "",
+                    "confidence": 0.0
+                }
             screenings_db[screening_id].setdefault("ai_results", {})
             screenings_db[screening_id]["ai_results"].update({"structured_risk": structured_risk})
         except Exception as e:
             }
         # Use structured_risk for summary recommendations & simple disease inference placeholders
+        hem = screenings_db[screening_id]["ai_results"].get("medical_insights", {}).get("hemoglobin_estimate", None)
+        bil = screenings_db[screening_id]["ai_results"].get("medical_insights", {}).get("bilirubin_estimate", None)
+        # Keep older ai_results shape for backward compatibility (if you want)
         screenings_db[screening_id].setdefault("ai_results", {})
         screenings_db[screening_id]["ai_results"].update({
             "processing_time_ms": 1200
         })
+        # disease_predictions & recommendations can be built from structured_risk if needed
         disease_predictions = [
             {
+                "condition": "Anemia-like-signs",  # internal tag (not surfaced in LLM summary)
                 "risk_level": "Medium" if structured_risk.get("anemia_probability", 0.0) > 0.5 else "Low",
                 "probability": structured_risk.get("anemia_probability", 0.0),
                 "confidence": structured_risk.get("confidence", 0.0)
         recommendations = {
             "action_needed": "consult" if structured_risk.get("risk_score", 0.0) > 30.0 else "monitor",
             "message_english": structured_risk.get("recommendation", "") or f"Please follow up with a health professional if concerns persist.",
+            "message_hindi": ""  # could be auto-translated if desired
         }
         screenings_db[screening_id].update({