Spaces:

dpv007
/

cloud

Sleeping

App Files Files Community

dpv007 commited on Dec 12, 2025

Commit

d5eb738

verified ·

1 Parent(s): e464210

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -100

app.py CHANGED Viewed

@@ -336,7 +336,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     return parsed_features, text_out
 # -----------------------
-# Gradio / LLM helper (uses regex extractor on LLM output)
 # -----------------------
 def run_llm_on_vlm(vlm_features_or_raw: Any,
                    max_new_tokens: int = 1024,
@@ -346,19 +346,21 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
                    system_prompt: Optional[str] = None,
                    developer_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
-    Call the remote LLM Space's /chat endpoint.
-    Accepts either:
-      - a dict (parsed VLM features) -> will be JSON-dumped (backwards compatible)
-      - a raw string (the exact VLM text output) -> will be forwarded AS-IS (no extra JSON quoting)
-    After the LLM returns, we use a regex-based extractor to pull numeric values and strings,
-    reconstruct a clean JSON dict with numeric defaults (no NaN).
     """
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
-    client = get_gradio_client_for_space(LLM_GRADIO_SPACE)
     model_identity = model_identity or LLM_MODEL_IDENTITY
     system_prompt = system_prompt or LLM_SYSTEM_PROMPT
     developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
@@ -383,103 +385,123 @@ def run_llm_on_vlm(vlm_features_or_raw: Any,
         f"{vlm_json_str}\n"
         "===END VLM OUTPUT===\n\n"
     )
-    input_payload_str = instruction  # we feed only the instruction (which contains the VLM output)
-    try:
-        logger.info("Calling LLM Space %s with strict JSON-only prompt", LLM_GRADIO_SPACE)
-        result = client.predict(
-            input_data=input_payload_str,
-            max_new_tokens=float(max_new_tokens),
-            model_identity=model_identity,
-            system_prompt=system_prompt,
-            developer_prompt=developer_prompt,
-            reasoning_effort=reasoning_effort,
-            temperature=float(temperature),
-            top_p=0.9,
-            top_k=50,
-            repetition_penalty=1.0,
-            api_name="/chat"
-        )
-    except Exception as e:
-        logger.exception("LLM call failed")
-        raise RuntimeError(f"LLM call failed: {e}")
-    # Normalize result to string
-    if isinstance(result, (dict, list)):
-        text_out = json.dumps(result)
-    else:
-        text_out = str(result)
-    if not text_out or len(text_out.strip()) == 0:
-        raise RuntimeError("LLM returned empty response")
-    # LOG raw output for debugging / auditing
-    logger.info("LLM raw output:\n%s", text_out)
-    # Use regex-based extraction (robust)
-    try:
-        parsed = extract_json_via_regex(text_out)
-    except Exception as e:
-        logger.exception("Regex JSON extraction failed")
-        # As a last fallback, attempt naive JSON parsing; if that fails, raise with raw output
         try:
-            parsed = json.loads(text_out)
-        except Exception:
-            # include raw output in the exception text so logs contain it
-            raise ValueError(f"Failed to extract JSON from LLM output: {e}\nRaw Output:\n{text_out}")
-    if not isinstance(parsed, dict):
-        raise ValueError("Parsed LLM output is not a JSON object/dict")
-    # LOG parsed JSON (pretty-printed)
-    try:
-        logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2, ensure_ascii=False))
-    except Exception:
-        logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
-    # Final safety clamps (already ensured by extractor, but keep defensive checks)
-    def safe_prob(val):
-        try:
-            v = float(val)
-            return max(0.0, min(1.0, v))
-        except Exception:
-            return 0.0
-    for k in [
-        "jaundice_probability",
-        "anemia_probability",
-        "hydration_issue_probability",
-        "neurological_issue_probability"
-    ]:
-        parsed[k] = safe_prob(parsed.get(k, 0.0))
-    # risk_score clamp 0..100
-    try:
-        rs = float(parsed.get("risk_score", 0.0))
-        parsed["risk_score"] = round(max(0.0, min(100.0, rs)), 2)
-    except Exception:
-        parsed["risk_score"] = 0.0
-    # confidence clamp 0..1
-    parsed["confidence"] = safe_prob(parsed.get("confidence", 0.0))
-    # Ensure summary/recommendation are strings
-    parsed["summary"] = str(parsed.get("summary", "") or "").strip()
-    parsed["recommendation"] = str(parsed.get("recommendation", "") or "").strip()
-    # Optional: add flags indicating missing values (useful for frontend)
-    for k in [
-        "jaundice_probability",
-        "anemia_probability",
-        "hydration_issue_probability",
-        "neurological_issue_probability",
-        "confidence",
-        "risk_score"
-    ]:
-        parsed[f"{k}_was_missing"] = False  # extractor already returned defaults; mark as False
-    return parsed
 # -----------------------
 # API endpoints

     return parsed_features, text_out
 # -----------------------
+# Gradio / LLM helper (defensive, with retry + clamps)
 # -----------------------
 def run_llm_on_vlm(vlm_features_or_raw: Any,
                    max_new_tokens: int = 1024,
                    system_prompt: Optional[str] = None,
                    developer_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
+    Call the remote LLM Space's /chat endpoint with defensive input handling and a single retry.
+    - Coerces types (int for tokens), clamps ranges where remote spaces often expect them.
+    - Retries once with safe defaults if the Space rejects the inputs (e.g. temperature too low).
+    - Logs and returns regex-extracted JSON as before.
     """
     if not GRADIO_AVAILABLE:
         raise RuntimeError("gradio_client not installed. Add gradio_client to requirements.txt")
+    # Try to import AppError for specific handling; fallback to Exception if unavailable
+    try:
+        from gradio_client import AppError  # type: ignore
+    except Exception:
+        AppError = Exception  # fallback
+    client = get_gradio_client_for_space(LLM_GRADIO_SPACE)
     model_identity = model_identity or LLM_MODEL_IDENTITY
     system_prompt = system_prompt or LLM_SYSTEM_PROMPT
     developer_prompt = developer_prompt or LLM_DEVELOPER_PROMPT
         f"{vlm_json_str}\n"
         "===END VLM OUTPUT===\n\n"
     )
+    input_payload_str = instruction
+    # Defensive coercion / clamps
+    try_max_new_tokens = int(max_new_tokens) if max_new_tokens is not None else 1024
+    if try_max_new_tokens <= 0:
+        try_max_new_tokens = 1024
+    try_temperature = float(temperature) if temperature is not None else 0.0
+    # Many demos require temperature >= 0.1; clamp to 0.1 minimum to avoid validation failures
+    if try_temperature < 0.1:
+        try_temperature = 0.1
+    # prepare kwargs for predict
+    predict_kwargs = dict(
+        input_data=input_payload_str,
+        max_new_tokens=float(try_max_new_tokens),
+        model_identity=model_identity,
+        system_prompt=system_prompt,
+        developer_prompt=developer_prompt,
+        reasoning_effort=reasoning_effort,
+        temperature=float(try_temperature),
+        top_p=0.9,
+        top_k=50,
+        repetition_penalty=1.0,
+        api_name="/chat"
+    )
+    # attempt + one retry with safer defaults if AppError occurs
+    last_exc = None
+    for attempt in (1, 2):
         try:
+            logger.info("Calling LLM Space %s (attempt %d) with temperature=%s, max_new_tokens=%s",
+                        LLM_GRADIO_SPACE, attempt, predict_kwargs.get("temperature"), predict_kwargs.get("max_new_tokens"))
+            result = client.predict(**predict_kwargs)
+            # normalize to string
+            if isinstance(result, (dict, list)):
+                text_out = json.dumps(result)
+            else:
+                text_out = str(result)
+            if not text_out or len(text_out.strip()) == 0:
+                raise RuntimeError("LLM returned empty response")
+            logger.info("LLM raw output:\n%s", text_out)
+            # parse with regex extractor (may raise)
+            parsed = extract_json_via_regex(text_out)
+            if not isinstance(parsed, dict):
+                raise ValueError("Parsed LLM output is not a JSON object/dict")
+            # pretty log parsed JSON
+            try:
+                logger.info("LLM parsed JSON:\n%s", json.dumps(parsed, indent=2, ensure_ascii=False))
+            except Exception:
+                logger.info("LLM parsed JSON (raw dict): %s", str(parsed))
+            # defensive clamps (same as before)
+            def safe_prob(val):
+                try:
+                    v = float(val)
+                    return max(0.0, min(1.0, v))
+                except Exception:
+                    return 0.0
+            for k in [
+                "jaundice_probability",
+                "anemia_probability",
+                "hydration_issue_probability",
+                "neurological_issue_probability"
+            ]:
+                parsed[k] = safe_prob(parsed.get(k, 0.0))
+            try:
+                rs = float(parsed.get("risk_score", 0.0))
+                parsed["risk_score"] = round(max(0.0, min(100.0, rs)), 2)
+            except Exception:
+                parsed["risk_score"] = 0.0
+            parsed["confidence"] = safe_prob(parsed.get("confidence", 0.0))
+            parsed["summary"] = str(parsed.get("summary", "") or "").strip()
+            parsed["recommendation"] = str(parsed.get("recommendation", "") or "").strip()
+            for k in [
+                "jaundice_probability",
+                "anemia_probability",
+                "hydration_issue_probability",
+                "neurological_issue_probability",
+                "confidence",
+                "risk_score"
+            ]:
+                parsed[f"{k}_was_missing"] = False
+            return parsed
+        except AppError as app_e:
+            # Specific remote validation error: log and attempt a single retry with ultra-safe defaults
+            logger.exception("LLM AppError (remote validation failed) on attempt %d: %s", attempt, str(app_e))
+            last_exc = app_e
+            if attempt == 1:
+                # tighten inputs and retry: force temperature=0.2, max_new_tokens=512
+                predict_kwargs["temperature"] = 0.2
+                predict_kwargs["max_new_tokens"] = float(512)
+                logger.info("Retrying LLM call with temperature=0.2 and max_new_tokens=512")
+                continue
+            else:
+                # no more retries
+                raise RuntimeError(f"LLM call failed (AppError): {app_e}")
+        except Exception as e:
+            logger.exception("LLM call failed on attempt %d: %s", attempt, str(e))
+            last_exc = e
+            # try one retry only for non-AppError exceptions
+            if attempt == 1:
+                predict_kwargs["temperature"] = 0.2
+                predict_kwargs["max_new_tokens"] = float(512)
+                continue
+            raise RuntimeError(f"LLM call failed: {e}")
+    # if we reach here, raise last caught exception
+    raise RuntimeError(f"LLM call ultimately failed: {last_exc}")
 # -----------------------
 # API endpoints