Spaces:

dpv007
/

cloud

Sleeping

App Files Files Community

dpv007 commited on Dec 12, 2025

Commit

96c51cc

verified ·

1 Parent(s): f37add2

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -58

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ Notes:
     * always returns raw VLM output in API responses,
     * extracts JSON from VLM via regex when possible, and
     * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
 """
 import io
@@ -51,10 +52,6 @@ GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct
 LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
 HF_TOKEN = os.getenv("HF_TOKEN", None)
-# VLM retry config (if VLM returns empty text)
-VLM_EMPTY_RETRIES = int(os.getenv("VLM_EMPTY_RETRIES", "2"))
-VLM_EMPTY_RETRY_SLEEP_S = float(os.getenv("VLM_EMPTY_RETRY_SLEEP_S", "0.5"))
 # Default VLM prompt
 DEFAULT_VLM_PROMPT = (
     "From the provided face/eye images, compute the required screening features "
@@ -249,7 +246,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     return out
 # -----------------------
-# Gradio / VLM helper (returns parsed dict OR None, plus raw text)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
@@ -263,10 +260,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
        (parsed_features_dict_or_None, raw_text_response_str)
-    Robustness improvements:
-    - Retries a few times if raw text is empty.
-    - Attempts json.loads first, then extract_json_via_regex.
-    - Logs raw output and parsed features for debugging.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path) or not os.path.exists(eye_path):
@@ -277,70 +271,51 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
-    last_exc = None
-    raw_text = None
-    for attempt in range(1, VLM_EMPTY_RETRIES + 2):  # attempts = retries+1
-        try:
-            logger.info("Calling VLM Space %s (attempt %d)", GRADIO_VLM_SPACE, attempt)
-            result = client.predict(message=message, history=[], api_name="/chat_fn")
-        except Exception as e:
-            logger.exception("VLM call failed on attempt %d", attempt)
-            last_exc = e
-            if attempt <= VLM_EMPTY_RETRIES:
-                time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
-                continue
-            raise RuntimeError(f"VLM call ultimately failed: {e}")
-        if not result:
-            logger.warning("VLM returned empty result object on attempt %d", attempt)
-            raw_text = ""
-        else:
-            # normalize result object
-            if isinstance(result, (list, tuple)):
-                out = result[0]
-            elif isinstance(result, dict):
-                out = result
-            else:
-                out = {"text": str(result)}
-            text_out = out.get("text") or out.get("output") or ""
-            # if files key exists but text is empty, log it
-            if isinstance(out, dict) and (out.get("files") == [] or not out.get("files")) and (not text_out.strip()):
-                logger.warning("VLM returned no text AND no files in response on attempt %d: %s", attempt, str(out))
-            raw_text = text_out
-        # if raw_text is non-empty, break; otherwise retry up to retries
-        if raw_text and raw_text.strip():
-            break
         else:
-            logger.warning("VLM returned empty text on attempt %d. Retrying (%d remaining)...", attempt, max(0, VLM_EMPTY_RETRIES - (attempt - 1)))
-            if attempt <= VLM_EMPTY_RETRIES:
-                time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
-                continue
-            # no more retries
-            break
-    if raw_text is None:
-        raise RuntimeError(f"VLM returned no response (last error: {last_exc})")
-    text_out = raw_text
     # Log raw VLM output for debugging/auditing
-    logger.info("VLM raw output (length=%d):\n%s", len(text_out or ""), (text_out[:1000] + "...") if text_out and len(text_out) > 1000 else (text_out or "<EMPTY>"))
     # Try to parse JSON first (fast path)
     parsed_features = None
     try:
-        parsed_features = json.loads(text_out) if text_out and text_out.strip() else None
         if parsed_features is not None and not isinstance(parsed_features, dict):
             parsed_features = None
     except Exception:
         parsed_features = None
     # If json.loads failed or returned None, try regex-based extraction
-    if parsed_features is None and text_out and text_out.strip():
         try:
-            parsed_features = extract_json_via_regex(text_out)
             logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
         except Exception as e:
             logger.info("VLM regex extraction failed or found nothing: %s", str(e))
@@ -352,7 +327,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
         logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
     # Always return raw_text (may be empty string) and parsed_features (or None)
-    return parsed_features, (text_out or "")
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)

     * always returns raw VLM output in API responses,
     * extracts JSON from VLM via regex when possible, and
     * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
+ - VLM calls were simplified to a single call (no retries).
 """
 import io
 LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 # Default VLM prompt
 DEFAULT_VLM_PROMPT = (
     "From the provided face/eye images, compute the required screening features "
     return out
 # -----------------------
+# Gradio / VLM helper (single-call, no retries)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
     Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
        (parsed_features_dict_or_None, raw_text_response_str)
+    Simplified: single call (no retries). Attempts json.loads then regex extraction.
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path) or not os.path.exists(eye_path):
     client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
     message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
+    # SINGLE CALL (no retries)
+    try:
+        logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
+        result = client.predict(message=message, history=[], api_name="/chat_fn")
+    except Exception as e:
+        logger.exception("VLM call failed (no retries)")
+        raise RuntimeError(f"VLM call failed: {e}")
+    # Normalize result
+    raw_text = ""
+    if not result:
+        logger.warning("VLM returned empty result object")
+        raw_text = ""
+    else:
+        if isinstance(result, (list, tuple)):
+            out = result[0]
+        elif isinstance(result, dict):
+            out = result
         else:
+            out = {"text": str(result)}
+        text_out = out.get("text") or out.get("output") or ""
+        raw_text = text_out
+        logger.info("VLM response object (debug): %s", out)
+        # If files present but text empty, log it explicitly
+        if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
+            logger.warning("VLM returned no text AND files: %s", out.get("files"))
     # Log raw VLM output for debugging/auditing
+    logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
     # Try to parse JSON first (fast path)
     parsed_features = None
     try:
+        parsed_features = json.loads(raw_text) if raw_text and raw_text.strip() else None
         if parsed_features is not None and not isinstance(parsed_features, dict):
             parsed_features = None
     except Exception:
         parsed_features = None
     # If json.loads failed or returned None, try regex-based extraction
+    if parsed_features is None and raw_text and raw_text.strip():
         try:
+            parsed_features = extract_json_via_regex(raw_text)
             logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
         except Exception as e:
             logger.info("VLM regex extraction failed or found nothing: %s", str(e))
         logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
     # Always return raw_text (may be empty string) and parsed_features (or None)
+    return parsed_features, (raw_text or "")
 # -----------------------
 # Gradio / LLM helper (defensive, with retry + clamps)