Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ Notes:
|
|
| 15 |
* always returns raw VLM output in API responses,
|
| 16 |
* extracts JSON from VLM via regex when possible, and
|
| 17 |
* sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
|
|
|
|
| 18 |
"""
|
| 19 |
|
| 20 |
import io
|
|
@@ -51,10 +52,6 @@ GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct
|
|
| 51 |
LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
|
| 52 |
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
| 53 |
|
| 54 |
-
# VLM retry config (if VLM returns empty text)
|
| 55 |
-
VLM_EMPTY_RETRIES = int(os.getenv("VLM_EMPTY_RETRIES", "2"))
|
| 56 |
-
VLM_EMPTY_RETRY_SLEEP_S = float(os.getenv("VLM_EMPTY_RETRY_SLEEP_S", "0.5"))
|
| 57 |
-
|
| 58 |
# Default VLM prompt
|
| 59 |
DEFAULT_VLM_PROMPT = (
|
| 60 |
"From the provided face/eye images, compute the required screening features "
|
|
@@ -249,7 +246,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
|
|
| 249 |
return out
|
| 250 |
|
| 251 |
# -----------------------
|
| 252 |
-
# Gradio / VLM helper (
|
| 253 |
# -----------------------
|
| 254 |
def get_gradio_client_for_space(space: str) -> Client:
|
| 255 |
if not GRADIO_AVAILABLE:
|
|
@@ -263,10 +260,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
|
|
| 263 |
Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
|
| 264 |
(parsed_features_dict_or_None, raw_text_response_str)
|
| 265 |
|
| 266 |
-
|
| 267 |
-
- Retries a few times if raw text is empty.
|
| 268 |
-
- Attempts json.loads first, then extract_json_via_regex.
|
| 269 |
-
- Logs raw output and parsed features for debugging.
|
| 270 |
"""
|
| 271 |
prompt = prompt or DEFAULT_VLM_PROMPT
|
| 272 |
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
|
@@ -277,70 +271,51 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
|
|
| 277 |
client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
|
| 278 |
message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
if
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
# normalize result object
|
| 299 |
-
if isinstance(result, (list, tuple)):
|
| 300 |
-
out = result[0]
|
| 301 |
-
elif isinstance(result, dict):
|
| 302 |
-
out = result
|
| 303 |
-
else:
|
| 304 |
-
out = {"text": str(result)}
|
| 305 |
-
|
| 306 |
-
text_out = out.get("text") or out.get("output") or ""
|
| 307 |
-
# if files key exists but text is empty, log it
|
| 308 |
-
if isinstance(out, dict) and (out.get("files") == [] or not out.get("files")) and (not text_out.strip()):
|
| 309 |
-
logger.warning("VLM returned no text AND no files in response on attempt %d: %s", attempt, str(out))
|
| 310 |
-
raw_text = text_out
|
| 311 |
-
|
| 312 |
-
# if raw_text is non-empty, break; otherwise retry up to retries
|
| 313 |
-
if raw_text and raw_text.strip():
|
| 314 |
-
break
|
| 315 |
else:
|
| 316 |
-
|
| 317 |
-
if attempt <= VLM_EMPTY_RETRIES:
|
| 318 |
-
time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
|
| 319 |
-
continue
|
| 320 |
-
# no more retries
|
| 321 |
-
break
|
| 322 |
|
| 323 |
-
|
| 324 |
-
|
|
|
|
| 325 |
|
| 326 |
-
|
|
|
|
|
|
|
| 327 |
|
| 328 |
# Log raw VLM output for debugging/auditing
|
| 329 |
-
logger.info("VLM raw output (length=%d):\n%s", len(
|
| 330 |
|
| 331 |
# Try to parse JSON first (fast path)
|
| 332 |
parsed_features = None
|
| 333 |
try:
|
| 334 |
-
parsed_features = json.loads(
|
| 335 |
if parsed_features is not None and not isinstance(parsed_features, dict):
|
| 336 |
parsed_features = None
|
| 337 |
except Exception:
|
| 338 |
parsed_features = None
|
| 339 |
|
| 340 |
# If json.loads failed or returned None, try regex-based extraction
|
| 341 |
-
if parsed_features is None and
|
| 342 |
try:
|
| 343 |
-
parsed_features = extract_json_via_regex(
|
| 344 |
logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
|
| 345 |
except Exception as e:
|
| 346 |
logger.info("VLM regex extraction failed or found nothing: %s", str(e))
|
|
@@ -352,7 +327,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
|
|
| 352 |
logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
|
| 353 |
|
| 354 |
# Always return raw_text (may be empty string) and parsed_features (or None)
|
| 355 |
-
return parsed_features, (
|
| 356 |
|
| 357 |
# -----------------------
|
| 358 |
# Gradio / LLM helper (defensive, with retry + clamps)
|
|
|
|
| 15 |
* always returns raw VLM output in API responses,
|
| 16 |
* extracts JSON from VLM via regex when possible, and
|
| 17 |
* sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
|
| 18 |
+
- VLM calls were simplified to a single call (no retries).
|
| 19 |
"""
|
| 20 |
|
| 21 |
import io
|
|
|
|
| 52 |
LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
|
| 53 |
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# Default VLM prompt
|
| 56 |
DEFAULT_VLM_PROMPT = (
|
| 57 |
"From the provided face/eye images, compute the required screening features "
|
|
|
|
| 246 |
return out
|
| 247 |
|
| 248 |
# -----------------------
|
| 249 |
+
# Gradio / VLM helper (single-call, no retries)
|
| 250 |
# -----------------------
|
| 251 |
def get_gradio_client_for_space(space: str) -> Client:
|
| 252 |
if not GRADIO_AVAILABLE:
|
|
|
|
| 260 |
Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
|
| 261 |
(parsed_features_dict_or_None, raw_text_response_str)
|
| 262 |
|
| 263 |
+
Simplified: single call (no retries). Attempts json.loads then regex extraction.
|
|
|
|
|
|
|
|
|
|
| 264 |
"""
|
| 265 |
prompt = prompt or DEFAULT_VLM_PROMPT
|
| 266 |
if not os.path.exists(face_path) or not os.path.exists(eye_path):
|
|
|
|
| 271 |
client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
|
| 272 |
message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
|
| 273 |
|
| 274 |
+
# SINGLE CALL (no retries)
|
| 275 |
+
try:
|
| 276 |
+
logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
|
| 277 |
+
result = client.predict(message=message, history=[], api_name="/chat_fn")
|
| 278 |
+
except Exception as e:
|
| 279 |
+
logger.exception("VLM call failed (no retries)")
|
| 280 |
+
raise RuntimeError(f"VLM call failed: {e}")
|
| 281 |
+
|
| 282 |
+
# Normalize result
|
| 283 |
+
raw_text = ""
|
| 284 |
+
if not result:
|
| 285 |
+
logger.warning("VLM returned empty result object")
|
| 286 |
+
raw_text = ""
|
| 287 |
+
else:
|
| 288 |
+
if isinstance(result, (list, tuple)):
|
| 289 |
+
out = result[0]
|
| 290 |
+
elif isinstance(result, dict):
|
| 291 |
+
out = result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
else:
|
| 293 |
+
out = {"text": str(result)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
+
text_out = out.get("text") or out.get("output") or ""
|
| 296 |
+
raw_text = text_out
|
| 297 |
+
logger.info("VLM response object (debug): %s", out)
|
| 298 |
|
| 299 |
+
# If files present but text empty, log it explicitly
|
| 300 |
+
if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
|
| 301 |
+
logger.warning("VLM returned no text AND files: %s", out.get("files"))
|
| 302 |
|
| 303 |
# Log raw VLM output for debugging/auditing
|
| 304 |
+
logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
|
| 305 |
|
| 306 |
# Try to parse JSON first (fast path)
|
| 307 |
parsed_features = None
|
| 308 |
try:
|
| 309 |
+
parsed_features = json.loads(raw_text) if raw_text and raw_text.strip() else None
|
| 310 |
if parsed_features is not None and not isinstance(parsed_features, dict):
|
| 311 |
parsed_features = None
|
| 312 |
except Exception:
|
| 313 |
parsed_features = None
|
| 314 |
|
| 315 |
# If json.loads failed or returned None, try regex-based extraction
|
| 316 |
+
if parsed_features is None and raw_text and raw_text.strip():
|
| 317 |
try:
|
| 318 |
+
parsed_features = extract_json_via_regex(raw_text)
|
| 319 |
logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
|
| 320 |
except Exception as e:
|
| 321 |
logger.info("VLM regex extraction failed or found nothing: %s", str(e))
|
|
|
|
| 327 |
logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
|
| 328 |
|
| 329 |
# Always return raw_text (may be empty string) and parsed_features (or None)
|
| 330 |
+
return parsed_features, (raw_text or "")
|
| 331 |
|
| 332 |
# -----------------------
|
| 333 |
# Gradio / LLM helper (defensive, with retry + clamps)
|