dpv007 commited on
Commit
96c51cc
·
verified ·
1 Parent(s): f37add2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -58
app.py CHANGED
@@ -15,6 +15,7 @@ Notes:
15
  * always returns raw VLM output in API responses,
16
  * extracts JSON from VLM via regex when possible, and
17
  * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
 
18
  """
19
 
20
  import io
@@ -51,10 +52,6 @@ GRADIO_VLM_SPACE = os.getenv("GRADIO_SPACE", "developer0hye/Qwen3-VL-8B-Instruct
51
  LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
52
  HF_TOKEN = os.getenv("HF_TOKEN", None)
53
 
54
- # VLM retry config (if VLM returns empty text)
55
- VLM_EMPTY_RETRIES = int(os.getenv("VLM_EMPTY_RETRIES", "2"))
56
- VLM_EMPTY_RETRY_SLEEP_S = float(os.getenv("VLM_EMPTY_RETRY_SLEEP_S", "0.5"))
57
-
58
  # Default VLM prompt
59
  DEFAULT_VLM_PROMPT = (
60
  "From the provided face/eye images, compute the required screening features "
@@ -249,7 +246,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
249
  return out
250
 
251
  # -----------------------
252
- # Gradio / VLM helper (returns parsed dict OR None, plus raw text)
253
  # -----------------------
254
  def get_gradio_client_for_space(space: str) -> Client:
255
  if not GRADIO_AVAILABLE:
@@ -263,10 +260,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
263
  Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
264
  (parsed_features_dict_or_None, raw_text_response_str)
265
 
266
- Robustness improvements:
267
- - Retries a few times if raw text is empty.
268
- - Attempts json.loads first, then extract_json_via_regex.
269
- - Logs raw output and parsed features for debugging.
270
  """
271
  prompt = prompt or DEFAULT_VLM_PROMPT
272
  if not os.path.exists(face_path) or not os.path.exists(eye_path):
@@ -277,70 +271,51 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
277
  client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
278
  message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
279
 
280
- last_exc = None
281
- raw_text = None
282
- for attempt in range(1, VLM_EMPTY_RETRIES + 2): # attempts = retries+1
283
- try:
284
- logger.info("Calling VLM Space %s (attempt %d)", GRADIO_VLM_SPACE, attempt)
285
- result = client.predict(message=message, history=[], api_name="/chat_fn")
286
- except Exception as e:
287
- logger.exception("VLM call failed on attempt %d", attempt)
288
- last_exc = e
289
- if attempt <= VLM_EMPTY_RETRIES:
290
- time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
291
- continue
292
- raise RuntimeError(f"VLM call ultimately failed: {e}")
293
-
294
- if not result:
295
- logger.warning("VLM returned empty result object on attempt %d", attempt)
296
- raw_text = ""
297
- else:
298
- # normalize result object
299
- if isinstance(result, (list, tuple)):
300
- out = result[0]
301
- elif isinstance(result, dict):
302
- out = result
303
- else:
304
- out = {"text": str(result)}
305
-
306
- text_out = out.get("text") or out.get("output") or ""
307
- # if files key exists but text is empty, log it
308
- if isinstance(out, dict) and (out.get("files") == [] or not out.get("files")) and (not text_out.strip()):
309
- logger.warning("VLM returned no text AND no files in response on attempt %d: %s", attempt, str(out))
310
- raw_text = text_out
311
-
312
- # if raw_text is non-empty, break; otherwise retry up to retries
313
- if raw_text and raw_text.strip():
314
- break
315
  else:
316
- logger.warning("VLM returned empty text on attempt %d. Retrying (%d remaining)...", attempt, max(0, VLM_EMPTY_RETRIES - (attempt - 1)))
317
- if attempt <= VLM_EMPTY_RETRIES:
318
- time.sleep(VLM_EMPTY_RETRY_SLEEP_S)
319
- continue
320
- # no more retries
321
- break
322
 
323
- if raw_text is None:
324
- raise RuntimeError(f"VLM returned no response (last error: {last_exc})")
 
325
 
326
- text_out = raw_text
 
 
327
 
328
  # Log raw VLM output for debugging/auditing
329
- logger.info("VLM raw output (length=%d):\n%s", len(text_out or ""), (text_out[:1000] + "...") if text_out and len(text_out) > 1000 else (text_out or "<EMPTY>"))
330
 
331
  # Try to parse JSON first (fast path)
332
  parsed_features = None
333
  try:
334
- parsed_features = json.loads(text_out) if text_out and text_out.strip() else None
335
  if parsed_features is not None and not isinstance(parsed_features, dict):
336
  parsed_features = None
337
  except Exception:
338
  parsed_features = None
339
 
340
  # If json.loads failed or returned None, try regex-based extraction
341
- if parsed_features is None and text_out and text_out.strip():
342
  try:
343
- parsed_features = extract_json_via_regex(text_out)
344
  logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
345
  except Exception as e:
346
  logger.info("VLM regex extraction failed or found nothing: %s", str(e))
@@ -352,7 +327,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: str, prompt: Optional[str
352
  logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
353
 
354
  # Always return raw_text (may be empty string) and parsed_features (or None)
355
- return parsed_features, (text_out or "")
356
 
357
  # -----------------------
358
  # Gradio / LLM helper (defensive, with retry + clamps)
 
15
  * always returns raw VLM output in API responses,
16
  * extracts JSON from VLM via regex when possible, and
17
  * sends either cleaned JSON or raw VLM string into LLM (and logs which was used).
18
+ - VLM calls were simplified to a single call (no retries).
19
  """
20
 
21
  import io
 
52
  LLM_GRADIO_SPACE = os.getenv("LLM_GRADIO_SPACE", "Tonic/med-gpt-oss-20b-demo")
53
  HF_TOKEN = os.getenv("HF_TOKEN", None)
54
 
 
 
 
 
55
  # Default VLM prompt
56
  DEFAULT_VLM_PROMPT = (
57
  "From the provided face/eye images, compute the required screening features "
 
246
  return out
247
 
248
  # -----------------------
249
+ # Gradio / VLM helper (single-call, no retries)
250
  # -----------------------
251
  def get_gradio_client_for_space(space: str) -> Client:
252
  if not GRADIO_AVAILABLE:
 
260
  Synchronous call to remote VLM (gradio /chat_fn). Returns tuple:
261
  (parsed_features_dict_or_None, raw_text_response_str)
262
 
263
+ Simplified: single call (no retries). Attempts json.loads then regex extraction.
 
 
 
264
  """
265
  prompt = prompt or DEFAULT_VLM_PROMPT
266
  if not os.path.exists(face_path) or not os.path.exists(eye_path):
 
271
  client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
272
  message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
273
 
274
+ # SINGLE CALL (no retries)
275
+ try:
276
+ logger.info("Calling VLM Space %s", GRADIO_VLM_SPACE)
277
+ result = client.predict(message=message, history=[], api_name="/chat_fn")
278
+ except Exception as e:
279
+ logger.exception("VLM call failed (no retries)")
280
+ raise RuntimeError(f"VLM call failed: {e}")
281
+
282
+ # Normalize result
283
+ raw_text = ""
284
+ if not result:
285
+ logger.warning("VLM returned empty result object")
286
+ raw_text = ""
287
+ else:
288
+ if isinstance(result, (list, tuple)):
289
+ out = result[0]
290
+ elif isinstance(result, dict):
291
+ out = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  else:
293
+ out = {"text": str(result)}
 
 
 
 
 
294
 
295
+ text_out = out.get("text") or out.get("output") or ""
296
+ raw_text = text_out
297
+ logger.info("VLM response object (debug): %s", out)
298
 
299
+ # If files present but text empty, log it explicitly
300
+ if isinstance(out, dict) and ("files" in out) and (not text_out.strip()):
301
+ logger.warning("VLM returned no text AND files: %s", out.get("files"))
302
 
303
  # Log raw VLM output for debugging/auditing
304
+ logger.info("VLM raw output (length=%d):\n%s", len(raw_text or ""), (raw_text[:1000] + "...") if raw_text and len(raw_text) > 1000 else (raw_text or "<EMPTY>"))
305
 
306
  # Try to parse JSON first (fast path)
307
  parsed_features = None
308
  try:
309
+ parsed_features = json.loads(raw_text) if raw_text and raw_text.strip() else None
310
  if parsed_features is not None and not isinstance(parsed_features, dict):
311
  parsed_features = None
312
  except Exception:
313
  parsed_features = None
314
 
315
  # If json.loads failed or returned None, try regex-based extraction
316
+ if parsed_features is None and raw_text and raw_text.strip():
317
  try:
318
+ parsed_features = extract_json_via_regex(raw_text)
319
  logger.info("VLM regex-extracted features:\n%s", json.dumps(parsed_features, indent=2, ensure_ascii=False))
320
  except Exception as e:
321
  logger.info("VLM regex extraction failed or found nothing: %s", str(e))
 
327
  logger.info("VLM parsed features (final): %s", json.dumps(parsed_features, ensure_ascii=False))
328
 
329
  # Always return raw_text (may be empty string) and parsed_features (or None)
330
+ return parsed_features, (raw_text or "")
331
 
332
  # -----------------------
333
  # Gradio / LLM helper (defensive, with retry + clamps)