dpv007 commited on
Commit
879187e
·
verified ·
1 Parent(s): 45ec08d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -109
app.py CHANGED
@@ -12,7 +12,7 @@ Notes:
12
  - Add httpx to requirements.txt for VLM POST/GET flow
13
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
14
  - This variant:
15
- * sends the face image to the HF Space using the POST/GET event flow
16
  * returns raw VLM output and meta (no VLM-side JSON extraction)
17
  """
18
 
@@ -250,6 +250,7 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
250
 
251
  # -----------------------
252
  # VLM helper using HF Spaces POST/GET event flow (gradio_api/call/chat)
 
253
  # -----------------------
254
  def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
255
  raise_on_file_delivery_failure: bool = False
@@ -259,30 +260,27 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
259
  1) POST -> returns an EVENT_ID
260
  2) GET /gradio_api/call/chat/{EVENT_ID} -> fetch result
261
 
262
- This function:
263
- - Loads face image, encodes as base64 and embeds in JSON payload as a single file
264
- - POSTs to the Space endpoint to create an event
265
- - GETs the event result and extracts text/output
266
- - Returns (parsed_features_or_None, raw_text, meta)
267
- NOTE: The function returns parsed_features=None (no JSON extraction here) and raw_text for LLM downstream.
268
  """
269
  prompt = prompt or DEFAULT_VLM_PROMPT
270
 
271
  if not os.path.exists(face_path):
272
  raise FileNotFoundError(f"Face image not found at: {face_path}")
273
 
274
- # Read and base64-encode the face image for embedding in JSON
275
  with open(face_path, "rb") as f:
276
  face_bytes = f.read()
277
  if not face_bytes:
278
  raise ValueError("Face image is empty (0 bytes)")
279
 
280
  face_b64 = base64.b64encode(face_bytes).decode("ascii")
281
- # prefix with MIME type (assume jpeg)
282
  face_data_uri = f"data:image/jpeg;base64,{face_b64}"
283
 
284
- # Build the JSON payload consistent with gradio multimodal style:
285
- payload = {
286
  "data": [
287
  {
288
  "text": prompt,
@@ -296,8 +294,7 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
296
  if SPACE_HOST:
297
  base_url = SPACE_HOST.rstrip("/")
298
  else:
299
- # infer from GRADIO_VLM_SPACE if it's of form "owner/space-name"
300
- # Many public HF spaces also map to {owner}-{space}.hf.space
301
  if "/" in GRADIO_VLM_SPACE:
302
  base_url = f"https://{GRADIO_VLM_SPACE.replace('/', '-')}.hf.space"
303
  else:
@@ -306,125 +303,178 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
306
  post_url = f"{base_url}/gradio_api/call/chat"
307
  get_url_template = f"{base_url}/gradio_api/call/chat/{{event_id}}"
308
 
309
- headers = {"Content-Type": "application/json"}
310
  if HF_TOKEN:
311
- headers["Authorization"] = f"Bearer {HF_TOKEN}"
312
 
313
  meta: Dict[str, Any] = {
314
  "vlm_file_delivery_ok": False,
315
  "vlm_files_seen": None,
316
  "vlm_raw_len": 0,
317
  "vlm_out_object": None,
318
- "post_url": post_url
 
319
  }
320
 
321
- try:
322
- logger.info("VLM POST -> %s (payload text len=%d, files=1)", post_url, len(prompt))
323
- with httpx.Client(timeout=30.0) as client:
324
- resp = client.post(post_url, headers=headers, json=payload)
325
- resp.raise_for_status()
 
 
 
 
 
 
 
 
 
 
326
 
327
- # Try to robustly extract an event id from the POST response
328
- event_id = None
 
 
 
 
 
329
  try:
330
- rj = resp.json()
331
  except Exception:
332
- rj = {}
333
 
334
- if isinstance(rj, dict):
335
- event_id = rj.get("event_id") or rj.get("id") or rj.get("job")
336
- if not event_id:
337
- # try to extract using regex from resp.text
338
- m = re.search(r'"([^"]{8,})"', resp.text or "")
339
- if m:
340
- event_id = m.group(1)
341
- if not event_id:
342
- parts = re.split(r'"', resp.text or "")
343
- if len(parts) >= 5:
344
- event_id_candidate = parts[3].strip()
345
- if event_id_candidate:
346
- event_id = event_id_candidate
347
  if not event_id:
348
- raise RuntimeError(f"Failed to obtain EVENT_ID from VLM POST response: {resp.text[:1000]}")
349
-
350
  meta["event_id"] = event_id
351
- logger.info("VLM event created: %s", event_id)
352
-
353
- # Poll the GET result endpoint
354
- get_url = get_url_template.format(event_id=event_id)
355
- logger.info("Polling VLM event result at %s", get_url)
356
-
357
- max_polls = 6
358
- poll_delay = 0.5
359
- final_text = ""
360
- last_response_json = None
361
- for attempt in range(max_polls):
362
- r2 = client.get(get_url, headers=headers, timeout=30.0)
363
- if r2.status_code == 204 or not (r2.text and r2.text.strip()):
364
- time.sleep(poll_delay)
365
- continue
366
- try:
367
- r2j = r2.json()
368
- last_response_json = r2j
369
- except Exception:
370
- r2j = None
371
-
372
- text_out = ""
373
- if isinstance(r2j, dict):
374
- if "data" in r2j and isinstance(r2j["data"], list) and len(r2j["data"]) > 0:
375
- first = r2j["data"][0]
376
- if isinstance(first, dict):
377
- text_out = first.get("text") or first.get("output") or json.dumps(first)
378
- elif isinstance(first, str):
379
- text_out = first
380
- text_out = text_out or r2j.get("text") or r2j.get("msg") or r2j.get("output", "") or ""
381
- else:
382
- text_out = r2.text or ""
383
 
384
- if text_out and text_out.strip():
385
- final_text = text_out
386
- break
387
- else:
388
- time.sleep(poll_delay)
389
- continue
390
-
391
- if not final_text:
392
- final_text = (r2.text or "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
- meta["vlm_raw_len"] = len(final_text)
395
- meta["vlm_out_object"] = (final_text[:2000] + "...") if len(final_text) > 2000 else final_text
 
396
 
397
- # Best-effort: detect whether server mentions receiving a file
398
- files_seen = None
399
  try:
400
- if isinstance(last_response_json, dict):
401
- for key in ("files", "output_files", "files_sent", "uploaded_files", "received_files"):
402
- if key in last_response_json and isinstance(last_response_json[key], (list, tuple)):
403
- files_seen = len(last_response_json[key])
404
- break
405
- if files_seen is None and final_text:
406
- ext_matches = re.findall(r"\.(?:jpg|jpeg|png|bmp|gif)\b", final_text, flags=re.IGNORECASE)
407
- if ext_matches:
408
- files_seen = len(ext_matches)
409
- else:
410
- matches = re.findall(r"\b(?:uploaded|received|file)\b", final_text, flags=re.IGNORECASE)
411
- if matches:
412
- files_seen = max(1, len(matches))
413
  except Exception:
414
- files_seen = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
- meta["vlm_files_seen"] = files_seen
417
- meta["vlm_file_delivery_ok"] = (files_seen is not None and files_seen >= 1)
 
418
 
419
- parsed_features = None
420
- return parsed_features, (final_text or ""), meta
421
 
422
- except httpx.HTTPStatusError as he:
423
- logger.exception("VLM HTTP error")
424
- raise RuntimeError(f"VLM http error: {he.response.status_code} {str(he)}")
425
- except Exception as e:
426
- logger.exception("VLM call (httpx) failed")
427
- raise RuntimeError(f"VLM call failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
 
429
  # -----------------------
430
  # Gradio / LLM helper (defensive, with retry + clamps)
 
12
  - Add httpx to requirements.txt for VLM POST/GET flow
13
  - If VLM/LLM Spaces are private, set HF_TOKEN in the environment for authentication.
14
  - This variant:
15
+ * sends the face image to the HF Space using the POST/GET event flow (tries JSON data-uri first, then multipart fallback)
16
  * returns raw VLM output and meta (no VLM-side JSON extraction)
17
  """
18
 
 
250
 
251
  # -----------------------
252
  # VLM helper using HF Spaces POST/GET event flow (gradio_api/call/chat)
253
+ # Robust: try JSON (data-uri) POST first; if 5xx, fall back to multipart/form-data file upload.
254
  # -----------------------
255
  def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, prompt: Optional[str] = None,
256
  raise_on_file_delivery_failure: bool = False
 
260
  1) POST -> returns an EVENT_ID
261
  2) GET /gradio_api/call/chat/{EVENT_ID} -> fetch result
262
 
263
+ Behavior:
264
+ - Try JSON payload with data URI (fast path)
265
+ - If JSON POST yields server error (5xx), retry with multipart/form-data attaching the face image
266
+ - Poll GET endpoint a few times for result
267
+ - Return (parsed_features_or_None, raw_text, meta)
268
+ - parsed_features is None (we avoid parsing JSON here)
269
  """
270
  prompt = prompt or DEFAULT_VLM_PROMPT
271
 
272
  if not os.path.exists(face_path):
273
  raise FileNotFoundError(f"Face image not found at: {face_path}")
274
 
 
275
  with open(face_path, "rb") as f:
276
  face_bytes = f.read()
277
  if not face_bytes:
278
  raise ValueError("Face image is empty (0 bytes)")
279
 
280
  face_b64 = base64.b64encode(face_bytes).decode("ascii")
 
281
  face_data_uri = f"data:image/jpeg;base64,{face_b64}"
282
 
283
+ payload_json = {
 
284
  "data": [
285
  {
286
  "text": prompt,
 
294
  if SPACE_HOST:
295
  base_url = SPACE_HOST.rstrip("/")
296
  else:
297
+ # Many public HF spaces map to {owner}-{space}.hf.space when used in hostnames.
 
298
  if "/" in GRADIO_VLM_SPACE:
299
  base_url = f"https://{GRADIO_VLM_SPACE.replace('/', '-')}.hf.space"
300
  else:
 
303
  post_url = f"{base_url}/gradio_api/call/chat"
304
  get_url_template = f"{base_url}/gradio_api/call/chat/{{event_id}}"
305
 
306
+ headers_json = {"Content-Type": "application/json"}
307
  if HF_TOKEN:
308
+ headers_json["Authorization"] = f"Bearer {HF_TOKEN}"
309
 
310
  meta: Dict[str, Any] = {
311
  "vlm_file_delivery_ok": False,
312
  "vlm_files_seen": None,
313
  "vlm_raw_len": 0,
314
  "vlm_out_object": None,
315
+ "post_url": post_url,
316
+ "attempts": []
317
  }
318
 
319
+ def _extract_event_id(resp_text: str, resp_json: Optional[Dict[str, Any]]) -> Optional[str]:
320
+ if isinstance(resp_json, dict):
321
+ for k in ("event_id", "id", "job"):
322
+ if k in resp_json and resp_json[k]:
323
+ return resp_json[k]
324
+ # try a quoted token heuristic (like the awk approach)
325
+ m = re.search(r'"([^"]{8,})"', resp_text or "")
326
+ if m:
327
+ return m.group(1)
328
+ parts = re.split(r'"', resp_text or "")
329
+ if len(parts) >= 5:
330
+ candidate = parts[3].strip()
331
+ if candidate:
332
+ return candidate
333
+ return None
334
 
335
+ with httpx.Client(timeout=30.0) as client:
336
+ # Attempt 1: JSON data-uri POST
337
+ try:
338
+ logger.info("VLM POST (JSON data-uri) -> %s (prompt len=%d)", post_url, len(prompt))
339
+ resp = client.post(post_url, headers=headers_json, json=payload_json)
340
+ resp.raise_for_status()
341
+ meta["attempts"].append({"mode": "json", "status_code": resp.status_code})
342
  try:
343
+ resp_json = resp.json()
344
  except Exception:
345
+ resp_json = None
346
 
347
+ event_id = _extract_event_id(resp.text, resp_json)
 
 
 
 
 
 
 
 
 
 
 
 
348
  if not event_id:
349
+ raise RuntimeError(f"Failed to obtain EVENT_ID from VLM POST (json) response: {resp.text[:1000]}")
 
350
  meta["event_id"] = event_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
+ except httpx.HTTPStatusError as he:
353
+ # Log attempt and fallback to multipart if server-side error
354
+ status = he.response.status_code if he.response is not None else None
355
+ body_excerpt = (he.response.text[:1000] if he.response is not None else str(he))
356
+ logger.warning("VLM JSON POST failed (status=%s). Response excerpt: %s", status, body_excerpt[:400])
357
+ meta["attempts"].append({"mode": "json", "status_code": status, "error": body_excerpt})
358
+ if status is None or 500 <= status < 600:
359
+ # Try multipart fallback
360
+ try:
361
+ logger.info("Attempting multipart/form-data fallback to %s", post_url)
362
+ # Some Spaces expect 'data' field to be JSON array describing inputs and files to be referenced.
363
+ # We'll send 'data' as JSON string with a placeholder for file indices, and attach the file in 'file' part.
364
+ data_field = json.dumps([{"text": prompt, "files": [None]}])
365
+ files = {
366
+ "data": (None, data_field, "application/json"),
367
+ "file": (os.path.basename(face_path), face_bytes, "image/jpeg")
368
+ }
369
+ # Authorization header only; content-type will be set by httpx for multipart
370
+ headers_mp = {}
371
+ if HF_TOKEN:
372
+ headers_mp["Authorization"] = f"Bearer {HF_TOKEN}"
373
+
374
+ resp2 = client.post(post_url, headers=headers_mp, files=files)
375
+ resp2.raise_for_status()
376
+ meta["attempts"].append({"mode": "multipart", "status_code": resp2.status_code})
377
+ try:
378
+ resp2_json = resp2.json()
379
+ except Exception:
380
+ resp2_json = None
381
+ event_id = _extract_event_id(resp2.text, resp2_json)
382
+ if not event_id:
383
+ raise RuntimeError(f"Failed to obtain EVENT_ID from VLM POST (multipart) response: {resp2.text[:1000]}")
384
+ meta["event_id"] = event_id
385
+ except Exception as e_mp:
386
+ logger.exception("Multipart fallback failed")
387
+ meta["attempts"].append({"mode": "multipart", "error": str(e_mp)})
388
+ raise RuntimeError(f"VLM POST failed (json then multipart): {body_excerpt[:1000]} | multipart error: {str(e_mp)}")
389
+ else:
390
+ # Non-5xx error — surface it
391
+ raise RuntimeError(f"VLM POST failed with status {status}: {body_excerpt[:1000]}")
392
+ except Exception as e:
393
+ logger.exception("VLM POST unexpected failure")
394
+ meta["attempts"].append({"mode": "json", "error": str(e)})
395
+ raise RuntimeError(f"VLM POST failed: {e}")
396
+
397
+ # If we have event_id, poll GET endpoint for result
398
+ event_id = meta.get("event_id")
399
+ if not event_id:
400
+ raise RuntimeError("No event_id obtained from VLM POST (unexpected)")
401
+
402
+ get_url = get_url_template.format(event_id=event_id)
403
+ logger.info("Polling VLM event result at %s", get_url)
404
+
405
+ max_polls = 8
406
+ poll_delay = 0.5
407
+ final_text = ""
408
+ last_response_json = None
409
+ for attempt in range(max_polls):
410
+ try:
411
+ r2 = client.get(get_url, timeout=30.0)
412
+ except Exception as e_get:
413
+ logger.warning("GET attempt %d failed: %s", attempt + 1, str(e_get))
414
+ time.sleep(poll_delay)
415
+ continue
416
 
417
+ if r2.status_code == 204 or not (r2.text and r2.text.strip()):
418
+ time.sleep(poll_delay)
419
+ continue
420
 
 
 
421
  try:
422
+ r2j = r2.json()
423
+ last_response_json = r2j
 
 
 
 
 
 
 
 
 
 
 
424
  except Exception:
425
+ r2j = None
426
+
427
+ text_out = ""
428
+ if isinstance(r2j, dict):
429
+ if "data" in r2j and isinstance(r2j["data"], list) and len(r2j["data"]) > 0:
430
+ first = r2j["data"][0]
431
+ if isinstance(first, dict):
432
+ text_out = first.get("text") or first.get("output") or json.dumps(first)
433
+ elif isinstance(first, str):
434
+ text_out = first
435
+ text_out = text_out or r2j.get("text") or r2j.get("msg") or r2j.get("output", "") or ""
436
+ else:
437
+ text_out = r2.text or ""
438
+
439
+ if text_out and text_out.strip():
440
+ final_text = text_out
441
+ meta["attempts"].append({"mode": "get", "status_code": r2.status_code})
442
+ break
443
+ else:
444
+ time.sleep(poll_delay)
445
+ continue
446
 
447
+ if not final_text:
448
+ final_text = (r2.text or "").strip()
449
+ meta["attempts"].append({"mode": "get_last", "status_code": r2.status_code if 'r2' in locals() and r2 is not None else None, "raw": final_text[:500]})
450
 
451
+ meta["vlm_raw_len"] = len(final_text)
452
+ meta["vlm_out_object"] = (final_text[:2000] + "...") if len(final_text) > 2000 else final_text
453
 
454
+ # Best-effort: detect whether server mentions receiving a file
455
+ files_seen = None
456
+ try:
457
+ if isinstance(last_response_json, dict):
458
+ for key in ("files", "output_files", "files_sent", "uploaded_files", "received_files"):
459
+ if key in last_response_json and isinstance(last_response_json[key], (list, tuple)):
460
+ files_seen = len(last_response_json[key])
461
+ break
462
+ if files_seen is None and final_text:
463
+ ext_matches = re.findall(r"\.(?:jpg|jpeg|png|bmp|gif)\b", final_text, flags=re.IGNORECASE)
464
+ if ext_matches:
465
+ files_seen = len(ext_matches)
466
+ else:
467
+ matches = re.findall(r"\b(?:uploaded|received|file)\b", final_text, flags=re.IGNORECASE)
468
+ if matches:
469
+ files_seen = max(1, len(matches))
470
+ except Exception:
471
+ files_seen = None
472
+
473
+ meta["vlm_files_seen"] = files_seen
474
+ meta["vlm_file_delivery_ok"] = (files_seen is not None and files_seen >= 1)
475
+
476
+ parsed_features = None
477
+ return parsed_features, (final_text or ""), meta
478
 
479
  # -----------------------
480
  # Gradio / LLM helper (defensive, with retry + clamps)