dpv007 commited on
Commit
7088aee
·
verified ·
1 Parent(s): d8e884b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -158
app.py CHANGED
@@ -16,6 +16,7 @@ Notes:
16
  * extracts JSON from VLM via regex when possible, and
17
  * sends only the face image to the VLM (not the eye image).
18
  """
 
19
  import io
20
  import os
21
  import uuid
@@ -35,9 +36,6 @@ from PIL import Image
35
  import numpy as np
36
  import cv2 # opencv-python-headless expected installed
37
 
38
- # httpx used for multipart fallback when gradio_client cannot reliably upload
39
- import httpx # ensure httpx added to requirements
40
-
41
  # Optional gradio client (for VLM + LLM calls)
42
  try:
43
  from gradio_client import Client, handle_file # type: ignore
@@ -247,43 +245,8 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
247
  }
248
  return out
249
 
250
- # -----------------------
251
- # Helper: multipart call to HF Space inference endpoint (fallback)
252
- # -----------------------
253
- def call_space_multipart(space: str, api_name: str, prompt: str, face_path: str, timeout: float = 30.0) -> Dict[str, Any]:
254
- """
255
- Multipart POST to Hugging Face Space inference endpoint.
256
- - Attempts to send an explicit multipart upload that many Spaces accept.
257
- - Returns parsed JSON when available or a dict with 'raw' text.
258
- """
259
- # Build embed-style endpoint
260
- endpoint = f"https://hf.space/embed/{space}/api/{api_name.lstrip('/')}"
261
- headers = {}
262
- if HF_TOKEN:
263
- headers["Authorization"] = f"Bearer {HF_TOKEN}"
264
-
265
- # Many Spaces accept a "data" field which is a JSON array of inputs; we provide prompt as first arg
266
- # and attach the file with a 'file' key. Some Spaces expect different key names — this is a pragmatic fallback.
267
- files = {
268
- "data": (None, json.dumps([prompt, None])),
269
- "file": (os.path.basename(face_path), open(face_path, "rb"), "image/jpeg")
270
- }
271
- try:
272
- with httpx.Client(timeout=timeout) as client:
273
- resp = client.post(endpoint, headers=headers, files=files)
274
- resp.raise_for_status()
275
- try:
276
- return resp.json()
277
- except Exception:
278
- # return raw text if JSON is unavailable
279
- return {"raw": resp.text}
280
- except Exception as e:
281
- logger.exception("call_space_multipart failed: %s", e)
282
- raise
283
-
284
  # -----------------------
285
  # Gradio / VLM helper (sends only face image, returns meta)
286
- # Robust: tries predictable gradio_client signatures; if that fails, falls back to multipart HTTP
287
  # -----------------------
288
  def get_gradio_client_for_space(space: str) -> Client:
289
  if not GRADIO_AVAILABLE:
@@ -303,140 +266,75 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
303
  - vlm_files_seen (int or None)
304
  - vlm_raw_len (int)
305
  - vlm_out_object (short repr)
306
- - vlm_upload_method (which method was used)
307
  """
308
  prompt = prompt or DEFAULT_VLM_PROMPT
309
 
 
310
  if not os.path.exists(face_path):
311
  raise FileNotFoundError(f"Face image not found at: {face_path}")
312
- if eye_path is not None and not os.path.exists(eye_path):
313
  raise FileNotFoundError(f"Eye image not found at: {eye_path}")
314
-
315
  face_size = os.path.getsize(face_path)
316
- logger.info(f"VLM input files - Face: {face_size} bytes")
317
- if face_size == 0:
318
- raise ValueError("Face image is empty (0 bytes)")
319
-
320
- meta: Dict[str, Any] = {
321
- "vlm_file_delivery_ok": False,
322
- "vlm_files_seen": None,
323
- "vlm_raw_len": 0,
324
- "vlm_out_object": None,
325
- "vlm_upload_method": None
326
- }
327
-
328
- # If gradio_client is not available, directly use multipart fallback
329
  if not GRADIO_AVAILABLE:
330
- logger.warning("gradio_client not available; using httpx multipart fallback to upload image.")
331
- try:
332
- out = call_space_multipart(GRADIO_VLM_SPACE, "chat_fn", prompt, face_path)
333
- raw_text = ""
334
- if isinstance(out, dict):
335
- raw_text = json.dumps(out)
336
- else:
337
- raw_text = str(out)
338
- meta["vlm_upload_method"] = "httpx_multipart"
339
- meta["vlm_raw_len"] = len(raw_text)
340
- meta["vlm_out_object"] = (raw_text[:2000] + "...") if len(raw_text) > 2000 else raw_text
341
- except Exception as e:
342
- logger.exception("Multipart fallback failed")
343
- raise RuntimeError(f"VLM multipart fallback failed: {e}")
344
 
345
- else:
346
- # Try using gradio_client with predictable argument patterns
347
- client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
- # Prepare handle_file wrapper (gradio_client helper)
350
- try:
351
- file_wrapper = handle_file(face_path)
352
- except Exception:
353
- # Some versions expect an open file-like; attempt that
354
- file_wrapper = open(face_path, "rb")
355
 
356
- # We'll try several invocation styles until one works:
357
- tried_methods = []
358
- result = None
359
- # 1) Positional: client.predict(prompt, file, api_name="/chat_fn")
360
- try:
361
- logger.info("Attempting gradio_client.predict positional call (prompt, file) to %s", GRADIO_VLM_SPACE)
362
- result = client.predict(prompt, file_wrapper, api_name="/chat_fn")
363
- meta["vlm_upload_method"] = "gradio_positional"
364
- tried_methods.append("gradio_positional")
365
- except TypeError as te:
366
- logger.info("Positional call TypeError: %s", te)
367
- tried_methods.append("gradio_positional_failed")
368
- except Exception as e:
369
- logger.exception("Positional gradio_client.predict failed: %s", e)
370
- tried_methods.append("gradio_positional_failed_general")
371
-
372
- # 2) Named common alternatives
373
- if result is None:
374
- named_attempts = [
375
- {"text": prompt, "image": file_wrapper},
376
- {"message": prompt, "file": file_wrapper},
377
- {"prompt": prompt, "image": file_wrapper},
378
- {"prompt": prompt, "file": file_wrapper},
379
- {"input_data": [prompt, None]}
380
- ]
381
- for named_args in named_attempts:
382
- try:
383
- logger.info("Attempting gradio_client.predict named call with args: %s", list(named_args.keys()))
384
- result = client.predict(api_name="/chat_fn", **named_args)
385
- meta["vlm_upload_method"] = "gradio_named:" + ",".join(list(named_args.keys()))
386
- tried_methods.append(f"gradio_named_{','.join(list(named_args.keys()))}")
387
- break
388
- except TypeError as te:
389
- logger.info("Named call TypeError with keys %s: %s", list(named_args.keys()), te)
390
- except Exception as e:
391
- logger.info("Named call failed with keys %s: %s", list(named_args.keys()), e)
392
-
393
- # 3) If still None, fallback to httpx multipart
394
- if result is None:
395
- logger.warning("gradio_client attempts did not yield a usable response; falling back to httpx multipart upload.")
396
- tried_methods.append("httpx_multipart_fallback")
397
- try:
398
- out = call_space_multipart(GRADIO_VLM_SPACE, "chat_fn", prompt, face_path)
399
- result = out
400
- meta["vlm_upload_method"] = "httpx_multipart"
401
- except Exception as e:
402
- logger.exception("httpx multipart fallback failed: %s", e)
403
- raise RuntimeError(f"All VLM upload methods failed: {e}. Tried: {tried_methods}")
404
-
405
- # Normalize result into raw_text and out object
406
  raw_text = ""
407
  out = None
408
- try:
409
- # If result is an httpx/json dict from call_space_multipart or gradio returned a dict/list
410
- if isinstance(result, (dict, list)):
 
 
 
411
  out = result
412
- # Try to extract textual outputs in common keys
413
- if isinstance(out, dict):
414
- possible_text = out.get("data") or out.get("text") or out.get("output") or out.get("raw") or out.get("msg")
415
- if possible_text is None:
416
- if "data" in out and isinstance(out["data"], (list, tuple)) and len(out["data"]) > 0:
417
- possible_text = out["data"][0]
418
- if isinstance(possible_text, (dict, list)):
419
- raw_text = json.dumps(possible_text)
420
- else:
421
- raw_text = str(possible_text or "")
422
- else:
423
- raw_text = json.dumps(out)
424
  else:
425
- # not dict/list -> string-like
426
- raw_text = str(result or "")
427
- out = {"text": raw_text}
428
- except Exception as e:
429
- logger.exception("Normalization of VLM result failed: %s", e)
430
- raw_text = str(result or "")
431
- out = {"text": raw_text}
432
 
433
- meta["vlm_raw_len"] = len(raw_text or "")
434
- try:
435
- meta["vlm_out_object"] = str(out)[:2000]
436
- except Exception:
437
- meta["vlm_out_object"] = "<unreprable>"
 
 
438
 
439
- logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
440
 
441
  # --- Check whether the remote acknowledged receiving files (expect 1) ---
442
  files_seen = None
@@ -694,8 +592,7 @@ async def health_check():
694
  "detector": impl,
695
  "vlm_available": GRADIO_AVAILABLE,
696
  "vlm_space": GRADIO_VLM_SPACE,
697
- "llm_space": LLM_GRADIO_SPACE,
698
- "hf_token_present": bool(HF_TOKEN)
699
  }
700
 
701
  @app.post("/api/v1/validate-eye-photo")
@@ -991,6 +888,8 @@ class ImageUrls(BaseModel):
991
  face_image_url: HttpUrl
992
  eye_image_url: HttpUrl
993
 
 
 
994
  # helper: download URL to file with safety checks
995
  async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
996
  """
@@ -1276,4 +1175,4 @@ async def process_screening(screening_id: str):
1276
  # -----------------------
1277
  if __name__ == "__main__":
1278
  import uvicorn
1279
- uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
16
  * extracts JSON from VLM via regex when possible, and
17
  * sends only the face image to the VLM (not the eye image).
18
  """
19
+
20
  import io
21
  import os
22
  import uuid
 
36
  import numpy as np
37
  import cv2 # opencv-python-headless expected installed
38
 
 
 
 
39
  # Optional gradio client (for VLM + LLM calls)
40
  try:
41
  from gradio_client import Client, handle_file # type: ignore
 
245
  }
246
  return out
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  # -----------------------
249
  # Gradio / VLM helper (sends only face image, returns meta)
 
250
  # -----------------------
251
  def get_gradio_client_for_space(space: str) -> Client:
252
  if not GRADIO_AVAILABLE:
 
266
  - vlm_files_seen (int or None)
267
  - vlm_raw_len (int)
268
  - vlm_out_object (short repr)
 
269
  """
270
  prompt = prompt or DEFAULT_VLM_PROMPT
271
 
272
+
273
  if not os.path.exists(face_path):
274
  raise FileNotFoundError(f"Face image not found at: {face_path}")
275
+ if not os.path.exists(eye_path):
276
  raise FileNotFoundError(f"Eye image not found at: {eye_path}")
277
+
278
  face_size = os.path.getsize(face_path)
279
+ eye_size = os.path.getsize(eye_path)
280
+ logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
281
+
282
+ if face_size == 0 or eye_size == 0:
283
+ raise ValueError("One or both images are empty (0 bytes)")
284
+
 
 
 
 
 
 
 
285
  if not GRADIO_AVAILABLE:
286
+ raise RuntimeError("gradio_client not available in this environment.")
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
289
+
290
+ # Verify files can be opened as images
291
+ try:
292
+ Image.open(face_path).verify()
293
+ Image.open(eye_path).verify()
294
+ logger.info("Both images verified as valid")
295
+ except Exception as e:
296
+ raise ValueError(f"Invalid image file(s): {e}")
297
+
298
+ message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
299
+
300
+ logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
301
+ client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
302
+ # NOTE: only send face image to the Space
303
 
304
+ message = {"text": prompt, "files": [handle_file(face_path)]}
 
 
 
 
 
305
 
306
+ meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
307
+
308
+ # SINGLE CALL (no retries)
309
+ try:
310
+ logger.info("Calling VLM Space %s with 1 file (face only)", GRADIO_VLM_SPACE)
311
+ result = client.predict(message=message, history=[], api_name="/chat_fn")
312
+ except Exception as e:
313
+ logger.exception("VLM call failed (no retries)")
314
+ raise RuntimeError(f"VLM call failed: {e}")
315
+
316
+ # Normalize result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  raw_text = ""
318
  out = None
319
+ if not result:
320
+ logger.warning("VLM returned empty result object")
321
+ else:
322
+ if isinstance(result, (list, tuple)):
323
+ out = result[0]
324
+ elif isinstance(result, dict):
325
  out = result
 
 
 
 
 
 
 
 
 
 
 
 
326
  else:
327
+ out = {"text": str(result)}
 
 
 
 
 
 
328
 
329
+ text_out = out.get("text") or out.get("output") or ""
330
+ raw_text = text_out or ""
331
+ meta["vlm_raw_len"] = len(raw_text or "")
332
+ try:
333
+ meta["vlm_out_object"] = str(out)[:2000]
334
+ except Exception:
335
+ meta["vlm_out_object"] = "<unreprable>"
336
 
337
+ logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
338
 
339
  # --- Check whether the remote acknowledged receiving files (expect 1) ---
340
  files_seen = None
 
592
  "detector": impl,
593
  "vlm_available": GRADIO_AVAILABLE,
594
  "vlm_space": GRADIO_VLM_SPACE,
595
+ "llm_space": LLM_GRADIO_SPACE
 
596
  }
597
 
598
  @app.post("/api/v1/validate-eye-photo")
 
888
  face_image_url: HttpUrl
889
  eye_image_url: HttpUrl
890
 
891
+ import httpx # make sure to add httpx to requirements
892
+
893
  # helper: download URL to file with safety checks
894
  async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
895
  """
 
1175
  # -----------------------
1176
  if __name__ == "__main__":
1177
  import uvicorn
1178
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)