Spaces:

dpv007
/

cloud

Sleeping

App Files Files Community

dpv007 commited on Dec 12, 2025

Commit

7088aee

verified ·

1 Parent(s): d8e884b

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -158

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ Notes:
     * extracts JSON from VLM via regex when possible, and
     * sends only the face image to the VLM (not the eye image).
 """
 import io
 import os
 import uuid
@@ -35,9 +36,6 @@ from PIL import Image
 import numpy as np
 import cv2  # opencv-python-headless expected installed
-# httpx used for multipart fallback when gradio_client cannot reliably upload
-import httpx  # ensure httpx added to requirements
 # Optional gradio client (for VLM + LLM calls)
 try:
     from gradio_client import Client, handle_file  # type: ignore
@@ -247,43 +245,8 @@ def extract_json_via_regex(raw_text: str) -> Dict[str, Any]:
     }
     return out
-# -----------------------
-# Helper: multipart call to HF Space inference endpoint (fallback)
-# -----------------------
-def call_space_multipart(space: str, api_name: str, prompt: str, face_path: str, timeout: float = 30.0) -> Dict[str, Any]:
-    """
-    Multipart POST to Hugging Face Space inference endpoint.
-    - Attempts to send an explicit multipart upload that many Spaces accept.
-    - Returns parsed JSON when available or a dict with 'raw' text.
-    """
-    # Build embed-style endpoint
-    endpoint = f"https://hf.space/embed/{space}/api/{api_name.lstrip('/')}"
-    headers = {}
-    if HF_TOKEN:
-        headers["Authorization"] = f"Bearer {HF_TOKEN}"
-    # Many Spaces accept a "data" field which is a JSON array of inputs; we provide prompt as first arg
-    # and attach the file with a 'file' key. Some Spaces expect different key names — this is a pragmatic fallback.
-    files = {
-        "data": (None, json.dumps([prompt, None])),
-        "file": (os.path.basename(face_path), open(face_path, "rb"), "image/jpeg")
-    }
-    try:
-        with httpx.Client(timeout=timeout) as client:
-            resp = client.post(endpoint, headers=headers, files=files)
-            resp.raise_for_status()
-            try:
-                return resp.json()
-            except Exception:
-                # return raw text if JSON is unavailable
-                return {"raw": resp.text}
-    except Exception as e:
-        logger.exception("call_space_multipart failed: %s", e)
-        raise
 # -----------------------
 # Gradio / VLM helper (sends only face image, returns meta)
-# Robust: tries predictable gradio_client signatures; if that fails, falls back to multipart HTTP
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
@@ -303,140 +266,75 @@ def run_vlm_and_get_features(face_path: str, eye_path: Optional[str] = None, pro
       - vlm_files_seen (int or None)
       - vlm_raw_len (int)
       - vlm_out_object (short repr)
-      - vlm_upload_method (which method was used)
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path):
         raise FileNotFoundError(f"Face image not found at: {face_path}")
-    if eye_path is not None and not os.path.exists(eye_path):
         raise FileNotFoundError(f"Eye image not found at: {eye_path}")
     face_size = os.path.getsize(face_path)
-    logger.info(f"VLM input files - Face: {face_size} bytes")
-    if face_size == 0:
-        raise ValueError("Face image is empty (0 bytes)")
-    meta: Dict[str, Any] = {
-        "vlm_file_delivery_ok": False,
-        "vlm_files_seen": None,
-        "vlm_raw_len": 0,
-        "vlm_out_object": None,
-        "vlm_upload_method": None
-    }
-    # If gradio_client is not available, directly use multipart fallback
     if not GRADIO_AVAILABLE:
-        logger.warning("gradio_client not available; using httpx multipart fallback to upload image.")
-        try:
-            out = call_space_multipart(GRADIO_VLM_SPACE, "chat_fn", prompt, face_path)
-            raw_text = ""
-            if isinstance(out, dict):
-                raw_text = json.dumps(out)
-            else:
-                raw_text = str(out)
-            meta["vlm_upload_method"] = "httpx_multipart"
-            meta["vlm_raw_len"] = len(raw_text)
-            meta["vlm_out_object"] = (raw_text[:2000] + "...") if len(raw_text) > 2000 else raw_text
-        except Exception as e:
-            logger.exception("Multipart fallback failed")
-            raise RuntimeError(f"VLM multipart fallback failed: {e}")
-    else:
-        # Try using gradio_client with predictable argument patterns
-        client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
-        # Prepare handle_file wrapper (gradio_client helper)
-        try:
-            file_wrapper = handle_file(face_path)
-        except Exception:
-            # Some versions expect an open file-like; attempt that
-            file_wrapper = open(face_path, "rb")
-        # We'll try several invocation styles until one works:
-        tried_methods = []
-        result = None
-        # 1) Positional: client.predict(prompt, file, api_name="/chat_fn")
-        try:
-            logger.info("Attempting gradio_client.predict positional call (prompt, file) to %s", GRADIO_VLM_SPACE)
-            result = client.predict(prompt, file_wrapper, api_name="/chat_fn")
-            meta["vlm_upload_method"] = "gradio_positional"
-            tried_methods.append("gradio_positional")
-        except TypeError as te:
-            logger.info("Positional call TypeError: %s", te)
-            tried_methods.append("gradio_positional_failed")
-        except Exception as e:
-            logger.exception("Positional gradio_client.predict failed: %s", e)
-            tried_methods.append("gradio_positional_failed_general")
-        # 2) Named common alternatives
-        if result is None:
-            named_attempts = [
-                {"text": prompt, "image": file_wrapper},
-                {"message": prompt, "file": file_wrapper},
-                {"prompt": prompt, "image": file_wrapper},
-                {"prompt": prompt, "file": file_wrapper},
-                {"input_data": [prompt, None]}
-            ]
-            for named_args in named_attempts:
-                try:
-                    logger.info("Attempting gradio_client.predict named call with args: %s", list(named_args.keys()))
-                    result = client.predict(api_name="/chat_fn", **named_args)
-                    meta["vlm_upload_method"] = "gradio_named:" + ",".join(list(named_args.keys()))
-                    tried_methods.append(f"gradio_named_{','.join(list(named_args.keys()))}")
-                    break
-                except TypeError as te:
-                    logger.info("Named call TypeError with keys %s: %s", list(named_args.keys()), te)
-                except Exception as e:
-                    logger.info("Named call failed with keys %s: %s", list(named_args.keys()), e)
-        # 3) If still None, fallback to httpx multipart
-        if result is None:
-            logger.warning("gradio_client attempts did not yield a usable response; falling back to httpx multipart upload.")
-            tried_methods.append("httpx_multipart_fallback")
-            try:
-                out = call_space_multipart(GRADIO_VLM_SPACE, "chat_fn", prompt, face_path)
-                result = out
-                meta["vlm_upload_method"] = "httpx_multipart"
-            except Exception as e:
-                logger.exception("httpx multipart fallback failed: %s", e)
-                raise RuntimeError(f"All VLM upload methods failed: {e}. Tried: {tried_methods}")
-    # Normalize result into raw_text and out object
     raw_text = ""
     out = None
-    try:
-        # If result is an httpx/json dict from call_space_multipart or gradio returned a dict/list
-        if isinstance(result, (dict, list)):
             out = result
-            # Try to extract textual outputs in common keys
-            if isinstance(out, dict):
-                possible_text = out.get("data") or out.get("text") or out.get("output") or out.get("raw") or out.get("msg")
-                if possible_text is None:
-                    if "data" in out and isinstance(out["data"], (list, tuple)) and len(out["data"]) > 0:
-                        possible_text = out["data"][0]
-                if isinstance(possible_text, (dict, list)):
-                    raw_text = json.dumps(possible_text)
-                else:
-                    raw_text = str(possible_text or "")
-            else:
-                raw_text = json.dumps(out)
         else:
-            # not dict/list -> string-like
-            raw_text = str(result or "")
-            out = {"text": raw_text}
-    except Exception as e:
-        logger.exception("Normalization of VLM result failed: %s", e)
-        raw_text = str(result or "")
-        out = {"text": raw_text}
-    meta["vlm_raw_len"] = len(raw_text or "")
-    try:
-        meta["vlm_out_object"] = str(out)[:2000]
-    except Exception:
-        meta["vlm_out_object"] = "<unreprable>"
-    logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
     # --- Check whether the remote acknowledged receiving files (expect 1) ---
     files_seen = None
@@ -694,8 +592,7 @@ async def health_check():
         "detector": impl,
         "vlm_available": GRADIO_AVAILABLE,
         "vlm_space": GRADIO_VLM_SPACE,
-        "llm_space": LLM_GRADIO_SPACE,
-        "hf_token_present": bool(HF_TOKEN)
     }
 @app.post("/api/v1/validate-eye-photo")
@@ -991,6 +888,8 @@ class ImageUrls(BaseModel):
     face_image_url: HttpUrl
     eye_image_url: HttpUrl
 # helper: download URL to file with safety checks
 async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
     """
@@ -1276,4 +1175,4 @@ async def process_screening(screening_id: str):
 # -----------------------
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

     * extracts JSON from VLM via regex when possible, and
     * sends only the face image to the VLM (not the eye image).
 """
 import io
 import os
 import uuid
 import numpy as np
 import cv2  # opencv-python-headless expected installed
 # Optional gradio client (for VLM + LLM calls)
 try:
     from gradio_client import Client, handle_file  # type: ignore
     }
     return out
 # -----------------------
 # Gradio / VLM helper (sends only face image, returns meta)
 # -----------------------
 def get_gradio_client_for_space(space: str) -> Client:
     if not GRADIO_AVAILABLE:
       - vlm_files_seen (int or None)
       - vlm_raw_len (int)
       - vlm_out_object (short repr)
     """
     prompt = prompt or DEFAULT_VLM_PROMPT
     if not os.path.exists(face_path):
         raise FileNotFoundError(f"Face image not found at: {face_path}")
+    if not os.path.exists(eye_path):
         raise FileNotFoundError(f"Eye image not found at: {eye_path}")
     face_size = os.path.getsize(face_path)
+    eye_size = os.path.getsize(eye_path)
+    logger.info(f"VLM input files - Face: {face_size} bytes, Eye: {eye_size} bytes")
+    if face_size == 0 or eye_size == 0:
+        raise ValueError("One or both images are empty (0 bytes)")
     if not GRADIO_AVAILABLE:
+        raise RuntimeError("gradio_client not available in this environment.")
+    client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
+    # Verify files can be opened as images
+    try:
+        Image.open(face_path).verify()
+        Image.open(eye_path).verify()
+        logger.info("Both images verified as valid")
+    except Exception as e:
+        raise ValueError(f"Invalid image file(s): {e}")
+    message = {"text": prompt, "files": [handle_file(face_path), handle_file(eye_path)]}
+    logger.info(f"Calling VLM with message structure: text={len(prompt)} chars, files=2")
+    client = get_gradio_client_for_space(GRADIO_VLM_SPACE)
+    # NOTE: only send face image to the Space
+    message = {"text": prompt, "files": [handle_file(face_path)]}
+    meta: Dict[str, Any] = {"vlm_file_delivery_ok": False, "vlm_files_seen": None, "vlm_raw_len": 0, "vlm_out_object": None}
+    # SINGLE CALL (no retries)
+    try:
+        logger.info("Calling VLM Space %s with 1 file (face only)", GRADIO_VLM_SPACE)
+        result = client.predict(message=message, history=[], api_name="/chat_fn")
+    except Exception as e:
+        logger.exception("VLM call failed (no retries)")
+        raise RuntimeError(f"VLM call failed: {e}")
+    # Normalize result
     raw_text = ""
     out = None
+    if not result:
+        logger.warning("VLM returned empty result object")
+    else:
+        if isinstance(result, (list, tuple)):
+            out = result[0]
+        elif isinstance(result, dict):
             out = result
         else:
+            out = {"text": str(result)}
+        text_out = out.get("text") or out.get("output") or ""
+        raw_text = text_out or ""
+        meta["vlm_raw_len"] = len(raw_text or "")
+        try:
+            meta["vlm_out_object"] = str(out)[:2000]
+        except Exception:
+            meta["vlm_out_object"] = "<unreprable>"
+        logger.info("VLM response object (debug snippet): %s", meta["vlm_out_object"])
     # --- Check whether the remote acknowledged receiving files (expect 1) ---
     files_seen = None
         "detector": impl,
         "vlm_available": GRADIO_AVAILABLE,
         "vlm_space": GRADIO_VLM_SPACE,
+        "llm_space": LLM_GRADIO_SPACE
     }
 @app.post("/api/v1/validate-eye-photo")
     face_image_url: HttpUrl
     eye_image_url: HttpUrl
+import httpx  # make sure to add httpx to requirements
 # helper: download URL to file with safety checks
 async def download_image_to_path(url: str, dest_path: str, max_bytes: int = 5_000_000, timeout_seconds: int = 10) -> None:
     """
 # -----------------------
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)