Spaces:

Seth0330
/

OCR_VISION

Sleeping

App Files Files Community

Seth0330 commited on Aug 18, 2025

Commit

ab98649

verified ·

1 Parent(s): 0fac414

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -10

app.py CHANGED Viewed

@@ -134,21 +134,51 @@ def _hf_client(model_id: str):
 def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
     """
-    Calls Hugging Face Hosted Inference API for a VLM (e.g., LLaVA v1.6 Mistral-7B).
-    Uses the Visual Question Answering interface: (image + question) -> text.
     """
     client = _hf_client(model_id)
     image_bytes = base64.b64decode(image_base64)
-    # Some deployments return list[{'answer': '...'}]; others return str
-    result = client.visual_question_answering(
-        image=image_bytes,
-        question=prompt,
-        max_new_tokens=512
-    )
-    if isinstance(result, list) and result and isinstance(result[0], dict) and "answer" in result[0]:
-        return result[0]["answer"]
     if isinstance(result, str):
         return result
     return str(result)
 # ---------------------------

 def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
     """
+    Calls Hugging Face Hosted Inference API for VQA without extra kwargs that
+    some client versions don’t support. Includes robust fallbacks for return types.
     """
     client = _hf_client(model_id)
     image_bytes = base64.b64decode(image_base64)
+    # Primary: simple VQA call (most deployments support this signature)
+    try:
+        result = client.visual_question_answering(
+            image=image_bytes,
+            question=prompt
+        )
+    except TypeError:
+        # Fallback for older/newer client variants that don’t expose the helper
+        # or expect a different signature. Try the generic .request() path.
+        result = client.request(
+            task="visual_question_answering",
+            data={"inputs": {"question": prompt}},
+            files={"image": image_bytes}
+        )
+    # Normalize result into a string
+    # Possible shapes:
+    #  - str
+    #  - [{"answer": "..."}]
+    #  - {"answer": "..."}
+    #  - [{"generated_text": "..."}] (some backends)
     if isinstance(result, str):
         return result
+    if isinstance(result, dict):
+        if "answer" in result:
+            return result["answer"]
+        if "generated_text" in result:
+            return result["generated_text"]
+    if isinstance(result, list) and result:
+        first = result[0]
+        if isinstance(first, dict):
+            if "answer" in first:
+                return first["answer"]
+            if "generated_text" in first:
+                return first["generated_text"]
+    # Last resort
     return str(result)
 # ---------------------------