Seth0330 commited on
Commit
ab98649
·
verified ·
1 Parent(s): 0fac414

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -10
app.py CHANGED
@@ -134,21 +134,51 @@ def _hf_client(model_id: str):
134
 
135
  def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
136
  """
137
- Calls Hugging Face Hosted Inference API for a VLM (e.g., LLaVA v1.6 Mistral-7B).
138
- Uses the Visual Question Answering interface: (image + question) -> text.
139
  """
140
  client = _hf_client(model_id)
141
  image_bytes = base64.b64decode(image_base64)
142
- # Some deployments return list[{'answer': '...'}]; others return str
143
- result = client.visual_question_answering(
144
- image=image_bytes,
145
- question=prompt,
146
- max_new_tokens=512
147
- )
148
- if isinstance(result, list) and result and isinstance(result[0], dict) and "answer" in result[0]:
149
- return result[0]["answer"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  if isinstance(result, str):
151
  return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  return str(result)
153
 
154
  # ---------------------------
 
134
 
135
  def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
136
  """
137
+ Calls Hugging Face Hosted Inference API for VQA without extra kwargs that
138
+ some client versions don’t support. Includes robust fallbacks for return types.
139
  """
140
  client = _hf_client(model_id)
141
  image_bytes = base64.b64decode(image_base64)
142
+
143
+ # Primary: simple VQA call (most deployments support this signature)
144
+ try:
145
+ result = client.visual_question_answering(
146
+ image=image_bytes,
147
+ question=prompt
148
+ )
149
+ except TypeError:
150
+ # Fallback for older/newer client variants that don’t expose the helper
151
+ # or expect a different signature. Try the generic .request() path.
152
+ result = client.request(
153
+ task="visual_question_answering",
154
+ data={"inputs": {"question": prompt}},
155
+ files={"image": image_bytes}
156
+ )
157
+
158
+ # Normalize result into a string
159
+ # Possible shapes:
160
+ # - str
161
+ # - [{"answer": "..."}]
162
+ # - {"answer": "..."}
163
+ # - [{"generated_text": "..."}] (some backends)
164
  if isinstance(result, str):
165
  return result
166
+
167
+ if isinstance(result, dict):
168
+ if "answer" in result:
169
+ return result["answer"]
170
+ if "generated_text" in result:
171
+ return result["generated_text"]
172
+
173
+ if isinstance(result, list) and result:
174
+ first = result[0]
175
+ if isinstance(first, dict):
176
+ if "answer" in first:
177
+ return first["answer"]
178
+ if "generated_text" in first:
179
+ return first["generated_text"]
180
+
181
+ # Last resort
182
  return str(result)
183
 
184
  # ---------------------------