Spaces:
Runtime error
Runtime error
Leonardo
commited on
Update scripts/visual_qa.py
Browse files- scripts/visual_qa.py +8 -7
scripts/visual_qa.py
CHANGED
|
@@ -118,8 +118,8 @@ class VisualQATool(Tool):
|
|
| 118 |
"question": {"description": "the question to answer", "type": "string", "nullable": True},
|
| 119 |
}
|
| 120 |
output_type = "string"
|
| 121 |
-
|
| 122 |
-
client = InferenceClient("
|
| 123 |
|
| 124 |
def forward(self, image_path: str, question: Optional[str] = None) -> str:
|
| 125 |
output = ""
|
|
@@ -151,7 +151,6 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
|
|
| 151 |
image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
|
| 152 |
question: The question to answer.
|
| 153 |
"""
|
| 154 |
-
|
| 155 |
add_note = False
|
| 156 |
if not question:
|
| 157 |
add_note = True
|
|
@@ -161,21 +160,21 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
|
|
| 161 |
|
| 162 |
mime_type, _ = mimetypes.guess_type(image_path)
|
| 163 |
base64_image = encode_image(image_path)
|
| 164 |
-
|
| 165 |
payload = {
|
| 166 |
-
"model": "
|
| 167 |
"messages": [
|
| 168 |
{
|
| 169 |
"role": "user",
|
| 170 |
"content": [
|
| 171 |
-
{"type": "text", "text": question},
|
| 172 |
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
|
| 173 |
],
|
| 174 |
}
|
| 175 |
],
|
| 176 |
"max_tokens": 1000,
|
| 177 |
}
|
| 178 |
-
response = requests.post("https://
|
| 179 |
try:
|
| 180 |
output = response.json()["choices"][0]["message"]["content"]
|
| 181 |
except Exception:
|
|
@@ -184,4 +183,6 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
|
|
| 184 |
if add_note:
|
| 185 |
output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
|
| 186 |
|
|
|
|
|
|
|
| 187 |
return output
|
|
|
|
| 118 |
"question": {"description": "the question to answer", "type": "string", "nullable": True},
|
| 119 |
}
|
| 120 |
output_type = "string"
|
| 121 |
+
# try use the same model with two different endpoints
|
| 122 |
+
client = InferenceClient("google/gemma-3-27b-it")
|
| 123 |
|
| 124 |
def forward(self, image_path: str, question: Optional[str] = None) -> str:
|
| 125 |
output = ""
|
|
|
|
| 151 |
image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
|
| 152 |
question: The question to answer.
|
| 153 |
"""
|
|
|
|
| 154 |
add_note = False
|
| 155 |
if not question:
|
| 156 |
add_note = True
|
|
|
|
| 160 |
|
| 161 |
mime_type, _ = mimetypes.guess_type(image_path)
|
| 162 |
base64_image = encode_image(image_path)
|
| 163 |
+
# we try use the same model with two different endpoints; here openrouter
|
| 164 |
payload = {
|
| 165 |
+
"model": "google/gemma-3-27b-it:free",
|
| 166 |
"messages": [
|
| 167 |
{
|
| 168 |
"role": "user",
|
| 169 |
"content": [
|
| 170 |
+
{"type": "text", "text": "what is in this image" + question},
|
| 171 |
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
|
| 172 |
],
|
| 173 |
}
|
| 174 |
],
|
| 175 |
"max_tokens": 1000,
|
| 176 |
}
|
| 177 |
+
response = requests.post("https://openrouter.ai/api/v1", headers=headers, json=payload)
|
| 178 |
try:
|
| 179 |
output = response.json()["choices"][0]["message"]["content"]
|
| 180 |
except Exception:
|
|
|
|
| 183 |
if add_note:
|
| 184 |
output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
|
| 185 |
|
| 186 |
+
# TO DO: write to yaml or chromadb -> HF Dataset in due course...
|
| 187 |
+
|
| 188 |
return output
|