Leonardo commited on
Commit
1c103fe
·
verified ·
1 Parent(s): 58844af

Update scripts/visual_qa.py

Browse files
Files changed (1) hide show
  1. scripts/visual_qa.py +8 -7
scripts/visual_qa.py CHANGED
@@ -118,8 +118,8 @@ class VisualQATool(Tool):
118
  "question": {"description": "the question to answer", "type": "string", "nullable": True},
119
  }
120
  output_type = "string"
121
-
122
- client = InferenceClient("HuggingFaceM4/idefics2-8b-chatty")
123
 
124
  def forward(self, image_path: str, question: Optional[str] = None) -> str:
125
  output = ""
@@ -151,7 +151,6 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
151
  image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
152
  question: The question to answer.
153
  """
154
-
155
  add_note = False
156
  if not question:
157
  add_note = True
@@ -161,21 +160,21 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
161
 
162
  mime_type, _ = mimetypes.guess_type(image_path)
163
  base64_image = encode_image(image_path)
164
-
165
  payload = {
166
- "model": "gpt-4o",
167
  "messages": [
168
  {
169
  "role": "user",
170
  "content": [
171
- {"type": "text", "text": question},
172
  {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
173
  ],
174
  }
175
  ],
176
  "max_tokens": 1000,
177
  }
178
- response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
179
  try:
180
  output = response.json()["choices"][0]["message"]["content"]
181
  except Exception:
@@ -184,4 +183,6 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
184
  if add_note:
185
  output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
186
 
 
 
187
  return output
 
118
  "question": {"description": "the question to answer", "type": "string", "nullable": True},
119
  }
120
  output_type = "string"
121
+ # try use the same model with two different endpoints
122
+ client = InferenceClient("google/gemma-3-27b-it")
123
 
124
  def forward(self, image_path: str, question: Optional[str] = None) -> str:
125
  output = ""
 
151
  image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
152
  question: The question to answer.
153
  """
 
154
  add_note = False
155
  if not question:
156
  add_note = True
 
160
 
161
  mime_type, _ = mimetypes.guess_type(image_path)
162
  base64_image = encode_image(image_path)
163
+ # we try use the same model with two different endpoints; here openrouter
164
  payload = {
165
+ "model": "google/gemma-3-27b-it:free",
166
  "messages": [
167
  {
168
  "role": "user",
169
  "content": [
170
+ {"type": "text", "text": "what is in this image" + question},
171
  {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
172
  ],
173
  }
174
  ],
175
  "max_tokens": 1000,
176
  }
177
+ response = requests.post("https://openrouter.ai/api/v1", headers=headers, json=payload)
178
  try:
179
  output = response.json()["choices"][0]["message"]["content"]
180
  except Exception:
 
183
  if add_note:
184
  output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
185
 
186
+ # TO DO: write to yaml or chromadb -> HF Dataset in due course...
187
+
188
  return output