OpenDeepResearch

Runtime error

App Files Files Community

Leonardo commited on Mar 21, 2025

Commit

1c103fe

verified ·

1 Parent(s): 58844af

Update scripts/visual_qa.py

Browse files

Files changed (1) hide show

scripts/visual_qa.py +8 -7

scripts/visual_qa.py CHANGED Viewed

@@ -118,8 +118,8 @@ class VisualQATool(Tool):
         "question": {"description": "the question to answer", "type": "string", "nullable": True},
     }
     output_type = "string"
-    client = InferenceClient("HuggingFaceM4/idefics2-8b-chatty")
     def forward(self, image_path: str, question: Optional[str] = None) -> str:
         output = ""
@@ -151,7 +151,6 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
         image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
         question: The question to answer.
     """
     add_note = False
     if not question:
         add_note = True
@@ -161,21 +160,21 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
     mime_type, _ = mimetypes.guess_type(image_path)
     base64_image = encode_image(image_path)
     payload = {
-        "model": "gpt-4o",
         "messages": [
             {
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": question},
                     {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
                 ],
             }
         ],
         "max_tokens": 1000,
     }
-    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
     try:
         output = response.json()["choices"][0]["message"]["content"]
     except Exception:
@@ -184,4 +183,6 @@ def visualizer(image_path: str, question: Optional[str] = None) -> str:
     if add_note:
         output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
     return output

         "question": {"description": "the question to answer", "type": "string", "nullable": True},
     }
     output_type = "string"
+    # try use the same model with two different endpoints
+    client = InferenceClient("google/gemma-3-27b-it")
     def forward(self, image_path: str, question: Optional[str] = None) -> str:
         output = ""
         image_path: The path to the image on which to answer the question. This should be a local path to downloaded image.
         question: The question to answer.
     """
     add_note = False
     if not question:
         add_note = True
     mime_type, _ = mimetypes.guess_type(image_path)
     base64_image = encode_image(image_path)
+    # we try use the same model with two different endpoints; here openrouter
     payload = {
+        "model": "google/gemma-3-27b-it:free",
         "messages": [
             {
                 "role": "user",
                 "content": [
+                    {"type": "text", "text": "what is in this image" + question},
                     {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
                 ],
             }
         ],
         "max_tokens": 1000,
     }
+    response = requests.post("https://openrouter.ai/api/v1", headers=headers, json=payload)
     try:
         output = response.json()["choices"][0]["message"]["content"]
     except Exception:
     if add_note:
         output = f"You did not provide a particular question, so here is a detailed caption for the image: {output}"
+    # TO DO: write to yaml or chromadb -> HF Dataset in due course...
     return output