Agent_Course_Final_Assignment

Sleeping

App Files Files Community

abliznyuk commited on Jun 30, 2025

Commit

a73f583

1 Parent(s): e9204df

swithc to 4o-mini, add VQA tool

Browse files

Files changed (2) hide show

agent.py +62 -4
requirements.txt +2 -1

agent.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from smolagents import CodeAgent, OpenAIServerModel
 from smolagents import WikipediaSearchTool, GoogleSearchTool, VisitWebpageTool, PythonInterpreterTool
@@ -7,6 +7,63 @@ def get_prompt():
         return f.read()
 class GAIAAgent:
     def __init__(self):
         self.agent = CodeAgent(
@@ -15,10 +72,12 @@ class GAIAAgent:
                 VisitWebpageTool(),
                 WikipediaSearchTool(),
                 PythonInterpreterTool(),
             ],
-            model=OpenAIServerModel(model_id='gpt-4.1', max_tokens=4096, temperature=0),
             add_base_tools=False,
-            max_steps=15,
         )
         self.prompt = get_prompt()
@@ -28,4 +87,3 @@ class GAIAAgent:
 if __name__ == '__main__':
     agent = GAIAAgent()
-    agent("What is the meaning of life?")

+from smolagents import CodeAgent, OpenAIServerModel, tool, Tool
 from smolagents import WikipediaSearchTool, GoogleSearchTool, VisitWebpageTool, PythonInterpreterTool
         return f.read()
+@tool
+def visual_qa(image_url: str, question: str) -> str:
+    """
+    Provides functionality to perform visual question answering (VQA) by processing an image and a natural language question.
+    Args:
+        image_url: str
+            A URL pointing to the location of the image to be analyzed. The URL
+            should be accessible and point to a valid image file.
+        question: str
+            A natural language string containing the question to be answered
+            based on the provided image.
+    Returns:
+        str
+            The model-generated answer to the provided question based on the
+            analysis of the image.
+    Raises:
+        Exception
+            If there is any issue with the API request, such as connection
+            errors or invalid inputs.
+    """
+    from openai import OpenAI
+    client = OpenAI()
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": question},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": image_url,
+                        "detail": "low"
+                    },
+                },
+            ],
+        }],
+    )
+    return response.choices[0].message.content
+class FinalAnswerTool(Tool):
+    name = "final_answer"
+    description = "Provides a final answer to the given problem."
+    inputs = {"answer": {"type": "any", "description": "The final answer to the problem"}}
+    output_type = "any"
+    def forward(self, answer: str) -> str:
+        if "final answer:" in answer.lower():
+            return answer.lower().split("final answer:")[1].strip()
+        return answer
 class GAIAAgent:
     def __init__(self):
         self.agent = CodeAgent(
                 VisitWebpageTool(),
                 WikipediaSearchTool(),
                 PythonInterpreterTool(),
+                FinalAnswerTool(),
+                visual_qa,
             ],
+            model=OpenAIServerModel(model_id='gpt-4o-mini', max_tokens=4096, temperature=0),
             add_base_tools=False,
+            max_steps=10,
         )
         self.prompt = get_prompt()
 if __name__ == '__main__':
     agent = GAIAAgent()

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ smolagents[openai]==1.19.0
 wikipedia-api==0.8.1
 duckduckgo-search==8.0.4
 gradio==5.35.0
-requests

 wikipedia-api==0.8.1
 duckduckgo-search==8.0.4
 gradio==5.35.0
+requests
+markdownify