import requests from openai import OpenAI from smolagents import CodeAgent, OpenAIServerModel, tool, Tool, FinalAnswerTool from smolagents import WikipediaSearchTool, GoogleSearchTool, VisitWebpageTool, PythonInterpreterTool def get_prompt(): with open("prompt.txt", "r") as f: return f.read() @tool def visual_qa(image_url: str, question: str) -> str: """ Provides functionality to perform visual question answering (VQA) by processing an image and a natural language question. Args: image_url (str): A URL pointing to the location of the image to be analyzed. The URL should be accessible and point to a valid image file. question: (str): A natural language string containing the question to be answered based on the provided image. Returns: str: The model-generated answer to the provided question based on the analysis of the image. """ from openai import OpenAI client = OpenAI() response = client.chat.completions.create( model="gpt-4o-mini", messages=[{ "role": "user", "content": [ {"type": "text", "text": question}, { "type": "image_url", "image_url": { "url": image_url, "detail": "low" }, }, ], }], ) return response.choices[0].message.content @tool def transcribe_audio(audio_url: str) -> str: """ Provides functionality to perform audio transcription. Args: audio_url (str): A URL pointing to the location of the audio to be analyzed. Returns: str: Audio transcription. """ client = OpenAI() r = client.audio.transcriptions.create( model="gpt-4o-mini-transcribe", file=requests.get(audio_url).content, response_format="text", ) return r.text class GAIAAgent: def __init__(self): self.agent = CodeAgent( tools=[ GoogleSearchTool(provider="serper"), VisitWebpageTool(), WikipediaSearchTool(), PythonInterpreterTool(), FinalAnswerTool(), visual_qa, transcribe_audio, ], model=OpenAIServerModel(model_id='gpt-4.1-mini', max_tokens=4096, temperature=0), add_base_tools=False, max_steps=15, additional_authorized_imports=["pandas"], ) self.prompt = get_prompt() def __call__(self, question: str) -> str: args = {"question": question} return self.agent.run(self.prompt, additional_args=args) if __name__ == '__main__': agent = GAIAAgent()