abliznyuk's picture
fixes
5e11dec
import requests
from openai import OpenAI
from smolagents import CodeAgent, OpenAIServerModel, tool, Tool, FinalAnswerTool
from smolagents import WikipediaSearchTool, GoogleSearchTool, VisitWebpageTool, PythonInterpreterTool
def get_prompt():
with open("prompt.txt", "r") as f:
return f.read()
@tool
def visual_qa(image_url: str, question: str) -> str:
"""
Provides functionality to perform visual question answering (VQA) by processing an image and a natural language question.
Args:
image_url (str): A URL pointing to the location of the image to be analyzed. The URL should be accessible and point to a valid image file.
question: (str): A natural language string containing the question to be answered based on the provided image.
Returns:
str: The model-generated answer to the provided question based on the analysis of the image.
"""
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": question},
{
"type": "image_url",
"image_url": {
"url": image_url,
"detail": "low"
},
},
],
}],
)
return response.choices[0].message.content
@tool
def transcribe_audio(audio_url: str) -> str:
"""
Provides functionality to perform audio transcription.
Args:
audio_url (str): A URL pointing to the location of the audio to be analyzed.
Returns:
str: Audio transcription.
"""
client = OpenAI()
r = client.audio.transcriptions.create(
model="gpt-4o-mini-transcribe",
file=requests.get(audio_url).content,
response_format="text",
)
return r.text
class GAIAAgent:
def __init__(self):
self.agent = CodeAgent(
tools=[
GoogleSearchTool(provider="serper"),
VisitWebpageTool(),
WikipediaSearchTool(),
PythonInterpreterTool(),
FinalAnswerTool(),
visual_qa,
transcribe_audio,
],
model=OpenAIServerModel(model_id='gpt-4.1-mini', max_tokens=4096, temperature=0),
add_base_tools=False,
max_steps=15,
additional_authorized_imports=["pandas"],
)
self.prompt = get_prompt()
def __call__(self, question: str) -> str:
args = {"question": question}
return self.agent.run(self.prompt, additional_args=args)
if __name__ == '__main__':
agent = GAIAAgent()