GaiaAgent_Final_Assignment

Running

App Files Files Community

GaiaAgent_Final_Assignment / agent.py

Francesco-A

FIX: removed LocalAgent

6cc7969 about 9 hours ago

raw

history blame contribute delete

8.03 kB


	# Generic agent
	import os
	from typing import Optional
	import pandas as pd

	# Smolagents imports
	from smolagents import (
	CodeAgent,
	InferenceClientModel,
	TransformersModel,
	LiteLLMModel,
	Tool,
	tool,
	DuckDuckGoSearchTool,
	VisitWebpageTool,
	WikipediaSearchTool,
	PythonInterpreterTool,
	FinalAnswerTool,
	)

	# Import your custom tools (to be used in app, not in local notebook)
	from tools.download_file import download_file_from_url
	from tools.files_to_text import image_to_text, pdf_to_text, text_file_to_string
	from tools.audio_tools import youtube_to_text, transcribe_audio

	# Define tools
	AGENT_TOOLS = [
	# Default Tools
	DuckDuckGoSearchTool(), # Internet search
	VisitWebpageTool(), # Retrieve webpage content
	PythonInterpreterTool(), # Executes agent-generated Python code
	FinalAnswerTool(), # Ends agent reasoning and returns final answer

	# Custom Tools
	download_file_from_url, # file downloader
	text_file_to_string, # .txt, .md, .json, etc.
	pdf_to_text, # PyMuPDF-based safe PDF parser
	image_to_text, # OCR for images
	youtube_to_text, # Youtube audio to text
	transcribe_audio, # Audio file to text
	]

	# System prompt
	SYSTEM_PROMPT = """
	You are an expert General AI Assistant and Python Programmer tasked with solving complex GAIA benchmark problems.

	### 1. Reason-Act-Observe
	Follow a PLAN → ACT → OBSERVE loop:
	- PLAN: Break the task into 1–3 logical steps. Identify tools for each step.
	- ACT: Write and run one self-contained Python block per step.
	- OBSERVE: Examine outputs or errors before proceeding.

	### 2. File Handling
	- When a tool like `download_file_from_url` returns a local file path (e.g., `/tmp/data.csv`), you MUST save this path to a descriptive variable (e.g., `filepath`) and immediately use that variable as the argument for the next file-reading tool.

	You must select the reading or transcription method strictly based on the file type or source, following the rules below.

	\| File Type / Source \| Tool / Method to Use \|
	\| :--- \| :--- \|
	\| `.csv` \| `pd.read_csv(filepath)` \|
	\| `.xlsx`, `.xls` \| `pd.read_excel(filepath)` \|
	\| `.pdf` \| `pdf_to_text(filepath)` \|
	\| `.txt`, `.md`, `.json` \| `text_file_to_string(filepath)` \|
	\| `.png`, `.jpg`, `.jpeg` \| `image_to_text(filepath)` \|
	\| YouTube URL \| `youtube_to_text(url)` \|
	\| `.mp3`, `.wav`, `.m4a`, `.flac`, `.ogg` \| `transcribe_audio(filepath)` \|

	Important rules:
	- When a tool returns a local file path, you must store it in a variable (e.g. `filepath`) and pass that variable directly to the next tool.
	- You must not mix methods across file types (e.g. do not use Whisper for CSVs or pandas for audio).
	- For YouTube links, always attempt `youtube_to_text` first; it will automatically fall back to Whisper if captions are unavailable.

	### 3. Data Analysis & Answer
	- Inspect loaded datasets first (`.head()`, `.info()`, `.describe()`) before analysis.
	- Write clean, idiomatic Python code. Before that, check if there is any pre-made tool that would work for the task.
	- Use `FinalAnswerTool` only once the problem is fully solved to give a concise final answer.

	### 4. Additional instructions for the following tasks provided by GAIA team
	- You are a general AI assistant. I will ask you a question. Do not reveal your internal reasoning. Only the content inside FinalAnswerTool will be evaluated.
	- Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.

	### 5. To provide the final answer, you MUST call the final_answer tool inside a <code> block.

	- Example of how to end the task:

	Thought: I have found the answer. I will now provide it.
	<code>
	final_answer("FINAL ANSWER: The capital of France is Paris")
	</code>

	\n\n
	"""

	class BasicAgent:
	def __init__(self):
	self.system_prompt = SYSTEM_PROMPT
	self.model = InferenceClientModel(
	model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking",
	temperature = 0.0,
	top_p = 1.0,
	max_tokens = 8196,
	)
	self.tools = AGENT_TOOLS
	self.basic_agent = CodeAgent(
	name = "basic_agent",
	description = "Basic smolagents CodeAgent",
	model = self.model,
	tools = self.tools,
	add_base_tools = True, # probably redundant, but it does not hurt
	max_steps = 5,
	additional_authorized_imports = [
	'numpy','subprocess', 're', 'pandas',
	'json', 'os', 'datetime', 'tempfile',
	],
	verbosity_level = 1,
	max_print_outputs_length=1_000_000
	)

	print("✅ Basic agent initialized")

	def __call__(self, question: str, file_path: Optional[str] = None) -> str:

	if file_path:
	# Inject system prompt + question and (optional) file path
	prompt = (
	f"{self.system_prompt}\n\n"
	f"Question: {question}\n\n"
	f"There is an associated file at path: {file_path}.\n"
	f"Use the appropriate tool to download it (if necessary) and read it before answering"
	)
	else:
	prompt = (
	f"{self.system_prompt}\n\n"
	f"Question: {question}\n\n"
	)

	return self.basic_agent.run(prompt)

	class GeminiAgent:
	def __init__(self):
	self.system_prompt = SYSTEM_PROMPT
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
	if not GOOGLE_API_KEY:
	raise RuntimeError(
	"GOOGLE_API_KEY not found."
	)
	self.model = LiteLLMModel(
	model_id = "gemini/gemini-2.0-flash",
	api_key = GOOGLE_API_KEY,
	temperature = 0.0,
	top_p = 1.0,
	max_tokens = 8196,
	)
	self.tools = AGENT_TOOLS
	self.gemini_agent = CodeAgent(
	name = "gemini_agent",
	description = "Gemini CodeAgent",
	model = self.model,
	tools = self.tools,
	add_base_tools = True, # probably redundant, but it does not hurt
	max_steps = 5,
	additional_authorized_imports = [
	'numpy','subprocess', 're', 'pandas',
	'json', 'os', 'datetime', 'tempfile',
	],
	verbosity_level = 1,
	max_print_outputs_length=1_000_000
	)

	print("✅ Gemini agent initialized")

	def __call__(self, question: str, file_path: Optional[str] = None) -> str:

	if file_path:
	# Inject system prompt + question and (optional) file path
	prompt = (
	f"{self.system_prompt}\n\n"
	f"Question: {question}\n\n"
	f"There is an associated file at path: {file_path}.\n"
	f"Use the appropriate tool to download it (if necessary) and read it before answering"
	)
	else:
	prompt = (
	f"{self.system_prompt}\n\n"
	f"Question: {question}\n\n"
	)

	return self.gemini_agent.run(prompt)