HF_Agents_Course_GAIA_Agent

Sleeping

App Files Files Community

HF_Agents_Course_GAIA_Agent / src /agents /smolagents_agent.py

agercas

refactor code

b6088cd 8 months ago

raw

history blame contribute delete

9.06 kB

	"""
	Multi-Agent System for GAIA Benchmark using smolagents
	Architecture: Coordinator -> Specialized Agents
	"""

	from typing import Any

	from smolagents import CodeAgent, HfApiModel

	from src.tools import all_tools


	class GAIAMultiAgentSystem:
	"""
	Multi-agent system designed for GAIA benchmark tasks.
	Uses a coordinator agent that delegates to specialized agents.
	"""

	def __init__(self, model_config: dict[str, Any] \| None = None):
	"""
	Initialize the multi-agent system.

	Args:
	model_config: Configuration for the language model
	e.g., {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", "provider": "together"}
	"""
	model_config = model_config or {}
	self.model = HfApiModel(**model_config)
	# self.model = InferenceClientModel(**model_config)
	self.agents = {}
	self._setup_agents()
	self._setup_coordinator()

	def _setup_agents(self):
	"""Set up all specialized agents with their respective tools."""

	# Search Agent - Information retrieval
	search_tools = [
	# Assuming these are your actual tool instances
	# Replace with actual tool references from all_tools
	"wikipedia_search",
	"wikipedia_search_tool",
	"duckduckgo_search",
	"web_search_duckduckgo",
	"arxiv_search",
	"fetch_webpage_content",
	]

	self.agents["search_agent"] = CodeAgent(
	model=self.model,
	tools=[tool for tool in all_tools if tool.name in search_tools],
	name="search_agent",
	description="Retrieves factual information and background data from various sources including Wikipedia, web search, and academic papers",
	verbosity_level=1,
	max_steps=10,
	)

	# Document Agent - Document processing
	document_tools = ["load_csv_file", "load_excel_file", "read_text_file", "transcribe_audio_file"]

	self.agents["document_agent"] = CodeAgent(
	model=self.model,
	tools=[tool for tool in all_tools if tool.name in document_tools],
	name="document_agent",
	description="Loads and processes structured and unstructured documents including CSV, Excel, text files, and audio transcriptions",
	verbosity_level=1,
	max_steps=8,
	)

	# Vision Agent - Image processing
	vision_tools = ["ocr_tool", "image_captioning_tool", "visual_qa_tool"]

	self.agents["vision_agent"] = CodeAgent(
	model=self.model,
	tools=[tool for tool in all_tools if tool.name in vision_tools],
	name="vision_agent",
	description="Extracts text and meaning from images using OCR, captioning, and visual question answering",
	verbosity_level=1,
	max_steps=6,
	)

	# Reasoning Agent - Logic and analysis
	reasoning_tools = ["analyze_chess_position", "analyze_table_commutativity", "count_items_in_list"]

	self.agents["reasoning_agent"] = CodeAgent(
	model=self.model,
	tools=[tool for tool in all_tools if tool.name in reasoning_tools],
	name="reasoning_agent",
	description="Performs symbolic reasoning, logical pattern recognition, and analytical tasks",
	verbosity_level=1,
	max_steps=8,
	)

	# Language Agent - Text processing
	language_tools = ["reverse_string", "reverse_words_in_string"]

	# Note: Language agent might need additional string manipulation tools
	self.agents["language_agent"] = CodeAgent(
	model=self.model,
	tools=[tool for tool in all_tools if tool.name in language_tools],
	name="language_agent",
	description="Handles low-level text transformations and string manipulations",
	verbosity_level=1,
	max_steps=5,
	)

	# Coding Agent - Python execution and logic
	self.agents["coding_agent"] = CodeAgent(
	model=self.model,
	tools=[], # Uses implicit code execution capabilities
	name="coding_agent",
	description="Executes Python code and performs computational logic through code interpretation",
	additional_authorized_imports=[
	"pandas",
	"numpy",
	"matplotlib",
	"json",
	"re",
	"datetime",
	"math",
	"statistics",
	"itertools",
	],
	verbosity_level=1,
	max_steps=10,
	)

	def _setup_coordinator(self):
	"""Set up the coordinator agent that manages other agents."""

	# Collect all managed agents
	managed_agents = list(self.agents.values())

	self.coordinator = CodeAgent(
	model=self.model,
	tools=[], # Coordinator has no direct tools
	managed_agents=managed_agents,
	name="coordinator",
	description="Coordinates and delegates tasks to specialized agents based on task requirements",
	planning_interval=3, # Plan every 3 steps
	verbosity_level=2,
	max_steps=20,
	)

	def analyze_task(self, task: str) -> dict[str, Any]:
	"""
	Analyze a GAIA task to determine which agents might be needed.

	Args:
	task: The task description

	Returns:
	Dictionary with task analysis
	"""
	analysis_prompt = f"""
	Analyze this GAIA benchmark task and determine which types of agents would be most useful:

	Task: {task}

	Available agent types:
	- search_agent: For finding factual information online
	- document_agent: For processing files (CSV, Excel, text, audio)
	- vision_agent: For analyzing images
	- reasoning_agent: For logical analysis and pattern recognition
	- language_agent: For text transformations
	- coding_agent: For computational tasks and data processing

	Provide a brief analysis of what agents would be needed and why.
	"""

	# Use the coordinator's model for analysis
	response = self.model([{"role": "user", "content": analysis_prompt}])
	return {"analysis": response.content, "task": task}

	def solve_task(self, task: str, context: str \| None = None) -> Any:
	"""
	Solve a GAIA benchmark task using the multi-agent system.

	Args:
	task: The task to solve
	context: Optional additional context

	Returns:
	The result from the coordinator agent
	"""

	# Prepare the enhanced prompt for the coordinator
	enhanced_task = f"""
	You are coordinating a team of specialized agents to solve this GAIA benchmark task.

	TASK: {task}

	{f"CONTEXT: {context}" if context else ""}

	Available agents and their capabilities:
	- search_agent: Retrieves information from Wikipedia, web search, ArXiv
	- document_agent: Processes CSV, Excel, text files, and audio transcriptions
	- vision_agent: Analyzes images with OCR, captioning, and visual QA
	- reasoning_agent: Performs logical analysis and pattern recognition
	- language_agent: Handles text transformations and string operations
	- coding_agent: Executes Python code for computational tasks

	Strategy:
	1. Analyze what type of information or processing is needed
	2. Delegate to appropriate specialized agents
	3. Combine results from multiple agents if needed
	4. Provide a final comprehensive answer

	Be systematic and thorough. Use multiple agents when the task requires different types of expertise.
	"""

	return self.coordinator.run(enhanced_task)

	def get_agent_info(self) -> dict[str, dict]:
	"""Get information about all agents in the system."""
	info = {}
	for name, agent in self.agents.items():
	info[name] = {
	"description": agent.description,
	"tools": [tool.name for tool in agent.tools] if hasattr(agent, "tools") else [],
	"max_steps": agent.max_steps,
	}

	info["coordinator"] = {
	"description": self.coordinator.description,
	"managed_agents": [agent.name for agent in self.coordinator.managed_agents],
	"max_steps": self.coordinator.max_steps,
	}

	return info

	def visualize_system(self):
	"""Visualize the multi-agent system structure."""
	if hasattr(self.coordinator, "visualize"):
	return self.coordinator.visualize()
	else:
	print("System Structure:")
	print("Coordinator")
	for agent_name in self.agents.keys():
	print(f" └── {agent_name}")