Spaces:

Paper2Agent
/

alphagenome_agent

Running

App Files Files Community

alphagenome_agent / managers /tools /tool_selector.py

Paper2Agent

Upload 56 files

8b54db1 verified 4 months ago

raw

history blame contribute delete

4.28 kB

	"""
	Tool Selector for CodeAct Agent.
	Pure LLM-based tool selection mechanism similar to Biomni's prompt_based_retrieval.
	"""

	import re
	from typing import Dict, List, Optional
	from langchain_core.messages import HumanMessage
	from langchain_core.language_models.chat_models import BaseChatModel


	class ToolSelector:
	"""
	LLM-based tool selection system inspired by Biomni's approach.
	Uses an LLM to intelligently select the most relevant tools for a given task.
	"""

	def __init__(self, model: BaseChatModel):
	"""
	Initialize the ToolSelector.

	Args:
	model: The language model to use for tool selection
	"""
	self.model = model

	def select_tools_for_task(self, query: str, available_tools: Dict[str, Dict], max_tools: int = 15) -> List[str]:
	"""
	Use LLM-based selection to choose the most relevant tools for a query.
	Inspired by Biomni's prompt_based_retrieval mechanism.

	Args:
	query: The user's query/task description
	available_tools: Dictionary of {tool_name: tool_info} available
	max_tools: Maximum number of tools to select

	Returns:
	List of selected tool names
	"""
	if not available_tools:
	return []

	# Format tools for LLM prompt
	tools_list = self._format_tools_for_prompt(available_tools)

	# Create selection prompt (similar to Biomni's approach)
	selection_prompt = f"""You are an expert biomedical research assistant. Your task is to select the most relevant tools to help answer a user's query.

	USER QUERY: {query}

	Below are the available tools. Select items that are directly or indirectly relevant to answering the query.
	Be generous in your selection - include tools that might be useful for the task, even if they're not explicitly mentioned in the query.
	It's better to include slightly more tools than to miss potentially useful ones.

	AVAILABLE TOOLS:
	{tools_list}

	Select up to {max_tools} tools that would be most helpful for this task.

	Respond with ONLY a comma-separated list of the exact tool names, like this:
	tool_name_1, tool_name_2, tool_name_3

	Selected tools:"""

	try:
	# Get LLM response
	response = self.model.invoke([HumanMessage(content=selection_prompt)])
	response_content = response.content.strip()

	# Parse the response to extract tool names
	selected_tools = self._parse_tool_selection_response(response_content, available_tools)

	# Ensure we don't exceed max_tools
	return selected_tools[:max_tools]

	except Exception as e:
	print(f"Error in LLM-based tool selection: {e}")
	# Return all tools if LLM fails (no keyword fallback)
	return list(available_tools.keys())[:max_tools]

	def _format_tools_for_prompt(self, tools: Dict[str, Dict]) -> str:
	"""Format tools for the LLM prompt."""
	formatted = []
	for i, (tool_name, tool_info) in enumerate(tools.items(), 1):
	description = tool_info.get('description', 'No description available')
	source = tool_info.get('source', 'unknown')
	formatted.append(f"{i}. {tool_name} ({source}): {description}")
	return "\n".join(formatted)

	def _parse_tool_selection_response(self, response: str, available_tools: Dict[str, Dict]) -> List[str]:
	"""Parse the LLM response to extract valid tool names."""
	selected_tools = []

	# Split by commas and clean up
	tool_candidates = [name.strip() for name in response.split(',')]

	for candidate in tool_candidates:
	# Remove any extra characters, numbers, or formatting
	clean_candidate = re.sub(r'^\d+\.\s*', '', candidate) # Remove "1. " prefixes
	clean_candidate = clean_candidate.strip()

	# Check if this matches any available tool (case-insensitive)
	for tool_name in available_tools.keys():
	if clean_candidate.lower() == tool_name.lower():
	if tool_name not in selected_tools: # Avoid duplicates
	selected_tools.append(tool_name)
	break

	return selected_tools