|
|
""" |
|
|
Tool Selector for CodeAct Agent. |
|
|
Pure LLM-based tool selection mechanism similar to Biomni's prompt_based_retrieval. |
|
|
""" |
|
|
|
|
|
import re |
|
|
from typing import Dict, List, Optional |
|
|
from langchain_core.messages import HumanMessage |
|
|
from langchain_core.language_models.chat_models import BaseChatModel |
|
|
|
|
|
|
|
|
class ToolSelector: |
|
|
""" |
|
|
LLM-based tool selection system inspired by Biomni's approach. |
|
|
Uses an LLM to intelligently select the most relevant tools for a given task. |
|
|
""" |
|
|
|
|
|
def __init__(self, model: BaseChatModel): |
|
|
""" |
|
|
Initialize the ToolSelector. |
|
|
|
|
|
Args: |
|
|
model: The language model to use for tool selection |
|
|
""" |
|
|
self.model = model |
|
|
|
|
|
def select_tools_for_task(self, query: str, available_tools: Dict[str, Dict], max_tools: int = 15) -> List[str]: |
|
|
""" |
|
|
Use LLM-based selection to choose the most relevant tools for a query. |
|
|
Inspired by Biomni's prompt_based_retrieval mechanism. |
|
|
|
|
|
Args: |
|
|
query: The user's query/task description |
|
|
available_tools: Dictionary of {tool_name: tool_info} available |
|
|
max_tools: Maximum number of tools to select |
|
|
|
|
|
Returns: |
|
|
List of selected tool names |
|
|
""" |
|
|
if not available_tools: |
|
|
return [] |
|
|
|
|
|
|
|
|
tools_list = self._format_tools_for_prompt(available_tools) |
|
|
|
|
|
|
|
|
selection_prompt = f"""You are an expert biomedical research assistant. Your task is to select the most relevant tools to help answer a user's query. |
|
|
|
|
|
USER QUERY: {query} |
|
|
|
|
|
Below are the available tools. Select items that are directly or indirectly relevant to answering the query. |
|
|
Be generous in your selection - include tools that might be useful for the task, even if they're not explicitly mentioned in the query. |
|
|
It's better to include slightly more tools than to miss potentially useful ones. |
|
|
|
|
|
AVAILABLE TOOLS: |
|
|
{tools_list} |
|
|
|
|
|
Select up to {max_tools} tools that would be most helpful for this task. |
|
|
|
|
|
Respond with ONLY a comma-separated list of the exact tool names, like this: |
|
|
tool_name_1, tool_name_2, tool_name_3 |
|
|
|
|
|
Selected tools:""" |
|
|
|
|
|
try: |
|
|
|
|
|
response = self.model.invoke([HumanMessage(content=selection_prompt)]) |
|
|
response_content = response.content.strip() |
|
|
|
|
|
|
|
|
selected_tools = self._parse_tool_selection_response(response_content, available_tools) |
|
|
|
|
|
|
|
|
return selected_tools[:max_tools] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in LLM-based tool selection: {e}") |
|
|
|
|
|
return list(available_tools.keys())[:max_tools] |
|
|
|
|
|
def _format_tools_for_prompt(self, tools: Dict[str, Dict]) -> str: |
|
|
"""Format tools for the LLM prompt.""" |
|
|
formatted = [] |
|
|
for i, (tool_name, tool_info) in enumerate(tools.items(), 1): |
|
|
description = tool_info.get('description', 'No description available') |
|
|
source = tool_info.get('source', 'unknown') |
|
|
formatted.append(f"{i}. {tool_name} ({source}): {description}") |
|
|
return "\n".join(formatted) |
|
|
|
|
|
def _parse_tool_selection_response(self, response: str, available_tools: Dict[str, Dict]) -> List[str]: |
|
|
"""Parse the LLM response to extract valid tool names.""" |
|
|
selected_tools = [] |
|
|
|
|
|
|
|
|
tool_candidates = [name.strip() for name in response.split(',')] |
|
|
|
|
|
for candidate in tool_candidates: |
|
|
|
|
|
clean_candidate = re.sub(r'^\d+\.\s*', '', candidate) |
|
|
clean_candidate = clean_candidate.strip() |
|
|
|
|
|
|
|
|
for tool_name in available_tools.keys(): |
|
|
if clean_candidate.lower() == tool_name.lower(): |
|
|
if tool_name not in selected_tools: |
|
|
selected_tools.append(tool_name) |
|
|
break |
|
|
|
|
|
return selected_tools |