""" Tool Selector for CodeAct Agent. Pure LLM-based tool selection mechanism similar to Biomni's prompt_based_retrieval. """ import re from typing import Dict, List, Optional from langchain_core.messages import HumanMessage from langchain_core.language_models.chat_models import BaseChatModel class ToolSelector: """ LLM-based tool selection system inspired by Biomni's approach. Uses an LLM to intelligently select the most relevant tools for a given task. """ def __init__(self, model: BaseChatModel): """ Initialize the ToolSelector. Args: model: The language model to use for tool selection """ self.model = model def select_tools_for_task(self, query: str, available_tools: Dict[str, Dict], max_tools: int = 15) -> List[str]: """ Use LLM-based selection to choose the most relevant tools for a query. Inspired by Biomni's prompt_based_retrieval mechanism. Args: query: The user's query/task description available_tools: Dictionary of {tool_name: tool_info} available max_tools: Maximum number of tools to select Returns: List of selected tool names """ if not available_tools: return [] # Format tools for LLM prompt tools_list = self._format_tools_for_prompt(available_tools) # Create selection prompt (similar to Biomni's approach) selection_prompt = f"""You are an expert biomedical research assistant. Your task is to select the most relevant tools to help answer a user's query. USER QUERY: {query} Below are the available tools. Select items that are directly or indirectly relevant to answering the query. Be generous in your selection - include tools that might be useful for the task, even if they're not explicitly mentioned in the query. It's better to include slightly more tools than to miss potentially useful ones. AVAILABLE TOOLS: {tools_list} Select up to {max_tools} tools that would be most helpful for this task. Respond with ONLY a comma-separated list of the exact tool names, like this: tool_name_1, tool_name_2, tool_name_3 Selected tools:""" try: # Get LLM response response = self.model.invoke([HumanMessage(content=selection_prompt)]) response_content = response.content.strip() # Parse the response to extract tool names selected_tools = self._parse_tool_selection_response(response_content, available_tools) # Ensure we don't exceed max_tools return selected_tools[:max_tools] except Exception as e: print(f"Error in LLM-based tool selection: {e}") # Return all tools if LLM fails (no keyword fallback) return list(available_tools.keys())[:max_tools] def _format_tools_for_prompt(self, tools: Dict[str, Dict]) -> str: """Format tools for the LLM prompt.""" formatted = [] for i, (tool_name, tool_info) in enumerate(tools.items(), 1): description = tool_info.get('description', 'No description available') source = tool_info.get('source', 'unknown') formatted.append(f"{i}. {tool_name} ({source}): {description}") return "\n".join(formatted) def _parse_tool_selection_response(self, response: str, available_tools: Dict[str, Dict]) -> List[str]: """Parse the LLM response to extract valid tool names.""" selected_tools = [] # Split by commas and clean up tool_candidates = [name.strip() for name in response.split(',')] for candidate in tool_candidates: # Remove any extra characters, numbers, or formatting clean_candidate = re.sub(r'^\d+\.\s*', '', candidate) # Remove "1. " prefixes clean_candidate = clean_candidate.strip() # Check if this matches any available tool (case-insensitive) for tool_name in available_tools.keys(): if clean_candidate.lower() == tool_name.lower(): if tool_name not in selected_tools: # Avoid duplicates selected_tools.append(tool_name) break return selected_tools