from dataclasses import dataclass, field import json import re @dataclass class ToolSelector: llm_client: any = None async def select(self, intent: str, text: str, ctx): msg = text.lower().strip() tool_scores = ctx.get("tool_scores", {}) rag_score = tool_scores.get("rag_fitness", 0.0) web_score = tool_scores.get("web_fitness", 0.0) llm_score = tool_scores.get("llm_only", 0.0) # --------------------------------- # 1. Detect ADMIN RULES FIRST # --------------------------------- if intent == "admin": return _multi_step([ step("admin", {"query": text}), step("llm", {"query": text}) ], "admin safety rule triggered → llm") steps = [] needs_rag = False needs_web = False # --------------------------------- # 2. Check RAG results (pre-fetch) # --------------------------------- rag_results = ctx.get("rag_results", []) rag_has_data = len(rag_results) > 0 # RAG patterns: internal knowledge, company-specific, documentation rag_patterns = [ r"company", r"internal", r"documentation", r"our ", r"your ", r"knowledge base", r"private", r"internal docs", r"corporate" ] if rag_has_data or rag_score >= 0.55 or any(re.search(p, msg) for p in rag_patterns): needs_rag = True if not any(s["tool"] == "rag" for s in steps): steps.append(step("rag", {"query": text})) # --------------------------------- # 3. Fact lookup / definition → Web # --------------------------------- fact_patterns = [ r"what is ", r"who is ", r"where is ", r"tell me about ", r"define ", r"explain ", r"history of ", r"information about", r"details about" ] if web_score >= 0.55 or any(re.search(p, msg) for p in fact_patterns): needs_web = True steps.append(step("web", {"query": text})) # --------------------------------- # 4. Freshness heuristic → Web # --------------------------------- freshness_keywords = ["latest", "today", "news", "current", "recent", "now", "updates", "breaking", "trending"] if any(k in msg for k in freshness_keywords): needs_web = True # Avoid duplicate web steps if not any(s["tool"] == "web" for s in steps): steps.append(step("web", {"query": text})) # --------------------------------- # 5. Complex queries that need multiple sources # --------------------------------- complex_patterns = [ r"compare", r"difference between", r"versus", r"vs", r"both", r"and also", r"as well as", r"in addition" ] needs_multiple = any(re.search(p, msg) for p in complex_patterns) # --------------------------------- # 6. Use LLM to enhance plan if we have partial steps or complex query # --------------------------------- if self.llm_client and (needs_multiple or (needs_rag and needs_web) or len(steps) == 0): plan_prompt = f""" You are an enterprise MCP agent. You can select MULTIPLE tools in sequence to provide comprehensive answers. TOOLS: - rag → private knowledge retrieval (use for internal/company docs) - web → online factual lookup (use for public facts, current info) - llm → final reasoning and synthesis (always include at end) Current context: - RAG available: {rag_has_data} - User message: "{text}" - Tool scores: {json.dumps(tool_scores)} Determine which tools are needed. You can select: - Just LLM (simple questions) - RAG + LLM (internal knowledge questions) - Web + LLM (public fact questions) - RAG + Web + LLM (comprehensive questions needing both sources) Return a JSON list describing the steps, e.g.: [ {{"tool": "rag", "reason": "Need internal documentation"}}, {{"tool": "web", "reason": "Need current public information"}}, {{"tool": "llm", "reason": "Synthesize all information"}} ] Only return the JSON array. Do not include markdown formatting. """ try: out = await self.llm_client.simple_call(plan_prompt) # Clean the output in case LLM adds markdown out = out.strip() if out.startswith("```json"): out = out[7:] if out.startswith("```"): out = out[3:] if out.endswith("```"): out = out[:-3] out = out.strip() steps_json = json.loads(out) # Replace steps with LLM-planned steps (excluding LLM, we'll add it at end) steps = [ step(s["tool"], {"query": text}) for s in steps_json if s.get("tool") != "llm" ] except Exception as e: # If LLM planning fails, keep existing steps or use fallback if not steps: steps = [] # --------------------------------- # 7. Always end with LLM synthesis # --------------------------------- if not steps or steps[-1]["tool"] != "llm": steps.append(step("llm", { "rag_data": rag_results if rag_has_data else None, "query": text })) # Build reason string showing the tool sequence tool_names = [s["tool"] for s in steps] reason = f"multi-tool plan: {' → '.join(tool_names)} | scores={tool_scores}" return _multi_step(steps, reason) def step(tool, input_data): return {"tool": tool, "input": input_data} def _multi_step(steps, reason): from ..models.agent import AgentDecision return AgentDecision( action="multi_step", tool=None, tool_input={"steps": steps}, reason=reason )