File size: 9,954 Bytes
ff0e97f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
"""
Subagent System Test
Tests the specialized subagent orchestration system with tool filtering and routing.
Usage:
# Test subagent system (uses .env NUTHATCH_USE_STDIO setting)
python tests/test_subagents.py
# Test individual specialists
python tests/test_subagents.py --specialist image_identifier
python tests/test_subagents.py --specialist species_explorer
python tests/test_subagents.py --specialist taxonomy_specialist
Configuration (.env):
# Ensure STDIO mode for testing
NUTHATCH_USE_STDIO=true
NUTHATCH_API_KEY=<your-key>
# Required for Modal classifier
MODAL_MCP_URL=<your-modal-url>
BIRD_CLASSIFIER_API_KEY=<your-modal-key>
# Required for LLM
OPENAI_API_KEY=<your-key>
"""
import asyncio
import sys
from pathlib import Path
from dotenv import load_dotenv
# Add parent directory to sys.path
parent_dir = Path(__file__).parent.parent
sys.path.insert(0, str(parent_dir))
from langgraph_agent import AgentFactory
from langgraph_agent.config import AgentConfig
from langgraph_agent.subagent_config import SubAgentConfig
from langgraph_agent.subagent_factory import SubAgentFactory
from langgraph_agent.mcp_clients import MCPClientManager
from langchain_openai import ChatOpenAI
load_dotenv()
def validate_config():
"""Validate required configuration."""
errors = []
if not AgentConfig.MODAL_MCP_URL:
errors.append("MODAL_MCP_URL not set")
if not AgentConfig.BIRD_CLASSIFIER_API_KEY:
errors.append("BIRD_CLASSIFIER_API_KEY not set")
if not AgentConfig.NUTHATCH_API_KEY:
errors.append("NUTHATCH_API_KEY not set")
if not AgentConfig.OPENAI_API_KEY:
errors.append("OPENAI_API_KEY not set")
if not AgentConfig.NUTHATCH_USE_STDIO:
print("\n⚠️ WARNING: NUTHATCH_USE_STDIO is False")
print(" For this test, STDIO mode is recommended")
print(" Set NUTHATCH_USE_STDIO=true in .env\n")
if errors:
print("\n❌ Configuration Errors:")
for error in errors:
print(f" • {error}")
print("\n💡 Check your .env file")
return False
return True
async def test_tool_filtering():
"""Test that each subagent gets the correct filtered tool set."""
print("\n" + "="*70)
print("TEST 1: Tool Filtering")
print("="*70)
# Get all tools
client = await MCPClientManager.create_multi_server_client()
all_tools = await MCPClientManager.get_tools(client)
print(f"\n[ALL TOOLS]: {len(all_tools)} total tools available")
for tool in all_tools:
print(f" • {tool.name}")
# Test each subagent's tool filtering
definitions = SubAgentConfig.get_subagent_definitions()
for subagent_name, config in definitions.items():
print(f"\n[{subagent_name.upper()}]:")
print(f" Name: {config['name']}")
print(f" Expected tools: {len(config['tools'])}")
print(f" Tools: {', '.join(config['tools'])}")
# Filter tools
allowed_tool_names = set(config['tools'])
filtered_tools = [t for t in all_tools if t.name in allowed_tool_names]
print(f" ✅ Filtered to: {len(filtered_tools)} tools")
if len(filtered_tools) != len(config['tools']):
print(f" ⚠️ Warning: Expected {len(config['tools'])} but got {len(filtered_tools)}")
async def test_individual_subagent(subagent_name: str):
"""Test a specific subagent with a sample query."""
print("\n" + "="*70)
print(f"TEST 2: Individual Subagent - {subagent_name}")
print("="*70)
# Get configuration
definitions = SubAgentConfig.get_subagent_definitions()
if subagent_name not in definitions:
print(f"\n❌ Unknown subagent: {subagent_name}")
print(f"Available: {', '.join(definitions.keys())}")
return
config = definitions[subagent_name]
print(f"\n[CONFIG]:")
print(f" Name: {config['name']}")
print(f" Description: {config['description']}")
print(f" Tools: {', '.join(config['tools'])}")
# Create LLM
llm = ChatOpenAI(
model=AgentConfig.DEFAULT_OPENAI_MODEL,
temperature=config['temperature'],
streaming=True
)
# Get tools and create subagent
client = await MCPClientManager.create_multi_server_client()
all_tools = await MCPClientManager.get_tools(client)
print(f"\n[CREATING SUBAGENT]...")
subagent = await SubAgentFactory.create_subagent(
subagent_name, all_tools, llm
)
print(f"✅ Subagent created successfully")
# Test queries for each specialist
test_queries = {
"image_identifier": "What bird is in this image?",
"species_explorer": "Tell me about Northern Cardinals",
"taxonomy_specialist": "What birds are in the Cardinalidae family?"
}
query = test_queries.get(subagent_name, "Help me identify birds")
print(f"\n[TEST QUERY]: {query}")
print(f"[RESPONSE]:")
print("-" * 70)
try:
# Note: This will fail without an actual image for image_identifier
# but shows the subagent is working
result = await subagent.ainvoke({
"messages": [{"role": "user", "content": query}]
})
if result and "messages" in result:
for msg in result["messages"]:
if hasattr(msg, 'content'):
print(msg.content)
else:
print(result)
except Exception as e:
print(f"⚠️ Test query failed (expected for image_identifier without image): {e}")
async def test_router():
"""Test the routing logic."""
print("\n" + "="*70)
print("TEST 3: Router Logic")
print("="*70)
test_cases = [
("What bird is this?", "image_identifier"),
("Identify this photo", "image_identifier"),
("Tell me about cardinals", "species_explorer"),
("Find birds with red feathers", "species_explorer"),
("Show me audio of a robin", "species_explorer"),
("What families exist?", "taxonomy_specialist"),
("Show me endangered birds", "taxonomy_specialist"),
]
print("\n[ROUTING TESTS]:")
print(f"Testing {len(test_cases)} queries...")
for query, expected_route in test_cases:
# Simulate routing logic from subagent_router.py
content = query.lower()
if any(word in content for word in ["identify", "what bird", "classify", "image", "photo"]):
route = "image_identifier"
elif any(word in content for word in ["audio", "sound", "call", "song", "find", "search"]):
route = "species_explorer"
elif any(word in content for word in ["family", "families", "conservation", "endangered", "taxonomy"]):
route = "taxonomy_specialist"
else:
route = "species_explorer"
status = "✅" if route == expected_route else "❌"
print(f"\n {status} Query: '{query}'")
print(f" Expected: {expected_route}")
print(f" Got: {route}")
async def test_full_orchestrator():
"""Test the full subagent orchestrator."""
print("\n" + "="*70)
print("TEST 4: Full Orchestrator")
print("="*70)
# Enable subagents
SubAgentConfig.USE_SUBAGENTS = True
print(f"\n[CONFIG]:")
print(f" Subagents enabled: {SubAgentConfig.USE_SUBAGENTS}")
print(f" OpenAI model: {AgentConfig.DEFAULT_OPENAI_MODEL}")
print(f" Temperature: {AgentConfig.OPENAI_TEMPERATURE}")
print(f"\n[CREATING ORCHESTRATOR]...")
try:
orchestrator = await AgentFactory.create_subagent_orchestrator(
model=AgentConfig.DEFAULT_OPENAI_MODEL,
api_key=AgentConfig.OPENAI_API_KEY,
provider="openai",
mode="Specialized Subagents (3 Specialists)"
)
print(f"✅ Orchestrator created successfully")
print(f" Type: {type(orchestrator)}")
except Exception as e:
print(f"❌ Orchestrator creation failed: {e}")
import traceback
traceback.print_exc()
async def run_all_tests():
"""Run all subagent tests."""
print("\n" + "="*70)
print("SUBAGENT SYSTEM TEST SUITE")
print("="*70)
if not validate_config():
print("\n❌ Test suite aborted due to configuration errors")
return
try:
# Test 1: Tool filtering
await test_tool_filtering()
# Test 2: Individual subagents
for subagent_name in ["image_identifier", "species_explorer", "taxonomy_specialist"]:
await test_individual_subagent(subagent_name)
# Test 3: Router logic
await test_router()
# Test 4: Full orchestrator
await test_full_orchestrator()
print("\n" + "="*70)
print("✅ ALL TESTS COMPLETED")
print("="*70)
except Exception as e:
print(f"\n❌ Test suite failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
if sys.argv[1] == "--specialist" and len(sys.argv) > 2:
# Test individual specialist
specialist_name = sys.argv[2]
asyncio.run(test_individual_subagent(specialist_name))
elif sys.argv[1] == "--router":
# Test router only
asyncio.run(test_router())
elif sys.argv[1] == "--tools":
# Test tool filtering only
asyncio.run(test_tool_filtering())
else:
print("Usage:")
print(" python tests/test_subagents.py # Run all tests")
print(" python tests/test_subagents.py --specialist <name> # Test one specialist")
print(" python tests/test_subagents.py --router # Test routing only")
print(" python tests/test_subagents.py --tools # Test tool filtering")
else:
# Run all tests
asyncio.run(run_all_tests())
|