Spaces:

MCP-1st-Birthday
/

BirdScopeAI

Paused

File size: 9,954 Bytes

ff0e97f

"""
Subagent System Test

Tests the specialized subagent orchestration system with tool filtering and routing.

Usage:
    # Test subagent system (uses .env NUTHATCH_USE_STDIO setting)
    python tests/test_subagents.py

    # Test individual specialists
    python tests/test_subagents.py --specialist image_identifier
    python tests/test_subagents.py --specialist species_explorer
    python tests/test_subagents.py --specialist taxonomy_specialist

Configuration (.env):
    # Ensure STDIO mode for testing
    NUTHATCH_USE_STDIO=true
    NUTHATCH_API_KEY=<your-key>

    # Required for Modal classifier
    MODAL_MCP_URL=<your-modal-url>
    BIRD_CLASSIFIER_API_KEY=<your-modal-key>

    # Required for LLM
    OPENAI_API_KEY=<your-key>
"""
import asyncio
import sys
from pathlib import Path

from dotenv import load_dotenv

# Add parent directory to sys.path
parent_dir = Path(__file__).parent.parent
sys.path.insert(0, str(parent_dir))

from langgraph_agent import AgentFactory
from langgraph_agent.config import AgentConfig
from langgraph_agent.subagent_config import SubAgentConfig
from langgraph_agent.subagent_factory import SubAgentFactory
from langgraph_agent.mcp_clients import MCPClientManager
from langchain_openai import ChatOpenAI

load_dotenv()


def validate_config():
    """Validate required configuration."""
    errors = []

    if not AgentConfig.MODAL_MCP_URL:
        errors.append("MODAL_MCP_URL not set")
    if not AgentConfig.BIRD_CLASSIFIER_API_KEY:
        errors.append("BIRD_CLASSIFIER_API_KEY not set")
    if not AgentConfig.NUTHATCH_API_KEY:
        errors.append("NUTHATCH_API_KEY not set")
    if not AgentConfig.OPENAI_API_KEY:
        errors.append("OPENAI_API_KEY not set")

    if not AgentConfig.NUTHATCH_USE_STDIO:
        print("\n⚠️  WARNING: NUTHATCH_USE_STDIO is False")
        print("   For this test, STDIO mode is recommended")
        print("   Set NUTHATCH_USE_STDIO=true in .env\n")

    if errors:
        print("\n❌ Configuration Errors:")
        for error in errors:
            print(f"  • {error}")
        print("\n💡 Check your .env file")
        return False

    return True


async def test_tool_filtering():
    """Test that each subagent gets the correct filtered tool set."""
    print("\n" + "="*70)
    print("TEST 1: Tool Filtering")
    print("="*70)

    # Get all tools
    client = await MCPClientManager.create_multi_server_client()
    all_tools = await MCPClientManager.get_tools(client)

    print(f"\n[ALL TOOLS]: {len(all_tools)} total tools available")
    for tool in all_tools:
        print(f"  • {tool.name}")

    # Test each subagent's tool filtering
    definitions = SubAgentConfig.get_subagent_definitions()

    for subagent_name, config in definitions.items():
        print(f"\n[{subagent_name.upper()}]:")
        print(f"  Name: {config['name']}")
        print(f"  Expected tools: {len(config['tools'])}")
        print(f"  Tools: {', '.join(config['tools'])}")

        # Filter tools
        allowed_tool_names = set(config['tools'])
        filtered_tools = [t for t in all_tools if t.name in allowed_tool_names]

        print(f"  ✅ Filtered to: {len(filtered_tools)} tools")

        if len(filtered_tools) != len(config['tools']):
            print(f"  ⚠️  Warning: Expected {len(config['tools'])} but got {len(filtered_tools)}")


async def test_individual_subagent(subagent_name: str):
    """Test a specific subagent with a sample query."""
    print("\n" + "="*70)
    print(f"TEST 2: Individual Subagent - {subagent_name}")
    print("="*70)

    # Get configuration
    definitions = SubAgentConfig.get_subagent_definitions()

    if subagent_name not in definitions:
        print(f"\n❌ Unknown subagent: {subagent_name}")
        print(f"Available: {', '.join(definitions.keys())}")
        return

    config = definitions[subagent_name]
    print(f"\n[CONFIG]:")
    print(f"  Name: {config['name']}")
    print(f"  Description: {config['description']}")
    print(f"  Tools: {', '.join(config['tools'])}")

    # Create LLM
    llm = ChatOpenAI(
        model=AgentConfig.DEFAULT_OPENAI_MODEL,
        temperature=config['temperature'],
        streaming=True
    )

    # Get tools and create subagent
    client = await MCPClientManager.create_multi_server_client()
    all_tools = await MCPClientManager.get_tools(client)

    print(f"\n[CREATING SUBAGENT]...")
    subagent = await SubAgentFactory.create_subagent(
        subagent_name, all_tools, llm
    )
    print(f"✅ Subagent created successfully")

    # Test queries for each specialist
    test_queries = {
        "image_identifier": "What bird is in this image?",
        "species_explorer": "Tell me about Northern Cardinals",
        "taxonomy_specialist": "What birds are in the Cardinalidae family?"
    }

    query = test_queries.get(subagent_name, "Help me identify birds")

    print(f"\n[TEST QUERY]: {query}")
    print(f"[RESPONSE]:")
    print("-" * 70)

    try:
        # Note: This will fail without an actual image for image_identifier
        # but shows the subagent is working
        result = await subagent.ainvoke({
            "messages": [{"role": "user", "content": query}]
        })

        if result and "messages" in result:
            for msg in result["messages"]:
                if hasattr(msg, 'content'):
                    print(msg.content)
        else:
            print(result)

    except Exception as e:
        print(f"⚠️  Test query failed (expected for image_identifier without image): {e}")


async def test_router():
    """Test the routing logic."""
    print("\n" + "="*70)
    print("TEST 3: Router Logic")
    print("="*70)

    test_cases = [
        ("What bird is this?", "image_identifier"),
        ("Identify this photo", "image_identifier"),
        ("Tell me about cardinals", "species_explorer"),
        ("Find birds with red feathers", "species_explorer"),
        ("Show me audio of a robin", "species_explorer"),
        ("What families exist?", "taxonomy_specialist"),
        ("Show me endangered birds", "taxonomy_specialist"),
    ]

    print("\n[ROUTING TESTS]:")
    print(f"Testing {len(test_cases)} queries...")

    for query, expected_route in test_cases:
        # Simulate routing logic from subagent_router.py
        content = query.lower()

        if any(word in content for word in ["identify", "what bird", "classify", "image", "photo"]):
            route = "image_identifier"
        elif any(word in content for word in ["audio", "sound", "call", "song", "find", "search"]):
            route = "species_explorer"
        elif any(word in content for word in ["family", "families", "conservation", "endangered", "taxonomy"]):
            route = "taxonomy_specialist"
        else:
            route = "species_explorer"

        status = "✅" if route == expected_route else "❌"
        print(f"\n  {status} Query: '{query}'")
        print(f"     Expected: {expected_route}")
        print(f"     Got: {route}")


async def test_full_orchestrator():
    """Test the full subagent orchestrator."""
    print("\n" + "="*70)
    print("TEST 4: Full Orchestrator")
    print("="*70)

    # Enable subagents
    SubAgentConfig.USE_SUBAGENTS = True

    print(f"\n[CONFIG]:")
    print(f"  Subagents enabled: {SubAgentConfig.USE_SUBAGENTS}")
    print(f"  OpenAI model: {AgentConfig.DEFAULT_OPENAI_MODEL}")
    print(f"  Temperature: {AgentConfig.OPENAI_TEMPERATURE}")

    print(f"\n[CREATING ORCHESTRATOR]...")
    try:
        orchestrator = await AgentFactory.create_subagent_orchestrator(
            model=AgentConfig.DEFAULT_OPENAI_MODEL,
            api_key=AgentConfig.OPENAI_API_KEY,
            provider="openai",
            mode="Specialized Subagents (3 Specialists)"
        )
        print(f"✅ Orchestrator created successfully")
        print(f"   Type: {type(orchestrator)}")

    except Exception as e:
        print(f"❌ Orchestrator creation failed: {e}")
        import traceback
        traceback.print_exc()


async def run_all_tests():
    """Run all subagent tests."""
    print("\n" + "="*70)
    print("SUBAGENT SYSTEM TEST SUITE")
    print("="*70)

    if not validate_config():
        print("\n❌ Test suite aborted due to configuration errors")
        return

    try:
        # Test 1: Tool filtering
        await test_tool_filtering()

        # Test 2: Individual subagents
        for subagent_name in ["image_identifier", "species_explorer", "taxonomy_specialist"]:
            await test_individual_subagent(subagent_name)

        # Test 3: Router logic
        await test_router()

        # Test 4: Full orchestrator
        await test_full_orchestrator()

        print("\n" + "="*70)
        print("✅ ALL TESTS COMPLETED")
        print("="*70)

    except Exception as e:
        print(f"\n❌ Test suite failed: {e}")
        import traceback
        traceback.print_exc()


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        if sys.argv[1] == "--specialist" and len(sys.argv) > 2:
            # Test individual specialist
            specialist_name = sys.argv[2]
            asyncio.run(test_individual_subagent(specialist_name))
        elif sys.argv[1] == "--router":
            # Test router only
            asyncio.run(test_router())
        elif sys.argv[1] == "--tools":
            # Test tool filtering only
            asyncio.run(test_tool_filtering())
        else:
            print("Usage:")
            print("  python tests/test_subagents.py                          # Run all tests")
            print("  python tests/test_subagents.py --specialist <name>      # Test one specialist")
            print("  python tests/test_subagents.py --router                 # Test routing only")
            print("  python tests/test_subagents.py --tools                  # Test tool filtering")
    else:
        # Run all tests
        asyncio.run(run_all_tests())