| | |
| | """ |
| | Complete Integration Test for GAIA Agent System |
| | Tests the full pipeline: Router -> Agents -> Tools -> Results |
| | """ |
| |
|
| | import os |
| | import sys |
| | import time |
| | import tempfile |
| | from pathlib import Path |
| |
|
| | |
| | sys.path.insert(0, str(Path(__file__).parent)) |
| |
|
| | from agents.state import GAIAAgentState, QuestionType, AgentRole |
| | from agents.router import RouterAgent |
| | from agents.web_researcher import WebResearchAgent |
| | from agents.file_processor_agent import FileProcessorAgent |
| | from agents.reasoning_agent import ReasoningAgent |
| | from models.qwen_client import QwenClient |
| |
|
| | def test_complete_pipeline(): |
| | """Test the complete GAIA agent pipeline""" |
| | |
| | print("π GAIA Complete Integration Test") |
| | print("=" * 50) |
| | |
| | |
| | try: |
| | llm_client = QwenClient() |
| | router = RouterAgent(llm_client) |
| | web_agent = WebResearchAgent(llm_client) |
| | file_agent = FileProcessorAgent(llm_client) |
| | reasoning_agent = ReasoningAgent(llm_client) |
| | except Exception as e: |
| | print(f"β Failed to initialize system: {e}") |
| | return False |
| | |
| | |
| | test_cases = [ |
| | { |
| | "question": "What is the population of Paris?", |
| | "description": "Simple Wikipedia/web research question", |
| | "expected_agent": AgentRole.WEB_RESEARCHER |
| | }, |
| | { |
| | "question": "Calculate the area of a circle with radius 5 meters", |
| | "description": "Mathematical reasoning with unit conversion", |
| | "expected_agent": AgentRole.REASONING_AGENT |
| | }, |
| | { |
| | "question": "What is the average of these numbers: 10, 20, 30, 40, 50?", |
| | "description": "Statistical calculation", |
| | "expected_agent": AgentRole.REASONING_AGENT |
| | } |
| | ] |
| | |
| | results = [] |
| | total_cost = 0.0 |
| | start_time = time.time() |
| | |
| | for i, test_case in enumerate(test_cases, 1): |
| | print(f"\nπ§ͺ Test {i}: {test_case['description']}") |
| | print(f" Question: {test_case['question']}") |
| | |
| | try: |
| | |
| | state = GAIAAgentState() |
| | state.task_id = f"test_{i}" |
| | state.question = test_case["question"] |
| | |
| | |
| | routed_state = router.route_question(state) |
| | print(f" β
Router: {routed_state.question_type.value} -> {[a.value for a in routed_state.selected_agents]}") |
| | |
| | |
| | if test_case["expected_agent"] in routed_state.selected_agents: |
| | if test_case["expected_agent"] == AgentRole.WEB_RESEARCHER: |
| | processed_state = web_agent.process(routed_state) |
| | elif test_case["expected_agent"] == AgentRole.REASONING_AGENT: |
| | processed_state = reasoning_agent.process(routed_state) |
| | elif test_case["expected_agent"] == AgentRole.FILE_PROCESSOR: |
| | processed_state = file_agent.process(routed_state) |
| | else: |
| | print(f" β οΈ Agent {test_case['expected_agent'].value} not implemented in test") |
| | continue |
| | |
| | |
| | if processed_state.agent_results: |
| | agent_result = list(processed_state.agent_results.values())[-1] |
| | success = agent_result.success |
| | confidence = agent_result.confidence |
| | cost = processed_state.total_cost |
| | processing_time = processed_state.total_processing_time |
| | |
| | print(f" β
Agent: {agent_result.agent_role.value}") |
| | print(f" β
Result: {agent_result.result[:100]}...") |
| | print(f" π Confidence: {confidence:.2f}") |
| | print(f" π° Cost: ${cost:.4f}") |
| | print(f" β±οΈ Time: {processing_time:.2f}s") |
| | |
| | total_cost += cost |
| | results.append(success) |
| | |
| | print(f" π― Overall: {'β
PASS' if success else 'β FAIL'}") |
| | else: |
| | print(f" β No agent results produced") |
| | results.append(False) |
| | else: |
| | print(f" β οΈ Expected agent {test_case['expected_agent'].value} not selected") |
| | results.append(False) |
| | |
| | except Exception as e: |
| | print(f" β Pipeline failed: {e}") |
| | results.append(False) |
| | |
| | |
| | print(f"\nπ§ͺ Test 4: File Processing with CSV") |
| | print(f" Description: Complete file analysis pipeline") |
| | |
| | try: |
| | with tempfile.TemporaryDirectory() as temp_dir: |
| | |
| | csv_path = os.path.join(temp_dir, "sales_data.csv") |
| | with open(csv_path, 'w') as f: |
| | f.write("product,sales,price\nWidget A,100,25.50\nWidget B,150,30.00\nWidget C,80,22.75") |
| | |
| | |
| | state = GAIAAgentState() |
| | state.task_id = "test_file" |
| | state.question = "What is the total sales value across all products?" |
| | state.file_name = "sales_data.csv" |
| | state.file_path = csv_path |
| | |
| | |
| | routed_state = router.route_question(state) |
| | processed_state = file_agent.process(routed_state) |
| | |
| | if processed_state.agent_results: |
| | agent_result = list(processed_state.agent_results.values())[-1] |
| | success = agent_result.success |
| | total_cost += processed_state.total_cost |
| | results.append(success) |
| | |
| | print(f" β
Router: {routed_state.question_type.value}") |
| | print(f" β
Agent: File processor") |
| | print(f" β
Result: {agent_result.result[:100]}...") |
| | print(f" π° Cost: ${processed_state.total_cost:.4f}") |
| | print(f" π― Overall: {'β
PASS' if success else 'β FAIL'}") |
| | else: |
| | print(f" β File processing failed") |
| | results.append(False) |
| | |
| | except Exception as e: |
| | print(f" β File test failed: {e}") |
| | results.append(False) |
| | |
| | |
| | total_time = time.time() - start_time |
| | passed = sum(results) |
| | total = len(results) |
| | pass_rate = (passed / total) * 100 |
| | |
| | print("\n" + "=" * 50) |
| | print("π COMPLETE INTEGRATION RESULTS") |
| | print("=" * 50) |
| | print(f"π― Tests Passed: {passed}/{total} ({pass_rate:.1f}%)") |
| | print(f"π° Total Cost: ${total_cost:.4f}") |
| | print(f"β±οΈ Total Time: {total_time:.2f} seconds") |
| | print(f"π Average Cost per Test: ${total_cost/total:.4f}") |
| | print(f"β‘ Average Time per Test: {total_time/total:.2f}s") |
| | |
| | |
| | monthly_budget = 0.10 |
| | if total_cost <= monthly_budget: |
| | remaining_budget = monthly_budget - total_cost |
| | estimated_questions = int(remaining_budget / (total_cost / total)) |
| | print(f"π° Budget Status: β
${remaining_budget:.4f} remaining (~{estimated_questions} more tests)") |
| | else: |
| | print(f"π° Budget Status: β οΈ Over budget by ${total_cost - monthly_budget:.4f}") |
| | |
| | |
| | if pass_rate >= 80 and total_cost <= 0.05: |
| | print("\nπ INTEGRATION SUCCESS! System ready for GAIA benchmark!") |
| | return True |
| | elif pass_rate >= 80: |
| | print("\nβ
FUNCTIONALITY SUCCESS! (Higher cost than ideal)") |
| | return True |
| | else: |
| | print("\nβ οΈ INTEGRATION ISSUES! Check individual test failures") |
| | return False |
| |
|
| | if __name__ == "__main__": |
| | success = test_complete_pipeline() |
| | sys.exit(0 if success else 1) |