Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Complete Integration Test for GAIA Agent System | |
| Tests the full pipeline: Router -> Agents -> Tools -> Results | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import tempfile | |
| from pathlib import Path | |
| # Add src to path for imports | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from agents.state import GAIAAgentState, QuestionType, AgentRole | |
| from agents.router import RouterAgent | |
| from agents.web_researcher import WebResearchAgent | |
| from agents.file_processor_agent import FileProcessorAgent | |
| from agents.reasoning_agent import ReasoningAgent | |
| from models.qwen_client import QwenClient | |
| def test_complete_pipeline(): | |
| """Test the complete GAIA agent pipeline""" | |
| print("π GAIA Complete Integration Test") | |
| print("=" * 50) | |
| # Initialize system | |
| try: | |
| llm_client = QwenClient() | |
| router = RouterAgent(llm_client) | |
| web_agent = WebResearchAgent(llm_client) | |
| file_agent = FileProcessorAgent(llm_client) | |
| reasoning_agent = ReasoningAgent(llm_client) | |
| except Exception as e: | |
| print(f"β Failed to initialize system: {e}") | |
| return False | |
| # End-to-end test cases | |
| test_cases = [ | |
| { | |
| "question": "What is the population of Paris?", | |
| "description": "Simple Wikipedia/web research question", | |
| "expected_agent": AgentRole.WEB_RESEARCHER | |
| }, | |
| { | |
| "question": "Calculate the area of a circle with radius 5 meters", | |
| "description": "Mathematical reasoning with unit conversion", | |
| "expected_agent": AgentRole.REASONING_AGENT | |
| }, | |
| { | |
| "question": "What is the average of these numbers: 10, 20, 30, 40, 50?", | |
| "description": "Statistical calculation", | |
| "expected_agent": AgentRole.REASONING_AGENT | |
| } | |
| ] | |
| results = [] | |
| total_cost = 0.0 | |
| start_time = time.time() | |
| for i, test_case in enumerate(test_cases, 1): | |
| print(f"\nπ§ͺ Test {i}: {test_case['description']}") | |
| print(f" Question: {test_case['question']}") | |
| try: | |
| # Step 1: Initialize state | |
| state = GAIAAgentState() | |
| state.task_id = f"test_{i}" | |
| state.question = test_case["question"] | |
| # Step 2: Route question | |
| routed_state = router.route_question(state) | |
| print(f" β Router: {routed_state.question_type.value} -> {[a.value for a in routed_state.selected_agents]}") | |
| # Step 3: Process with appropriate agent | |
| if test_case["expected_agent"] in routed_state.selected_agents: | |
| if test_case["expected_agent"] == AgentRole.WEB_RESEARCHER: | |
| processed_state = web_agent.process(routed_state) | |
| elif test_case["expected_agent"] == AgentRole.REASONING_AGENT: | |
| processed_state = reasoning_agent.process(routed_state) | |
| elif test_case["expected_agent"] == AgentRole.FILE_PROCESSOR: | |
| processed_state = file_agent.process(routed_state) | |
| else: | |
| print(f" β οΈ Agent {test_case['expected_agent'].value} not implemented in test") | |
| continue | |
| # Check results | |
| if processed_state.agent_results: | |
| agent_result = list(processed_state.agent_results.values())[-1] | |
| success = agent_result.success | |
| confidence = agent_result.confidence | |
| cost = processed_state.total_cost | |
| processing_time = processed_state.total_processing_time | |
| print(f" β Agent: {agent_result.agent_role.value}") | |
| print(f" β Result: {agent_result.result[:100]}...") | |
| print(f" π Confidence: {confidence:.2f}") | |
| print(f" π° Cost: ${cost:.4f}") | |
| print(f" β±οΈ Time: {processing_time:.2f}s") | |
| total_cost += cost | |
| results.append(success) | |
| print(f" π― Overall: {'β PASS' if success else 'β FAIL'}") | |
| else: | |
| print(f" β No agent results produced") | |
| results.append(False) | |
| else: | |
| print(f" β οΈ Expected agent {test_case['expected_agent'].value} not selected") | |
| results.append(False) | |
| except Exception as e: | |
| print(f" β Pipeline failed: {e}") | |
| results.append(False) | |
| # File processing test with actual file | |
| print(f"\nπ§ͺ Test 4: File Processing with CSV") | |
| print(f" Description: Complete file analysis pipeline") | |
| try: | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| # Create test CSV | |
| csv_path = os.path.join(temp_dir, "sales_data.csv") | |
| with open(csv_path, 'w') as f: | |
| f.write("product,sales,price\nWidget A,100,25.50\nWidget B,150,30.00\nWidget C,80,22.75") | |
| # Initialize state with file | |
| state = GAIAAgentState() | |
| state.task_id = "test_file" | |
| state.question = "What is the total sales value across all products?" | |
| state.file_name = "sales_data.csv" | |
| state.file_path = csv_path | |
| # Route and process | |
| routed_state = router.route_question(state) | |
| processed_state = file_agent.process(routed_state) | |
| if processed_state.agent_results: | |
| agent_result = list(processed_state.agent_results.values())[-1] | |
| success = agent_result.success | |
| total_cost += processed_state.total_cost | |
| results.append(success) | |
| print(f" β Router: {routed_state.question_type.value}") | |
| print(f" β Agent: File processor") | |
| print(f" β Result: {agent_result.result[:100]}...") | |
| print(f" π° Cost: ${processed_state.total_cost:.4f}") | |
| print(f" π― Overall: {'β PASS' if success else 'β FAIL'}") | |
| else: | |
| print(f" β File processing failed") | |
| results.append(False) | |
| except Exception as e: | |
| print(f" β File test failed: {e}") | |
| results.append(False) | |
| # Final summary | |
| total_time = time.time() - start_time | |
| passed = sum(results) | |
| total = len(results) | |
| pass_rate = (passed / total) * 100 | |
| print("\n" + "=" * 50) | |
| print("π COMPLETE INTEGRATION RESULTS") | |
| print("=" * 50) | |
| print(f"π― Tests Passed: {passed}/{total} ({pass_rate:.1f}%)") | |
| print(f"π° Total Cost: ${total_cost:.4f}") | |
| print(f"β±οΈ Total Time: {total_time:.2f} seconds") | |
| print(f"π Average Cost per Test: ${total_cost/total:.4f}") | |
| print(f"β‘ Average Time per Test: {total_time/total:.2f}s") | |
| # Budget analysis | |
| monthly_budget = 0.10 # $0.10/month | |
| if total_cost <= monthly_budget: | |
| remaining_budget = monthly_budget - total_cost | |
| estimated_questions = int(remaining_budget / (total_cost / total)) | |
| print(f"π° Budget Status: β ${remaining_budget:.4f} remaining (~{estimated_questions} more tests)") | |
| else: | |
| print(f"π° Budget Status: β οΈ Over budget by ${total_cost - monthly_budget:.4f}") | |
| # Success criteria | |
| if pass_rate >= 80 and total_cost <= 0.05: # 80% success, reasonable cost | |
| print("\nπ INTEGRATION SUCCESS! System ready for GAIA benchmark!") | |
| return True | |
| elif pass_rate >= 80: | |
| print("\nβ FUNCTIONALITY SUCCESS! (Higher cost than ideal)") | |
| return True | |
| else: | |
| print("\nβ οΈ INTEGRATION ISSUES! Check individual test failures") | |
| return False | |
| if __name__ == "__main__": | |
| success = test_complete_pipeline() | |
| sys.exit(0 if success else 1) |