File size: 8,043 Bytes
225a75e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/usr/bin/env python3
"""
Complete Integration Test for GAIA Agent System
Tests the full pipeline: Router -> Agents -> Tools -> Results
"""

import os
import sys
import time
import tempfile
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from agents.state import GAIAAgentState, QuestionType, AgentRole
from agents.router import RouterAgent
from agents.web_researcher import WebResearchAgent
from agents.file_processor_agent import FileProcessorAgent
from agents.reasoning_agent import ReasoningAgent
from models.qwen_client import QwenClient

def test_complete_pipeline():
    """Test the complete GAIA agent pipeline"""
    
    print("πŸš€ GAIA Complete Integration Test")
    print("=" * 50)
    
    # Initialize system
    try:
        llm_client = QwenClient()
        router = RouterAgent(llm_client)
        web_agent = WebResearchAgent(llm_client)
        file_agent = FileProcessorAgent(llm_client)
        reasoning_agent = ReasoningAgent(llm_client)
    except Exception as e:
        print(f"❌ Failed to initialize system: {e}")
        return False
    
    # End-to-end test cases
    test_cases = [
        {
            "question": "What is the population of Paris?",
            "description": "Simple Wikipedia/web research question",
            "expected_agent": AgentRole.WEB_RESEARCHER
        },
        {
            "question": "Calculate the area of a circle with radius 5 meters",
            "description": "Mathematical reasoning with unit conversion",
            "expected_agent": AgentRole.REASONING_AGENT
        },
        {
            "question": "What is the average of these numbers: 10, 20, 30, 40, 50?",
            "description": "Statistical calculation",
            "expected_agent": AgentRole.REASONING_AGENT
        }
    ]
    
    results = []
    total_cost = 0.0
    start_time = time.time()
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\nπŸ§ͺ Test {i}: {test_case['description']}")
        print(f"   Question: {test_case['question']}")
        
        try:
            # Step 1: Initialize state
            state = GAIAAgentState()
            state.task_id = f"test_{i}"
            state.question = test_case["question"]
            
            # Step 2: Route question
            routed_state = router.route_question(state)
            print(f"   βœ… Router: {routed_state.question_type.value} -> {[a.value for a in routed_state.selected_agents]}")
            
            # Step 3: Process with appropriate agent
            if test_case["expected_agent"] in routed_state.selected_agents:
                if test_case["expected_agent"] == AgentRole.WEB_RESEARCHER:
                    processed_state = web_agent.process(routed_state)
                elif test_case["expected_agent"] == AgentRole.REASONING_AGENT:
                    processed_state = reasoning_agent.process(routed_state)
                elif test_case["expected_agent"] == AgentRole.FILE_PROCESSOR:
                    processed_state = file_agent.process(routed_state)
                else:
                    print(f"   ⚠️  Agent {test_case['expected_agent'].value} not implemented in test")
                    continue
                
                # Check results
                if processed_state.agent_results:
                    agent_result = list(processed_state.agent_results.values())[-1]
                    success = agent_result.success
                    confidence = agent_result.confidence
                    cost = processed_state.total_cost
                    processing_time = processed_state.total_processing_time
                    
                    print(f"   βœ… Agent: {agent_result.agent_role.value}")
                    print(f"   βœ… Result: {agent_result.result[:100]}...")
                    print(f"   πŸ“Š Confidence: {confidence:.2f}")
                    print(f"   πŸ’° Cost: ${cost:.4f}")
                    print(f"   ⏱️  Time: {processing_time:.2f}s")
                    
                    total_cost += cost
                    results.append(success)
                    
                    print(f"   🎯 Overall: {'βœ… PASS' if success else '❌ FAIL'}")
                else:
                    print(f"   ❌ No agent results produced")
                    results.append(False)
            else:
                print(f"   ⚠️  Expected agent {test_case['expected_agent'].value} not selected")
                results.append(False)
                
        except Exception as e:
            print(f"   ❌ Pipeline failed: {e}")
            results.append(False)
    
    # File processing test with actual file
    print(f"\nπŸ§ͺ Test 4: File Processing with CSV")
    print(f"   Description: Complete file analysis pipeline")
    
    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            # Create test CSV
            csv_path = os.path.join(temp_dir, "sales_data.csv")
            with open(csv_path, 'w') as f:
                f.write("product,sales,price\nWidget A,100,25.50\nWidget B,150,30.00\nWidget C,80,22.75")
            
            # Initialize state with file
            state = GAIAAgentState()
            state.task_id = "test_file"
            state.question = "What is the total sales value across all products?"
            state.file_name = "sales_data.csv"
            state.file_path = csv_path
            
            # Route and process
            routed_state = router.route_question(state)
            processed_state = file_agent.process(routed_state)
            
            if processed_state.agent_results:
                agent_result = list(processed_state.agent_results.values())[-1]
                success = agent_result.success
                total_cost += processed_state.total_cost
                results.append(success)
                
                print(f"   βœ… Router: {routed_state.question_type.value}")
                print(f"   βœ… Agent: File processor")
                print(f"   βœ… Result: {agent_result.result[:100]}...")
                print(f"   πŸ’° Cost: ${processed_state.total_cost:.4f}")
                print(f"   🎯 Overall: {'βœ… PASS' if success else '❌ FAIL'}")
            else:
                print(f"   ❌ File processing failed")
                results.append(False)
                
    except Exception as e:
        print(f"   ❌ File test failed: {e}")
        results.append(False)
    
    # Final summary
    total_time = time.time() - start_time
    passed = sum(results)
    total = len(results)
    pass_rate = (passed / total) * 100
    
    print("\n" + "=" * 50)
    print("πŸ“Š COMPLETE INTEGRATION RESULTS")
    print("=" * 50)
    print(f"🎯 Tests Passed: {passed}/{total} ({pass_rate:.1f}%)")
    print(f"πŸ’° Total Cost: ${total_cost:.4f}")
    print(f"⏱️  Total Time: {total_time:.2f} seconds")
    print(f"πŸ“ˆ Average Cost per Test: ${total_cost/total:.4f}")
    print(f"⚑ Average Time per Test: {total_time/total:.2f}s")
    
    # Budget analysis
    monthly_budget = 0.10  # $0.10/month
    if total_cost <= monthly_budget:
        remaining_budget = monthly_budget - total_cost
        estimated_questions = int(remaining_budget / (total_cost / total))
        print(f"πŸ’° Budget Status: βœ… ${remaining_budget:.4f} remaining (~{estimated_questions} more tests)")
    else:
        print(f"πŸ’° Budget Status: ⚠️  Over budget by ${total_cost - monthly_budget:.4f}")
    
    # Success criteria
    if pass_rate >= 80 and total_cost <= 0.05:  # 80% success, reasonable cost
        print("\nπŸš€ INTEGRATION SUCCESS! System ready for GAIA benchmark!")
        return True
    elif pass_rate >= 80:
        print("\nβœ… FUNCTIONALITY SUCCESS! (Higher cost than ideal)")
        return True
    else:
        print("\n⚠️  INTEGRATION ISSUES! Check individual test failures")
        return False

if __name__ == "__main__":
    success = test_complete_pipeline()
    sys.exit(0 if success else 1)