File size: 7,595 Bytes
225a75e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python3
"""
Integration test for GAIA Agents
Tests Web Researcher, File Processor, and Reasoning agents
"""

import os
import sys
import time
import tempfile
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent))

from agents.state import GAIAAgentState, QuestionType
from agents.web_researcher import WebResearchAgent
from agents.file_processor_agent import FileProcessorAgent
from agents.reasoning_agent import ReasoningAgent
from models.qwen_client import QwenClient

def test_agents():
    """Test all implemented agents"""
    
    print("πŸ€– GAIA Agents Integration Test")
    print("=" * 50)
    
    # Initialize LLM client
    try:
        llm_client = QwenClient()
    except Exception as e:
        print(f"❌ Failed to initialize LLM client: {e}")
        return False
    
    results = []
    start_time = time.time()
    
    # Test 1: Web Research Agent
    print("\n🌐 Testing Web Research Agent...")
    web_agent = WebResearchAgent(llm_client)
    
    web_test_cases = [
        {
            "question": "What is the capital of France?",
            "question_type": QuestionType.WIKIPEDIA,
            "complexity": "simple"
        },
        {
            "question": "Find information about Python programming language",
            "question_type": QuestionType.WEB_RESEARCH,
            "complexity": "medium"
        }
    ]
    
    for i, test_case in enumerate(web_test_cases, 1):
        state = GAIAAgentState()
        state.question = test_case["question"]
        state.question_type = test_case["question_type"]
        state.complexity_assessment = test_case["complexity"]
        
        try:
            result_state = web_agent.process(state)
            success = len(result_state.agent_results) > 0 and list(result_state.agent_results.values())[-1].success
            results.append(('Web Research', f'Test {i}', success, list(result_state.agent_results.values())[-1].processing_time if result_state.agent_results else 0))
            status = "βœ… PASS" if success else "❌ FAIL"
            print(f"   Test {i}: {status}")
            
        except Exception as e:
            results.append(('Web Research', f'Test {i}', False, 0))
            print(f"   Test {i}: ❌ FAIL ({e})")
    
    # Test 2: File Processor Agent
    print("\nπŸ“ Testing File Processor Agent...")
    file_agent = FileProcessorAgent(llm_client)
    
    # Create test files
    with tempfile.TemporaryDirectory() as temp_dir:
        # Create CSV test file
        csv_path = os.path.join(temp_dir, "test.csv")
        with open(csv_path, 'w') as f:
            f.write("name,age,salary\nAlice,25,50000\nBob,30,60000\nCharlie,35,70000")
        
        # Create Python test file  
        py_path = os.path.join(temp_dir, "test.py")
        with open(py_path, 'w') as f:
            f.write("def calculate_sum(a, b):\n    return a + b\n\nresult = calculate_sum(5, 3)")
        
        file_test_cases = [
            {
                "question": "What is the average salary in this data?",
                "file_path": csv_path,
                "question_type": QuestionType.FILE_PROCESSING,
                "complexity": "medium"
            },
            {
                "question": "What does this Python code do?",
                "file_path": py_path,
                "question_type": QuestionType.FILE_PROCESSING,
                "complexity": "simple"
            }
        ]
        
        for i, test_case in enumerate(file_test_cases, 1):
            state = GAIAAgentState()
            state.question = test_case["question"]
            state.file_path = test_case["file_path"]
            state.question_type = test_case["question_type"]
            state.complexity_assessment = test_case["complexity"]
            
            try:
                result_state = file_agent.process(state)
                success = len(result_state.agent_results) > 0 and list(result_state.agent_results.values())[-1].success
                results.append(('File Processor', f'Test {i}', success, list(result_state.agent_results.values())[-1].processing_time if result_state.agent_results else 0))
                status = "βœ… PASS" if success else "❌ FAIL"
                print(f"   Test {i}: {status}")
                
            except Exception as e:
                results.append(('File Processor', f'Test {i}', False, 0))
                print(f"   Test {i}: ❌ FAIL ({e})")
    
    # Test 3: Reasoning Agent
    print("\n🧠 Testing Reasoning Agent...")
    reasoning_agent = ReasoningAgent(llm_client)
    
    reasoning_test_cases = [
        {
            "question": "Calculate 15% of 200",
            "question_type": QuestionType.REASONING,
            "complexity": "simple"
        },
        {
            "question": "Convert 100 celsius to fahrenheit",
            "question_type": QuestionType.REASONING,
            "complexity": "simple"
        },
        {
            "question": "What is the average of 10, 15, 20, 25, 30?",
            "question_type": QuestionType.REASONING,
            "complexity": "medium"
        }
    ]
    
    for i, test_case in enumerate(reasoning_test_cases, 1):
        state = GAIAAgentState()
        state.question = test_case["question"]
        state.question_type = test_case["question_type"]
        state.complexity_assessment = test_case["complexity"]
        
        try:
            result_state = reasoning_agent.process(state)
            success = len(result_state.agent_results) > 0 and list(result_state.agent_results.values())[-1].success
            results.append(('Reasoning', f'Test {i}', success, list(result_state.agent_results.values())[-1].processing_time if result_state.agent_results else 0))
            status = "βœ… PASS" if success else "❌ FAIL"
            print(f"   Test {i}: {status}")
            
        except Exception as e:
            results.append(('Reasoning', f'Test {i}', False, 0))
            print(f"   Test {i}: ❌ FAIL ({e})")
    
    # Summary
    total_time = time.time() - start_time
    passed_tests = sum(1 for _, _, success, _ in results if success)
    total_tests = len(results)
    
    print("\n" + "=" * 50)
    print("πŸ“Š AGENT TEST RESULTS")
    print("=" * 50)
    
    # Results by agent
    agents = {}
    for agent, test, success, exec_time in results:
        if agent not in agents:
            agents[agent] = {'passed': 0, 'total': 0, 'time': 0}
        agents[agent]['total'] += 1
        agents[agent]['time'] += exec_time
        if success:
            agents[agent]['passed'] += 1
    
    for agent, stats in agents.items():
        pass_rate = (stats['passed'] / stats['total']) * 100
        avg_time = stats['time'] / stats['total']
        status = "βœ…" if pass_rate == 100 else "⚠️" if pass_rate >= 80 else "❌"
        print(f"{status} {agent:15}: {stats['passed']}/{stats['total']} ({pass_rate:5.1f}%) - Avg: {avg_time:.3f}s")
    
    # Overall results
    overall_pass_rate = (passed_tests / total_tests) * 100
    print(f"\n🎯 OVERALL: {passed_tests}/{total_tests} tests passed ({overall_pass_rate:.1f}%)")
    print(f"⏱️  TOTAL TIME: {total_time:.2f} seconds")
    
    # Success criteria
    if overall_pass_rate >= 80:
        print("πŸš€ AGENTS READY! Multi-agent system is working correctly!")
        return True
    else:
        print("⚠️  ISSUES FOUND! Check individual agent failures above")
        return False

if __name__ == "__main__":
    success = test_agents()
    sys.exit(0 if success else 1)