File size: 17,174 Bytes
8a682b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
"""
Integration tests for Chain of Thought system
Comprehensive testing of all CoT components working together
"""

import pytest
import asyncio
import time
from typing import Dict, List, Any
from unittest.mock import Mock, patch

# Import the CoT system components
from src.core.optimized_chain_of_thought import (
    OptimizedChainOfThought,
    ComplexityAnalyzer,
    TemplateLibrary,
    ReasoningCache,
    MultiPathReasoning,
    MetacognitiveLayer,
    ReasoningPath,
    ReasoningStep,
    ReasoningType
)

# Import existing tools for integration testing
from src.agents.advanced_hybrid_architecture import AdvancedHybridAgent
from src.utils.semantic_search_tool import semantic_search_tool
from src.utils.python_interpreter import python_interpreter


class TestCoTIntegration:
    """Integration tests for Chain of Thought system"""
    
    @pytest.fixture
    async def cot_system(self):
        """Create a configured CoT system for testing"""
        return OptimizedChainOfThought(
            "test_cot",
            config={
                'max_paths': 3,
                'cache_size': 100,
                'cache_ttl': 1,
                'parallel_threshold': 0.5,
                'confidence_threshold': 0.7,
                'complexity_depth_multiplier': 10
            }
        )
    
    @pytest.fixture
    async def complex_cot_system(self):
        """Create a more complex CoT system for advanced testing"""
        return OptimizedChainOfThought(
            "complex_test_cot",
            config={
                'max_paths': 5,
                'cache_size': 500,
                'cache_ttl': 24,
                'parallel_threshold': 0.3,
                'confidence_threshold': 0.8,
                'complexity_depth_multiplier': 15
            }
        )
    
    async def test_component_initialization(self, cot_system):
        """Test that all components initialize correctly"""
        assert cot_system.complexity_analyzer is not None
        assert cot_system.template_library is not None
        assert cot_system.reasoning_cache is not None
        assert cot_system.multi_path_engine is not None
        assert cot_system.metacognitive_layer is not None
        
        # Verify component types
        assert isinstance(cot_system.complexity_analyzer, ComplexityAnalyzer)
        assert isinstance(cot_system.template_library, TemplateLibrary)
        assert isinstance(cot_system.reasoning_cache, ReasoningCache)
        assert isinstance(cot_system.multi_path_engine, MultiPathReasoning)
        assert isinstance(cot_system.metacognitive_layer, MetacognitiveLayer)
    
    async def test_end_to_end_reasoning(self, cot_system):
        """Test complete reasoning flow"""
        query = "Explain the concept of recursion in programming"
        result = await cot_system.reason(query)
        
        assert result is not None
        assert isinstance(result, ReasoningPath)
        assert result.total_confidence > 0
        assert len(result.steps) > 0
        assert result.template_used in cot_system.template_library.templates
        assert result.execution_time > 0
        
        # Verify step structure
        for step in result.steps:
            assert isinstance(step, ReasoningStep)
            assert step.step_id > 0
            assert step.confidence >= 0 and step.confidence <= 1
            assert len(step.thought) > 0
    
    async def test_cache_integration(self, cot_system):
        """Test cache integration with main system"""
        query = "What is machine learning?"
        
        # First call - should miss cache
        result1 = await cot_system.reason(query)
        metrics1 = cot_system.performance_metrics
        cache_misses_1 = metrics1['cache_misses']
        
        # Second call - should hit cache
        result2 = await cot_system.reason(query)
        metrics2 = cot_system.performance_metrics
        cache_hits_2 = metrics2['cache_hits']
        
        assert cache_hits_2 > 0
        assert result2.execution_time < result1.execution_time
        
        # Verify cache stats
        cache_stats = cot_system.reasoning_cache.get_stats()
        assert cache_stats['hits'] > 0
        assert cache_stats['misses'] > 0
    
    async def test_complexity_analysis_integration(self, cot_system):
        """Test complexity analysis integration"""
        simple_query = "What is 2+2?"
        complex_query = "Analyze the time complexity of quicksort algorithm and compare it with merge sort in terms of space complexity and stability"
        
        # Analyze complexity
        simple_complexity, simple_features = cot_system.complexity_analyzer.analyze(simple_query)
        complex_complexity, complex_features = cot_system.complexity_analyzer.analyze(complex_query)
        
        assert simple_complexity < complex_complexity
        assert simple_features['length'] < complex_features['length']
        assert simple_features['vocabulary_complexity'] < complex_features['vocabulary_complexity']
        
        # Test that complexity affects reasoning depth
        simple_result = await cot_system.reason(simple_query)
        complex_result = await cot_system.reason(complex_query)
        
        # Complex queries should generally have more steps
        assert len(complex_result.steps) >= len(simple_result.steps)
    
    async def test_template_selection_integration(self, cot_system):
        """Test template selection integration"""
        mathematical_query = "Calculate the derivative of x^2 + 3x + 1"
        analytical_query = "Compare the benefits and drawbacks of cloud computing"
        
        # Get complexity analysis
        math_complexity, math_features = cot_system.complexity_analyzer.analyze(mathematical_query)
        analysis_complexity, analysis_features = cot_system.complexity_analyzer.analyze(analytical_query)
        
        # Test template selection
        math_template = cot_system.template_library.select_template(mathematical_query, math_features)
        analysis_template = cot_system.template_library.select_template(analytical_query, analysis_features)
        
        assert math_template is not None
        assert analysis_template is not None
        assert math_template.name != analysis_template.name
        
        # Test template applicability
        math_applicability = math_template.is_applicable(mathematical_query, math_features)
        analysis_applicability = analysis_template.is_applicable(analytical_query, analysis_features)
        
        assert math_applicability > 0
        assert analysis_applicability > 0
    
    async def test_multi_path_reasoning_integration(self, complex_cot_system):
        """Test multi-path reasoning integration"""
        complex_query = "Analyze the impact of artificial intelligence on job markets"
        
        # This should trigger multi-path reasoning due to complexity
        result = await complex_cot_system.reason(complex_query)
        
        assert result is not None
        assert len(result.steps) > 0
        
        # Verify that multiple reasoning types were used
        reasoning_types = [step.reasoning_type for step in result.steps]
        unique_types = set(reasoning_types)
        
        # Should have used multiple reasoning approaches
        assert len(unique_types) > 1
    
    async def test_metacognitive_layer_integration(self, cot_system):
        """Test metacognitive layer integration"""
        query = "Explain the concept of object-oriented programming"
        
        # Enable metacognitive reflection
        cot_system.config['enable_metacognition'] = True
        
        result = await cot_system.reason(query)
        
        assert result is not None
        assert result.total_confidence > 0
        
        # Metacognitive layer should improve confidence
        # (This is a basic test - in practice, metacognition should enhance reasoning)
        assert result.total_confidence >= 0.5
    
    async def test_error_handling_integration(self, cot_system):
        """Test error handling integration"""
        # Test with invalid query
        invalid_query = ""
        
        with pytest.raises(ValueError):
            await cot_system.reason(invalid_query)
        
        # Test with very long query
        long_query = "x" * 10000
        
        # Should handle gracefully
        result = await cot_system.reason(long_query)
        assert result is not None
    
    async def test_performance_metrics_integration(self, cot_system):
        """Test performance metrics integration"""
        queries = [
            "What is Python?",
            "Explain machine learning",
            "How does recursion work?"
        ]
        
        # Run multiple queries
        for query in queries:
            await cot_system.reason(query)
        
        # Check performance metrics
        metrics = cot_system.performance_metrics
        
        assert metrics['total_queries'] >= len(queries)
        assert metrics['average_execution_time'] > 0
        assert metrics['average_confidence'] > 0
        assert 'cache_hits' in metrics
        assert 'cache_misses' in metrics
    
    async def test_concurrent_reasoning(self, cot_system):
        """Test concurrent reasoning capabilities"""
        queries = [
            "What is artificial intelligence?",
            "Explain blockchain technology",
            "How do neural networks work?",
            "What is quantum computing?",
            "Explain the concept of APIs"
        ]
        
        # Run queries concurrently
        tasks = [cot_system.reason(query) for query in queries]
        results = await asyncio.gather(*tasks)
        
        # Verify all results
        for result in results:
            assert result is not None
            assert result.total_confidence > 0
            assert len(result.steps) > 0
    
    async def test_memory_management(self, cot_system):
        """Test memory management and cache eviction"""
        # Fill cache with many queries
        for i in range(150):  # More than cache size
            query = f"Test query number {i}"
            await cot_system.reason(query)
        
        # Check cache stats
        cache_stats = cot_system.reasoning_cache.get_stats()
        
        # Should have evicted some entries
        assert cache_stats['evictions'] > 0
        assert cache_stats['size'] <= cot_system.config['cache_size']


class TestCoTWithTools:
    """Test CoT integration with existing tools"""
    
    @pytest.fixture
    async def hybrid_agent_with_cot(self):
        """Create hybrid agent with CoT integration"""
        tools = [semantic_search_tool, python_interpreter]
        
        return AdvancedHybridAgent(
            "test_agent",
            config={
                'cot': {
                    'max_paths': 3,
                    'cache_size': 500,
                    'enable_metacognition': True
                }
            },
            tools=tools
        )
    
    async def test_cot_with_hybrid_agent(self, hybrid_agent_with_cot):
        """Test CoT integration with hybrid agent"""
        # Test query that uses CoT
        query = "Analyze the time complexity of quicksort algorithm"
        result = await hybrid_agent_with_cot.process_query(query)
        
        assert result is not None
        assert 'response' in result or 'answer' in result
        
        # Check reasoning history
        if hasattr(hybrid_agent_with_cot, 'reasoning_history'):
            assert len(hybrid_agent_with_cot.reasoning_history) > 0
            latest_entry = hybrid_agent_with_cot.reasoning_history[-1]
            assert 'mode' in latest_entry
            assert latest_entry['mode'] in ['cot', 'hybrid', 'tool']
    
    async def test_cot_with_semantic_search(self, hybrid_agent_with_cot):
        """Test CoT with semantic search tool"""
        query = "What are the latest developments in quantum computing?"
        
        # This should use semantic search and CoT reasoning
        result = await hybrid_agent_with_cot.process_query(query)
        
        assert result is not None
        
        # Verify that tools were used
        if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
            assert len(hybrid_agent_with_cot.tool_usage_history) > 0
    
    async def test_cot_with_python_interpreter(self, hybrid_agent_with_cot):
        """Test CoT with Python interpreter tool"""
        query = "Write a Python function to calculate fibonacci numbers and test it"
        
        result = await hybrid_agent_with_cot.process_query(query)
        
        assert result is not None
        
        # Should have used Python interpreter
        if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
            python_tool_used = any(
                'python_interpreter' in str(tool) 
                for tool in hybrid_agent_with_cot.tool_usage_history
            )
            assert python_tool_used


class TestCoTCompatibility:
    """Test compatibility with different configurations and versions"""
    
    def test_dependency_versions(self):
        """Ensure all dependencies are compatible"""
        import sys
        import importlib
        
        required_versions = {
            'numpy': '1.21.0',
            'asyncio': '3.4.3',
        }
        
        for package, min_version in required_versions.items():
            try:
                module = importlib.import_module(package)
                version = getattr(module, '__version__', 'unknown')
                print(f"{package}: {version} (required: >={min_version})")
            except ImportError:
                print(f"{package}: NOT INSTALLED (required: >={min_version})")
    
    async def test_reasoning_path_contract(self):
        """Ensure ReasoningPath maintains expected structure"""
        # Test that existing code expecting old structure still works
        path = ReasoningPath(
            path_id="test",
            query="test query",
            steps=[],
            total_confidence=0.8
        )
        
        # These attributes should exist for backward compatibility
        assert hasattr(path, 'path_id')
        assert hasattr(path, 'query')
        assert hasattr(path, 'steps')
        assert hasattr(path, 'total_confidence')
        assert hasattr(path, 'execution_time')
        assert hasattr(path, 'template_used')
        assert hasattr(path, 'complexity_score')
    
    async def test_configuration_compatibility(self):
        """Test compatibility with different configurations"""
        configs = [
            {'max_paths': 1, 'cache_size': 50},
            {'max_paths': 5, 'cache_size': 1000},
            {'parallel_threshold': 0.1, 'confidence_threshold': 0.9},
            {'parallel_threshold': 0.9, 'confidence_threshold': 0.5}
        ]
        
        for config in configs:
            cot_system = OptimizedChainOfThought("compat_test", config)
            
            # Should initialize without errors
            assert cot_system is not None
            
            # Should be able to reason
            result = await cot_system.reason("Test query")
            assert result is not None


# Performance and stress testing
class TestCoTPerformance:
    """Performance and stress testing for CoT system"""
    
    async def test_large_query_handling(self):
        """Test handling of large queries"""
        cot_system = OptimizedChainOfThought("perf_test", {'max_paths': 3})
        
        # Large query
        large_query = "Explain in detail the complete process of how machine learning algorithms work, including data preprocessing, feature engineering, model selection, training, validation, testing, and deployment, with specific examples of different types of algorithms like supervised learning, unsupervised learning, and reinforcement learning, and discuss the challenges and best practices in each step"
        
        start_time = time.time()
        result = await cot_system.reason(large_query)
        end_time = time.time()
        
        assert result is not None
        assert result.total_confidence > 0
        assert end_time - start_time < 30  # Should complete within 30 seconds
    
    async def test_concurrent_load(self):
        """Test concurrent load handling"""
        cot_system = OptimizedChainOfThought("load_test", {'max_paths': 2})
        
        # Create many concurrent queries
        queries = [f"Query {i}: Explain concept {i}" for i in range(20)]
        
        start_time = time.time()
        tasks = [cot_system.reason(query) for query in queries]
        results = await asyncio.gather(*tasks)
        end_time = time.time()
        
        # All should complete successfully
        assert len(results) == len(queries)
        for result in results:
            assert result is not None
            assert result.total_confidence > 0
        
        # Should complete in reasonable time
        assert end_time - start_time < 60  # Within 60 seconds


if __name__ == "__main__":
    # Run integration tests
    pytest.main([__file__, "-v"])