RobotPai

Build error

File size: 17,174 Bytes

8a682b5

"""
Integration tests for Chain of Thought system
Comprehensive testing of all CoT components working together
"""

import pytest
import asyncio
import time
from typing import Dict, List, Any
from unittest.mock import Mock, patch

# Import the CoT system components
from src.core.optimized_chain_of_thought import (
    OptimizedChainOfThought,
    ComplexityAnalyzer,
    TemplateLibrary,
    ReasoningCache,
    MultiPathReasoning,
    MetacognitiveLayer,
    ReasoningPath,
    ReasoningStep,
    ReasoningType
)

# Import existing tools for integration testing
from src.agents.advanced_hybrid_architecture import AdvancedHybridAgent
from src.utils.semantic_search_tool import semantic_search_tool
from src.utils.python_interpreter import python_interpreter


class TestCoTIntegration:
    """Integration tests for Chain of Thought system"""
    
    @pytest.fixture
    async def cot_system(self):
        """Create a configured CoT system for testing"""
        return OptimizedChainOfThought(
            "test_cot",
            config={
                'max_paths': 3,
                'cache_size': 100,
                'cache_ttl': 1,
                'parallel_threshold': 0.5,
                'confidence_threshold': 0.7,
                'complexity_depth_multiplier': 10
            }
        )
    
    @pytest.fixture
    async def complex_cot_system(self):
        """Create a more complex CoT system for advanced testing"""
        return OptimizedChainOfThought(
            "complex_test_cot",
            config={
                'max_paths': 5,
                'cache_size': 500,
                'cache_ttl': 24,
                'parallel_threshold': 0.3,
                'confidence_threshold': 0.8,
                'complexity_depth_multiplier': 15
            }
        )
    
    async def test_component_initialization(self, cot_system):
        """Test that all components initialize correctly"""
        assert cot_system.complexity_analyzer is not None
        assert cot_system.template_library is not None
        assert cot_system.reasoning_cache is not None
        assert cot_system.multi_path_engine is not None
        assert cot_system.metacognitive_layer is not None
        
        # Verify component types
        assert isinstance(cot_system.complexity_analyzer, ComplexityAnalyzer)
        assert isinstance(cot_system.template_library, TemplateLibrary)
        assert isinstance(cot_system.reasoning_cache, ReasoningCache)
        assert isinstance(cot_system.multi_path_engine, MultiPathReasoning)
        assert isinstance(cot_system.metacognitive_layer, MetacognitiveLayer)
    
    async def test_end_to_end_reasoning(self, cot_system):
        """Test complete reasoning flow"""
        query = "Explain the concept of recursion in programming"
        result = await cot_system.reason(query)
        
        assert result is not None
        assert isinstance(result, ReasoningPath)
        assert result.total_confidence > 0
        assert len(result.steps) > 0
        assert result.template_used in cot_system.template_library.templates
        assert result.execution_time > 0
        
        # Verify step structure
        for step in result.steps:
            assert isinstance(step, ReasoningStep)
            assert step.step_id > 0
            assert step.confidence >= 0 and step.confidence <= 1
            assert len(step.thought) > 0
    
    async def test_cache_integration(self, cot_system):
        """Test cache integration with main system"""
        query = "What is machine learning?"
        
        # First call - should miss cache
        result1 = await cot_system.reason(query)
        metrics1 = cot_system.performance_metrics
        cache_misses_1 = metrics1['cache_misses']
        
        # Second call - should hit cache
        result2 = await cot_system.reason(query)
        metrics2 = cot_system.performance_metrics
        cache_hits_2 = metrics2['cache_hits']
        
        assert cache_hits_2 > 0
        assert result2.execution_time < result1.execution_time
        
        # Verify cache stats
        cache_stats = cot_system.reasoning_cache.get_stats()
        assert cache_stats['hits'] > 0
        assert cache_stats['misses'] > 0
    
    async def test_complexity_analysis_integration(self, cot_system):
        """Test complexity analysis integration"""
        simple_query = "What is 2+2?"
        complex_query = "Analyze the time complexity of quicksort algorithm and compare it with merge sort in terms of space complexity and stability"
        
        # Analyze complexity
        simple_complexity, simple_features = cot_system.complexity_analyzer.analyze(simple_query)
        complex_complexity, complex_features = cot_system.complexity_analyzer.analyze(complex_query)
        
        assert simple_complexity < complex_complexity
        assert simple_features['length'] < complex_features['length']
        assert simple_features['vocabulary_complexity'] < complex_features['vocabulary_complexity']
        
        # Test that complexity affects reasoning depth
        simple_result = await cot_system.reason(simple_query)
        complex_result = await cot_system.reason(complex_query)
        
        # Complex queries should generally have more steps
        assert len(complex_result.steps) >= len(simple_result.steps)
    
    async def test_template_selection_integration(self, cot_system):
        """Test template selection integration"""
        mathematical_query = "Calculate the derivative of x^2 + 3x + 1"
        analytical_query = "Compare the benefits and drawbacks of cloud computing"
        
        # Get complexity analysis
        math_complexity, math_features = cot_system.complexity_analyzer.analyze(mathematical_query)
        analysis_complexity, analysis_features = cot_system.complexity_analyzer.analyze(analytical_query)
        
        # Test template selection
        math_template = cot_system.template_library.select_template(mathematical_query, math_features)
        analysis_template = cot_system.template_library.select_template(analytical_query, analysis_features)
        
        assert math_template is not None
        assert analysis_template is not None
        assert math_template.name != analysis_template.name
        
        # Test template applicability
        math_applicability = math_template.is_applicable(mathematical_query, math_features)
        analysis_applicability = analysis_template.is_applicable(analytical_query, analysis_features)
        
        assert math_applicability > 0
        assert analysis_applicability > 0
    
    async def test_multi_path_reasoning_integration(self, complex_cot_system):
        """Test multi-path reasoning integration"""
        complex_query = "Analyze the impact of artificial intelligence on job markets"
        
        # This should trigger multi-path reasoning due to complexity
        result = await complex_cot_system.reason(complex_query)
        
        assert result is not None
        assert len(result.steps) > 0
        
        # Verify that multiple reasoning types were used
        reasoning_types = [step.reasoning_type for step in result.steps]
        unique_types = set(reasoning_types)
        
        # Should have used multiple reasoning approaches
        assert len(unique_types) > 1
    
    async def test_metacognitive_layer_integration(self, cot_system):
        """Test metacognitive layer integration"""
        query = "Explain the concept of object-oriented programming"
        
        # Enable metacognitive reflection
        cot_system.config['enable_metacognition'] = True
        
        result = await cot_system.reason(query)
        
        assert result is not None
        assert result.total_confidence > 0
        
        # Metacognitive layer should improve confidence
        # (This is a basic test - in practice, metacognition should enhance reasoning)
        assert result.total_confidence >= 0.5
    
    async def test_error_handling_integration(self, cot_system):
        """Test error handling integration"""
        # Test with invalid query
        invalid_query = ""
        
        with pytest.raises(ValueError):
            await cot_system.reason(invalid_query)
        
        # Test with very long query
        long_query = "x" * 10000
        
        # Should handle gracefully
        result = await cot_system.reason(long_query)
        assert result is not None
    
    async def test_performance_metrics_integration(self, cot_system):
        """Test performance metrics integration"""
        queries = [
            "What is Python?",
            "Explain machine learning",
            "How does recursion work?"
        ]
        
        # Run multiple queries
        for query in queries:
            await cot_system.reason(query)
        
        # Check performance metrics
        metrics = cot_system.performance_metrics
        
        assert metrics['total_queries'] >= len(queries)
        assert metrics['average_execution_time'] > 0
        assert metrics['average_confidence'] > 0
        assert 'cache_hits' in metrics
        assert 'cache_misses' in metrics
    
    async def test_concurrent_reasoning(self, cot_system):
        """Test concurrent reasoning capabilities"""
        queries = [
            "What is artificial intelligence?",
            "Explain blockchain technology",
            "How do neural networks work?",
            "What is quantum computing?",
            "Explain the concept of APIs"
        ]
        
        # Run queries concurrently
        tasks = [cot_system.reason(query) for query in queries]
        results = await asyncio.gather(*tasks)
        
        # Verify all results
        for result in results:
            assert result is not None
            assert result.total_confidence > 0
            assert len(result.steps) > 0
    
    async def test_memory_management(self, cot_system):
        """Test memory management and cache eviction"""
        # Fill cache with many queries
        for i in range(150):  # More than cache size
            query = f"Test query number {i}"
            await cot_system.reason(query)
        
        # Check cache stats
        cache_stats = cot_system.reasoning_cache.get_stats()
        
        # Should have evicted some entries
        assert cache_stats['evictions'] > 0
        assert cache_stats['size'] <= cot_system.config['cache_size']


class TestCoTWithTools:
    """Test CoT integration with existing tools"""
    
    @pytest.fixture
    async def hybrid_agent_with_cot(self):
        """Create hybrid agent with CoT integration"""
        tools = [semantic_search_tool, python_interpreter]
        
        return AdvancedHybridAgent(
            "test_agent",
            config={
                'cot': {
                    'max_paths': 3,
                    'cache_size': 500,
                    'enable_metacognition': True
                }
            },
            tools=tools
        )
    
    async def test_cot_with_hybrid_agent(self, hybrid_agent_with_cot):
        """Test CoT integration with hybrid agent"""
        # Test query that uses CoT
        query = "Analyze the time complexity of quicksort algorithm"
        result = await hybrid_agent_with_cot.process_query(query)
        
        assert result is not None
        assert 'response' in result or 'answer' in result
        
        # Check reasoning history
        if hasattr(hybrid_agent_with_cot, 'reasoning_history'):
            assert len(hybrid_agent_with_cot.reasoning_history) > 0
            latest_entry = hybrid_agent_with_cot.reasoning_history[-1]
            assert 'mode' in latest_entry
            assert latest_entry['mode'] in ['cot', 'hybrid', 'tool']
    
    async def test_cot_with_semantic_search(self, hybrid_agent_with_cot):
        """Test CoT with semantic search tool"""
        query = "What are the latest developments in quantum computing?"
        
        # This should use semantic search and CoT reasoning
        result = await hybrid_agent_with_cot.process_query(query)
        
        assert result is not None
        
        # Verify that tools were used
        if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
            assert len(hybrid_agent_with_cot.tool_usage_history) > 0
    
    async def test_cot_with_python_interpreter(self, hybrid_agent_with_cot):
        """Test CoT with Python interpreter tool"""
        query = "Write a Python function to calculate fibonacci numbers and test it"
        
        result = await hybrid_agent_with_cot.process_query(query)
        
        assert result is not None
        
        # Should have used Python interpreter
        if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
            python_tool_used = any(
                'python_interpreter' in str(tool) 
                for tool in hybrid_agent_with_cot.tool_usage_history
            )
            assert python_tool_used


class TestCoTCompatibility:
    """Test compatibility with different configurations and versions"""
    
    def test_dependency_versions(self):
        """Ensure all dependencies are compatible"""
        import sys
        import importlib
        
        required_versions = {
            'numpy': '1.21.0',
            'asyncio': '3.4.3',
        }
        
        for package, min_version in required_versions.items():
            try:
                module = importlib.import_module(package)
                version = getattr(module, '__version__', 'unknown')
                print(f"{package}: {version} (required: >={min_version})")
            except ImportError:
                print(f"{package}: NOT INSTALLED (required: >={min_version})")
    
    async def test_reasoning_path_contract(self):
        """Ensure ReasoningPath maintains expected structure"""
        # Test that existing code expecting old structure still works
        path = ReasoningPath(
            path_id="test",
            query="test query",
            steps=[],
            total_confidence=0.8
        )
        
        # These attributes should exist for backward compatibility
        assert hasattr(path, 'path_id')
        assert hasattr(path, 'query')
        assert hasattr(path, 'steps')
        assert hasattr(path, 'total_confidence')
        assert hasattr(path, 'execution_time')
        assert hasattr(path, 'template_used')
        assert hasattr(path, 'complexity_score')
    
    async def test_configuration_compatibility(self):
        """Test compatibility with different configurations"""
        configs = [
            {'max_paths': 1, 'cache_size': 50},
            {'max_paths': 5, 'cache_size': 1000},
            {'parallel_threshold': 0.1, 'confidence_threshold': 0.9},
            {'parallel_threshold': 0.9, 'confidence_threshold': 0.5}
        ]
        
        for config in configs:
            cot_system = OptimizedChainOfThought("compat_test", config)
            
            # Should initialize without errors
            assert cot_system is not None
            
            # Should be able to reason
            result = await cot_system.reason("Test query")
            assert result is not None


# Performance and stress testing
class TestCoTPerformance:
    """Performance and stress testing for CoT system"""
    
    async def test_large_query_handling(self):
        """Test handling of large queries"""
        cot_system = OptimizedChainOfThought("perf_test", {'max_paths': 3})
        
        # Large query
        large_query = "Explain in detail the complete process of how machine learning algorithms work, including data preprocessing, feature engineering, model selection, training, validation, testing, and deployment, with specific examples of different types of algorithms like supervised learning, unsupervised learning, and reinforcement learning, and discuss the challenges and best practices in each step"
        
        start_time = time.time()
        result = await cot_system.reason(large_query)
        end_time = time.time()
        
        assert result is not None
        assert result.total_confidence > 0
        assert end_time - start_time < 30  # Should complete within 30 seconds
    
    async def test_concurrent_load(self):
        """Test concurrent load handling"""
        cot_system = OptimizedChainOfThought("load_test", {'max_paths': 2})
        
        # Create many concurrent queries
        queries = [f"Query {i}: Explain concept {i}" for i in range(20)]
        
        start_time = time.time()
        tasks = [cot_system.reason(query) for query in queries]
        results = await asyncio.gather(*tasks)
        end_time = time.time()
        
        # All should complete successfully
        assert len(results) == len(queries)
        for result in results:
            assert result is not None
            assert result.total_confidence > 0
        
        # Should complete in reasonable time
        assert end_time - start_time < 60  # Within 60 seconds


if __name__ == "__main__":
    # Run integration tests
    pytest.main([__file__, "-v"])