Spaces:
Build error
Build error
| """ | |
| Integration tests for Chain of Thought system | |
| Comprehensive testing of all CoT components working together | |
| """ | |
| import pytest | |
| import asyncio | |
| import time | |
| from typing import Dict, List, Any | |
| from unittest.mock import Mock, patch | |
| # Import the CoT system components | |
| from src.core.optimized_chain_of_thought import ( | |
| OptimizedChainOfThought, | |
| ComplexityAnalyzer, | |
| TemplateLibrary, | |
| ReasoningCache, | |
| MultiPathReasoning, | |
| MetacognitiveLayer, | |
| ReasoningPath, | |
| ReasoningStep, | |
| ReasoningType | |
| ) | |
| # Import existing tools for integration testing | |
| from src.agents.advanced_hybrid_architecture import AdvancedHybridAgent | |
| from src.utils.semantic_search_tool import semantic_search_tool | |
| from src.utils.python_interpreter import python_interpreter | |
| class TestCoTIntegration: | |
| """Integration tests for Chain of Thought system""" | |
| async def cot_system(self): | |
| """Create a configured CoT system for testing""" | |
| return OptimizedChainOfThought( | |
| "test_cot", | |
| config={ | |
| 'max_paths': 3, | |
| 'cache_size': 100, | |
| 'cache_ttl': 1, | |
| 'parallel_threshold': 0.5, | |
| 'confidence_threshold': 0.7, | |
| 'complexity_depth_multiplier': 10 | |
| } | |
| ) | |
| async def complex_cot_system(self): | |
| """Create a more complex CoT system for advanced testing""" | |
| return OptimizedChainOfThought( | |
| "complex_test_cot", | |
| config={ | |
| 'max_paths': 5, | |
| 'cache_size': 500, | |
| 'cache_ttl': 24, | |
| 'parallel_threshold': 0.3, | |
| 'confidence_threshold': 0.8, | |
| 'complexity_depth_multiplier': 15 | |
| } | |
| ) | |
| async def test_component_initialization(self, cot_system): | |
| """Test that all components initialize correctly""" | |
| assert cot_system.complexity_analyzer is not None | |
| assert cot_system.template_library is not None | |
| assert cot_system.reasoning_cache is not None | |
| assert cot_system.multi_path_engine is not None | |
| assert cot_system.metacognitive_layer is not None | |
| # Verify component types | |
| assert isinstance(cot_system.complexity_analyzer, ComplexityAnalyzer) | |
| assert isinstance(cot_system.template_library, TemplateLibrary) | |
| assert isinstance(cot_system.reasoning_cache, ReasoningCache) | |
| assert isinstance(cot_system.multi_path_engine, MultiPathReasoning) | |
| assert isinstance(cot_system.metacognitive_layer, MetacognitiveLayer) | |
| async def test_end_to_end_reasoning(self, cot_system): | |
| """Test complete reasoning flow""" | |
| query = "Explain the concept of recursion in programming" | |
| result = await cot_system.reason(query) | |
| assert result is not None | |
| assert isinstance(result, ReasoningPath) | |
| assert result.total_confidence > 0 | |
| assert len(result.steps) > 0 | |
| assert result.template_used in cot_system.template_library.templates | |
| assert result.execution_time > 0 | |
| # Verify step structure | |
| for step in result.steps: | |
| assert isinstance(step, ReasoningStep) | |
| assert step.step_id > 0 | |
| assert step.confidence >= 0 and step.confidence <= 1 | |
| assert len(step.thought) > 0 | |
| async def test_cache_integration(self, cot_system): | |
| """Test cache integration with main system""" | |
| query = "What is machine learning?" | |
| # First call - should miss cache | |
| result1 = await cot_system.reason(query) | |
| metrics1 = cot_system.performance_metrics | |
| cache_misses_1 = metrics1['cache_misses'] | |
| # Second call - should hit cache | |
| result2 = await cot_system.reason(query) | |
| metrics2 = cot_system.performance_metrics | |
| cache_hits_2 = metrics2['cache_hits'] | |
| assert cache_hits_2 > 0 | |
| assert result2.execution_time < result1.execution_time | |
| # Verify cache stats | |
| cache_stats = cot_system.reasoning_cache.get_stats() | |
| assert cache_stats['hits'] > 0 | |
| assert cache_stats['misses'] > 0 | |
| async def test_complexity_analysis_integration(self, cot_system): | |
| """Test complexity analysis integration""" | |
| simple_query = "What is 2+2?" | |
| complex_query = "Analyze the time complexity of quicksort algorithm and compare it with merge sort in terms of space complexity and stability" | |
| # Analyze complexity | |
| simple_complexity, simple_features = cot_system.complexity_analyzer.analyze(simple_query) | |
| complex_complexity, complex_features = cot_system.complexity_analyzer.analyze(complex_query) | |
| assert simple_complexity < complex_complexity | |
| assert simple_features['length'] < complex_features['length'] | |
| assert simple_features['vocabulary_complexity'] < complex_features['vocabulary_complexity'] | |
| # Test that complexity affects reasoning depth | |
| simple_result = await cot_system.reason(simple_query) | |
| complex_result = await cot_system.reason(complex_query) | |
| # Complex queries should generally have more steps | |
| assert len(complex_result.steps) >= len(simple_result.steps) | |
| async def test_template_selection_integration(self, cot_system): | |
| """Test template selection integration""" | |
| mathematical_query = "Calculate the derivative of x^2 + 3x + 1" | |
| analytical_query = "Compare the benefits and drawbacks of cloud computing" | |
| # Get complexity analysis | |
| math_complexity, math_features = cot_system.complexity_analyzer.analyze(mathematical_query) | |
| analysis_complexity, analysis_features = cot_system.complexity_analyzer.analyze(analytical_query) | |
| # Test template selection | |
| math_template = cot_system.template_library.select_template(mathematical_query, math_features) | |
| analysis_template = cot_system.template_library.select_template(analytical_query, analysis_features) | |
| assert math_template is not None | |
| assert analysis_template is not None | |
| assert math_template.name != analysis_template.name | |
| # Test template applicability | |
| math_applicability = math_template.is_applicable(mathematical_query, math_features) | |
| analysis_applicability = analysis_template.is_applicable(analytical_query, analysis_features) | |
| assert math_applicability > 0 | |
| assert analysis_applicability > 0 | |
| async def test_multi_path_reasoning_integration(self, complex_cot_system): | |
| """Test multi-path reasoning integration""" | |
| complex_query = "Analyze the impact of artificial intelligence on job markets" | |
| # This should trigger multi-path reasoning due to complexity | |
| result = await complex_cot_system.reason(complex_query) | |
| assert result is not None | |
| assert len(result.steps) > 0 | |
| # Verify that multiple reasoning types were used | |
| reasoning_types = [step.reasoning_type for step in result.steps] | |
| unique_types = set(reasoning_types) | |
| # Should have used multiple reasoning approaches | |
| assert len(unique_types) > 1 | |
| async def test_metacognitive_layer_integration(self, cot_system): | |
| """Test metacognitive layer integration""" | |
| query = "Explain the concept of object-oriented programming" | |
| # Enable metacognitive reflection | |
| cot_system.config['enable_metacognition'] = True | |
| result = await cot_system.reason(query) | |
| assert result is not None | |
| assert result.total_confidence > 0 | |
| # Metacognitive layer should improve confidence | |
| # (This is a basic test - in practice, metacognition should enhance reasoning) | |
| assert result.total_confidence >= 0.5 | |
| async def test_error_handling_integration(self, cot_system): | |
| """Test error handling integration""" | |
| # Test with invalid query | |
| invalid_query = "" | |
| with pytest.raises(ValueError): | |
| await cot_system.reason(invalid_query) | |
| # Test with very long query | |
| long_query = "x" * 10000 | |
| # Should handle gracefully | |
| result = await cot_system.reason(long_query) | |
| assert result is not None | |
| async def test_performance_metrics_integration(self, cot_system): | |
| """Test performance metrics integration""" | |
| queries = [ | |
| "What is Python?", | |
| "Explain machine learning", | |
| "How does recursion work?" | |
| ] | |
| # Run multiple queries | |
| for query in queries: | |
| await cot_system.reason(query) | |
| # Check performance metrics | |
| metrics = cot_system.performance_metrics | |
| assert metrics['total_queries'] >= len(queries) | |
| assert metrics['average_execution_time'] > 0 | |
| assert metrics['average_confidence'] > 0 | |
| assert 'cache_hits' in metrics | |
| assert 'cache_misses' in metrics | |
| async def test_concurrent_reasoning(self, cot_system): | |
| """Test concurrent reasoning capabilities""" | |
| queries = [ | |
| "What is artificial intelligence?", | |
| "Explain blockchain technology", | |
| "How do neural networks work?", | |
| "What is quantum computing?", | |
| "Explain the concept of APIs" | |
| ] | |
| # Run queries concurrently | |
| tasks = [cot_system.reason(query) for query in queries] | |
| results = await asyncio.gather(*tasks) | |
| # Verify all results | |
| for result in results: | |
| assert result is not None | |
| assert result.total_confidence > 0 | |
| assert len(result.steps) > 0 | |
| async def test_memory_management(self, cot_system): | |
| """Test memory management and cache eviction""" | |
| # Fill cache with many queries | |
| for i in range(150): # More than cache size | |
| query = f"Test query number {i}" | |
| await cot_system.reason(query) | |
| # Check cache stats | |
| cache_stats = cot_system.reasoning_cache.get_stats() | |
| # Should have evicted some entries | |
| assert cache_stats['evictions'] > 0 | |
| assert cache_stats['size'] <= cot_system.config['cache_size'] | |
| class TestCoTWithTools: | |
| """Test CoT integration with existing tools""" | |
| async def hybrid_agent_with_cot(self): | |
| """Create hybrid agent with CoT integration""" | |
| tools = [semantic_search_tool, python_interpreter] | |
| return AdvancedHybridAgent( | |
| "test_agent", | |
| config={ | |
| 'cot': { | |
| 'max_paths': 3, | |
| 'cache_size': 500, | |
| 'enable_metacognition': True | |
| } | |
| }, | |
| tools=tools | |
| ) | |
| async def test_cot_with_hybrid_agent(self, hybrid_agent_with_cot): | |
| """Test CoT integration with hybrid agent""" | |
| # Test query that uses CoT | |
| query = "Analyze the time complexity of quicksort algorithm" | |
| result = await hybrid_agent_with_cot.process_query(query) | |
| assert result is not None | |
| assert 'response' in result or 'answer' in result | |
| # Check reasoning history | |
| if hasattr(hybrid_agent_with_cot, 'reasoning_history'): | |
| assert len(hybrid_agent_with_cot.reasoning_history) > 0 | |
| latest_entry = hybrid_agent_with_cot.reasoning_history[-1] | |
| assert 'mode' in latest_entry | |
| assert latest_entry['mode'] in ['cot', 'hybrid', 'tool'] | |
| async def test_cot_with_semantic_search(self, hybrid_agent_with_cot): | |
| """Test CoT with semantic search tool""" | |
| query = "What are the latest developments in quantum computing?" | |
| # This should use semantic search and CoT reasoning | |
| result = await hybrid_agent_with_cot.process_query(query) | |
| assert result is not None | |
| # Verify that tools were used | |
| if hasattr(hybrid_agent_with_cot, 'tool_usage_history'): | |
| assert len(hybrid_agent_with_cot.tool_usage_history) > 0 | |
| async def test_cot_with_python_interpreter(self, hybrid_agent_with_cot): | |
| """Test CoT with Python interpreter tool""" | |
| query = "Write a Python function to calculate fibonacci numbers and test it" | |
| result = await hybrid_agent_with_cot.process_query(query) | |
| assert result is not None | |
| # Should have used Python interpreter | |
| if hasattr(hybrid_agent_with_cot, 'tool_usage_history'): | |
| python_tool_used = any( | |
| 'python_interpreter' in str(tool) | |
| for tool in hybrid_agent_with_cot.tool_usage_history | |
| ) | |
| assert python_tool_used | |
| class TestCoTCompatibility: | |
| """Test compatibility with different configurations and versions""" | |
| def test_dependency_versions(self): | |
| """Ensure all dependencies are compatible""" | |
| import sys | |
| import importlib | |
| required_versions = { | |
| 'numpy': '1.21.0', | |
| 'asyncio': '3.4.3', | |
| } | |
| for package, min_version in required_versions.items(): | |
| try: | |
| module = importlib.import_module(package) | |
| version = getattr(module, '__version__', 'unknown') | |
| print(f"{package}: {version} (required: >={min_version})") | |
| except ImportError: | |
| print(f"{package}: NOT INSTALLED (required: >={min_version})") | |
| async def test_reasoning_path_contract(self): | |
| """Ensure ReasoningPath maintains expected structure""" | |
| # Test that existing code expecting old structure still works | |
| path = ReasoningPath( | |
| path_id="test", | |
| query="test query", | |
| steps=[], | |
| total_confidence=0.8 | |
| ) | |
| # These attributes should exist for backward compatibility | |
| assert hasattr(path, 'path_id') | |
| assert hasattr(path, 'query') | |
| assert hasattr(path, 'steps') | |
| assert hasattr(path, 'total_confidence') | |
| assert hasattr(path, 'execution_time') | |
| assert hasattr(path, 'template_used') | |
| assert hasattr(path, 'complexity_score') | |
| async def test_configuration_compatibility(self): | |
| """Test compatibility with different configurations""" | |
| configs = [ | |
| {'max_paths': 1, 'cache_size': 50}, | |
| {'max_paths': 5, 'cache_size': 1000}, | |
| {'parallel_threshold': 0.1, 'confidence_threshold': 0.9}, | |
| {'parallel_threshold': 0.9, 'confidence_threshold': 0.5} | |
| ] | |
| for config in configs: | |
| cot_system = OptimizedChainOfThought("compat_test", config) | |
| # Should initialize without errors | |
| assert cot_system is not None | |
| # Should be able to reason | |
| result = await cot_system.reason("Test query") | |
| assert result is not None | |
| # Performance and stress testing | |
| class TestCoTPerformance: | |
| """Performance and stress testing for CoT system""" | |
| async def test_large_query_handling(self): | |
| """Test handling of large queries""" | |
| cot_system = OptimizedChainOfThought("perf_test", {'max_paths': 3}) | |
| # Large query | |
| large_query = "Explain in detail the complete process of how machine learning algorithms work, including data preprocessing, feature engineering, model selection, training, validation, testing, and deployment, with specific examples of different types of algorithms like supervised learning, unsupervised learning, and reinforcement learning, and discuss the challenges and best practices in each step" | |
| start_time = time.time() | |
| result = await cot_system.reason(large_query) | |
| end_time = time.time() | |
| assert result is not None | |
| assert result.total_confidence > 0 | |
| assert end_time - start_time < 30 # Should complete within 30 seconds | |
| async def test_concurrent_load(self): | |
| """Test concurrent load handling""" | |
| cot_system = OptimizedChainOfThought("load_test", {'max_paths': 2}) | |
| # Create many concurrent queries | |
| queries = [f"Query {i}: Explain concept {i}" for i in range(20)] | |
| start_time = time.time() | |
| tasks = [cot_system.reason(query) for query in queries] | |
| results = await asyncio.gather(*tasks) | |
| end_time = time.time() | |
| # All should complete successfully | |
| assert len(results) == len(queries) | |
| for result in results: | |
| assert result is not None | |
| assert result.total_confidence > 0 | |
| # Should complete in reasonable time | |
| assert end_time - start_time < 60 # Within 60 seconds | |
| if __name__ == "__main__": | |
| # Run integration tests | |
| pytest.main([__file__, "-v"]) |