Spaces:
Build error
Build error
File size: 17,174 Bytes
8a682b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | """
Integration tests for Chain of Thought system
Comprehensive testing of all CoT components working together
"""
import pytest
import asyncio
import time
from typing import Dict, List, Any
from unittest.mock import Mock, patch
# Import the CoT system components
from src.core.optimized_chain_of_thought import (
OptimizedChainOfThought,
ComplexityAnalyzer,
TemplateLibrary,
ReasoningCache,
MultiPathReasoning,
MetacognitiveLayer,
ReasoningPath,
ReasoningStep,
ReasoningType
)
# Import existing tools for integration testing
from src.agents.advanced_hybrid_architecture import AdvancedHybridAgent
from src.utils.semantic_search_tool import semantic_search_tool
from src.utils.python_interpreter import python_interpreter
class TestCoTIntegration:
"""Integration tests for Chain of Thought system"""
@pytest.fixture
async def cot_system(self):
"""Create a configured CoT system for testing"""
return OptimizedChainOfThought(
"test_cot",
config={
'max_paths': 3,
'cache_size': 100,
'cache_ttl': 1,
'parallel_threshold': 0.5,
'confidence_threshold': 0.7,
'complexity_depth_multiplier': 10
}
)
@pytest.fixture
async def complex_cot_system(self):
"""Create a more complex CoT system for advanced testing"""
return OptimizedChainOfThought(
"complex_test_cot",
config={
'max_paths': 5,
'cache_size': 500,
'cache_ttl': 24,
'parallel_threshold': 0.3,
'confidence_threshold': 0.8,
'complexity_depth_multiplier': 15
}
)
async def test_component_initialization(self, cot_system):
"""Test that all components initialize correctly"""
assert cot_system.complexity_analyzer is not None
assert cot_system.template_library is not None
assert cot_system.reasoning_cache is not None
assert cot_system.multi_path_engine is not None
assert cot_system.metacognitive_layer is not None
# Verify component types
assert isinstance(cot_system.complexity_analyzer, ComplexityAnalyzer)
assert isinstance(cot_system.template_library, TemplateLibrary)
assert isinstance(cot_system.reasoning_cache, ReasoningCache)
assert isinstance(cot_system.multi_path_engine, MultiPathReasoning)
assert isinstance(cot_system.metacognitive_layer, MetacognitiveLayer)
async def test_end_to_end_reasoning(self, cot_system):
"""Test complete reasoning flow"""
query = "Explain the concept of recursion in programming"
result = await cot_system.reason(query)
assert result is not None
assert isinstance(result, ReasoningPath)
assert result.total_confidence > 0
assert len(result.steps) > 0
assert result.template_used in cot_system.template_library.templates
assert result.execution_time > 0
# Verify step structure
for step in result.steps:
assert isinstance(step, ReasoningStep)
assert step.step_id > 0
assert step.confidence >= 0 and step.confidence <= 1
assert len(step.thought) > 0
async def test_cache_integration(self, cot_system):
"""Test cache integration with main system"""
query = "What is machine learning?"
# First call - should miss cache
result1 = await cot_system.reason(query)
metrics1 = cot_system.performance_metrics
cache_misses_1 = metrics1['cache_misses']
# Second call - should hit cache
result2 = await cot_system.reason(query)
metrics2 = cot_system.performance_metrics
cache_hits_2 = metrics2['cache_hits']
assert cache_hits_2 > 0
assert result2.execution_time < result1.execution_time
# Verify cache stats
cache_stats = cot_system.reasoning_cache.get_stats()
assert cache_stats['hits'] > 0
assert cache_stats['misses'] > 0
async def test_complexity_analysis_integration(self, cot_system):
"""Test complexity analysis integration"""
simple_query = "What is 2+2?"
complex_query = "Analyze the time complexity of quicksort algorithm and compare it with merge sort in terms of space complexity and stability"
# Analyze complexity
simple_complexity, simple_features = cot_system.complexity_analyzer.analyze(simple_query)
complex_complexity, complex_features = cot_system.complexity_analyzer.analyze(complex_query)
assert simple_complexity < complex_complexity
assert simple_features['length'] < complex_features['length']
assert simple_features['vocabulary_complexity'] < complex_features['vocabulary_complexity']
# Test that complexity affects reasoning depth
simple_result = await cot_system.reason(simple_query)
complex_result = await cot_system.reason(complex_query)
# Complex queries should generally have more steps
assert len(complex_result.steps) >= len(simple_result.steps)
async def test_template_selection_integration(self, cot_system):
"""Test template selection integration"""
mathematical_query = "Calculate the derivative of x^2 + 3x + 1"
analytical_query = "Compare the benefits and drawbacks of cloud computing"
# Get complexity analysis
math_complexity, math_features = cot_system.complexity_analyzer.analyze(mathematical_query)
analysis_complexity, analysis_features = cot_system.complexity_analyzer.analyze(analytical_query)
# Test template selection
math_template = cot_system.template_library.select_template(mathematical_query, math_features)
analysis_template = cot_system.template_library.select_template(analytical_query, analysis_features)
assert math_template is not None
assert analysis_template is not None
assert math_template.name != analysis_template.name
# Test template applicability
math_applicability = math_template.is_applicable(mathematical_query, math_features)
analysis_applicability = analysis_template.is_applicable(analytical_query, analysis_features)
assert math_applicability > 0
assert analysis_applicability > 0
async def test_multi_path_reasoning_integration(self, complex_cot_system):
"""Test multi-path reasoning integration"""
complex_query = "Analyze the impact of artificial intelligence on job markets"
# This should trigger multi-path reasoning due to complexity
result = await complex_cot_system.reason(complex_query)
assert result is not None
assert len(result.steps) > 0
# Verify that multiple reasoning types were used
reasoning_types = [step.reasoning_type for step in result.steps]
unique_types = set(reasoning_types)
# Should have used multiple reasoning approaches
assert len(unique_types) > 1
async def test_metacognitive_layer_integration(self, cot_system):
"""Test metacognitive layer integration"""
query = "Explain the concept of object-oriented programming"
# Enable metacognitive reflection
cot_system.config['enable_metacognition'] = True
result = await cot_system.reason(query)
assert result is not None
assert result.total_confidence > 0
# Metacognitive layer should improve confidence
# (This is a basic test - in practice, metacognition should enhance reasoning)
assert result.total_confidence >= 0.5
async def test_error_handling_integration(self, cot_system):
"""Test error handling integration"""
# Test with invalid query
invalid_query = ""
with pytest.raises(ValueError):
await cot_system.reason(invalid_query)
# Test with very long query
long_query = "x" * 10000
# Should handle gracefully
result = await cot_system.reason(long_query)
assert result is not None
async def test_performance_metrics_integration(self, cot_system):
"""Test performance metrics integration"""
queries = [
"What is Python?",
"Explain machine learning",
"How does recursion work?"
]
# Run multiple queries
for query in queries:
await cot_system.reason(query)
# Check performance metrics
metrics = cot_system.performance_metrics
assert metrics['total_queries'] >= len(queries)
assert metrics['average_execution_time'] > 0
assert metrics['average_confidence'] > 0
assert 'cache_hits' in metrics
assert 'cache_misses' in metrics
async def test_concurrent_reasoning(self, cot_system):
"""Test concurrent reasoning capabilities"""
queries = [
"What is artificial intelligence?",
"Explain blockchain technology",
"How do neural networks work?",
"What is quantum computing?",
"Explain the concept of APIs"
]
# Run queries concurrently
tasks = [cot_system.reason(query) for query in queries]
results = await asyncio.gather(*tasks)
# Verify all results
for result in results:
assert result is not None
assert result.total_confidence > 0
assert len(result.steps) > 0
async def test_memory_management(self, cot_system):
"""Test memory management and cache eviction"""
# Fill cache with many queries
for i in range(150): # More than cache size
query = f"Test query number {i}"
await cot_system.reason(query)
# Check cache stats
cache_stats = cot_system.reasoning_cache.get_stats()
# Should have evicted some entries
assert cache_stats['evictions'] > 0
assert cache_stats['size'] <= cot_system.config['cache_size']
class TestCoTWithTools:
"""Test CoT integration with existing tools"""
@pytest.fixture
async def hybrid_agent_with_cot(self):
"""Create hybrid agent with CoT integration"""
tools = [semantic_search_tool, python_interpreter]
return AdvancedHybridAgent(
"test_agent",
config={
'cot': {
'max_paths': 3,
'cache_size': 500,
'enable_metacognition': True
}
},
tools=tools
)
async def test_cot_with_hybrid_agent(self, hybrid_agent_with_cot):
"""Test CoT integration with hybrid agent"""
# Test query that uses CoT
query = "Analyze the time complexity of quicksort algorithm"
result = await hybrid_agent_with_cot.process_query(query)
assert result is not None
assert 'response' in result or 'answer' in result
# Check reasoning history
if hasattr(hybrid_agent_with_cot, 'reasoning_history'):
assert len(hybrid_agent_with_cot.reasoning_history) > 0
latest_entry = hybrid_agent_with_cot.reasoning_history[-1]
assert 'mode' in latest_entry
assert latest_entry['mode'] in ['cot', 'hybrid', 'tool']
async def test_cot_with_semantic_search(self, hybrid_agent_with_cot):
"""Test CoT with semantic search tool"""
query = "What are the latest developments in quantum computing?"
# This should use semantic search and CoT reasoning
result = await hybrid_agent_with_cot.process_query(query)
assert result is not None
# Verify that tools were used
if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
assert len(hybrid_agent_with_cot.tool_usage_history) > 0
async def test_cot_with_python_interpreter(self, hybrid_agent_with_cot):
"""Test CoT with Python interpreter tool"""
query = "Write a Python function to calculate fibonacci numbers and test it"
result = await hybrid_agent_with_cot.process_query(query)
assert result is not None
# Should have used Python interpreter
if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
python_tool_used = any(
'python_interpreter' in str(tool)
for tool in hybrid_agent_with_cot.tool_usage_history
)
assert python_tool_used
class TestCoTCompatibility:
"""Test compatibility with different configurations and versions"""
def test_dependency_versions(self):
"""Ensure all dependencies are compatible"""
import sys
import importlib
required_versions = {
'numpy': '1.21.0',
'asyncio': '3.4.3',
}
for package, min_version in required_versions.items():
try:
module = importlib.import_module(package)
version = getattr(module, '__version__', 'unknown')
print(f"{package}: {version} (required: >={min_version})")
except ImportError:
print(f"{package}: NOT INSTALLED (required: >={min_version})")
async def test_reasoning_path_contract(self):
"""Ensure ReasoningPath maintains expected structure"""
# Test that existing code expecting old structure still works
path = ReasoningPath(
path_id="test",
query="test query",
steps=[],
total_confidence=0.8
)
# These attributes should exist for backward compatibility
assert hasattr(path, 'path_id')
assert hasattr(path, 'query')
assert hasattr(path, 'steps')
assert hasattr(path, 'total_confidence')
assert hasattr(path, 'execution_time')
assert hasattr(path, 'template_used')
assert hasattr(path, 'complexity_score')
async def test_configuration_compatibility(self):
"""Test compatibility with different configurations"""
configs = [
{'max_paths': 1, 'cache_size': 50},
{'max_paths': 5, 'cache_size': 1000},
{'parallel_threshold': 0.1, 'confidence_threshold': 0.9},
{'parallel_threshold': 0.9, 'confidence_threshold': 0.5}
]
for config in configs:
cot_system = OptimizedChainOfThought("compat_test", config)
# Should initialize without errors
assert cot_system is not None
# Should be able to reason
result = await cot_system.reason("Test query")
assert result is not None
# Performance and stress testing
class TestCoTPerformance:
"""Performance and stress testing for CoT system"""
async def test_large_query_handling(self):
"""Test handling of large queries"""
cot_system = OptimizedChainOfThought("perf_test", {'max_paths': 3})
# Large query
large_query = "Explain in detail the complete process of how machine learning algorithms work, including data preprocessing, feature engineering, model selection, training, validation, testing, and deployment, with specific examples of different types of algorithms like supervised learning, unsupervised learning, and reinforcement learning, and discuss the challenges and best practices in each step"
start_time = time.time()
result = await cot_system.reason(large_query)
end_time = time.time()
assert result is not None
assert result.total_confidence > 0
assert end_time - start_time < 30 # Should complete within 30 seconds
async def test_concurrent_load(self):
"""Test concurrent load handling"""
cot_system = OptimizedChainOfThought("load_test", {'max_paths': 2})
# Create many concurrent queries
queries = [f"Query {i}: Explain concept {i}" for i in range(20)]
start_time = time.time()
tasks = [cot_system.reason(query) for query in queries]
results = await asyncio.gather(*tasks)
end_time = time.time()
# All should complete successfully
assert len(results) == len(queries)
for result in results:
assert result is not None
assert result.total_confidence > 0
# Should complete in reasonable time
assert end_time - start_time < 60 # Within 60 seconds
if __name__ == "__main__":
# Run integration tests
pytest.main([__file__, "-v"]) |