RobotPai

Build error

App Files Files Community

RobotPai / tests /integration /test_cot_integration.py

atr0p05

Upload 291 files

8a682b5 verified 8 months ago

raw

history blame contribute delete

17.2 kB

	"""
	Integration tests for Chain of Thought system
	Comprehensive testing of all CoT components working together
	"""

	import pytest
	import asyncio
	import time
	from typing import Dict, List, Any
	from unittest.mock import Mock, patch

	# Import the CoT system components
	from src.core.optimized_chain_of_thought import (
	OptimizedChainOfThought,
	ComplexityAnalyzer,
	TemplateLibrary,
	ReasoningCache,
	MultiPathReasoning,
	MetacognitiveLayer,
	ReasoningPath,
	ReasoningStep,
	ReasoningType
	)

	# Import existing tools for integration testing
	from src.agents.advanced_hybrid_architecture import AdvancedHybridAgent
	from src.utils.semantic_search_tool import semantic_search_tool
	from src.utils.python_interpreter import python_interpreter


	class TestCoTIntegration:
	"""Integration tests for Chain of Thought system"""

	@pytest.fixture
	async def cot_system(self):
	"""Create a configured CoT system for testing"""
	return OptimizedChainOfThought(
	"test_cot",
	config={
	'max_paths': 3,
	'cache_size': 100,
	'cache_ttl': 1,
	'parallel_threshold': 0.5,
	'confidence_threshold': 0.7,
	'complexity_depth_multiplier': 10
	}
	)

	@pytest.fixture
	async def complex_cot_system(self):
	"""Create a more complex CoT system for advanced testing"""
	return OptimizedChainOfThought(
	"complex_test_cot",
	config={
	'max_paths': 5,
	'cache_size': 500,
	'cache_ttl': 24,
	'parallel_threshold': 0.3,
	'confidence_threshold': 0.8,
	'complexity_depth_multiplier': 15
	}
	)

	async def test_component_initialization(self, cot_system):
	"""Test that all components initialize correctly"""
	assert cot_system.complexity_analyzer is not None
	assert cot_system.template_library is not None
	assert cot_system.reasoning_cache is not None
	assert cot_system.multi_path_engine is not None
	assert cot_system.metacognitive_layer is not None

	# Verify component types
	assert isinstance(cot_system.complexity_analyzer, ComplexityAnalyzer)
	assert isinstance(cot_system.template_library, TemplateLibrary)
	assert isinstance(cot_system.reasoning_cache, ReasoningCache)
	assert isinstance(cot_system.multi_path_engine, MultiPathReasoning)
	assert isinstance(cot_system.metacognitive_layer, MetacognitiveLayer)

	async def test_end_to_end_reasoning(self, cot_system):
	"""Test complete reasoning flow"""
	query = "Explain the concept of recursion in programming"
	result = await cot_system.reason(query)

	assert result is not None
	assert isinstance(result, ReasoningPath)
	assert result.total_confidence > 0
	assert len(result.steps) > 0
	assert result.template_used in cot_system.template_library.templates
	assert result.execution_time > 0

	# Verify step structure
	for step in result.steps:
	assert isinstance(step, ReasoningStep)
	assert step.step_id > 0
	assert step.confidence >= 0 and step.confidence <= 1
	assert len(step.thought) > 0

	async def test_cache_integration(self, cot_system):
	"""Test cache integration with main system"""
	query = "What is machine learning?"

	# First call - should miss cache
	result1 = await cot_system.reason(query)
	metrics1 = cot_system.performance_metrics
	cache_misses_1 = metrics1['cache_misses']

	# Second call - should hit cache
	result2 = await cot_system.reason(query)
	metrics2 = cot_system.performance_metrics
	cache_hits_2 = metrics2['cache_hits']

	assert cache_hits_2 > 0
	assert result2.execution_time < result1.execution_time

	# Verify cache stats
	cache_stats = cot_system.reasoning_cache.get_stats()
	assert cache_stats['hits'] > 0
	assert cache_stats['misses'] > 0

	async def test_complexity_analysis_integration(self, cot_system):
	"""Test complexity analysis integration"""
	simple_query = "What is 2+2?"
	complex_query = "Analyze the time complexity of quicksort algorithm and compare it with merge sort in terms of space complexity and stability"

	# Analyze complexity
	simple_complexity, simple_features = cot_system.complexity_analyzer.analyze(simple_query)
	complex_complexity, complex_features = cot_system.complexity_analyzer.analyze(complex_query)

	assert simple_complexity < complex_complexity
	assert simple_features['length'] < complex_features['length']
	assert simple_features['vocabulary_complexity'] < complex_features['vocabulary_complexity']

	# Test that complexity affects reasoning depth
	simple_result = await cot_system.reason(simple_query)
	complex_result = await cot_system.reason(complex_query)

	# Complex queries should generally have more steps
	assert len(complex_result.steps) >= len(simple_result.steps)

	async def test_template_selection_integration(self, cot_system):
	"""Test template selection integration"""
	mathematical_query = "Calculate the derivative of x^2 + 3x + 1"
	analytical_query = "Compare the benefits and drawbacks of cloud computing"

	# Get complexity analysis
	math_complexity, math_features = cot_system.complexity_analyzer.analyze(mathematical_query)
	analysis_complexity, analysis_features = cot_system.complexity_analyzer.analyze(analytical_query)

	# Test template selection
	math_template = cot_system.template_library.select_template(mathematical_query, math_features)
	analysis_template = cot_system.template_library.select_template(analytical_query, analysis_features)

	assert math_template is not None
	assert analysis_template is not None
	assert math_template.name != analysis_template.name

	# Test template applicability
	math_applicability = math_template.is_applicable(mathematical_query, math_features)
	analysis_applicability = analysis_template.is_applicable(analytical_query, analysis_features)

	assert math_applicability > 0
	assert analysis_applicability > 0

	async def test_multi_path_reasoning_integration(self, complex_cot_system):
	"""Test multi-path reasoning integration"""
	complex_query = "Analyze the impact of artificial intelligence on job markets"

	# This should trigger multi-path reasoning due to complexity
	result = await complex_cot_system.reason(complex_query)

	assert result is not None
	assert len(result.steps) > 0

	# Verify that multiple reasoning types were used
	reasoning_types = [step.reasoning_type for step in result.steps]
	unique_types = set(reasoning_types)

	# Should have used multiple reasoning approaches
	assert len(unique_types) > 1

	async def test_metacognitive_layer_integration(self, cot_system):
	"""Test metacognitive layer integration"""
	query = "Explain the concept of object-oriented programming"

	# Enable metacognitive reflection
	cot_system.config['enable_metacognition'] = True

	result = await cot_system.reason(query)

	assert result is not None
	assert result.total_confidence > 0

	# Metacognitive layer should improve confidence
	# (This is a basic test - in practice, metacognition should enhance reasoning)
	assert result.total_confidence >= 0.5

	async def test_error_handling_integration(self, cot_system):
	"""Test error handling integration"""
	# Test with invalid query
	invalid_query = ""

	with pytest.raises(ValueError):
	await cot_system.reason(invalid_query)

	# Test with very long query
	long_query = "x" * 10000

	# Should handle gracefully
	result = await cot_system.reason(long_query)
	assert result is not None

	async def test_performance_metrics_integration(self, cot_system):
	"""Test performance metrics integration"""
	queries = [
	"What is Python?",
	"Explain machine learning",
	"How does recursion work?"
	]

	# Run multiple queries
	for query in queries:
	await cot_system.reason(query)

	# Check performance metrics
	metrics = cot_system.performance_metrics

	assert metrics['total_queries'] >= len(queries)
	assert metrics['average_execution_time'] > 0
	assert metrics['average_confidence'] > 0
	assert 'cache_hits' in metrics
	assert 'cache_misses' in metrics

	async def test_concurrent_reasoning(self, cot_system):
	"""Test concurrent reasoning capabilities"""
	queries = [
	"What is artificial intelligence?",
	"Explain blockchain technology",
	"How do neural networks work?",
	"What is quantum computing?",
	"Explain the concept of APIs"
	]

	# Run queries concurrently
	tasks = [cot_system.reason(query) for query in queries]
	results = await asyncio.gather(*tasks)

	# Verify all results
	for result in results:
	assert result is not None
	assert result.total_confidence > 0
	assert len(result.steps) > 0

	async def test_memory_management(self, cot_system):
	"""Test memory management and cache eviction"""
	# Fill cache with many queries
	for i in range(150): # More than cache size
	query = f"Test query number {i}"
	await cot_system.reason(query)

	# Check cache stats
	cache_stats = cot_system.reasoning_cache.get_stats()

	# Should have evicted some entries
	assert cache_stats['evictions'] > 0
	assert cache_stats['size'] <= cot_system.config['cache_size']


	class TestCoTWithTools:
	"""Test CoT integration with existing tools"""

	@pytest.fixture
	async def hybrid_agent_with_cot(self):
	"""Create hybrid agent with CoT integration"""
	tools = [semantic_search_tool, python_interpreter]

	return AdvancedHybridAgent(
	"test_agent",
	config={
	'cot': {
	'max_paths': 3,
	'cache_size': 500,
	'enable_metacognition': True
	}
	},
	tools=tools
	)

	async def test_cot_with_hybrid_agent(self, hybrid_agent_with_cot):
	"""Test CoT integration with hybrid agent"""
	# Test query that uses CoT
	query = "Analyze the time complexity of quicksort algorithm"
	result = await hybrid_agent_with_cot.process_query(query)

	assert result is not None
	assert 'response' in result or 'answer' in result

	# Check reasoning history
	if hasattr(hybrid_agent_with_cot, 'reasoning_history'):
	assert len(hybrid_agent_with_cot.reasoning_history) > 0
	latest_entry = hybrid_agent_with_cot.reasoning_history[-1]
	assert 'mode' in latest_entry
	assert latest_entry['mode'] in ['cot', 'hybrid', 'tool']

	async def test_cot_with_semantic_search(self, hybrid_agent_with_cot):
	"""Test CoT with semantic search tool"""
	query = "What are the latest developments in quantum computing?"

	# This should use semantic search and CoT reasoning
	result = await hybrid_agent_with_cot.process_query(query)

	assert result is not None

	# Verify that tools were used
	if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
	assert len(hybrid_agent_with_cot.tool_usage_history) > 0

	async def test_cot_with_python_interpreter(self, hybrid_agent_with_cot):
	"""Test CoT with Python interpreter tool"""
	query = "Write a Python function to calculate fibonacci numbers and test it"

	result = await hybrid_agent_with_cot.process_query(query)

	assert result is not None

	# Should have used Python interpreter
	if hasattr(hybrid_agent_with_cot, 'tool_usage_history'):
	python_tool_used = any(
	'python_interpreter' in str(tool)
	for tool in hybrid_agent_with_cot.tool_usage_history
	)
	assert python_tool_used


	class TestCoTCompatibility:
	"""Test compatibility with different configurations and versions"""

	def test_dependency_versions(self):
	"""Ensure all dependencies are compatible"""
	import sys
	import importlib

	required_versions = {
	'numpy': '1.21.0',
	'asyncio': '3.4.3',
	}

	for package, min_version in required_versions.items():
	try:
	module = importlib.import_module(package)
	version = getattr(module, '__version__', 'unknown')
	print(f"{package}: {version} (required: >={min_version})")
	except ImportError:
	print(f"{package}: NOT INSTALLED (required: >={min_version})")

	async def test_reasoning_path_contract(self):
	"""Ensure ReasoningPath maintains expected structure"""
	# Test that existing code expecting old structure still works
	path = ReasoningPath(
	path_id="test",
	query="test query",
	steps=[],
	total_confidence=0.8
	)

	# These attributes should exist for backward compatibility
	assert hasattr(path, 'path_id')
	assert hasattr(path, 'query')
	assert hasattr(path, 'steps')
	assert hasattr(path, 'total_confidence')
	assert hasattr(path, 'execution_time')
	assert hasattr(path, 'template_used')
	assert hasattr(path, 'complexity_score')

	async def test_configuration_compatibility(self):
	"""Test compatibility with different configurations"""
	configs = [
	{'max_paths': 1, 'cache_size': 50},
	{'max_paths': 5, 'cache_size': 1000},
	{'parallel_threshold': 0.1, 'confidence_threshold': 0.9},
	{'parallel_threshold': 0.9, 'confidence_threshold': 0.5}
	]

	for config in configs:
	cot_system = OptimizedChainOfThought("compat_test", config)

	# Should initialize without errors
	assert cot_system is not None

	# Should be able to reason
	result = await cot_system.reason("Test query")
	assert result is not None


	# Performance and stress testing
	class TestCoTPerformance:
	"""Performance and stress testing for CoT system"""

	async def test_large_query_handling(self):
	"""Test handling of large queries"""
	cot_system = OptimizedChainOfThought("perf_test", {'max_paths': 3})

	# Large query
	large_query = "Explain in detail the complete process of how machine learning algorithms work, including data preprocessing, feature engineering, model selection, training, validation, testing, and deployment, with specific examples of different types of algorithms like supervised learning, unsupervised learning, and reinforcement learning, and discuss the challenges and best practices in each step"

	start_time = time.time()
	result = await cot_system.reason(large_query)
	end_time = time.time()

	assert result is not None
	assert result.total_confidence > 0
	assert end_time - start_time < 30 # Should complete within 30 seconds

	async def test_concurrent_load(self):
	"""Test concurrent load handling"""
	cot_system = OptimizedChainOfThought("load_test", {'max_paths': 2})

	# Create many concurrent queries
	queries = [f"Query {i}: Explain concept {i}" for i in range(20)]

	start_time = time.time()
	tasks = [cot_system.reason(query) for query in queries]
	results = await asyncio.gather(*tasks)
	end_time = time.time()

	# All should complete successfully
	assert len(results) == len(queries)
	for result in results:
	assert result is not None
	assert result.total_confidence > 0

	# Should complete in reasonable time
	assert end_time - start_time < 60 # Within 60 seconds


	if __name__ == "__main__":
	# Run integration tests
	pytest.main([__file__, "-v"])