Spaces:
Configuration error
Configuration error
| """ | |
| Integration Tests for ReflectionAgent and DeepEval | |
| """ | |
| import pytest | |
| import asyncio | |
| from typing import Dict, Any | |
| from pathlib import Path | |
| import yaml | |
| from ..reflection.reflection_agent import ReflectionAgent, ValidationStatus, ReflectionMetrics | |
| from unittest.mock import Mock, AsyncMock | |
| # Load configurations | |
| def agent_config(): | |
| config_path = Path("../aetheroos_sovereign_agent_stack_v1.0.yaml") | |
| with open(config_path) as f: | |
| return yaml.safe_load(f) | |
| def deep_eval_config(): | |
| config_path = Path("../reflection/deep_eval_config.yaml") | |
| with open(config_path) as f: | |
| return yaml.safe_load(f) | |
| async def reflection_agent(agent_config): | |
| agent = ReflectionAgent(agent_config) | |
| await agent.setup() | |
| return agent | |
| # Mock DeepEval responses | |
| def mock_deep_eval(): | |
| return AsyncMock( | |
| evaluate=AsyncMock( | |
| return_value={ | |
| "accuracy": 0.85, | |
| "consistency": 0.90, | |
| "ethical_compliance": 0.95, | |
| "performance": 0.88 | |
| } | |
| ) | |
| ) | |
| # Test Cases | |
| async def test_reflection_agent_setup(reflection_agent): | |
| """Test ReflectionAgent initialization and setup""" | |
| assert reflection_agent.config is not None | |
| assert reflection_agent.aethero_mem is not None | |
| assert reflection_agent.deep_eval is not None | |
| async def test_validate_output(reflection_agent, mock_deep_eval): | |
| """Test output validation with DeepEval""" | |
| reflection_agent.deep_eval = mock_deep_eval | |
| test_output = { | |
| "result": "test_result", | |
| "confidence": 0.9 | |
| } | |
| test_context = { | |
| "task_type": "analysis", | |
| "priority": "high" | |
| } | |
| result = await reflection_agent.validate_output( | |
| agent_id="test_agent", | |
| output=test_output, | |
| context=test_context | |
| ) | |
| assert isinstance(result.metrics, ReflectionMetrics) | |
| assert result.status in ValidationStatus | |
| assert len(result.findings) > 0 | |
| assert len(result.suggestions) > 0 | |
| async def test_reflection_on_pipeline(reflection_agent): | |
| """Test pipeline reflection process""" | |
| result = await reflection_agent.reflect_on_pipeline( | |
| pipeline_execution_id="test_pipeline_001" | |
| ) | |
| assert "reflection_id" in result | |
| assert "performance_analysis" in result | |
| assert "recommendations" in result | |
| async def test_deep_eval_integration(reflection_agent, deep_eval_config): | |
| """Test DeepEval integration with custom criteria""" | |
| test_output = { | |
| "generated_code": "def test_function(): pass", | |
| "documentation": "Test function documentation" | |
| } | |
| # Test with actual DeepEval criteria from config | |
| criteria = deep_eval_config["evaluation_criteria"] | |
| result = await reflection_agent.validate_output( | |
| agent_id="generator_agent_001", | |
| output=test_output, | |
| context={"criteria": criteria} | |
| ) | |
| assert result.metrics.accuracy >= criteria["accuracy"]["thresholds"]["low"] | |
| assert result.metrics.consistency >= criteria["consistency"]["thresholds"]["low"] | |
| assert result.metrics.ethical_compliance >= criteria["ethical_compliance"]["thresholds"]["low"] | |
| async def test_aethero_mem_logging(reflection_agent): | |
| """Test logging reflection results to Aethero_Mem""" | |
| metrics = ReflectionMetrics( | |
| accuracy=0.85, | |
| consistency=0.90, | |
| ethical_compliance=0.95, | |
| performance_score=0.88 | |
| ) | |
| findings = ["Finding 1", "Finding 2"] | |
| suggestions = ["Suggestion 1", "Suggestion 2"] | |
| # Test logging | |
| await reflection_agent._log_reflection( | |
| agent_id="test_agent", | |
| metrics=metrics, | |
| findings=findings, | |
| suggestions=suggestions | |
| ) | |
| # Verify logged data | |
| logged_data = await reflection_agent.aethero_mem.get_latest_reflection( | |
| agent_id="test_agent" | |
| ) | |
| assert logged_data is not None | |
| assert logged_data["metrics"]["accuracy"] == metrics.accuracy | |
| assert logged_data["findings"] == findings | |
| assert logged_data["suggestions"] == suggestions | |
| async def test_error_handling(reflection_agent): | |
| """Test error handling in reflection process""" | |
| # Test with invalid output | |
| with pytest.raises(ValueError): | |
| await reflection_agent.validate_output( | |
| agent_id="test_agent", | |
| output=None, | |
| context={} | |
| ) | |
| # Test with invalid pipeline ID | |
| with pytest.raises(ValueError): | |
| await reflection_agent.reflect_on_pipeline( | |
| pipeline_execution_id="" | |
| ) | |
| # Performance Tests | |
| async def test_reflection_performance(reflection_agent): | |
| """Test reflection process performance""" | |
| import time | |
| start_time = time.time() | |
| # Perform multiple validations | |
| tasks = [] | |
| for i in range(10): | |
| tasks.append( | |
| reflection_agent.validate_output( | |
| agent_id=f"test_agent_{i}", | |
| output={"result": f"test_{i}"}, | |
| context={"iteration": i} | |
| ) | |
| ) | |
| results = await asyncio.gather(*tasks) | |
| end_time = time.time() | |
| duration = end_time - start_time | |
| # Assert performance requirements | |
| assert duration < 5.0 # Should complete within 5 seconds | |
| assert all(isinstance(r, ValidationResult) for r in results) | |
| if __name__ == "__main__": | |
| pytest.main(["-v", __file__]) | |