""" Comprehensive test suite for Level 2 features """ import asyncio import sys import os # Add parent directory to path sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from agent.core.level2_config import llm_config, level2_config, LLMConfig, Level2Config from agent.core.semantic_cache import SemanticCache, semantic_cache from agent.core.observability import RealTimeObservabilityEngine, observability, ExecutionEvent from agent.core.contextual_memory import ContextualMemoryEngine, memory_engine, ExecutionResult from agent.core.adaptive_reasoning import AdaptiveReasoningEngine, reasoning_engine from agent.core.optimized_executor import OptimizedToolExecutor, tool_executor, ToolCall async def test_config(): """Test Level 2 configuration""" print("\n=== Testing Level 2 Configuration ===") # Test LLM config print(f"LLM Model: {llm_config.model}") print(f"Temperature: {llm_config.temperature}") print(f"Max Tokens: {llm_config.max_tokens}") # Test Level 2 config print(f"\nMulti-pass reasoning: {level2_config.enable_multi_pass_reasoning}") print(f"Semantic cache: {level2_config.enable_semantic_cache}") print(f"Parallel execution: {level2_config.enable_parallel_execution}") print(f"Auto retry: {level2_config.enable_auto_retry}") print(f"Max retries: {level2_config.max_tool_retries}") print("✅ Configuration test passed") async def test_semantic_cache(): """Test semantic caching""" print("\n=== Testing Semantic Cache ===") cache = SemanticCache() # Store a value query = "What is the capital of France?" result = {"answer": "Paris", "confidence": 0.99} key = await cache.store(query, result, {"type": "qa"}) print(f"Stored with key: {key[:8]}...") # Check cache hit cached = await cache.check(query) if cached: print(f"Cache hit! Similarity: {cached.similarity:.3f}") print(f"Result: {cached.result}") else: print("❌ Cache miss (unexpected)") return False # Check cache miss for different query miss_query = "What is the weather today?" miss_cached = await cache.check(miss_query) if miss_cached: print("❌ Cache hit (unexpected for different query)") return False else: print("✅ Cache miss as expected for different query") # Check stats stats = cache.get_stats() print(f"\nCache stats: {stats}") print("✅ Semantic cache test passed") return True async def test_observability(): """Test observability engine""" print("\n=== Testing Observability Engine ===") obs = RealTimeObservabilityEngine() # Track some executions for i in range(10): event = ExecutionEvent( event_type="tool_execution_complete", data={ "tool": "web_search", "success": i < 8, # 80% success rate "duration": 1.0 + i * 0.1, "cost": 0.05 } ) anomaly = obs.track_execution(event) if anomaly: print(f"Anomaly detected: {anomaly.tool_name} - {anomaly.deviation_percent:.1f}%") # Get metrics metrics = obs.get_tool_metrics("web_search") print(f"\nTool metrics: {metrics}") # Get summary summary = obs.get_summary() print(f"\nObservability summary: {summary}") # Test predictive warning warning = obs.predict_failure() if warning: print(f"Predictive warning: {warning.predicted_issue}") print("✅ Observability test passed") return True async def test_contextual_memory(): """Test contextual memory engine""" print("\n=== Testing Contextual Memory Engine ===") mem = ContextualMemoryEngine(storage_path="/tmp/test_memory") # Test context retrieval context = await mem.retrieve_context( query="Build a React app with TypeScript", user_id="test_user", max_tokens=1000 ) print(f"Retrieved context:") print(f" - User memory: {context.user_memory is not None}") print(f" - Similar examples: {len(context.similar_examples)}") print(f" - Domain knowledge: {len(context.domain_knowledge)}") print(f" - Compressed size: {context.compressed_size} tokens") # Test learning result = ExecutionResult( query="Build a React app", success=True, tools_used=["execute_code", "web_search"], reasoning=["Analyzed requirements", "Created plan"] ) await mem.learn_from_execution("test_user", result) # Get user stats stats = mem.get_user_stats("test_user") print(f"\nUser stats: {stats}") print("✅ Contextual memory test passed") return True async def test_adaptive_reasoning(): """Test adaptive reasoning engine""" print("\n=== Testing Adaptive Reasoning Engine ===") engine = AdaptiveReasoningEngine() # Test problem analysis print("\nTesting problem analysis...") analysis = await engine.analyze_problem("Create a Python function to sort a list") print(f" Difficulty: {analysis.estimated_difficulty}") print(f" Tool calls: {analysis.estimated_tool_calls}") print(f" Domains: {analysis.domains}") # Test execution plan print("\nTesting execution plan creation...") plan = await engine.create_execution_plan( query="Create a Python function to sort a list", analysis=analysis, available_tools=["execute_code", "web_search"] ) print(f" Steps: {len(plan.steps)}") print(f" Total cost: {plan.total_cost}") print(f" Estimated duration: {plan.estimated_duration}s") for step in plan.steps: print(f" - Step {step.step_number}: {step.tool} ({step.reasoning[:50]}...)") # Test verification print("\nTesting solution verification...") verification = await engine.verify_solution( query="Create a Python function to sort a list", result="def sort_list(lst): return sorted(lst)", steps_executed=["execute_code"] ) print(f" Complete: {verification.is_complete}") print(f" Quality score: {verification.quality_score}") print(f" Improvements: {verification.improvements}") print("✅ Adaptive reasoning test passed") return True async def test_optimized_executor(): """Test optimized tool executor""" print("\n=== Testing Optimized Tool Executor ===") executor = OptimizedToolExecutor() # Mock execute function async def mock_execute(tool_name: str, params: dict): await asyncio.sleep(0.1) # Simulate work return f"Result from {tool_name}" # Create tool calls tools = [ ToolCall(id="1", name="web_search", params={"query": "test"}), ToolCall(id="2", name="execute_code", params={"code": "print('hello')"}), ] # Execute with optimization print("\nExecuting tools with optimization...") results = await executor.execute_with_optimization( tools=tools, execute_fn=mock_execute ) for tool_id, result in results.items(): print(f" Tool {result.tool_name}: success={result.success}, time={result.execution_time:.3f}s") print("✅ Optimized executor test passed") return True async def run_all_tests(): """Run all tests""" print("=" * 60) print("Level 2 Features Test Suite") print("=" * 60) tests = [ ("Configuration", test_config), ("Semantic Cache", test_semantic_cache), ("Observability", test_observability), ("Contextual Memory", test_contextual_memory), ("Adaptive Reasoning", test_adaptive_reasoning), ("Optimized Executor", test_optimized_executor), ] passed = 0 failed = 0 for name, test_fn in tests: try: result = await test_fn() if result is not False: passed += 1 else: failed += 1 except Exception as e: print(f"❌ {name} test failed with error: {e}") import traceback traceback.print_exc() failed += 1 print("\n" + "=" * 60) print(f"Test Results: {passed} passed, {failed} failed") print("=" * 60) return failed == 0 if __name__ == "__main__": success = asyncio.run(run_all_tests()) sys.exit(0 if success else 1)