| | """ |
| | Comprehensive test suite for Level 2 features |
| | """ |
| |
|
| | import asyncio |
| | import sys |
| | import os |
| |
|
| | |
| | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) |
| |
|
| | from agent.core.level2_config import llm_config, level2_config, LLMConfig, Level2Config |
| | from agent.core.semantic_cache import SemanticCache, semantic_cache |
| | from agent.core.observability import RealTimeObservabilityEngine, observability, ExecutionEvent |
| | from agent.core.contextual_memory import ContextualMemoryEngine, memory_engine, ExecutionResult |
| | from agent.core.adaptive_reasoning import AdaptiveReasoningEngine, reasoning_engine |
| | from agent.core.optimized_executor import OptimizedToolExecutor, tool_executor, ToolCall |
| |
|
| |
|
| | async def test_config(): |
| | """Test Level 2 configuration""" |
| | print("\n=== Testing Level 2 Configuration ===") |
| | |
| | |
| | print(f"LLM Model: {llm_config.model}") |
| | print(f"Temperature: {llm_config.temperature}") |
| | print(f"Max Tokens: {llm_config.max_tokens}") |
| | |
| | |
| | print(f"\nMulti-pass reasoning: {level2_config.enable_multi_pass_reasoning}") |
| | print(f"Semantic cache: {level2_config.enable_semantic_cache}") |
| | print(f"Parallel execution: {level2_config.enable_parallel_execution}") |
| | print(f"Auto retry: {level2_config.enable_auto_retry}") |
| | print(f"Max retries: {level2_config.max_tool_retries}") |
| | |
| | print("✅ Configuration test passed") |
| |
|
| |
|
| | async def test_semantic_cache(): |
| | """Test semantic caching""" |
| | print("\n=== Testing Semantic Cache ===") |
| | |
| | cache = SemanticCache() |
| | |
| | |
| | query = "What is the capital of France?" |
| | result = {"answer": "Paris", "confidence": 0.99} |
| | |
| | key = await cache.store(query, result, {"type": "qa"}) |
| | print(f"Stored with key: {key[:8]}...") |
| | |
| | |
| | cached = await cache.check(query) |
| | if cached: |
| | print(f"Cache hit! Similarity: {cached.similarity:.3f}") |
| | print(f"Result: {cached.result}") |
| | else: |
| | print("❌ Cache miss (unexpected)") |
| | return False |
| | |
| | |
| | miss_query = "What is the weather today?" |
| | miss_cached = await cache.check(miss_query) |
| | if miss_cached: |
| | print("❌ Cache hit (unexpected for different query)") |
| | return False |
| | else: |
| | print("✅ Cache miss as expected for different query") |
| | |
| | |
| | stats = cache.get_stats() |
| | print(f"\nCache stats: {stats}") |
| | |
| | print("✅ Semantic cache test passed") |
| | return True |
| |
|
| |
|
| | async def test_observability(): |
| | """Test observability engine""" |
| | print("\n=== Testing Observability Engine ===") |
| | |
| | obs = RealTimeObservabilityEngine() |
| | |
| | |
| | for i in range(10): |
| | event = ExecutionEvent( |
| | event_type="tool_execution_complete", |
| | data={ |
| | "tool": "web_search", |
| | "success": i < 8, |
| | "duration": 1.0 + i * 0.1, |
| | "cost": 0.05 |
| | } |
| | ) |
| | anomaly = obs.track_execution(event) |
| | if anomaly: |
| | print(f"Anomaly detected: {anomaly.tool_name} - {anomaly.deviation_percent:.1f}%") |
| | |
| | |
| | metrics = obs.get_tool_metrics("web_search") |
| | print(f"\nTool metrics: {metrics}") |
| | |
| | |
| | summary = obs.get_summary() |
| | print(f"\nObservability summary: {summary}") |
| | |
| | |
| | warning = obs.predict_failure() |
| | if warning: |
| | print(f"Predictive warning: {warning.predicted_issue}") |
| | |
| | print("✅ Observability test passed") |
| | return True |
| |
|
| |
|
| | async def test_contextual_memory(): |
| | """Test contextual memory engine""" |
| | print("\n=== Testing Contextual Memory Engine ===") |
| | |
| | mem = ContextualMemoryEngine(storage_path="/tmp/test_memory") |
| | |
| | |
| | context = await mem.retrieve_context( |
| | query="Build a React app with TypeScript", |
| | user_id="test_user", |
| | max_tokens=1000 |
| | ) |
| | |
| | print(f"Retrieved context:") |
| | print(f" - User memory: {context.user_memory is not None}") |
| | print(f" - Similar examples: {len(context.similar_examples)}") |
| | print(f" - Domain knowledge: {len(context.domain_knowledge)}") |
| | print(f" - Compressed size: {context.compressed_size} tokens") |
| | |
| | |
| | result = ExecutionResult( |
| | query="Build a React app", |
| | success=True, |
| | tools_used=["execute_code", "web_search"], |
| | reasoning=["Analyzed requirements", "Created plan"] |
| | ) |
| | |
| | await mem.learn_from_execution("test_user", result) |
| | |
| | |
| | stats = mem.get_user_stats("test_user") |
| | print(f"\nUser stats: {stats}") |
| | |
| | print("✅ Contextual memory test passed") |
| | return True |
| |
|
| |
|
| | async def test_adaptive_reasoning(): |
| | """Test adaptive reasoning engine""" |
| | print("\n=== Testing Adaptive Reasoning Engine ===") |
| | |
| | engine = AdaptiveReasoningEngine() |
| | |
| | |
| | print("\nTesting problem analysis...") |
| | analysis = await engine.analyze_problem("Create a Python function to sort a list") |
| | print(f" Difficulty: {analysis.estimated_difficulty}") |
| | print(f" Tool calls: {analysis.estimated_tool_calls}") |
| | print(f" Domains: {analysis.domains}") |
| | |
| | |
| | print("\nTesting execution plan creation...") |
| | plan = await engine.create_execution_plan( |
| | query="Create a Python function to sort a list", |
| | analysis=analysis, |
| | available_tools=["execute_code", "web_search"] |
| | ) |
| | print(f" Steps: {len(plan.steps)}") |
| | print(f" Total cost: {plan.total_cost}") |
| | print(f" Estimated duration: {plan.estimated_duration}s") |
| | |
| | for step in plan.steps: |
| | print(f" - Step {step.step_number}: {step.tool} ({step.reasoning[:50]}...)") |
| | |
| | |
| | print("\nTesting solution verification...") |
| | verification = await engine.verify_solution( |
| | query="Create a Python function to sort a list", |
| | result="def sort_list(lst): return sorted(lst)", |
| | steps_executed=["execute_code"] |
| | ) |
| | print(f" Complete: {verification.is_complete}") |
| | print(f" Quality score: {verification.quality_score}") |
| | print(f" Improvements: {verification.improvements}") |
| | |
| | print("✅ Adaptive reasoning test passed") |
| | return True |
| |
|
| |
|
| | async def test_optimized_executor(): |
| | """Test optimized tool executor""" |
| | print("\n=== Testing Optimized Tool Executor ===") |
| | |
| | executor = OptimizedToolExecutor() |
| | |
| | |
| | async def mock_execute(tool_name: str, params: dict): |
| | await asyncio.sleep(0.1) |
| | return f"Result from {tool_name}" |
| | |
| | |
| | tools = [ |
| | ToolCall(id="1", name="web_search", params={"query": "test"}), |
| | ToolCall(id="2", name="execute_code", params={"code": "print('hello')"}), |
| | ] |
| | |
| | |
| | print("\nExecuting tools with optimization...") |
| | results = await executor.execute_with_optimization( |
| | tools=tools, |
| | execute_fn=mock_execute |
| | ) |
| | |
| | for tool_id, result in results.items(): |
| | print(f" Tool {result.tool_name}: success={result.success}, time={result.execution_time:.3f}s") |
| | |
| | print("✅ Optimized executor test passed") |
| | return True |
| |
|
| |
|
| | async def run_all_tests(): |
| | """Run all tests""" |
| | print("=" * 60) |
| | print("Level 2 Features Test Suite") |
| | print("=" * 60) |
| | |
| | tests = [ |
| | ("Configuration", test_config), |
| | ("Semantic Cache", test_semantic_cache), |
| | ("Observability", test_observability), |
| | ("Contextual Memory", test_contextual_memory), |
| | ("Adaptive Reasoning", test_adaptive_reasoning), |
| | ("Optimized Executor", test_optimized_executor), |
| | ] |
| | |
| | passed = 0 |
| | failed = 0 |
| | |
| | for name, test_fn in tests: |
| | try: |
| | result = await test_fn() |
| | if result is not False: |
| | passed += 1 |
| | else: |
| | failed += 1 |
| | except Exception as e: |
| | print(f"❌ {name} test failed with error: {e}") |
| | import traceback |
| | traceback.print_exc() |
| | failed += 1 |
| | |
| | print("\n" + "=" * 60) |
| | print(f"Test Results: {passed} passed, {failed} failed") |
| | print("=" * 60) |
| | |
| | return failed == 0 |
| |
|
| |
|
| | if __name__ == "__main__": |
| | success = asyncio.run(run_all_tests()) |
| | sys.exit(0 if success else 1) |
| |
|