water3 / test_level2_features.py
onewayto's picture
Upload 187 files
070daf8 verified
"""
Comprehensive test suite for Level 2 features
"""
import asyncio
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from agent.core.level2_config import llm_config, level2_config, LLMConfig, Level2Config
from agent.core.semantic_cache import SemanticCache, semantic_cache
from agent.core.observability import RealTimeObservabilityEngine, observability, ExecutionEvent
from agent.core.contextual_memory import ContextualMemoryEngine, memory_engine, ExecutionResult
from agent.core.adaptive_reasoning import AdaptiveReasoningEngine, reasoning_engine
from agent.core.optimized_executor import OptimizedToolExecutor, tool_executor, ToolCall
async def test_config():
"""Test Level 2 configuration"""
print("\n=== Testing Level 2 Configuration ===")
# Test LLM config
print(f"LLM Model: {llm_config.model}")
print(f"Temperature: {llm_config.temperature}")
print(f"Max Tokens: {llm_config.max_tokens}")
# Test Level 2 config
print(f"\nMulti-pass reasoning: {level2_config.enable_multi_pass_reasoning}")
print(f"Semantic cache: {level2_config.enable_semantic_cache}")
print(f"Parallel execution: {level2_config.enable_parallel_execution}")
print(f"Auto retry: {level2_config.enable_auto_retry}")
print(f"Max retries: {level2_config.max_tool_retries}")
print("✅ Configuration test passed")
async def test_semantic_cache():
"""Test semantic caching"""
print("\n=== Testing Semantic Cache ===")
cache = SemanticCache()
# Store a value
query = "What is the capital of France?"
result = {"answer": "Paris", "confidence": 0.99}
key = await cache.store(query, result, {"type": "qa"})
print(f"Stored with key: {key[:8]}...")
# Check cache hit
cached = await cache.check(query)
if cached:
print(f"Cache hit! Similarity: {cached.similarity:.3f}")
print(f"Result: {cached.result}")
else:
print("❌ Cache miss (unexpected)")
return False
# Check cache miss for different query
miss_query = "What is the weather today?"
miss_cached = await cache.check(miss_query)
if miss_cached:
print("❌ Cache hit (unexpected for different query)")
return False
else:
print("✅ Cache miss as expected for different query")
# Check stats
stats = cache.get_stats()
print(f"\nCache stats: {stats}")
print("✅ Semantic cache test passed")
return True
async def test_observability():
"""Test observability engine"""
print("\n=== Testing Observability Engine ===")
obs = RealTimeObservabilityEngine()
# Track some executions
for i in range(10):
event = ExecutionEvent(
event_type="tool_execution_complete",
data={
"tool": "web_search",
"success": i < 8, # 80% success rate
"duration": 1.0 + i * 0.1,
"cost": 0.05
}
)
anomaly = obs.track_execution(event)
if anomaly:
print(f"Anomaly detected: {anomaly.tool_name} - {anomaly.deviation_percent:.1f}%")
# Get metrics
metrics = obs.get_tool_metrics("web_search")
print(f"\nTool metrics: {metrics}")
# Get summary
summary = obs.get_summary()
print(f"\nObservability summary: {summary}")
# Test predictive warning
warning = obs.predict_failure()
if warning:
print(f"Predictive warning: {warning.predicted_issue}")
print("✅ Observability test passed")
return True
async def test_contextual_memory():
"""Test contextual memory engine"""
print("\n=== Testing Contextual Memory Engine ===")
mem = ContextualMemoryEngine(storage_path="/tmp/test_memory")
# Test context retrieval
context = await mem.retrieve_context(
query="Build a React app with TypeScript",
user_id="test_user",
max_tokens=1000
)
print(f"Retrieved context:")
print(f" - User memory: {context.user_memory is not None}")
print(f" - Similar examples: {len(context.similar_examples)}")
print(f" - Domain knowledge: {len(context.domain_knowledge)}")
print(f" - Compressed size: {context.compressed_size} tokens")
# Test learning
result = ExecutionResult(
query="Build a React app",
success=True,
tools_used=["execute_code", "web_search"],
reasoning=["Analyzed requirements", "Created plan"]
)
await mem.learn_from_execution("test_user", result)
# Get user stats
stats = mem.get_user_stats("test_user")
print(f"\nUser stats: {stats}")
print("✅ Contextual memory test passed")
return True
async def test_adaptive_reasoning():
"""Test adaptive reasoning engine"""
print("\n=== Testing Adaptive Reasoning Engine ===")
engine = AdaptiveReasoningEngine()
# Test problem analysis
print("\nTesting problem analysis...")
analysis = await engine.analyze_problem("Create a Python function to sort a list")
print(f" Difficulty: {analysis.estimated_difficulty}")
print(f" Tool calls: {analysis.estimated_tool_calls}")
print(f" Domains: {analysis.domains}")
# Test execution plan
print("\nTesting execution plan creation...")
plan = await engine.create_execution_plan(
query="Create a Python function to sort a list",
analysis=analysis,
available_tools=["execute_code", "web_search"]
)
print(f" Steps: {len(plan.steps)}")
print(f" Total cost: {plan.total_cost}")
print(f" Estimated duration: {plan.estimated_duration}s")
for step in plan.steps:
print(f" - Step {step.step_number}: {step.tool} ({step.reasoning[:50]}...)")
# Test verification
print("\nTesting solution verification...")
verification = await engine.verify_solution(
query="Create a Python function to sort a list",
result="def sort_list(lst): return sorted(lst)",
steps_executed=["execute_code"]
)
print(f" Complete: {verification.is_complete}")
print(f" Quality score: {verification.quality_score}")
print(f" Improvements: {verification.improvements}")
print("✅ Adaptive reasoning test passed")
return True
async def test_optimized_executor():
"""Test optimized tool executor"""
print("\n=== Testing Optimized Tool Executor ===")
executor = OptimizedToolExecutor()
# Mock execute function
async def mock_execute(tool_name: str, params: dict):
await asyncio.sleep(0.1) # Simulate work
return f"Result from {tool_name}"
# Create tool calls
tools = [
ToolCall(id="1", name="web_search", params={"query": "test"}),
ToolCall(id="2", name="execute_code", params={"code": "print('hello')"}),
]
# Execute with optimization
print("\nExecuting tools with optimization...")
results = await executor.execute_with_optimization(
tools=tools,
execute_fn=mock_execute
)
for tool_id, result in results.items():
print(f" Tool {result.tool_name}: success={result.success}, time={result.execution_time:.3f}s")
print("✅ Optimized executor test passed")
return True
async def run_all_tests():
"""Run all tests"""
print("=" * 60)
print("Level 2 Features Test Suite")
print("=" * 60)
tests = [
("Configuration", test_config),
("Semantic Cache", test_semantic_cache),
("Observability", test_observability),
("Contextual Memory", test_contextual_memory),
("Adaptive Reasoning", test_adaptive_reasoning),
("Optimized Executor", test_optimized_executor),
]
passed = 0
failed = 0
for name, test_fn in tests:
try:
result = await test_fn()
if result is not False:
passed += 1
else:
failed += 1
except Exception as e:
print(f"❌ {name} test failed with error: {e}")
import traceback
traceback.print_exc()
failed += 1
print("\n" + "=" * 60)
print(f"Test Results: {passed} passed, {failed} failed")
print("=" * 60)
return failed == 0
if __name__ == "__main__":
success = asyncio.run(run_all_tests())
sys.exit(0 if success else 1)