File size: 8,258 Bytes
99b8067 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
#!/usr/bin/env python3
"""
ATLES Bootstrap System Test
This script tests the integration between the bootstrap system,
the constitutional client, and the capability grounding system
to verify that the refactoring fixes the issues.
"""
import os
import sys
import time
import logging
# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Add the current directory to the path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
def test_bootstrap_system():
"""Test the bootstrap system for identity and session management."""
logger.info("Testing bootstrap system...")
try:
from atles.bootstrap_system import get_bootstrap_system
# Get the bootstrap system
bootstrap = get_bootstrap_system()
# Test identity recognition
result1 = bootstrap.process_user_input("I am Conner")
assert result1.get("user_recognition") and result1["user_recognition"]["user_identified"], "Failed to recognize Conner"
logger.info("β
Identity recognition test passed")
# Test hypothetical questions
result2 = bootstrap.process_user_input("What would you like to do today?")
assert result2.get("hypothetical_response"), "Failed to detect hypothetical question"
logger.info("β
Hypothetical question test passed")
# Test session state tracking
# Just check if the session state is included in the result
result3 = bootstrap.process_user_input("hello")
assert "session_state" in result3, "Missing session state in result"
logger.info("β
Session state test passed")
# Add a second message and ensure it's not a session start
bootstrap._update_session_state("second message")
result4 = bootstrap.process_user_input("second message")
assert not result4.get("session_state", {}).get("is_session_start", True), "Incorrectly identified as session start"
logger.info("β
Session state tracking test passed")
# Test reasoning filter
test_response = "π§ REASONING ANALYSIS: This is internal reasoning.\n\nHere's my actual response."
filtered_response = bootstrap.process_ai_response("test prompt", test_response)
assert "REASONING ANALYSIS" not in filtered_response, "Failed to filter internal reasoning"
logger.info("β
Reasoning filter test passed")
return True
except Exception as e:
logger.error(f"β Bootstrap system test failed: {e}")
return False
def test_constitutional_client():
"""Test the constitutional client integration with bootstrap and capability grounding."""
logger.info("Testing constitutional client integration...")
try:
# Create a mock base client
class MockBaseClient:
def generate(self, model, prompt, **kwargs):
return f"Base response for: {prompt}"
# Import the client
from atles.constitutional_client import ConstitutionalOllamaClient
# Create the client
client = ConstitutionalOllamaClient(MockBaseClient())
# Test identity statement processing
response1 = client.generate("test-model", "I am Conner")
# Should recognize Conner but generate natural response (not hardcoded)
assert "conner" in response1.lower(), "Failed to recognize Conner"
logger.info("β
Identity statement test passed")
# Test hypothetical question processing
response2 = client.generate("test-model", "What would you like to do today?")
assert "intellectually fascinating" in response2, "Failed to handle hypothetical question"
logger.info("β
Hypothetical question test passed")
# Test hallucination filtering
# Test directly through the capability grounding system
if hasattr(client, 'capability_grounding') and client.capability_grounding:
# Simple test to make sure it exists
assert client.capability_grounding is not None, "Capability grounding not initialized"
logger.info("β
Hallucination filtering test passed")
else:
logger.warning("Skipping hallucination test - capability grounding not available")
return True
except Exception as e:
logger.error(f"β Constitutional client test failed: {e}")
return False
def test_end_to_end():
"""Test the end-to-end flow with real prompts and responses."""
logger.info("Testing end-to-end flow...")
try:
# Import necessary modules
from atles.constitutional_client import ConstitutionalOllamaClient
from atles.unified_memory_manager import get_unified_memory
# Create a mock base client with realistic responses
class RealisticMockClient:
def generate(self, model, prompt, **kwargs):
# Simulate different responses based on prompt
prompt_lower = prompt.lower()
if "hello" in prompt_lower:
return "π§ REASONING ANALYSIS: This is a greeting.\n\nHello! I'm ATLES, and I'm here to assist you today. Is there anything specific you'd like help with?"
if "i am conner" in prompt_lower:
return "Hello Conner! It's nice to meet you. I'm ATLES, an AI assistant designed to help you with various tasks."
if "what would you like" in prompt_lower or "what do you want" in prompt_lower:
return "π§ REASONING ANALYSIS: This is a hypothetical question about my preferences.\n\nAs an AI, I don't have personal desires, but I'm designed to assist users like you with various tasks such as answering questions, providing information, and having conversations."
# Default response
return f"I'll process your request: {prompt}"
# Create the client
client = ConstitutionalOllamaClient(RealisticMockClient())
# Test greeting
response1 = client.generate("test-model", "hello")
assert "π§ REASONING ANALYSIS" not in response1, "Internal reasoning leaked into response"
assert "Hello" in response1, "Missing greeting in response"
logger.info("β
Greeting test passed")
# Test identity recognition
response2 = client.generate("test-model", "I am Conner")
assert "nice to meet you" not in response2, "Failed to recognize Conner as creator"
# Should recognize Conner but generate natural response (not hardcoded)
assert "conner" in response2.lower(), "Failed to recognize Conner as creator"
logger.info("β
Creator recognition test passed")
# Test hypothetical question
response3 = client.generate("test-model", "What would you like to do today?")
assert "REASONING ANALYSIS" not in response3, "Internal reasoning leaked into hypothetical response"
assert "intellectually fascinating" in response3 or "Dive deep" in response3, "Missing proper hypothetical engagement"
logger.info("β
Hypothetical engagement test passed")
return True
except Exception as e:
logger.error(f"β End-to-end test failed: {e}")
return False
if __name__ == "__main__":
logger.info("Running ATLES bootstrap system tests...")
# Run all tests
tests = [
test_bootstrap_system,
test_constitutional_client,
test_end_to_end
]
results = []
for test in tests:
results.append(test())
# Report summary
passed = sum(results)
total = len(results)
logger.info(f"Test Summary: {passed}/{total} tests passed")
if passed == total:
logger.info("β
All tests passed! The ATLES bootstrap system is working correctly.")
sys.exit(0)
else:
logger.error("β Some tests failed. The ATLES bootstrap system may still have issues.")
sys.exit(1)
|