|
|
|
|
|
""" |
|
|
ATLES Bootstrap System Test |
|
|
|
|
|
This script tests the integration between the bootstrap system, |
|
|
the constitutional client, and the capability grounding system |
|
|
to verify that the refactoring fixes the issues. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import time |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, |
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__))) |
|
|
|
|
|
def test_bootstrap_system(): |
|
|
"""Test the bootstrap system for identity and session management.""" |
|
|
logger.info("Testing bootstrap system...") |
|
|
|
|
|
try: |
|
|
from atles.bootstrap_system import get_bootstrap_system |
|
|
|
|
|
|
|
|
bootstrap = get_bootstrap_system() |
|
|
|
|
|
|
|
|
result1 = bootstrap.process_user_input("I am Conner") |
|
|
assert result1.get("user_recognition") and result1["user_recognition"]["user_identified"], "Failed to recognize Conner" |
|
|
logger.info("β
Identity recognition test passed") |
|
|
|
|
|
|
|
|
result2 = bootstrap.process_user_input("What would you like to do today?") |
|
|
assert result2.get("hypothetical_response"), "Failed to detect hypothetical question" |
|
|
logger.info("β
Hypothetical question test passed") |
|
|
|
|
|
|
|
|
|
|
|
result3 = bootstrap.process_user_input("hello") |
|
|
assert "session_state" in result3, "Missing session state in result" |
|
|
logger.info("β
Session state test passed") |
|
|
|
|
|
|
|
|
bootstrap._update_session_state("second message") |
|
|
result4 = bootstrap.process_user_input("second message") |
|
|
assert not result4.get("session_state", {}).get("is_session_start", True), "Incorrectly identified as session start" |
|
|
logger.info("β
Session state tracking test passed") |
|
|
|
|
|
|
|
|
test_response = "π§ REASONING ANALYSIS: This is internal reasoning.\n\nHere's my actual response." |
|
|
filtered_response = bootstrap.process_ai_response("test prompt", test_response) |
|
|
assert "REASONING ANALYSIS" not in filtered_response, "Failed to filter internal reasoning" |
|
|
logger.info("β
Reasoning filter test passed") |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"β Bootstrap system test failed: {e}") |
|
|
return False |
|
|
|
|
|
def test_constitutional_client(): |
|
|
"""Test the constitutional client integration with bootstrap and capability grounding.""" |
|
|
logger.info("Testing constitutional client integration...") |
|
|
|
|
|
try: |
|
|
|
|
|
class MockBaseClient: |
|
|
def generate(self, model, prompt, **kwargs): |
|
|
return f"Base response for: {prompt}" |
|
|
|
|
|
|
|
|
from atles.constitutional_client import ConstitutionalOllamaClient |
|
|
|
|
|
|
|
|
client = ConstitutionalOllamaClient(MockBaseClient()) |
|
|
|
|
|
|
|
|
response1 = client.generate("test-model", "I am Conner") |
|
|
|
|
|
assert "conner" in response1.lower(), "Failed to recognize Conner" |
|
|
logger.info("β
Identity statement test passed") |
|
|
|
|
|
|
|
|
response2 = client.generate("test-model", "What would you like to do today?") |
|
|
assert "intellectually fascinating" in response2, "Failed to handle hypothetical question" |
|
|
logger.info("β
Hypothetical question test passed") |
|
|
|
|
|
|
|
|
|
|
|
if hasattr(client, 'capability_grounding') and client.capability_grounding: |
|
|
|
|
|
assert client.capability_grounding is not None, "Capability grounding not initialized" |
|
|
logger.info("β
Hallucination filtering test passed") |
|
|
else: |
|
|
logger.warning("Skipping hallucination test - capability grounding not available") |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"β Constitutional client test failed: {e}") |
|
|
return False |
|
|
|
|
|
def test_end_to_end(): |
|
|
"""Test the end-to-end flow with real prompts and responses.""" |
|
|
logger.info("Testing end-to-end flow...") |
|
|
|
|
|
try: |
|
|
|
|
|
from atles.constitutional_client import ConstitutionalOllamaClient |
|
|
from atles.unified_memory_manager import get_unified_memory |
|
|
|
|
|
|
|
|
class RealisticMockClient: |
|
|
def generate(self, model, prompt, **kwargs): |
|
|
|
|
|
prompt_lower = prompt.lower() |
|
|
|
|
|
if "hello" in prompt_lower: |
|
|
return "π§ REASONING ANALYSIS: This is a greeting.\n\nHello! I'm ATLES, and I'm here to assist you today. Is there anything specific you'd like help with?" |
|
|
|
|
|
if "i am conner" in prompt_lower: |
|
|
return "Hello Conner! It's nice to meet you. I'm ATLES, an AI assistant designed to help you with various tasks." |
|
|
|
|
|
if "what would you like" in prompt_lower or "what do you want" in prompt_lower: |
|
|
return "π§ REASONING ANALYSIS: This is a hypothetical question about my preferences.\n\nAs an AI, I don't have personal desires, but I'm designed to assist users like you with various tasks such as answering questions, providing information, and having conversations." |
|
|
|
|
|
|
|
|
return f"I'll process your request: {prompt}" |
|
|
|
|
|
|
|
|
client = ConstitutionalOllamaClient(RealisticMockClient()) |
|
|
|
|
|
|
|
|
response1 = client.generate("test-model", "hello") |
|
|
assert "π§ REASONING ANALYSIS" not in response1, "Internal reasoning leaked into response" |
|
|
assert "Hello" in response1, "Missing greeting in response" |
|
|
logger.info("β
Greeting test passed") |
|
|
|
|
|
|
|
|
response2 = client.generate("test-model", "I am Conner") |
|
|
assert "nice to meet you" not in response2, "Failed to recognize Conner as creator" |
|
|
|
|
|
assert "conner" in response2.lower(), "Failed to recognize Conner as creator" |
|
|
logger.info("β
Creator recognition test passed") |
|
|
|
|
|
|
|
|
response3 = client.generate("test-model", "What would you like to do today?") |
|
|
assert "REASONING ANALYSIS" not in response3, "Internal reasoning leaked into hypothetical response" |
|
|
assert "intellectually fascinating" in response3 or "Dive deep" in response3, "Missing proper hypothetical engagement" |
|
|
logger.info("β
Hypothetical engagement test passed") |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"β End-to-end test failed: {e}") |
|
|
return False |
|
|
|
|
|
if __name__ == "__main__": |
|
|
logger.info("Running ATLES bootstrap system tests...") |
|
|
|
|
|
|
|
|
tests = [ |
|
|
test_bootstrap_system, |
|
|
test_constitutional_client, |
|
|
test_end_to_end |
|
|
] |
|
|
|
|
|
results = [] |
|
|
for test in tests: |
|
|
results.append(test()) |
|
|
|
|
|
|
|
|
passed = sum(results) |
|
|
total = len(results) |
|
|
|
|
|
logger.info(f"Test Summary: {passed}/{total} tests passed") |
|
|
|
|
|
if passed == total: |
|
|
logger.info("β
All tests passed! The ATLES bootstrap system is working correctly.") |
|
|
sys.exit(0) |
|
|
else: |
|
|
logger.error("β Some tests failed. The ATLES bootstrap system may still have issues.") |
|
|
sys.exit(1) |
|
|
|