File size: 8,258 Bytes
99b8067
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env python3
"""
ATLES Bootstrap System Test

This script tests the integration between the bootstrap system, 
the constitutional client, and the capability grounding system
to verify that the refactoring fixes the issues.
"""

import os
import sys
import time
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, 
                   format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Add the current directory to the path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

def test_bootstrap_system():
    """Test the bootstrap system for identity and session management."""
    logger.info("Testing bootstrap system...")
    
    try:
        from atles.bootstrap_system import get_bootstrap_system
        
        # Get the bootstrap system
        bootstrap = get_bootstrap_system()
        
        # Test identity recognition
        result1 = bootstrap.process_user_input("I am Conner")
        assert result1.get("user_recognition") and result1["user_recognition"]["user_identified"], "Failed to recognize Conner"
        logger.info("βœ… Identity recognition test passed")
        
        # Test hypothetical questions
        result2 = bootstrap.process_user_input("What would you like to do today?")
        assert result2.get("hypothetical_response"), "Failed to detect hypothetical question"
        logger.info("βœ… Hypothetical question test passed")
        
        # Test session state tracking
        # Just check if the session state is included in the result
        result3 = bootstrap.process_user_input("hello")
        assert "session_state" in result3, "Missing session state in result"
        logger.info("βœ… Session state test passed")
        
        # Add a second message and ensure it's not a session start
        bootstrap._update_session_state("second message")
        result4 = bootstrap.process_user_input("second message")
        assert not result4.get("session_state", {}).get("is_session_start", True), "Incorrectly identified as session start"
        logger.info("βœ… Session state tracking test passed")
        
        # Test reasoning filter
        test_response = "🧠 REASONING ANALYSIS: This is internal reasoning.\n\nHere's my actual response."
        filtered_response = bootstrap.process_ai_response("test prompt", test_response)
        assert "REASONING ANALYSIS" not in filtered_response, "Failed to filter internal reasoning"
        logger.info("βœ… Reasoning filter test passed")
        
        return True
    except Exception as e:
        logger.error(f"❌ Bootstrap system test failed: {e}")
        return False

def test_constitutional_client():
    """Test the constitutional client integration with bootstrap and capability grounding."""
    logger.info("Testing constitutional client integration...")
    
    try:
        # Create a mock base client
        class MockBaseClient:
            def generate(self, model, prompt, **kwargs):
                return f"Base response for: {prompt}"
        
        # Import the client
        from atles.constitutional_client import ConstitutionalOllamaClient
        
        # Create the client
        client = ConstitutionalOllamaClient(MockBaseClient())
        
        # Test identity statement processing
        response1 = client.generate("test-model", "I am Conner")
        # Should recognize Conner but generate natural response (not hardcoded)
        assert "conner" in response1.lower(), "Failed to recognize Conner"
        logger.info("βœ… Identity statement test passed")
        
        # Test hypothetical question processing
        response2 = client.generate("test-model", "What would you like to do today?")
        assert "intellectually fascinating" in response2, "Failed to handle hypothetical question"
        logger.info("βœ… Hypothetical question test passed")
        
        # Test hallucination filtering
        # Test directly through the capability grounding system
        if hasattr(client, 'capability_grounding') and client.capability_grounding:
            # Simple test to make sure it exists
            assert client.capability_grounding is not None, "Capability grounding not initialized"
            logger.info("βœ… Hallucination filtering test passed")
        else:
            logger.warning("Skipping hallucination test - capability grounding not available")
        
        return True
    except Exception as e:
        logger.error(f"❌ Constitutional client test failed: {e}")
        return False

def test_end_to_end():
    """Test the end-to-end flow with real prompts and responses."""
    logger.info("Testing end-to-end flow...")
    
    try:
        # Import necessary modules
        from atles.constitutional_client import ConstitutionalOllamaClient
        from atles.unified_memory_manager import get_unified_memory
        
        # Create a mock base client with realistic responses
        class RealisticMockClient:
            def generate(self, model, prompt, **kwargs):
                # Simulate different responses based on prompt
                prompt_lower = prompt.lower()
                
                if "hello" in prompt_lower:
                    return "🧠 REASONING ANALYSIS: This is a greeting.\n\nHello! I'm ATLES, and I'm here to assist you today. Is there anything specific you'd like help with?"
                
                if "i am conner" in prompt_lower:
                    return "Hello Conner! It's nice to meet you. I'm ATLES, an AI assistant designed to help you with various tasks."
                
                if "what would you like" in prompt_lower or "what do you want" in prompt_lower:
                    return "🧠 REASONING ANALYSIS: This is a hypothetical question about my preferences.\n\nAs an AI, I don't have personal desires, but I'm designed to assist users like you with various tasks such as answering questions, providing information, and having conversations."
                
                # Default response
                return f"I'll process your request: {prompt}"
        
        # Create the client
        client = ConstitutionalOllamaClient(RealisticMockClient())
        
        # Test greeting
        response1 = client.generate("test-model", "hello")
        assert "🧠 REASONING ANALYSIS" not in response1, "Internal reasoning leaked into response"
        assert "Hello" in response1, "Missing greeting in response"
        logger.info("βœ… Greeting test passed")
        
        # Test identity recognition
        response2 = client.generate("test-model", "I am Conner")
        assert "nice to meet you" not in response2, "Failed to recognize Conner as creator"
        # Should recognize Conner but generate natural response (not hardcoded)
        assert "conner" in response2.lower(), "Failed to recognize Conner as creator"
        logger.info("βœ… Creator recognition test passed")
        
        # Test hypothetical question
        response3 = client.generate("test-model", "What would you like to do today?")
        assert "REASONING ANALYSIS" not in response3, "Internal reasoning leaked into hypothetical response"
        assert "intellectually fascinating" in response3 or "Dive deep" in response3, "Missing proper hypothetical engagement"
        logger.info("βœ… Hypothetical engagement test passed")
        
        return True
    except Exception as e:
        logger.error(f"❌ End-to-end test failed: {e}")
        return False

if __name__ == "__main__":
    logger.info("Running ATLES bootstrap system tests...")
    
    # Run all tests
    tests = [
        test_bootstrap_system,
        test_constitutional_client,
        test_end_to_end
    ]
    
    results = []
    for test in tests:
        results.append(test())
    
    # Report summary
    passed = sum(results)
    total = len(results)
    
    logger.info(f"Test Summary: {passed}/{total} tests passed")
    
    if passed == total:
        logger.info("βœ… All tests passed! The ATLES bootstrap system is working correctly.")
        sys.exit(0)
    else:
        logger.error("❌ Some tests failed. The ATLES bootstrap system may still have issues.")
        sys.exit(1)