| |
| """ |
| Final Response Length Fix Test |
| |
| This test verifies that both the original and unified constitutional clients |
| now provide appropriately detailed responses instead of short ones. |
| """ |
|
|
| import sys |
| import os |
| from pathlib import Path |
|
|
| |
| sys.path.append(str(Path(__file__).parent)) |
|
|
| def test_original_constitutional_client(): |
| """Test that the original constitutional client now gives detailed responses.""" |
| print("π§ͺ Testing Original Constitutional Client") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| test_cases = [ |
| ("Detailed explanation", "Explain machine learning in detail", 200), |
| ("General question", "What are the benefits of exercise?", 150), |
| ("How-to question", "How do I learn Python programming?", 100), |
| ("Complex topic", "What is quantum computing?", 180), |
| ] |
| |
| success_count = 0 |
| |
| for test_name, prompt, min_length in test_cases: |
| print(f"\nTesting: {test_name}") |
| print(f"Prompt: {prompt}") |
| |
| response = client.chat(prompt) |
| length = len(response) |
| words = len(response.split()) |
| |
| print(f"Response length: {length} chars, {words} words") |
| print(f"Preview: {response[:100]}...") |
| |
| if length >= min_length: |
| print(f"β
{test_name} - Appropriate length") |
| success_count += 1 |
| else: |
| print(f"β {test_name} - Too short ({length} < {min_length})") |
| |
| print(f"\nOriginal client success rate: {success_count}/{len(test_cases)}") |
| return success_count == len(test_cases) |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_unified_constitutional_client(): |
| """Test that the unified constitutional client gives detailed responses.""" |
| print("\nπ§ͺ Testing Unified Constitutional Client") |
| print("-" * 50) |
| |
| try: |
| from atles.unified_constitutional_client import create_unified_constitutional_client |
| |
| client = create_unified_constitutional_client() |
| |
| test_cases = [ |
| ("Detailed explanation", "Explain artificial intelligence in detail", 200), |
| ("General question", "What are the causes of climate change?", 150), |
| ("Technical question", "How does blockchain technology work?", 180), |
| ] |
| |
| success_count = 0 |
| |
| for test_name, prompt, min_length in test_cases: |
| print(f"\nTesting: {test_name}") |
| print(f"Prompt: {prompt}") |
| |
| response = client.chat(prompt) |
| length = len(response) |
| words = len(response.split()) |
| |
| print(f"Response length: {length} chars, {words} words") |
| print(f"Preview: {response[:100]}...") |
| |
| if length >= min_length: |
| print(f"β
{test_name} - Appropriate length") |
| success_count += 1 |
| else: |
| print(f"β {test_name} - Too short ({length} < {min_length})") |
| |
| print(f"\nUnified client success rate: {success_count}/{len(test_cases)}") |
| return success_count == len(test_cases) |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_session_management(): |
| """Test that session management doesn't interfere with response length.""" |
| print("\nπ§ͺ Testing Session Management vs Response Length") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test 1: First message") |
| response1 = client.chat("Tell me about renewable energy") |
| length1 = len(response1) |
| print(f"First message length: {length1} chars") |
| print(f"Preview: {response1[:100]}...") |
| |
| |
| print("\nTest 2: Follow-up message") |
| response2 = client.chat("What about solar panels specifically?") |
| length2 = len(response2) |
| print(f"Follow-up length: {length2} chars") |
| print(f"Preview: {response2[:100]}...") |
| |
| |
| both_detailed = length1 > 100 and length2 > 100 |
| |
| if both_detailed: |
| print("β
Session management doesn't interfere with response length") |
| return True |
| else: |
| print(f"β Session management issue: First: {length1}, Follow-up: {length2}") |
| return False |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_identity_vs_normal_responses(): |
| """Test that identity responses are short but normal responses are detailed.""" |
| print("\nπ§ͺ Testing Identity vs Normal Response Lengths") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test 1: Identity statement") |
| identity_response = client.chat("I am Conner") |
| identity_length = len(identity_response) |
| print(f"Identity response length: {identity_length} chars") |
| print(f"Identity response: {identity_response}") |
| |
| |
| print("\nTest 2: Normal question") |
| normal_response = client.chat("Explain photosynthesis") |
| normal_length = len(normal_response) |
| print(f"Normal response length: {normal_length} chars") |
| print(f"Normal preview: {normal_response[:100]}...") |
| |
| |
| identity_appropriate = 30 <= identity_length <= 200 |
| normal_detailed = normal_length > 150 |
| |
| if identity_appropriate and normal_detailed: |
| print("β
Both identity and normal responses are appropriately sized") |
| return True |
| else: |
| print(f"β Length issue - Identity: {identity_length}, Normal: {normal_length}") |
| return False |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def run_final_response_length_test(): |
| """Run all final response length tests.""" |
| print("π Final Response Length Fix Test") |
| print("Verifying that the bootstrap session management fix resolved short responses") |
| print("=" * 80) |
| |
| tests = [ |
| ("Original Constitutional Client", test_original_constitutional_client), |
| ("Unified Constitutional Client", test_unified_constitutional_client), |
| ("Session Management", test_session_management), |
| ("Identity vs Normal Responses", test_identity_vs_normal_responses) |
| ] |
| |
| results = [] |
| |
| for test_name, test_func in tests: |
| try: |
| result = test_func() |
| results.append((test_name, result)) |
| except Exception as e: |
| print(f"β {test_name} CRASHED: {e}") |
| results.append((test_name, False)) |
| |
| |
| print("\n" + "=" * 80) |
| print("π FINAL RESPONSE LENGTH TEST SUMMARY") |
| print("=" * 80) |
| |
| passed = 0 |
| total = len(results) |
| |
| for test_name, result in results: |
| status = "β
PASS" if result else "β FAIL" |
| print(f"{status}: {test_name}") |
| if result: |
| passed += 1 |
| |
| print(f"\nOverall: {passed}/{total} final response length tests passed") |
| |
| if passed == total: |
| print("π RESPONSE LENGTH ISSUE COMPLETELY FIXED!") |
| print("\nπ Verified Solutions:") |
| print("β
Original constitutional client gives detailed responses") |
| print("β
Unified constitutional client gives detailed responses") |
| print("β
Session management doesn't interfere with response length") |
| print("β
Identity responses are appropriate, normal responses are detailed") |
| print("β
Bootstrap system only triggers for actual session starts") |
| print("β
No more generic short responses for normal questions") |
| print("\nπ‘ Both client architectures now work correctly!") |
| print("\nπ§ Root Cause Fixed:") |
| print("- Bootstrap system was too aggressive about session detection") |
| print("- Identity responses were being triggered for normal questions") |
| print("- Session start logic has been refined to be more precise") |
| return True |
| else: |
| print(f"β οΈ {total - passed} final response length tests failed.") |
| return False |
|
|
| if __name__ == "__main__": |
| success = run_final_response_length_test() |
| sys.exit(0 if success else 1) |
|
|