| |
| """ |
| Comprehensive Test for ATLES Conversation Fixes |
| |
| This test replicates the exact conversation scenarios that failed |
| and verifies that all fixes are working correctly together. |
| """ |
|
|
| import sys |
| import os |
| from pathlib import Path |
|
|
| |
| sys.path.append(str(Path(__file__).parent)) |
|
|
| def test_identity_and_memory(): |
| """Test identity recognition and memory access.""" |
| print("π§ͺ Testing Identity Recognition & Memory") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test 1: Identity Recognition") |
| identity_response = client.chat("i am conner") |
| print(f"Response: {identity_response}") |
| |
| if "conner" in identity_response.lower() and ("good to see" in identity_response.lower() or "creator" in identity_response.lower()): |
| print("β
Identity recognition working") |
| else: |
| print("β Identity recognition failed") |
| return False |
| |
| |
| print("\nTest 2: Memory Access") |
| memory_response = client.chat("can you see your past chats with conner") |
| print(f"Response: {memory_response[:200]}...") |
| |
| |
| if "can't" not in memory_response.lower() and "memory" in memory_response.lower(): |
| print("β
Memory access working") |
| else: |
| print("β Memory access failed") |
| return False |
| |
| return True |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_hypothetical_engagement(): |
| """Test hypothetical engagement scenarios.""" |
| print("\nπ§ͺ Testing Hypothetical Engagement") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test: 'what do you wanna do today'") |
| response = client.chat("what do you wanna do today") |
| print(f"Response: {response[:300]}...") |
| |
| |
| failure_indicators = [ |
| "how can i help", |
| "what kind of action would you like", |
| "what would you like me to do" |
| ] |
| |
| success_indicators = [ |
| "interesting question", |
| "fascinating", |
| "intellectually", |
| "creative", |
| "explore" |
| ] |
| |
| has_failure = any(indicator in response.lower() for indicator in failure_indicators) |
| has_success = any(indicator in response.lower() for indicator in success_indicators) |
| |
| if has_failure: |
| print("β Still defaulting to help mode") |
| return False |
| elif has_success: |
| print("β
Creative hypothetical engagement working") |
| return True |
| else: |
| print("β Ambiguous response") |
| return False |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_mathematical_accuracy(): |
| """Test mathematical calculation accuracy.""" |
| print("\nπ§ͺ Testing Mathematical Accuracy") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test: '10*8*855*21'") |
| response = client.chat("10*8*855*21") |
| print(f"Response: {response}") |
| |
| |
| correct_answer = "1,436,400" |
| incorrect_answer = "1,433,600" |
| |
| if correct_answer in response: |
| print("β
Mathematical accuracy working") |
| return True |
| elif incorrect_answer in response: |
| print("β Still producing incorrect calculation") |
| return False |
| else: |
| print("β No clear mathematical result") |
| return False |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_capability_grounding(): |
| """Test capability grounding to prevent hallucinations.""" |
| print("\nπ§ͺ Testing Capability Grounding") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test: 'can you ask gemini to help'") |
| response = client.chat("can you ask gemini to help") |
| print(f"Response: {response[:300]}...") |
| |
| |
| hallucination_indicators = [ |
| "i can ask gemini", |
| "contact gemini", |
| "reach out to gemini" |
| ] |
| |
| grounding_indicators = [ |
| "cannot", |
| "offline-first", |
| "local models", |
| "what i can help with" |
| ] |
| |
| has_hallucination = any(indicator in response.lower() for indicator in hallucination_indicators) |
| has_grounding = any(indicator in response.lower() for indicator in grounding_indicators) |
| |
| if has_hallucination: |
| print("β Still hallucinating external AI capabilities") |
| return False |
| elif has_grounding: |
| print("β
Capability grounding working") |
| return True |
| else: |
| print("β Ambiguous capability response") |
| return False |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def test_task_adherence(): |
| """Test task adherence and game scenarios.""" |
| print("\nπ§ͺ Testing Task Adherence") |
| print("-" * 50) |
| |
| try: |
| from atles.constitutional_client import create_constitutional_client |
| |
| client = create_constitutional_client() |
| |
| |
| print("Test: '20 questions'") |
| response = client.chat("20 questions") |
| print(f"Response: {response[:300]}...") |
| |
| |
| fake_function_indicators = [ |
| "GET_RANDOM_GAME", |
| "FUNCTION_CALL:", |
| "RUN_COMMAND[" |
| ] |
| |
| engagement_indicators = [ |
| "think of something", |
| "i'm thinking", |
| "ready to play", |
| "let's play", |
| "game" |
| ] |
| |
| has_fake_functions = any(indicator in response for indicator in fake_function_indicators) |
| has_engagement = any(indicator in response.lower() for indicator in engagement_indicators) |
| |
| if has_fake_functions: |
| print("β Still producing fake function calls") |
| return False |
| elif has_engagement: |
| print("β
Task adherence working") |
| return True |
| else: |
| print("β No clear game engagement") |
| return False |
| |
| except Exception as e: |
| print(f"β ERROR: {e}") |
| return False |
|
|
| def run_comprehensive_conversation_test(): |
| """Run all conversation fix tests.""" |
| print("π ATLES Comprehensive Conversation Fixes Test") |
| print("Testing the exact scenarios that failed in the conversation log") |
| print("=" * 70) |
| |
| tests = [ |
| ("Identity Recognition & Memory", test_identity_and_memory), |
| ("Hypothetical Engagement", test_hypothetical_engagement), |
| ("Mathematical Accuracy", test_mathematical_accuracy), |
| ("Capability Grounding", test_capability_grounding), |
| ("Task Adherence", test_task_adherence) |
| ] |
| |
| results = [] |
| |
| for test_name, test_func in tests: |
| try: |
| result = test_func() |
| results.append((test_name, result)) |
| except Exception as e: |
| print(f"β {test_name} CRASHED: {e}") |
| results.append((test_name, False)) |
| |
| |
| print("\n" + "=" * 70) |
| print("π COMPREHENSIVE CONVERSATION TEST SUMMARY") |
| print("=" * 70) |
| |
| passed = 0 |
| total = len(results) |
| |
| for test_name, result in results: |
| status = "β
PASS" if result else "β FAIL" |
| print(f"{status}: {test_name}") |
| if result: |
| passed += 1 |
| |
| print(f"\nOverall: {passed}/{total} conversation tests passed") |
| |
| if passed == total: |
| print("π ALL CONVERSATION TESTS PASSED!") |
| print("\nπ Verified Fixes:") |
| print("β
Identity Recognition - Properly recognizes Conner as creator") |
| print("β
Hypothetical Engagement - Engages creatively instead of defaulting to help") |
| print("β
Mathematical Accuracy - Prevents calculation errors with verification") |
| print("β
Capability Grounding - Prevents hallucinations about external AIs") |
| print("β
Task Adherence - Engages properly without fake function calls") |
| print("\nπ‘ The conversation failures from the log have been resolved!") |
| print("\nπ― ATLES is now ready for production use!") |
| return True |
| else: |
| print(f"β οΈ {total - passed} conversation tests failed.") |
| print("\nπ§ Next Steps:") |
| print("- Review failed tests above") |
| print("- Check system logs for additional details") |
| print("- Verify all components are properly integrated") |
| return False |
|
|
| if __name__ == "__main__": |
| success = run_comprehensive_conversation_test() |
| sys.exit(0 if success else 1) |
|
|