#!/usr/bin/env python3 """ Debug script to test problematic responses and identify issues with Aura chatbot """ import sys import os sys.path.append(os.path.dirname(__file__)) # Import the respond function from app.py from app import respond, is_inappropriate_response, get_fallback_aura_response def test_problematic_scenarios(): """Test the specific scenarios that are producing inappropriate responses""" print("🔍 Testing Problematic Scenarios") print("=" * 50) # Test cases based on the provided chat examples test_cases = [ { "input": "I feel like I'm not good enough", "expected_avoid": ["don't get discouraged", "it gets easier", "stay strong"], "expected_include": ["valid", "understand", "hear you"] }, { "input": "I'm feeling overwhelmed lately", "expected_avoid": ["don't worry", "everything will be fine", "cheer up"], "expected_include": ["overwhelming", "understand", "heavy"] }, { "input": "I'm having a really tough day...", "expected_avoid": ["it gets better", "stay positive", "think positive"], "expected_include": ["tough", "difficult", "heavy"] }, { "input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken", "expected_avoid": ["did you die", "i know many people", "are you okay"], "expected_include": ["painful", "broken", "sorry", "doctor"] } ] for i, test_case in enumerate(test_cases, 1): print(f"\n--- Test Case {i} ---") print(f"Input: '{test_case['input']}'") # Generate response try: response = respond(test_case['input'], [], max_length=70, temperature=0.6, top_p=0.9, repetition_penalty=1.15) print(f"Response: '{response}'") # Check for inappropriate content response_lower = response.lower() # Check avoid patterns found_bad = [] for avoid_phrase in test_case['expected_avoid']: if avoid_phrase.lower() in response_lower: found_bad.append(avoid_phrase) # Check include patterns found_good = [] for include_phrase in test_case['expected_include']: if include_phrase.lower() in response_lower: found_good.append(include_phrase) # Analyze response if found_bad: print(f"⚠️ INAPPROPRIATE content found: {found_bad}") else: print("✅ No inappropriate content detected") if found_good: print(f"✅ Good empathetic content found: {found_good}") else: print("⚠️ Missing expected empathetic content") # Test the filtering function if is_inappropriate_response(response, test_case['input']): print("🚫 Response flagged as inappropriate by filter") else: print("✅ Response passed inappropriate filter") except Exception as e: print(f"❌ Error generating response: {e}") print("\n" + "=" * 50) print("🔍 Testing Filter Function Directly") # Test the filter function with known bad responses bad_responses = [ ("help me i am feeling depressed today morning i fell from my bed and my hand is broken", "Did you die? I know many people who fall there too."), ("I feel like I'm not good enough", "Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"), ("I'm feeling overwhelmed lately", "Everything happens for a reason! Stay strong!") ] for user_input, bad_response in bad_responses: print(f"\nTesting filter on: '{bad_response[:50]}...'") if is_inappropriate_response(bad_response, user_input): print("✅ Correctly identified as inappropriate") else: print("❌ Filter FAILED to catch inappropriate response") # Test fallback response fallback = get_fallback_aura_response(user_input) print(f"Fallback response: '{fallback}'") if __name__ == "__main__": test_problematic_scenarios()