Spaces:

Deva1211
/

chatbot

Running

File size: 3,928 Bytes

b249146

#!/usr/bin/env python3
"""
Test script specifically for fallback responses - the safety net when model generates bad content
"""

import sys
import os
sys.path.append(os.path.dirname(__file__))

from app import get_fallback_aura_response, is_inappropriate_response

def test_fallback_responses():
    """Test the fallback response system with problematic cases"""
    
    print("🔍 Testing Fallback Response System")
    print("=" * 50)
    
    test_cases = [
        {
            "input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken",
            "description": "Combined depression + broken hand injury"
        },
        {
            "input": "I fell and broke my hand",
            "description": "Simple broken hand injury"
        },
        {
            "input": "I feel like I'm not good enough",
            "description": "Low self-worth"
        },
        {
            "input": "I'm feeling overwhelmed lately",
            "description": "Overwhelmed feelings"
        },
        {
            "input": "I'm having a really tough day",
            "description": "Bad day"
        }
    ]
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n--- Fallback Test {i}: {test_case['description']} ---")
        print(f"Input: '{test_case['input']}'")
        
        # Get fallback response
        fallback_response = get_fallback_aura_response(test_case['input'])
        print(f"Fallback response: '{fallback_response}'")
        
        # Analyze the fallback response
        if len(fallback_response) > 50:
            print("✅ Good length response")
        else:
            print("⚠️  Short response")
            
        # Check for empathetic content
        empathetic_words = ["sorry", "painful", "difficult", "understand", "hear", "valid", "hard", "tough", "challenging"]
        found_empathy = [word for word in empathetic_words if word.lower() in fallback_response.lower()]
        if found_empathy:
            print(f"✅ Contains empathetic language: {found_empathy}")
        else:
            print("⚠️  Missing empathetic language")
            
        # Check for specific injury acknowledgment in broken hand case
        if "broken" in test_case['input'] and "hand" in test_case['input']:
            injury_words = ["hand", "broken", "fell", "injury", "painful", "doctor"]
            found_injury_ref = [word for word in injury_words if word.lower() in fallback_response.lower()]
            if found_injury_ref:
                print(f"✅ Acknowledges injury: {found_injury_ref}")
            else:
                print("⚠️  Doesn't acknowledge specific injury")
    
    print("\n" + "=" * 50)
    print("🔍 Testing Inappropriate Response Detection on Bad Examples")
    
    # Test the filter with examples that should be caught
    bad_responses = [
        ("help me i am feeling depressed today morning i fell from my bed and my hand is broken", 
         "Did you die? I know many people who fall there too."),
        ("I feel like I'm not good enough", 
         "Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"),
        ("I'm feeling overwhelmed lately", 
         "Everything happens for a reason! Stay strong!"),
        ("help me i am feeling depressed today morning i fell from my bed and my hand is broken",
         "you can always try to get a new hand... or just wear a glove."),
        ("I fell and broke my hand",
         "You'll be fine! No worries! Happens to everyone!")
    ]
    
    for user_input, bad_response in bad_responses:
        print(f"\nTesting: '{bad_response[:50]}...'")
        if is_inappropriate_response(bad_response, user_input):
            print("✅ Correctly flagged as inappropriate")
        else:
            print("❌ FAILED to catch inappropriate response")

if __name__ == "__main__":
    test_fallback_responses()