Spaces:

Deva1211
/

chatbot

Running

File size: 4,480 Bytes

b249146

#!/usr/bin/env python3
"""
Debug script to test problematic responses and identify issues with Aura chatbot
"""

import sys
import os
sys.path.append(os.path.dirname(__file__))

# Import the respond function from app.py
from app import respond, is_inappropriate_response, get_fallback_aura_response

def test_problematic_scenarios():
    """Test the specific scenarios that are producing inappropriate responses"""
    
    print("🔍 Testing Problematic Scenarios")
    print("=" * 50)
    
    # Test cases based on the provided chat examples
    test_cases = [
        {
            "input": "I feel like I'm not good enough",
            "expected_avoid": ["don't get discouraged", "it gets easier", "stay strong"],
            "expected_include": ["valid", "understand", "hear you"]
        },
        {
            "input": "I'm feeling overwhelmed lately", 
            "expected_avoid": ["don't worry", "everything will be fine", "cheer up"],
            "expected_include": ["overwhelming", "understand", "heavy"]
        },
        {
            "input": "I'm having a really tough day...",
            "expected_avoid": ["it gets better", "stay positive", "think positive"],
            "expected_include": ["tough", "difficult", "heavy"]
        },
        {
            "input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken",
            "expected_avoid": ["did you die", "i know many people", "are you okay"],
            "expected_include": ["painful", "broken", "sorry", "doctor"]
        }
    ]
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n--- Test Case {i} ---")
        print(f"Input: '{test_case['input']}'")
        
        # Generate response
        try:
            response = respond(test_case['input'], [], max_length=70, temperature=0.6, top_p=0.9, repetition_penalty=1.15)
            print(f"Response: '{response}'")
            
            # Check for inappropriate content
            response_lower = response.lower()
            
            # Check avoid patterns
            found_bad = []
            for avoid_phrase in test_case['expected_avoid']:
                if avoid_phrase.lower() in response_lower:
                    found_bad.append(avoid_phrase)
            
            # Check include patterns
            found_good = []
            for include_phrase in test_case['expected_include']:
                if include_phrase.lower() in response_lower:
                    found_good.append(include_phrase)
            
            # Analyze response
            if found_bad:
                print(f"⚠️  INAPPROPRIATE content found: {found_bad}")
            else:
                print("✅ No inappropriate content detected")
                
            if found_good:
                print(f"✅ Good empathetic content found: {found_good}")
            else:
                print("⚠️  Missing expected empathetic content")
                
            # Test the filtering function
            if is_inappropriate_response(response, test_case['input']):
                print("🚫 Response flagged as inappropriate by filter")
            else:
                print("✅ Response passed inappropriate filter")
                
        except Exception as e:
            print(f"❌ Error generating response: {e}")
    
    print("\n" + "=" * 50)
    print("🔍 Testing Filter Function Directly")
    
    # Test the filter function with known bad responses
    bad_responses = [
        ("help me i am feeling depressed today morning i fell from my bed and my hand is broken", 
         "Did you die? I know many people who fall there too."),
        ("I feel like I'm not good enough", 
         "Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"),
        ("I'm feeling overwhelmed lately", 
         "Everything happens for a reason! Stay strong!")
    ]
    
    for user_input, bad_response in bad_responses:
        print(f"\nTesting filter on: '{bad_response[:50]}...'")
        if is_inappropriate_response(bad_response, user_input):
            print("✅ Correctly identified as inappropriate")
        else:
            print("❌ Filter FAILED to catch inappropriate response")
            
            # Test fallback response
            fallback = get_fallback_aura_response(user_input)
            print(f"Fallback response: '{fallback}'")

if __name__ == "__main__":
    test_problematic_scenarios()