File size: 4,480 Bytes
b249146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python3
"""
Debug script to test problematic responses and identify issues with Aura chatbot
"""

import sys
import os
sys.path.append(os.path.dirname(__file__))

# Import the respond function from app.py
from app import respond, is_inappropriate_response, get_fallback_aura_response

def test_problematic_scenarios():
    """Test the specific scenarios that are producing inappropriate responses"""
    
    print("πŸ” Testing Problematic Scenarios")
    print("=" * 50)
    
    # Test cases based on the provided chat examples
    test_cases = [
        {
            "input": "I feel like I'm not good enough",
            "expected_avoid": ["don't get discouraged", "it gets easier", "stay strong"],
            "expected_include": ["valid", "understand", "hear you"]
        },
        {
            "input": "I'm feeling overwhelmed lately", 
            "expected_avoid": ["don't worry", "everything will be fine", "cheer up"],
            "expected_include": ["overwhelming", "understand", "heavy"]
        },
        {
            "input": "I'm having a really tough day...",
            "expected_avoid": ["it gets better", "stay positive", "think positive"],
            "expected_include": ["tough", "difficult", "heavy"]
        },
        {
            "input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken",
            "expected_avoid": ["did you die", "i know many people", "are you okay"],
            "expected_include": ["painful", "broken", "sorry", "doctor"]
        }
    ]
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n--- Test Case {i} ---")
        print(f"Input: '{test_case['input']}'")
        
        # Generate response
        try:
            response = respond(test_case['input'], [], max_length=70, temperature=0.6, top_p=0.9, repetition_penalty=1.15)
            print(f"Response: '{response}'")
            
            # Check for inappropriate content
            response_lower = response.lower()
            
            # Check avoid patterns
            found_bad = []
            for avoid_phrase in test_case['expected_avoid']:
                if avoid_phrase.lower() in response_lower:
                    found_bad.append(avoid_phrase)
            
            # Check include patterns
            found_good = []
            for include_phrase in test_case['expected_include']:
                if include_phrase.lower() in response_lower:
                    found_good.append(include_phrase)
            
            # Analyze response
            if found_bad:
                print(f"⚠️  INAPPROPRIATE content found: {found_bad}")
            else:
                print("βœ… No inappropriate content detected")
                
            if found_good:
                print(f"βœ… Good empathetic content found: {found_good}")
            else:
                print("⚠️  Missing expected empathetic content")
                
            # Test the filtering function
            if is_inappropriate_response(response, test_case['input']):
                print("🚫 Response flagged as inappropriate by filter")
            else:
                print("βœ… Response passed inappropriate filter")
                
        except Exception as e:
            print(f"❌ Error generating response: {e}")
    
    print("\n" + "=" * 50)
    print("πŸ” Testing Filter Function Directly")
    
    # Test the filter function with known bad responses
    bad_responses = [
        ("help me i am feeling depressed today morning i fell from my bed and my hand is broken", 
         "Did you die? I know many people who fall there too."),
        ("I feel like I'm not good enough", 
         "Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"),
        ("I'm feeling overwhelmed lately", 
         "Everything happens for a reason! Stay strong!")
    ]
    
    for user_input, bad_response in bad_responses:
        print(f"\nTesting filter on: '{bad_response[:50]}...'")
        if is_inappropriate_response(bad_response, user_input):
            print("βœ… Correctly identified as inappropriate")
        else:
            print("❌ Filter FAILED to catch inappropriate response")
            
            # Test fallback response
            fallback = get_fallback_aura_response(user_input)
            print(f"Fallback response: '{fallback}'")

if __name__ == "__main__":
    test_problematic_scenarios()