File size: 4,480 Bytes
b249146 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
#!/usr/bin/env python3
"""
Debug script to test problematic responses and identify issues with Aura chatbot
"""
import sys
import os
sys.path.append(os.path.dirname(__file__))
# Import the respond function from app.py
from app import respond, is_inappropriate_response, get_fallback_aura_response
def test_problematic_scenarios():
"""Test the specific scenarios that are producing inappropriate responses"""
print("π Testing Problematic Scenarios")
print("=" * 50)
# Test cases based on the provided chat examples
test_cases = [
{
"input": "I feel like I'm not good enough",
"expected_avoid": ["don't get discouraged", "it gets easier", "stay strong"],
"expected_include": ["valid", "understand", "hear you"]
},
{
"input": "I'm feeling overwhelmed lately",
"expected_avoid": ["don't worry", "everything will be fine", "cheer up"],
"expected_include": ["overwhelming", "understand", "heavy"]
},
{
"input": "I'm having a really tough day...",
"expected_avoid": ["it gets better", "stay positive", "think positive"],
"expected_include": ["tough", "difficult", "heavy"]
},
{
"input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken",
"expected_avoid": ["did you die", "i know many people", "are you okay"],
"expected_include": ["painful", "broken", "sorry", "doctor"]
}
]
for i, test_case in enumerate(test_cases, 1):
print(f"\n--- Test Case {i} ---")
print(f"Input: '{test_case['input']}'")
# Generate response
try:
response = respond(test_case['input'], [], max_length=70, temperature=0.6, top_p=0.9, repetition_penalty=1.15)
print(f"Response: '{response}'")
# Check for inappropriate content
response_lower = response.lower()
# Check avoid patterns
found_bad = []
for avoid_phrase in test_case['expected_avoid']:
if avoid_phrase.lower() in response_lower:
found_bad.append(avoid_phrase)
# Check include patterns
found_good = []
for include_phrase in test_case['expected_include']:
if include_phrase.lower() in response_lower:
found_good.append(include_phrase)
# Analyze response
if found_bad:
print(f"β οΈ INAPPROPRIATE content found: {found_bad}")
else:
print("β
No inappropriate content detected")
if found_good:
print(f"β
Good empathetic content found: {found_good}")
else:
print("β οΈ Missing expected empathetic content")
# Test the filtering function
if is_inappropriate_response(response, test_case['input']):
print("π« Response flagged as inappropriate by filter")
else:
print("β
Response passed inappropriate filter")
except Exception as e:
print(f"β Error generating response: {e}")
print("\n" + "=" * 50)
print("π Testing Filter Function Directly")
# Test the filter function with known bad responses
bad_responses = [
("help me i am feeling depressed today morning i fell from my bed and my hand is broken",
"Did you die? I know many people who fall there too."),
("I feel like I'm not good enough",
"Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"),
("I'm feeling overwhelmed lately",
"Everything happens for a reason! Stay strong!")
]
for user_input, bad_response in bad_responses:
print(f"\nTesting filter on: '{bad_response[:50]}...'")
if is_inappropriate_response(bad_response, user_input):
print("β
Correctly identified as inappropriate")
else:
print("β Filter FAILED to catch inappropriate response")
# Test fallback response
fallback = get_fallback_aura_response(user_input)
print(f"Fallback response: '{fallback}'")
if __name__ == "__main__":
test_problematic_scenarios()
|