|
|
|
|
|
""" |
|
|
Debug script to test problematic responses and identify issues with Aura chatbot |
|
|
""" |
|
|
|
|
|
import sys |
|
|
import os |
|
|
sys.path.append(os.path.dirname(__file__)) |
|
|
|
|
|
|
|
|
from app import respond, is_inappropriate_response, get_fallback_aura_response |
|
|
|
|
|
def test_problematic_scenarios(): |
|
|
"""Test the specific scenarios that are producing inappropriate responses""" |
|
|
|
|
|
print("π Testing Problematic Scenarios") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
test_cases = [ |
|
|
{ |
|
|
"input": "I feel like I'm not good enough", |
|
|
"expected_avoid": ["don't get discouraged", "it gets easier", "stay strong"], |
|
|
"expected_include": ["valid", "understand", "hear you"] |
|
|
}, |
|
|
{ |
|
|
"input": "I'm feeling overwhelmed lately", |
|
|
"expected_avoid": ["don't worry", "everything will be fine", "cheer up"], |
|
|
"expected_include": ["overwhelming", "understand", "heavy"] |
|
|
}, |
|
|
{ |
|
|
"input": "I'm having a really tough day...", |
|
|
"expected_avoid": ["it gets better", "stay positive", "think positive"], |
|
|
"expected_include": ["tough", "difficult", "heavy"] |
|
|
}, |
|
|
{ |
|
|
"input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken", |
|
|
"expected_avoid": ["did you die", "i know many people", "are you okay"], |
|
|
"expected_include": ["painful", "broken", "sorry", "doctor"] |
|
|
} |
|
|
] |
|
|
|
|
|
for i, test_case in enumerate(test_cases, 1): |
|
|
print(f"\n--- Test Case {i} ---") |
|
|
print(f"Input: '{test_case['input']}'") |
|
|
|
|
|
|
|
|
try: |
|
|
response = respond(test_case['input'], [], max_length=70, temperature=0.6, top_p=0.9, repetition_penalty=1.15) |
|
|
print(f"Response: '{response}'") |
|
|
|
|
|
|
|
|
response_lower = response.lower() |
|
|
|
|
|
|
|
|
found_bad = [] |
|
|
for avoid_phrase in test_case['expected_avoid']: |
|
|
if avoid_phrase.lower() in response_lower: |
|
|
found_bad.append(avoid_phrase) |
|
|
|
|
|
|
|
|
found_good = [] |
|
|
for include_phrase in test_case['expected_include']: |
|
|
if include_phrase.lower() in response_lower: |
|
|
found_good.append(include_phrase) |
|
|
|
|
|
|
|
|
if found_bad: |
|
|
print(f"β οΈ INAPPROPRIATE content found: {found_bad}") |
|
|
else: |
|
|
print("β
No inappropriate content detected") |
|
|
|
|
|
if found_good: |
|
|
print(f"β
Good empathetic content found: {found_good}") |
|
|
else: |
|
|
print("β οΈ Missing expected empathetic content") |
|
|
|
|
|
|
|
|
if is_inappropriate_response(response, test_case['input']): |
|
|
print("π« Response flagged as inappropriate by filter") |
|
|
else: |
|
|
print("β
Response passed inappropriate filter") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Error generating response: {e}") |
|
|
|
|
|
print("\n" + "=" * 50) |
|
|
print("π Testing Filter Function Directly") |
|
|
|
|
|
|
|
|
bad_responses = [ |
|
|
("help me i am feeling depressed today morning i fell from my bed and my hand is broken", |
|
|
"Did you die? I know many people who fall there too."), |
|
|
("I feel like I'm not good enough", |
|
|
"Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"), |
|
|
("I'm feeling overwhelmed lately", |
|
|
"Everything happens for a reason! Stay strong!") |
|
|
] |
|
|
|
|
|
for user_input, bad_response in bad_responses: |
|
|
print(f"\nTesting filter on: '{bad_response[:50]}...'") |
|
|
if is_inappropriate_response(bad_response, user_input): |
|
|
print("β
Correctly identified as inappropriate") |
|
|
else: |
|
|
print("β Filter FAILED to catch inappropriate response") |
|
|
|
|
|
|
|
|
fallback = get_fallback_aura_response(user_input) |
|
|
print(f"Fallback response: '{fallback}'") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_problematic_scenarios() |
|
|
|