Spaces:

Deva1211
/

chatbot

Running

App Files Files Community

chatbot / test_fallbacks.py

Deva1211

🔧 MAJOR FIX: Comprehensive inappropriate response prevention

b249146 4 months ago

raw

history blame contribute delete

3.93 kB

	#!/usr/bin/env python3
	"""
	Test script specifically for fallback responses - the safety net when model generates bad content
	"""

	import sys
	import os
	sys.path.append(os.path.dirname(__file__))

	from app import get_fallback_aura_response, is_inappropriate_response

	def test_fallback_responses():
	"""Test the fallback response system with problematic cases"""

	print("🔍 Testing Fallback Response System")
	print("=" * 50)

	test_cases = [
	{
	"input": "help me i am feeling depressed today morning i fell from my bed and my hand is broken",
	"description": "Combined depression + broken hand injury"
	},
	{
	"input": "I fell and broke my hand",
	"description": "Simple broken hand injury"
	},
	{
	"input": "I feel like I'm not good enough",
	"description": "Low self-worth"
	},
	{
	"input": "I'm feeling overwhelmed lately",
	"description": "Overwhelmed feelings"
	},
	{
	"input": "I'm having a really tough day",
	"description": "Bad day"
	}
	]

	for i, test_case in enumerate(test_cases, 1):
	print(f"\n--- Fallback Test {i}: {test_case['description']} ---")
	print(f"Input: '{test_case['input']}'")

	# Get fallback response
	fallback_response = get_fallback_aura_response(test_case['input'])
	print(f"Fallback response: '{fallback_response}'")

	# Analyze the fallback response
	if len(fallback_response) > 50:
	print("✅ Good length response")
	else:
	print("⚠️ Short response")

	# Check for empathetic content
	empathetic_words = ["sorry", "painful", "difficult", "understand", "hear", "valid", "hard", "tough", "challenging"]
	found_empathy = [word for word in empathetic_words if word.lower() in fallback_response.lower()]
	if found_empathy:
	print(f"✅ Contains empathetic language: {found_empathy}")
	else:
	print("⚠️ Missing empathetic language")

	# Check for specific injury acknowledgment in broken hand case
	if "broken" in test_case['input'] and "hand" in test_case['input']:
	injury_words = ["hand", "broken", "fell", "injury", "painful", "doctor"]
	found_injury_ref = [word for word in injury_words if word.lower() in fallback_response.lower()]
	if found_injury_ref:
	print(f"✅ Acknowledges injury: {found_injury_ref}")
	else:
	print("⚠️ Doesn't acknowledge specific injury")

	print("\n" + "=" * 50)
	print("🔍 Testing Inappropriate Response Detection on Bad Examples")

	# Test the filter with examples that should be caught
	bad_responses = [
	("help me i am feeling depressed today morning i fell from my bed and my hand is broken",
	"Did you die? I know many people who fall there too."),
	("I feel like I'm not good enough",
	"Don't get discouraged. It gets easier! You'll be there tomorrow, too :D"),
	("I'm feeling overwhelmed lately",
	"Everything happens for a reason! Stay strong!"),
	("help me i am feeling depressed today morning i fell from my bed and my hand is broken",
	"you can always try to get a new hand... or just wear a glove."),
	("I fell and broke my hand",
	"You'll be fine! No worries! Happens to everyone!")
	]

	for user_input, bad_response in bad_responses:
	print(f"\nTesting: '{bad_response[:50]}...'")
	if is_inappropriate_response(bad_response, user_input):
	print("✅ Correctly flagged as inappropriate")
	else:
	print("❌ FAILED to catch inappropriate response")

	if __name__ == "__main__":
	test_fallback_responses()