Spaces:

DocUA
/

Spiritual_Health_Project

Sleeping

App Files Files Community

Spiritual_Health_Project / tests /unit /test_question_validator.py

DocUA

feat: Complete prompt optimization system implementation

24214fc 29 days ago

raw

history blame contribute delete

8.09 kB

	#!/usr/bin/env python3
	"""
	Test script for QuestionEffectivenessValidator functionality.
	"""

	import sys
	import os
	sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'src'))

	from config.prompt_management.question_validator import QuestionEffectivenessValidator, QuestionQuality
	from config.prompt_management.data_models import ScenarioType

	def test_question_validator():
	"""Test QuestionEffectivenessValidator functionality."""
	print("Testing QuestionEffectivenessValidator...")

	# Initialize validator
	validator = QuestionEffectivenessValidator()
	print("✓ QuestionEffectivenessValidator initialized")

	# Test 1: Validate high-quality questions
	print("\n1. Testing high-quality question validation...")

	high_quality_questions = [
	("You mentioned you can't garden anymore. Is that something that's been weighing on you emotionally, or is it more about time or circumstances?", ScenarioType.LOSS_OF_INTEREST),
	("I'm sorry for your loss. How have you been coping with this? Is there anything that's been particularly difficult for you?", ScenarioType.LOSS_OF_LOVED_ONE),
	("It sounds like you're managing a lot on your own. How is that affecting you? Is it more of a practical challenge, or is it weighing on you emotionally?", ScenarioType.NO_SUPPORT),
	("I hear that things have been stressful. Can you tell me more about what's been causing that stress?", ScenarioType.VAGUE_STRESS),
	("Sleep difficulties can be really challenging. Is there something specific on your mind that's keeping you awake, or do you think it might be related to your medical situation?", ScenarioType.SLEEP_ISSUES)
	]

	for question, scenario_type in high_quality_questions:
	analysis = validator.validate_question_effectiveness(question, scenario_type)

	print(f" Question: {question[:50]}...")
	print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")
	print(f" Targeting: {analysis.targeting_score:.2f}, Empathy: {analysis.empathy_score:.2f}, Clarity: {analysis.clarity_score:.2f}")

	if analysis.effectiveness_score >= 0.6:
	print(f" ✓ High quality achieved")
	else:
	print(f" ⚠ Lower than expected quality")

	if analysis.strengths:
	print(f" Strengths: {len(analysis.strengths)} identified")

	print()

	# Test 2: Validate poor-quality questions
	print("2. Testing poor-quality question validation...")

	poor_quality_questions = [
	("How are you feeling?", ScenarioType.LOSS_OF_INTEREST),
	("That's sad.", ScenarioType.LOSS_OF_LOVED_ONE),
	("Okay.", ScenarioType.NO_SUPPORT),
	("Tell me more", ScenarioType.VAGUE_STRESS),
	("Are you sleeping well or not sleeping well or maybe sleeping okay but not great and what do you think about that situation with your sleep patterns?", ScenarioType.SLEEP_ISSUES)
	]

	for question, scenario_type in poor_quality_questions:
	analysis = validator.validate_question_effectiveness(question, scenario_type)

	print(f" Question: {question[:50]}...")
	print(f" Score: {analysis.effectiveness_score:.2f} ({analysis.quality_level.value})")

	if analysis.effectiveness_score < 0.5:
	print(f" ✓ Correctly identified as low quality")
	else:
	print(f" ⚠ Higher than expected quality")

	if analysis.weaknesses:
	print(f" Weaknesses: {analysis.weaknesses[:2]}")

	if analysis.suggestions:
	print(f" Suggestions: {analysis.suggestions[:2]}")

	print()

	# Test 3: Test component scoring
	print("3. Testing component scoring...")

	# Test targeting score
	targeting_test = "Is that something that's been weighing on you emotionally, or is it more about circumstances?"
	analysis = validator.validate_question_effectiveness(targeting_test, ScenarioType.LOSS_OF_INTEREST)
	print(f" Targeting test: {analysis.targeting_score:.2f}")

	# Test empathy score
	empathy_test = "I'm sorry for your loss. I understand this must be very difficult for you."
	analysis = validator.validate_question_effectiveness(empathy_test, ScenarioType.LOSS_OF_LOVED_ONE)
	print(f" Empathy test: {analysis.empathy_score:.2f}")

	# Test clarity score
	clarity_test = "What specifically has been causing your sleep problems?"
	analysis = validator.validate_question_effectiveness(clarity_test, ScenarioType.SLEEP_ISSUES)
	print(f" Clarity test: {analysis.clarity_score:.2f}")

	# Test 4: Batch validation
	print("\n4. Testing batch validation...")

	batch_questions = [
	("You mentioned you can't garden anymore. Is that weighing on you emotionally?", ScenarioType.LOSS_OF_INTEREST),
	("How are you coping with your loss?", ScenarioType.LOSS_OF_LOVED_ONE),
	("What's causing your stress?", ScenarioType.VAGUE_STRESS)
	]

	batch_results = validator.batch_validate_questions(batch_questions)
	print(f" ✓ Batch validated {len(batch_results)} questions")

	for i, result in enumerate(batch_results):
	print(f" Question {i+1}: {result.effectiveness_score:.2f} ({result.quality_level.value})")

	# Test 5: Generate effectiveness report
	print("\n5. Testing effectiveness report generation...")

	report = validator.generate_effectiveness_report(batch_results)

	print(f" ✓ Report generated for {report['total_questions']} questions")
	print(f" Average effectiveness: {report['average_scores']['effectiveness']}")
	print(f" Quality distribution: {report['quality_distribution']}")

	if report['common_strengths']:
	print(f" Most common strength: {report['common_strengths'][0][0]}")

	if report['common_weaknesses']:
	print(f" Most common weakness: {report['common_weaknesses'][0][0]}")

	# Test 6: Edge cases
	print("\n6. Testing edge cases...")

	edge_cases = [
	("", None), # Empty question
	("This is not a question", ScenarioType.VAGUE_STRESS), # No question mark
	("What? How? Why? When? Where?", ScenarioType.LOSS_OF_INTEREST), # Multiple questions
	("A" * 200, ScenarioType.NO_SUPPORT) # Very long question
	]

	for question, scenario_type in edge_cases:
	try:
	analysis = validator.validate_question_effectiveness(question, scenario_type)
	print(f" ✓ Handled edge case: {len(question)} chars → {analysis.effectiveness_score:.2f}")
	except Exception as e:
	print(f" ✗ Edge case failed: {e}")
	return False

	# Test 7: Scenario-specific validation
	print("\n7. Testing scenario-specific validation...")

	scenario_tests = {
	ScenarioType.LOSS_OF_INTEREST: "Is this change meaningful to you, or is it more about practical circumstances?",
	ScenarioType.LOSS_OF_LOVED_ONE: "How are you processing this grief emotionally?",
	ScenarioType.NO_SUPPORT: "Is this isolation causing you distress, or is it more about practical assistance?",
	ScenarioType.VAGUE_STRESS: "What specifically is contributing to that stress?",
	ScenarioType.SLEEP_ISSUES: "Is something on your mind keeping you awake, or might it be medical?"
	}

	for scenario_type, question in scenario_tests.items():
	analysis = validator.validate_question_effectiveness(question, scenario_type)
	print(f" {scenario_type.value}: {analysis.targeting_score:.2f} targeting score")

	if analysis.targeting_score >= 0.5:
	print(f" ✓ Good scenario targeting")
	else:
	print(f" ⚠ Weak scenario targeting")

	print("\n✓ All QuestionEffectivenessValidator tests passed!")
	return True

	if __name__ == "__main__":
	success = test_question_validator()
	sys.exit(0 if success else 1)