Spaces:

vimalk78
/

abc123

Sleeping

App Files Files Community

abc123 / crossword-app /backend-py /test-integration /test_word_variety.py

vimalk78

Add complete Python backend with AI-powered crossword generation

38c016b 4 months ago

raw

history blame

5.6 kB

	#!/usr/bin/env python3
	"""
	Test word variety to ensure we get different words each time.
	"""

	import asyncio
	import sys
	from pathlib import Path
	from unittest.mock import Mock

	# Add project root to path
	project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
	sys.path.insert(0, str(project_root))

	from src.services.vector_search import VectorSearchService

	def mock_vector_search():
	"""Create a mock vector search with reproducible but varied results."""
	# Create mock candidates (simulating different similarity scores)
	mock_candidates = []
	words = [
	"SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST",
	"PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY",
	"ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY",
	"LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS",
	"PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE"
	]

	for i, word in enumerate(words):
	similarity = 0.9 - (i * 0.02) # Decreasing similarity scores
	mock_candidates.append({
	"word": word,
	"clue": f"{word.lower()} (scientific term)",
	"similarity": similarity,
	"source": "vector_search"
	})

	return mock_candidates

	async def test_word_variety():
	"""Test that we get different words on multiple requests."""
	print("🧪 Testing word variety with weighted random selection\n")

	# Create mock vector service
	vector_service = VectorSearchService()

	# Mock the weighted selection method with our test data
	candidates = mock_vector_search()

	# Run selection multiple times
	results = []
	for i in range(5):
	selected = vector_service._weighted_random_selection(candidates, 12)
	word_list = [w["word"] for w in selected]
	results.append(word_list)
	print(f"Selection {i+1}: {word_list[:5]}...") # Show first 5 words

	# Check variety
	unique_words_per_position = []
	for pos in range(5): # Check first 5 positions
	words_at_pos = [result[pos] for result in results if len(result) > pos]
	unique_at_pos = len(set(words_at_pos))
	unique_words_per_position.append(unique_at_pos)
	print(f"Position {pos}: {unique_at_pos} different words across 5 selections")

	# Calculate variety score
	total_variety = sum(unique_words_per_position)
	max_possible = len(unique_words_per_position) * len(results)
	variety_percentage = (total_variety / max_possible) * 100

	print(f"\n📊 Variety Score: {variety_percentage:.1f}% (higher is more varied)")

	if variety_percentage > 60:
	print("✅ Good variety - words are sufficiently randomized")
	return True
	else:
	print("⚠️ Low variety - same words appearing too often")
	return False

	def test_weighted_tiers():
	"""Test that weighted selection respects tier priorities."""
	print("\n🧪 Testing weighted tier selection\n")

	vector_service = VectorSearchService()

	# Create candidates with clear tier separation
	candidates = []

	# Tier 1: High similarity (should appear often)
	tier1_words = ["EXCELLENT", "PERFECT", "AMAZING"]
	for word in tier1_words:
	candidates.append({
	"word": word, "clue": f"{word} clue",
	"similarity": 0.95, "source": "test"
	})

	# Tier 2: Medium-high similarity
	tier2_words = ["GOOD", "NICE", "FINE"]
	for word in tier2_words:
	candidates.append({
	"word": word, "clue": f"{word} clue",
	"similarity": 0.75, "source": "test"
	})

	# Tier 3: Lower similarity (should appear rarely)
	tier3_words = ["OKAY", "AVERAGE", "BASIC"]
	for word in tier3_words:
	candidates.append({
	"word": word, "clue": f"{word} clue",
	"similarity": 0.50, "source": "test"
	})

	# Run many selections and count frequency
	word_counts = {}
	num_trials = 100

	for _ in range(num_trials):
	selected = vector_service._weighted_random_selection(candidates, 6)
	for word_obj in selected:
	word = word_obj["word"]
	word_counts[word] = word_counts.get(word, 0) + 1

	print("Word selection frequencies:")

	tier1_avg = sum(word_counts.get(w, 0) for w in tier1_words) / len(tier1_words)
	tier2_avg = sum(word_counts.get(w, 0) for w in tier2_words) / len(tier2_words)
	tier3_avg = sum(word_counts.get(w, 0) for w in tier3_words) / len(tier3_words)

	print(f"Tier 1 (high similarity): {tier1_avg:.1f} avg selections")
	print(f"Tier 2 (medium similarity): {tier2_avg:.1f} avg selections")
	print(f"Tier 3 (low similarity): {tier3_avg:.1f} avg selections")

	# Check if weighting is working (tier 1 should be selected more than tier 3)
	if tier1_avg > tier2_avg > tier3_avg:
	print("✅ Weighted selection working correctly")
	return True
	else:
	print("⚠️ Weighted selection not respecting similarity scores")
	return False

	async def main():
	"""Run variety tests."""
	print("🎲 Testing Word Selection Variety\n")

	variety_test = await test_word_variety()
	tier_test = test_weighted_tiers()

	if variety_test and tier_test:
	print("\n🎉 All variety tests passed!")
	print("🔄 Word selection should now be much more varied between requests")
	else:
	print("\n❌ Some variety tests failed")

	if __name__ == "__main__":
	asyncio.run(main())