vimalk78's picture
Add complete Python backend with AI-powered crossword generation
38c016b
raw
history blame
5.6 kB
#!/usr/bin/env python3
"""
Test word variety to ensure we get different words each time.
"""
import asyncio
import sys
from pathlib import Path
from unittest.mock import Mock
# Add project root to path
project_root = Path(__file__).parent.parent # Go up from test-integration to backend-py
sys.path.insert(0, str(project_root))
from src.services.vector_search import VectorSearchService
def mock_vector_search():
"""Create a mock vector search with reproducible but varied results."""
# Create mock candidates (simulating different similarity scores)
mock_candidates = []
words = [
"SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST",
"PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY",
"ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY",
"LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS",
"PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE"
]
for i, word in enumerate(words):
similarity = 0.9 - (i * 0.02) # Decreasing similarity scores
mock_candidates.append({
"word": word,
"clue": f"{word.lower()} (scientific term)",
"similarity": similarity,
"source": "vector_search"
})
return mock_candidates
async def test_word_variety():
"""Test that we get different words on multiple requests."""
print("🧪 Testing word variety with weighted random selection\n")
# Create mock vector service
vector_service = VectorSearchService()
# Mock the weighted selection method with our test data
candidates = mock_vector_search()
# Run selection multiple times
results = []
for i in range(5):
selected = vector_service._weighted_random_selection(candidates, 12)
word_list = [w["word"] for w in selected]
results.append(word_list)
print(f"Selection {i+1}: {word_list[:5]}...") # Show first 5 words
# Check variety
unique_words_per_position = []
for pos in range(5): # Check first 5 positions
words_at_pos = [result[pos] for result in results if len(result) > pos]
unique_at_pos = len(set(words_at_pos))
unique_words_per_position.append(unique_at_pos)
print(f"Position {pos}: {unique_at_pos} different words across 5 selections")
# Calculate variety score
total_variety = sum(unique_words_per_position)
max_possible = len(unique_words_per_position) * len(results)
variety_percentage = (total_variety / max_possible) * 100
print(f"\n📊 Variety Score: {variety_percentage:.1f}% (higher is more varied)")
if variety_percentage > 60:
print("✅ Good variety - words are sufficiently randomized")
return True
else:
print("⚠️ Low variety - same words appearing too often")
return False
def test_weighted_tiers():
"""Test that weighted selection respects tier priorities."""
print("\n🧪 Testing weighted tier selection\n")
vector_service = VectorSearchService()
# Create candidates with clear tier separation
candidates = []
# Tier 1: High similarity (should appear often)
tier1_words = ["EXCELLENT", "PERFECT", "AMAZING"]
for word in tier1_words:
candidates.append({
"word": word, "clue": f"{word} clue",
"similarity": 0.95, "source": "test"
})
# Tier 2: Medium-high similarity
tier2_words = ["GOOD", "NICE", "FINE"]
for word in tier2_words:
candidates.append({
"word": word, "clue": f"{word} clue",
"similarity": 0.75, "source": "test"
})
# Tier 3: Lower similarity (should appear rarely)
tier3_words = ["OKAY", "AVERAGE", "BASIC"]
for word in tier3_words:
candidates.append({
"word": word, "clue": f"{word} clue",
"similarity": 0.50, "source": "test"
})
# Run many selections and count frequency
word_counts = {}
num_trials = 100
for _ in range(num_trials):
selected = vector_service._weighted_random_selection(candidates, 6)
for word_obj in selected:
word = word_obj["word"]
word_counts[word] = word_counts.get(word, 0) + 1
print("Word selection frequencies:")
tier1_avg = sum(word_counts.get(w, 0) for w in tier1_words) / len(tier1_words)
tier2_avg = sum(word_counts.get(w, 0) for w in tier2_words) / len(tier2_words)
tier3_avg = sum(word_counts.get(w, 0) for w in tier3_words) / len(tier3_words)
print(f"Tier 1 (high similarity): {tier1_avg:.1f} avg selections")
print(f"Tier 2 (medium similarity): {tier2_avg:.1f} avg selections")
print(f"Tier 3 (low similarity): {tier3_avg:.1f} avg selections")
# Check if weighting is working (tier 1 should be selected more than tier 3)
if tier1_avg > tier2_avg > tier3_avg:
print("✅ Weighted selection working correctly")
return True
else:
print("⚠️ Weighted selection not respecting similarity scores")
return False
async def main():
"""Run variety tests."""
print("🎲 Testing Word Selection Variety\n")
variety_test = await test_word_variety()
tier_test = test_weighted_tiers()
if variety_test and tier_test:
print("\n🎉 All variety tests passed!")
print("🔄 Word selection should now be much more varied between requests")
else:
print("\n❌ Some variety tests failed")
if __name__ == "__main__":
asyncio.run(main())