File size: 8,363 Bytes
676533d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
#!/usr/bin/env python3
"""
Test script demonstrating difficulty-aware softmax selection with frequency percentiles.
This script shows how the extended softmax approach incorporates both semantic similarity
and word frequency percentiles to create difficulty-aware probability distributions.
"""
import os
import sys
import numpy as np
# Add src directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
def test_difficulty_aware_selection():
"""Test difficulty-aware softmax selection across different difficulty levels."""
print("π§ͺ Testing difficulty-aware softmax selection...")
# Set up environment for softmax selection
os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
os.environ['USE_SOFTMAX_SELECTION'] = 'true'
os.environ['DIFFICULTY_WEIGHT'] = '0.3'
from services.thematic_word_service import ThematicWordService
# Create service instance
service = ThematicWordService()
service.initialize()
# Test configuration loading
print(f"β
Configuration:")
print(f" Temperature: {service.similarity_temperature}")
print(f" Softmax enabled: {service.use_softmax_selection}")
print(f" Difficulty weight: {service.difficulty_weight}")
# Test theme
theme = "animals"
difficulties = ["easy", "medium", "hard"]
print(f"\nπ― Testing theme: '{theme}' across difficulty levels")
for difficulty in difficulties:
print(f"\nπ Difficulty: {difficulty.upper()}")
# Generate words for each difficulty
words = service.generate_thematic_words(
[theme],
num_words=10,
difficulty=difficulty
)
print(f" Selected words:")
for word, similarity, tier in words:
percentile = service.word_percentiles.get(word.lower(), 0.0)
print(f" {word}: similarity={similarity:.3f}, percentile={percentile:.3f} ({tier})")
print("\nβ
Difficulty-aware selection test completed!")
def test_composite_scoring():
"""Test the composite scoring function directly."""
print("\nπ§ͺ Testing composite scoring function...")
os.environ['DIFFICULTY_WEIGHT'] = '0.4' # Higher weight for demonstration
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
service.initialize()
# Mock test data - words with different frequency characteristics
test_words = [
("CAT", 0.8), # Common word, high similarity
("ELEPHANT", 0.9), # Moderately common, very high similarity
("QUETZAL", 0.7), # Rare word, good similarity
("DOG", 0.75), # Very common, good similarity
("PLATYPUS", 0.85) # Rare word, high similarity
]
print(f"π― Testing composite scoring with difficulty weight: {service.difficulty_weight}")
for difficulty in ["easy", "medium", "hard"]:
print(f"\nπ Difficulty: {difficulty.upper()}")
scored_words = []
for word, similarity in test_words:
composite = service._compute_composite_score(similarity, word, difficulty)
percentile = service.word_percentiles.get(word.lower(), 0.0)
scored_words.append((word, similarity, percentile, composite))
# Sort by composite score to show ranking
scored_words.sort(key=lambda x: x[3], reverse=True)
print(" Word ranking by composite score:")
for word, sim, perc, comp in scored_words:
print(f" {word}: similarity={sim:.3f}, percentile={perc:.3f}, composite={comp:.3f}")
def test_probability_distributions():
"""Test how probability distributions change with difficulty."""
print("\nπ§ͺ Testing probability distributions across difficulties...")
os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
os.environ['DIFFICULTY_WEIGHT'] = '0.3'
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
service.initialize()
# Create mock candidates with varied frequency profiles
candidates = [
{"word": "CAT", "similarity": 0.8, "tier": "tier_3_very_common"},
{"word": "DOG", "similarity": 0.75, "tier": "tier_2_extremely_common"},
{"word": "ELEPHANT", "similarity": 0.9, "tier": "tier_6_moderately_common"},
{"word": "TIGER", "similarity": 0.85, "tier": "tier_7_somewhat_uncommon"},
{"word": "QUETZAL", "similarity": 0.7, "tier": "tier_9_rare"},
{"word": "PLATYPUS", "similarity": 0.8, "tier": "tier_10_very_rare"}
]
print("π― Analyzing selection probability distributions:")
for difficulty in ["easy", "medium", "hard"]:
print(f"\nπ Difficulty: {difficulty.upper()}")
# Run multiple selections to estimate probabilities
selections = {}
num_trials = 100
for _ in range(num_trials):
selected = service._softmax_weighted_selection(
candidates.copy(),
num_words=3,
difficulty=difficulty
)
for word_data in selected:
word = word_data["word"]
selections[word] = selections.get(word, 0) + 1
# Calculate and display probabilities
print(" Selection probabilities:")
for word_data in candidates:
word = word_data["word"]
probability = selections.get(word, 0) / num_trials
percentile = service.word_percentiles.get(word.lower(), 0.0)
print(f" {word}: {probability:.2f} (percentile: {percentile:.3f})")
def test_environment_configuration():
"""Test different environment variable configurations."""
print("\nπ§ͺ Testing environment configuration scenarios...")
scenarios = [
{"DIFFICULTY_WEIGHT": "0.1", "desc": "Low difficulty influence"},
{"DIFFICULTY_WEIGHT": "0.3", "desc": "Balanced (default)"},
{"DIFFICULTY_WEIGHT": "0.5", "desc": "High difficulty influence"},
{"DIFFICULTY_WEIGHT": "0.8", "desc": "Frequency-dominant"}
]
for scenario in scenarios:
print(f"\nπ Scenario: {scenario['desc']} (weight={scenario['DIFFICULTY_WEIGHT']})")
# Set environment
for key, value in scenario.items():
if key != "desc":
os.environ[key] = value
# Test with fresh service
if 'services.thematic_word_service' in sys.modules:
del sys.modules['services.thematic_word_service']
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
print(f" Configuration loaded: difficulty_weight={service.difficulty_weight}")
# Test composite scoring for different words
test_cases = [
("CAT", 0.8, "easy"), # Common word, easy difficulty
("QUETZAL", 0.7, "hard") # Rare word, hard difficulty
]
for word, sim, diff in test_cases:
composite = service._compute_composite_score(sim, word, diff)
percentile = service.word_percentiles.get(word.lower(), 0.0) if hasattr(service, 'word_percentiles') and service.word_percentiles else 0.0
print(f" {word} ({diff}): similarity={sim:.3f}, percentile={percentile:.3f}, composite={composite:.3f}")
if __name__ == "__main__":
print("π Difficulty-Aware Softmax Selection Test Suite")
print("=" * 60)
test_difficulty_aware_selection()
test_composite_scoring()
test_probability_distributions()
test_environment_configuration()
print("\n" + "=" * 60)
print("π All tests completed successfully!")
print("\nπ Summary of features:")
print(" β’ Continuous frequency percentiles replace discrete tiers")
print(" β’ Difficulty-aware composite scoring (similarity + frequency alignment)")
print(" β’ Configurable difficulty weight via DIFFICULTY_WEIGHT environment variable")
print(" β’ Smooth probability distributions for easy/medium/hard selection")
print(" β’ Gaussian peaks for optimal frequency ranges per difficulty")
print("\nπ Ready for production use with crossword backend!") |