File size: 8,363 Bytes
676533d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python3
"""
Test script demonstrating difficulty-aware softmax selection with frequency percentiles.

This script shows how the extended softmax approach incorporates both semantic similarity
and word frequency percentiles to create difficulty-aware probability distributions.
"""

import os
import sys
import numpy as np

# Add src directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

def test_difficulty_aware_selection():
    """Test difficulty-aware softmax selection across different difficulty levels."""
    print("πŸ§ͺ Testing difficulty-aware softmax selection...")
    
    # Set up environment for softmax selection
    os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
    os.environ['USE_SOFTMAX_SELECTION'] = 'true'
    os.environ['DIFFICULTY_WEIGHT'] = '0.3'
    
    from services.thematic_word_service import ThematicWordService
    
    # Create service instance
    service = ThematicWordService()
    service.initialize()
    
    # Test configuration loading
    print(f"βœ… Configuration:")
    print(f"   Temperature: {service.similarity_temperature}")
    print(f"   Softmax enabled: {service.use_softmax_selection}")
    print(f"   Difficulty weight: {service.difficulty_weight}")
    
    # Test theme
    theme = "animals"
    difficulties = ["easy", "medium", "hard"]
    
    print(f"\n🎯 Testing theme: '{theme}' across difficulty levels")
    
    for difficulty in difficulties:
        print(f"\nπŸ“Š Difficulty: {difficulty.upper()}")
        
        # Generate words for each difficulty
        words = service.generate_thematic_words(
            [theme], 
            num_words=10, 
            difficulty=difficulty
        )
        
        print(f"   Selected words:")
        for word, similarity, tier in words:
            percentile = service.word_percentiles.get(word.lower(), 0.0)
            print(f"      {word}: similarity={similarity:.3f}, percentile={percentile:.3f} ({tier})")
    
    print("\nβœ… Difficulty-aware selection test completed!")

def test_composite_scoring():
    """Test the composite scoring function directly."""
    print("\nπŸ§ͺ Testing composite scoring function...")
    
    os.environ['DIFFICULTY_WEIGHT'] = '0.4'  # Higher weight for demonstration
    
    from services.thematic_word_service import ThematicWordService
    
    service = ThematicWordService()
    service.initialize()
    
    # Mock test data - words with different frequency characteristics
    test_words = [
        ("CAT", 0.8),      # Common word, high similarity
        ("ELEPHANT", 0.9), # Moderately common, very high similarity  
        ("QUETZAL", 0.7),  # Rare word, good similarity
        ("DOG", 0.75),     # Very common, good similarity
        ("PLATYPUS", 0.85) # Rare word, high similarity
    ]
    
    print(f"🎯 Testing composite scoring with difficulty weight: {service.difficulty_weight}")
    
    for difficulty in ["easy", "medium", "hard"]:
        print(f"\nπŸ“Š Difficulty: {difficulty.upper()}")
        
        scored_words = []
        for word, similarity in test_words:
            composite = service._compute_composite_score(similarity, word, difficulty)
            percentile = service.word_percentiles.get(word.lower(), 0.0)
            scored_words.append((word, similarity, percentile, composite))
        
        # Sort by composite score to show ranking
        scored_words.sort(key=lambda x: x[3], reverse=True)
        
        print("   Word ranking by composite score:")
        for word, sim, perc, comp in scored_words:
            print(f"      {word}: similarity={sim:.3f}, percentile={perc:.3f}, composite={comp:.3f}")

def test_probability_distributions():
    """Test how probability distributions change with difficulty."""
    print("\nπŸ§ͺ Testing probability distributions across difficulties...")
    
    os.environ['SIMILARITY_TEMPERATURE'] = '0.7'
    os.environ['DIFFICULTY_WEIGHT'] = '0.3'
    
    from services.thematic_word_service import ThematicWordService
    
    service = ThematicWordService()
    service.initialize()
    
    # Create mock candidates with varied frequency profiles
    candidates = [
        {"word": "CAT", "similarity": 0.8, "tier": "tier_3_very_common"},
        {"word": "DOG", "similarity": 0.75, "tier": "tier_2_extremely_common"}, 
        {"word": "ELEPHANT", "similarity": 0.9, "tier": "tier_6_moderately_common"},
        {"word": "TIGER", "similarity": 0.85, "tier": "tier_7_somewhat_uncommon"},
        {"word": "QUETZAL", "similarity": 0.7, "tier": "tier_9_rare"},
        {"word": "PLATYPUS", "similarity": 0.8, "tier": "tier_10_very_rare"}
    ]
    
    print("🎯 Analyzing selection probability distributions:")
    
    for difficulty in ["easy", "medium", "hard"]:
        print(f"\nπŸ“Š Difficulty: {difficulty.upper()}")
        
        # Run multiple selections to estimate probabilities
        selections = {}
        num_trials = 100
        
        for _ in range(num_trials):
            selected = service._softmax_weighted_selection(
                candidates.copy(), 
                num_words=3, 
                difficulty=difficulty
            )
            for word_data in selected:
                word = word_data["word"]
                selections[word] = selections.get(word, 0) + 1
        
        # Calculate and display probabilities
        print("   Selection probabilities:")
        for word_data in candidates:
            word = word_data["word"]
            probability = selections.get(word, 0) / num_trials
            percentile = service.word_percentiles.get(word.lower(), 0.0)
            print(f"      {word}: {probability:.2f} (percentile: {percentile:.3f})")

def test_environment_configuration():
    """Test different environment variable configurations."""
    print("\nπŸ§ͺ Testing environment configuration scenarios...")
    
    scenarios = [
        {"DIFFICULTY_WEIGHT": "0.1", "desc": "Low difficulty influence"},
        {"DIFFICULTY_WEIGHT": "0.3", "desc": "Balanced (default)"},
        {"DIFFICULTY_WEIGHT": "0.5", "desc": "High difficulty influence"},
        {"DIFFICULTY_WEIGHT": "0.8", "desc": "Frequency-dominant"}
    ]
    
    for scenario in scenarios:
        print(f"\nπŸ“Š Scenario: {scenario['desc']} (weight={scenario['DIFFICULTY_WEIGHT']})")
        
        # Set environment
        for key, value in scenario.items():
            if key != "desc":
                os.environ[key] = value
        
        # Test with fresh service
        if 'services.thematic_word_service' in sys.modules:
            del sys.modules['services.thematic_word_service']
        
        from services.thematic_word_service import ThematicWordService
        service = ThematicWordService()
        
        print(f"   Configuration loaded: difficulty_weight={service.difficulty_weight}")
        
        # Test composite scoring for different words
        test_cases = [
            ("CAT", 0.8, "easy"),    # Common word, easy difficulty 
            ("QUETZAL", 0.7, "hard") # Rare word, hard difficulty
        ]
        
        for word, sim, diff in test_cases:
            composite = service._compute_composite_score(sim, word, diff)
            percentile = service.word_percentiles.get(word.lower(), 0.0) if hasattr(service, 'word_percentiles') and service.word_percentiles else 0.0
            print(f"      {word} ({diff}): similarity={sim:.3f}, percentile={percentile:.3f}, composite={composite:.3f}")

if __name__ == "__main__":
    print("πŸš€ Difficulty-Aware Softmax Selection Test Suite")
    print("=" * 60)
    
    test_difficulty_aware_selection()
    test_composite_scoring()
    test_probability_distributions()
    test_environment_configuration()
    
    print("\n" + "=" * 60)
    print("πŸŽ‰ All tests completed successfully!")
    print("\nπŸ“‹ Summary of features:")
    print("   β€’ Continuous frequency percentiles replace discrete tiers")
    print("   β€’ Difficulty-aware composite scoring (similarity + frequency alignment)")
    print("   β€’ Configurable difficulty weight via DIFFICULTY_WEIGHT environment variable")
    print("   β€’ Smooth probability distributions for easy/medium/hard selection")
    print("   β€’ Gaussian peaks for optimal frequency ranges per difficulty")
    print("\nπŸš€ Ready for production use with crossword backend!")