File size: 4,698 Bytes
38c016b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
"""
Simple test for word variety logic without dependencies.
"""

import random
from typing import List, Dict, Any

def weighted_random_selection(candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]:
    """
    Test version of weighted random selection.
    """
    if len(candidates) <= max_words:
        return candidates
    
    # Create tiers based on similarity scores
    candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True)
    
    # Tier 1: Top 25% - very high probability
    tier1_size = max(1, len(candidates_sorted) // 4)
    tier1 = candidates_sorted[:tier1_size]
    
    # Tier 2: Next 25% - high probability  
    tier2_size = max(1, len(candidates_sorted) // 4)
    tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size]
    
    # Tier 3: Next 35% - medium probability
    tier3_size = max(1, len(candidates_sorted) * 35 // 100)
    tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size]
    
    # Tier 4: Remaining - low probability
    tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:]
    
    selected = []
    
    # Always include some from tier 1 (but not all)
    tier1_count = min(max_words // 3, len(tier1))
    selected.extend(random.sample(tier1, tier1_count))
    
    # Fill remaining slots with weighted random selection
    remaining_slots = max_words - len(selected)
    
    if remaining_slots > 0:
        # Create weighted pool
        weighted_pool = []
        weighted_pool.extend([(w, 3) for w in tier2])  # 3x weight
        weighted_pool.extend([(w, 2) for w in tier3])  # 2x weight  
        weighted_pool.extend([(w, 1) for w in tier4])  # 1x weight
        
        # Also add remaining tier1 words with high weight
        remaining_tier1 = [w for w in tier1 if w not in selected]
        weighted_pool.extend([(w, 4) for w in remaining_tier1])  # 4x weight
        
        # Weighted random selection
        for _ in range(remaining_slots):
            if not weighted_pool:
                break
                
            # Create weighted list
            weighted_words = []
            for word, weight in weighted_pool:
                weighted_words.extend([word] * weight)
            
            if weighted_words:
                chosen = random.choice(weighted_words)
                selected.append(chosen)
                
                # Remove chosen word from pool
                weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen]
    
    # Final shuffle to mix up the order
    random.shuffle(selected)
    
    return selected[:max_words]

def create_test_candidates():
    """Create test word candidates."""
    words = [
        "SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST",
        "PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY", 
        "ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY",
        "LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS",
        "PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE"
    ]
    
    candidates = []
    for i, word in enumerate(words):
        similarity = 0.9 - (i * 0.02)  # Decreasing similarity scores
        candidates.append({
            "word": word,
            "clue": f"{word.lower()} (scientific term)",
            "similarity": similarity,
            "source": "vector_search"
        })
    
    return candidates

def test_variety():
    """Test word variety."""
    print("🧪 Testing word variety\n")
    
    candidates = create_test_candidates()
    
    # Run selection multiple times
    results = []
    for i in range(5):
        selected = weighted_random_selection(candidates, 12)
        word_list = [w["word"] for w in selected]
        results.append(word_list)
        print(f"Selection {i+1}: {word_list[:5]}...")
    
    # Check variety
    unique_words_per_position = []
    for pos in range(5):
        words_at_pos = [result[pos] for result in results if len(result) > pos]
        unique_at_pos = len(set(words_at_pos))
        unique_words_per_position.append(unique_at_pos)
        print(f"Position {pos}: {unique_at_pos} different words across 5 selections")
    
    total_variety = sum(unique_words_per_position)
    max_possible = len(unique_words_per_position) * len(results)
    variety_percentage = (total_variety / max_possible) * 100
    
    print(f"\n📊 Variety Score: {variety_percentage:.1f}%")
    
    return variety_percentage > 60

if __name__ == "__main__":
    success = test_variety()
    if success:
        print("✅ Word variety test passed!")
    else:
        print("❌ Word variety test failed!")