Spaces:

vimalk78
/

abc123

Sleeping

File size: 4,698 Bytes

38c016b

#!/usr/bin/env python3
"""
Simple test for word variety logic without dependencies.
"""

import random
from typing import List, Dict, Any

def weighted_random_selection(candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]:
    """
    Test version of weighted random selection.
    """
    if len(candidates) <= max_words:
        return candidates
    
    # Create tiers based on similarity scores
    candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True)
    
    # Tier 1: Top 25% - very high probability
    tier1_size = max(1, len(candidates_sorted) // 4)
    tier1 = candidates_sorted[:tier1_size]
    
    # Tier 2: Next 25% - high probability  
    tier2_size = max(1, len(candidates_sorted) // 4)
    tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size]
    
    # Tier 3: Next 35% - medium probability
    tier3_size = max(1, len(candidates_sorted) * 35 // 100)
    tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size]
    
    # Tier 4: Remaining - low probability
    tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:]
    
    selected = []
    
    # Always include some from tier 1 (but not all)
    tier1_count = min(max_words // 3, len(tier1))
    selected.extend(random.sample(tier1, tier1_count))
    
    # Fill remaining slots with weighted random selection
    remaining_slots = max_words - len(selected)
    
    if remaining_slots > 0:
        # Create weighted pool
        weighted_pool = []
        weighted_pool.extend([(w, 3) for w in tier2])  # 3x weight
        weighted_pool.extend([(w, 2) for w in tier3])  # 2x weight  
        weighted_pool.extend([(w, 1) for w in tier4])  # 1x weight
        
        # Also add remaining tier1 words with high weight
        remaining_tier1 = [w for w in tier1 if w not in selected]
        weighted_pool.extend([(w, 4) for w in remaining_tier1])  # 4x weight
        
        # Weighted random selection
        for _ in range(remaining_slots):
            if not weighted_pool:
                break
                
            # Create weighted list
            weighted_words = []
            for word, weight in weighted_pool:
                weighted_words.extend([word] * weight)
            
            if weighted_words:
                chosen = random.choice(weighted_words)
                selected.append(chosen)
                
                # Remove chosen word from pool
                weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen]
    
    # Final shuffle to mix up the order
    random.shuffle(selected)
    
    return selected[:max_words]

def create_test_candidates():
    """Create test word candidates."""
    words = [
        "SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST",
        "PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY", 
        "ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY",
        "LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS",
        "PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE"
    ]
    
    candidates = []
    for i, word in enumerate(words):
        similarity = 0.9 - (i * 0.02)  # Decreasing similarity scores
        candidates.append({
            "word": word,
            "clue": f"{word.lower()} (scientific term)",
            "similarity": similarity,
            "source": "vector_search"
        })
    
    return candidates

def test_variety():
    """Test word variety."""
    print("🧪 Testing word variety\n")
    
    candidates = create_test_candidates()
    
    # Run selection multiple times
    results = []
    for i in range(5):
        selected = weighted_random_selection(candidates, 12)
        word_list = [w["word"] for w in selected]
        results.append(word_list)
        print(f"Selection {i+1}: {word_list[:5]}...")
    
    # Check variety
    unique_words_per_position = []
    for pos in range(5):
        words_at_pos = [result[pos] for result in results if len(result) > pos]
        unique_at_pos = len(set(words_at_pos))
        unique_words_per_position.append(unique_at_pos)
        print(f"Position {pos}: {unique_at_pos} different words across 5 selections")
    
    total_variety = sum(unique_words_per_position)
    max_possible = len(unique_words_per_position) * len(results)
    variety_percentage = (total_variety / max_possible) * 100
    
    print(f"\n📊 Variety Score: {variety_percentage:.1f}%")
    
    return variety_percentage > 60

if __name__ == "__main__":
    success = test_variety()
    if success:
        print("✅ Word variety test passed!")
    else:
        print("❌ Word variety test failed!")