File size: 3,456 Bytes
b05514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
"""
Test adaptive beta fix with full vocabulary to see if it now correctly 
uses the adjusted threshold for filtering
"""

import os
import sys
import logging

# Configure logging to see the debug messages
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

def setup_environment():
    """Setup environment and add src to path"""
    # Set cache directory to root cache-dir folder
    cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
    cache_dir = os.path.abspath(cache_dir)
    os.environ['HF_HOME'] = cache_dir
    os.environ['TRANSFORMERS_CACHE'] = cache_dir
    os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
    
    # Add backend source to path
    backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
    backend_path = os.path.abspath(backend_path)
    if backend_path not in sys.path:
        sys.path.insert(0, backend_path)
    
    print(f"Using cache directory: {cache_dir}")

def test_adaptive_fix():
    """Test with full vocabulary to see the fix in action"""
    
    setup_environment()
    
    print("🔧 Testing Adaptive Beta Fix")
    print("=" * 50)
    
    # Set environment variables for soft minimum with debug - USE FULL VOCABULARY
    os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
    os.environ['SOFT_MIN_BETA'] = '10.0'
    os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
    os.environ['SOFT_MIN_MIN_WORDS'] = '15'
    os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
    os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
    os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '100000'  # Full vocabulary
    
    try:
        from services.thematic_word_service import ThematicWordService
        
        print("Creating ThematicWordService...")
        service = ThematicWordService()
        service.initialize()
        
        # Test the original problematic case with full vocabulary
        inputs = ["universe", "movies", "languages"]
        print(f"\\nTesting original case: {inputs} (with full vocabulary)")
        print(f"Expected: Should now get words using adjusted threshold")
        print("-" * 50)
        
        results = service.generate_thematic_words(
            inputs,
            num_words=50,
            min_similarity=0.25,  # Use 0.25 like the original log
            multi_theme=True
        )
        
        print(f"\\n✅ Final result: {len(results)} words generated")
        if len(results) > 0:
            print(f"Top 10 words:")
            for i, (word, similarity, tier) in enumerate(results[:10], 1):
                print(f"   {i}. {word}: {similarity:.4f}")
        else:
            print("   ⚠️ Still no words generated!")
            
        print(f"\\n🔬 Test another challenging case: ['science', 'art', 'music']")
        results2 = service.generate_thematic_words(
            ["science", "art", "music"],
            num_words=30,
            min_similarity=0.25,
            multi_theme=True
        )
        
        print(f"\\n✅ Second result: {len(results2)} words generated")
        if len(results2) > 0:
            print(f"Top 5 words:")
            for i, (word, similarity, tier) in enumerate(results2[:5], 1):
                print(f"   {i}. {word}: {similarity:.4f}")
    
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_adaptive_fix()