File size: 6,839 Bytes
b05514b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python3
"""
Test Adaptive Beta with Cricket+Sports Example

Tests that the adaptive beta mechanism generates more words for constrained cases
like "cricket sentence" + "sports topic".
"""

import os
import sys
import warnings
import logging

# Configure logging to see the adaptive beta messages
logging.basicConfig(level=logging.INFO, format='%(message)s')

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

def setup_environment():
    """Setup environment and add src to path"""
    # Set cache directory to root cache-dir folder
    cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
    cache_dir = os.path.abspath(cache_dir)
    os.environ['HF_HOME'] = cache_dir
    os.environ['TRANSFORMERS_CACHE'] = cache_dir
    os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
    
    # Add backend source to path
    backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
    backend_path = os.path.abspath(backend_path)
    if backend_path not in sys.path:
        sys.path.insert(0, backend_path)
    
    print(f"Using cache directory: {cache_dir}")

def test_adaptive_beta_cricket_sports():
    """Test the cricket+sports case that previously generated only 16 words"""
    
    setup_environment()
    
    print("πŸ§ͺ Testing Adaptive Beta with Cricket+Sports Example")
    print("=" * 60)
    
    # Set environment variables for soft minimum with adaptive beta
    os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
    os.environ['SOFT_MIN_BETA'] = '10.0'
    os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
    os.environ['SOFT_MIN_MIN_WORDS'] = '15'
    os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
    os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
    os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '5000'  # Smaller vocab for faster testing
    
    try:
        from services.thematic_word_service import ThematicWordService
        
        print("Creating ThematicWordService with adaptive soft minimum...")
        service = ThematicWordService()
        
        print("Initializing service (adaptive beta configuration will be logged)...")
        service.initialize()
        
        # Test cases
        test_cases = [
            {
                "name": "Cricket sentence only",
                "inputs": ["india won test series against england"],
                "expected": ">30 words (no constraint)",
                "description": "Single sentence - should generate many words"
            },
            {
                "name": "Cricket sentence + Sports topic",
                "inputs": ["india won test series against england", "Sports"],
                "expected": "~15-25 words (adaptive beta should kick in)",
                "description": "Sentence + topic - adaptive beta should relax to get more words"
            },
            {
                "name": "Multiple sports topics",
                "inputs": ["Cricket", "Tennis", "Football"],
                "expected": "~15-20 words (adaptive beta for 3 topics)",
                "description": "Three topics - should auto-adapt for more words"
            }
        ]
        
        for i, test_case in enumerate(test_cases, 1):
            print(f"\nπŸ“Š Test {i}: {test_case['name']}")
            print(f"   Description: {test_case['description']}")
            print(f"   Expected: {test_case['expected']}")
            print(f"   Inputs: {test_case['inputs']}")
            print("-" * 50)
            
            # Generate words
            results = service.generate_thematic_words(
                test_case['inputs'],
                num_words=50,
                min_similarity=0.3,
                multi_theme=False
            )
            
            print(f"βœ… Generated {len(results)} words")
            print(f"Top 15 words:")
            for j, (word, similarity, tier) in enumerate(results[:15], 1):
                print(f"   {j:2d}. {word:15s}: {similarity:.4f} ({tier})")
            
            # Analysis
            if len(results) >= 15:
                print(f"   βœ… Success: Generated {len(results)} words (β‰₯ 15 minimum)")
            else:
                print(f"   ⚠️ Warning: Only {len(results)} words generated (< 15 minimum)")
                print("   This suggests adaptive beta may need tuning")
    
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()

def test_adaptive_beta_disabled():
    """Test with adaptive beta disabled for comparison"""
    
    print(f"\n\nπŸ”’ Testing with Adaptive Beta DISABLED")
    print("=" * 60)
    
    # Disable adaptive beta
    os.environ['SOFT_MIN_ADAPTIVE'] = 'false'
    
    try:
        from services.thematic_word_service import ThematicWordService
        
        service = ThematicWordService()
        service.initialize()
        
        # Test the problematic case
        inputs = ["india won test series against england", "Sports"]
        print(f"Testing cricket+sports with fixed beta=10.0...")
        
        results = service.generate_thematic_words(
            inputs,
            num_words=50,
            min_similarity=0.3,
            multi_theme=False
        )
        
        print(f"βœ… Generated {len(results)} words (with fixed beta)")
        print(f"Top 10 words:")
        for j, (word, similarity, tier) in enumerate(results[:10], 1):
            print(f"   {j:2d}. {word:15s}: {similarity:.4f}")
            
        if len(results) < 15:
            print(f"   ⚠️ As expected: Only {len(results)} words with fixed beta (too strict)")
        else:
            print(f"   βœ… Surprisingly good: {len(results)} words even with fixed beta")
    
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()

def main():
    """Main test runner"""
    print("πŸ§ͺ Adaptive Beta Integration Test")
    print("Testing automatic beta relaxation for constrained word generation")
    print("=" * 70)
    
    try:
        # Test with adaptive beta enabled
        test_adaptive_beta_cricket_sports()
        
        # Test with adaptive beta disabled for comparison
        test_adaptive_beta_disabled()
        
        print("\n" + "=" * 70)
        print("🎯 ADAPTIVE BETA TEST RESULTS:")
        print("1. Adaptive beta should automatically relax when < 15 words found")
        print("2. Cricket+Sports should now generate 15+ words (was 16)")
        print("3. Complex multi-topic queries should auto-adapt for sufficient words")
        print("4. Logging shows beta adjustment process")
        print("=" * 70)
        
    except Exception as e:
        print(f"❌ Adaptive beta test failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()