File size: 6,839 Bytes
b05514b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
#!/usr/bin/env python3
"""
Test Adaptive Beta with Cricket+Sports Example
Tests that the adaptive beta mechanism generates more words for constrained cases
like "cricket sentence" + "sports topic".
"""
import os
import sys
import warnings
import logging
# Configure logging to see the adaptive beta messages
logging.basicConfig(level=logging.INFO, format='%(message)s')
# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
def setup_environment():
"""Setup environment and add src to path"""
# Set cache directory to root cache-dir folder
cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
cache_dir = os.path.abspath(cache_dir)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
# Add backend source to path
backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
backend_path = os.path.abspath(backend_path)
if backend_path not in sys.path:
sys.path.insert(0, backend_path)
print(f"Using cache directory: {cache_dir}")
def test_adaptive_beta_cricket_sports():
"""Test the cricket+sports case that previously generated only 16 words"""
setup_environment()
print("π§ͺ Testing Adaptive Beta with Cricket+Sports Example")
print("=" * 60)
# Set environment variables for soft minimum with adaptive beta
os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
os.environ['SOFT_MIN_BETA'] = '10.0'
os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
os.environ['SOFT_MIN_MIN_WORDS'] = '15'
os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '5000' # Smaller vocab for faster testing
try:
from services.thematic_word_service import ThematicWordService
print("Creating ThematicWordService with adaptive soft minimum...")
service = ThematicWordService()
print("Initializing service (adaptive beta configuration will be logged)...")
service.initialize()
# Test cases
test_cases = [
{
"name": "Cricket sentence only",
"inputs": ["india won test series against england"],
"expected": ">30 words (no constraint)",
"description": "Single sentence - should generate many words"
},
{
"name": "Cricket sentence + Sports topic",
"inputs": ["india won test series against england", "Sports"],
"expected": "~15-25 words (adaptive beta should kick in)",
"description": "Sentence + topic - adaptive beta should relax to get more words"
},
{
"name": "Multiple sports topics",
"inputs": ["Cricket", "Tennis", "Football"],
"expected": "~15-20 words (adaptive beta for 3 topics)",
"description": "Three topics - should auto-adapt for more words"
}
]
for i, test_case in enumerate(test_cases, 1):
print(f"\nπ Test {i}: {test_case['name']}")
print(f" Description: {test_case['description']}")
print(f" Expected: {test_case['expected']}")
print(f" Inputs: {test_case['inputs']}")
print("-" * 50)
# Generate words
results = service.generate_thematic_words(
test_case['inputs'],
num_words=50,
min_similarity=0.3,
multi_theme=False
)
print(f"β
Generated {len(results)} words")
print(f"Top 15 words:")
for j, (word, similarity, tier) in enumerate(results[:15], 1):
print(f" {j:2d}. {word:15s}: {similarity:.4f} ({tier})")
# Analysis
if len(results) >= 15:
print(f" β
Success: Generated {len(results)} words (β₯ 15 minimum)")
else:
print(f" β οΈ Warning: Only {len(results)} words generated (< 15 minimum)")
print(" This suggests adaptive beta may need tuning")
except Exception as e:
print(f"β Test failed: {e}")
import traceback
traceback.print_exc()
def test_adaptive_beta_disabled():
"""Test with adaptive beta disabled for comparison"""
print(f"\n\nπ Testing with Adaptive Beta DISABLED")
print("=" * 60)
# Disable adaptive beta
os.environ['SOFT_MIN_ADAPTIVE'] = 'false'
try:
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
service.initialize()
# Test the problematic case
inputs = ["india won test series against england", "Sports"]
print(f"Testing cricket+sports with fixed beta=10.0...")
results = service.generate_thematic_words(
inputs,
num_words=50,
min_similarity=0.3,
multi_theme=False
)
print(f"β
Generated {len(results)} words (with fixed beta)")
print(f"Top 10 words:")
for j, (word, similarity, tier) in enumerate(results[:10], 1):
print(f" {j:2d}. {word:15s}: {similarity:.4f}")
if len(results) < 15:
print(f" β οΈ As expected: Only {len(results)} words with fixed beta (too strict)")
else:
print(f" β
Surprisingly good: {len(results)} words even with fixed beta")
except Exception as e:
print(f"β Test failed: {e}")
import traceback
traceback.print_exc()
def main():
"""Main test runner"""
print("π§ͺ Adaptive Beta Integration Test")
print("Testing automatic beta relaxation for constrained word generation")
print("=" * 70)
try:
# Test with adaptive beta enabled
test_adaptive_beta_cricket_sports()
# Test with adaptive beta disabled for comparison
test_adaptive_beta_disabled()
print("\n" + "=" * 70)
print("π― ADAPTIVE BETA TEST RESULTS:")
print("1. Adaptive beta should automatically relax when < 15 words found")
print("2. Cricket+Sports should now generate 15+ words (was 16)")
print("3. Complex multi-topic queries should auto-adapt for sufficient words")
print("4. Logging shows beta adjustment process")
print("=" * 70)
except Exception as e:
print(f"β Adaptive beta test failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main() |