abc123 / hack /test_adaptive_beta.py
vimalk78's picture
feat: add multi-topic intersection methods with adaptive beta for word selection
b05514b
#!/usr/bin/env python3
"""
Test Adaptive Beta with Cricket+Sports Example
Tests that the adaptive beta mechanism generates more words for constrained cases
like "cricket sentence" + "sports topic".
"""
import os
import sys
import warnings
import logging
# Configure logging to see the adaptive beta messages
logging.basicConfig(level=logging.INFO, format='%(message)s')
# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
def setup_environment():
"""Setup environment and add src to path"""
# Set cache directory to root cache-dir folder
cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
cache_dir = os.path.abspath(cache_dir)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
# Add backend source to path
backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
backend_path = os.path.abspath(backend_path)
if backend_path not in sys.path:
sys.path.insert(0, backend_path)
print(f"Using cache directory: {cache_dir}")
def test_adaptive_beta_cricket_sports():
"""Test the cricket+sports case that previously generated only 16 words"""
setup_environment()
print("πŸ§ͺ Testing Adaptive Beta with Cricket+Sports Example")
print("=" * 60)
# Set environment variables for soft minimum with adaptive beta
os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
os.environ['SOFT_MIN_BETA'] = '10.0'
os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
os.environ['SOFT_MIN_MIN_WORDS'] = '15'
os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '5000' # Smaller vocab for faster testing
try:
from services.thematic_word_service import ThematicWordService
print("Creating ThematicWordService with adaptive soft minimum...")
service = ThematicWordService()
print("Initializing service (adaptive beta configuration will be logged)...")
service.initialize()
# Test cases
test_cases = [
{
"name": "Cricket sentence only",
"inputs": ["india won test series against england"],
"expected": ">30 words (no constraint)",
"description": "Single sentence - should generate many words"
},
{
"name": "Cricket sentence + Sports topic",
"inputs": ["india won test series against england", "Sports"],
"expected": "~15-25 words (adaptive beta should kick in)",
"description": "Sentence + topic - adaptive beta should relax to get more words"
},
{
"name": "Multiple sports topics",
"inputs": ["Cricket", "Tennis", "Football"],
"expected": "~15-20 words (adaptive beta for 3 topics)",
"description": "Three topics - should auto-adapt for more words"
}
]
for i, test_case in enumerate(test_cases, 1):
print(f"\nπŸ“Š Test {i}: {test_case['name']}")
print(f" Description: {test_case['description']}")
print(f" Expected: {test_case['expected']}")
print(f" Inputs: {test_case['inputs']}")
print("-" * 50)
# Generate words
results = service.generate_thematic_words(
test_case['inputs'],
num_words=50,
min_similarity=0.3,
multi_theme=False
)
print(f"βœ… Generated {len(results)} words")
print(f"Top 15 words:")
for j, (word, similarity, tier) in enumerate(results[:15], 1):
print(f" {j:2d}. {word:15s}: {similarity:.4f} ({tier})")
# Analysis
if len(results) >= 15:
print(f" βœ… Success: Generated {len(results)} words (β‰₯ 15 minimum)")
else:
print(f" ⚠️ Warning: Only {len(results)} words generated (< 15 minimum)")
print(" This suggests adaptive beta may need tuning")
except Exception as e:
print(f"❌ Test failed: {e}")
import traceback
traceback.print_exc()
def test_adaptive_beta_disabled():
"""Test with adaptive beta disabled for comparison"""
print(f"\n\nπŸ”’ Testing with Adaptive Beta DISABLED")
print("=" * 60)
# Disable adaptive beta
os.environ['SOFT_MIN_ADAPTIVE'] = 'false'
try:
from services.thematic_word_service import ThematicWordService
service = ThematicWordService()
service.initialize()
# Test the problematic case
inputs = ["india won test series against england", "Sports"]
print(f"Testing cricket+sports with fixed beta=10.0...")
results = service.generate_thematic_words(
inputs,
num_words=50,
min_similarity=0.3,
multi_theme=False
)
print(f"βœ… Generated {len(results)} words (with fixed beta)")
print(f"Top 10 words:")
for j, (word, similarity, tier) in enumerate(results[:10], 1):
print(f" {j:2d}. {word:15s}: {similarity:.4f}")
if len(results) < 15:
print(f" ⚠️ As expected: Only {len(results)} words with fixed beta (too strict)")
else:
print(f" βœ… Surprisingly good: {len(results)} words even with fixed beta")
except Exception as e:
print(f"❌ Test failed: {e}")
import traceback
traceback.print_exc()
def main():
"""Main test runner"""
print("πŸ§ͺ Adaptive Beta Integration Test")
print("Testing automatic beta relaxation for constrained word generation")
print("=" * 70)
try:
# Test with adaptive beta enabled
test_adaptive_beta_cricket_sports()
# Test with adaptive beta disabled for comparison
test_adaptive_beta_disabled()
print("\n" + "=" * 70)
print("🎯 ADAPTIVE BETA TEST RESULTS:")
print("1. Adaptive beta should automatically relax when < 15 words found")
print("2. Cricket+Sports should now generate 15+ words (was 16)")
print("3. Complex multi-topic queries should auto-adapt for sufficient words")
print("4. Logging shows beta adjustment process")
print("=" * 70)
except Exception as e:
print(f"❌ Adaptive beta test failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()