abc123 / hack /debug_adaptive_beta_bug.py
vimalk78's picture
feat: add multi-topic intersection methods with adaptive beta for word selection
b05514b
#!/usr/bin/env python3
"""
Debug Adaptive Beta Bug
Quick test to reproduce the bug where word count decreases when beta is relaxed.
"""
import os
import sys
import logging
# Configure logging to see the debug messages
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
def setup_environment():
"""Setup environment and add src to path"""
# Set cache directory to root cache-dir folder
cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
cache_dir = os.path.abspath(cache_dir)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
# Add backend source to path
backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
backend_path = os.path.abspath(backend_path)
if backend_path not in sys.path:
sys.path.insert(0, backend_path)
print(f"Using cache directory: {cache_dir}")
def test_debug_adaptive_beta():
"""Test the problematic case with debug logging"""
setup_environment()
print("πŸ› Debug Adaptive Beta Bug")
print("=" * 50)
# Set environment variables for soft minimum with debug
os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
os.environ['SOFT_MIN_BETA'] = '10.0'
os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
os.environ['SOFT_MIN_MIN_WORDS'] = '15'
os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '1000' # Small for faster testing
try:
from services.thematic_word_service import ThematicWordService
print("Creating ThematicWordService...")
service = ThematicWordService()
service.initialize()
# Test the problematic case
inputs = ["universe", "movies", "languages"]
print(f"\\nTesting problematic case: {inputs}")
print(f"Expected: Word count should INCREASE as beta decreases")
print("-" * 50)
results = service.generate_thematic_words(
inputs,
num_words=50,
min_similarity=0.3,
multi_theme=False # Force single theme processing
)
print(f"\\nβœ… Final result: {len(results)} words generated")
if len(results) > 0:
print(f"Top 5 words:")
for i, (word, similarity, tier) in enumerate(results[:5], 1):
print(f" {i}. {word}: {similarity:.4f}")
else:
print(" ⚠️ No words generated!")
except Exception as e:
print(f"❌ Test failed: {e}")
import traceback
traceback.print_exc()
def main():
print("πŸ§ͺ Debugging Adaptive Beta Bug")
print("This will show detailed score statistics at each beta level")
print("=" * 60)
test_debug_adaptive_beta()
print("\\n" + "=" * 60)
print("πŸ” Look for patterns in the debug output:")
print("1. Do score ranges change as expected?")
print("2. Is the threshold comparison working correctly?")
print("3. Are scores getting more permissive with lower beta?")
print("=" * 60)
if __name__ == "__main__":
main()