|
|
|
|
|
""" |
|
|
Debug Adaptive Beta Bug |
|
|
|
|
|
Quick test to reproduce the bug where word count decreases when beta is relaxed. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') |
|
|
|
|
|
def setup_environment(): |
|
|
"""Setup environment and add src to path""" |
|
|
|
|
|
cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir') |
|
|
cache_dir = os.path.abspath(cache_dir) |
|
|
os.environ['HF_HOME'] = cache_dir |
|
|
os.environ['TRANSFORMERS_CACHE'] = cache_dir |
|
|
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir |
|
|
|
|
|
|
|
|
backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src') |
|
|
backend_path = os.path.abspath(backend_path) |
|
|
if backend_path not in sys.path: |
|
|
sys.path.insert(0, backend_path) |
|
|
|
|
|
print(f"Using cache directory: {cache_dir}") |
|
|
|
|
|
def test_debug_adaptive_beta(): |
|
|
"""Test the problematic case with debug logging""" |
|
|
|
|
|
setup_environment() |
|
|
|
|
|
print("π Debug Adaptive Beta Bug") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum' |
|
|
os.environ['SOFT_MIN_BETA'] = '10.0' |
|
|
os.environ['SOFT_MIN_ADAPTIVE'] = 'true' |
|
|
os.environ['SOFT_MIN_MIN_WORDS'] = '15' |
|
|
os.environ['SOFT_MIN_MAX_RETRIES'] = '5' |
|
|
os.environ['SOFT_MIN_BETA_DECAY'] = '0.7' |
|
|
os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '1000' |
|
|
|
|
|
try: |
|
|
from services.thematic_word_service import ThematicWordService |
|
|
|
|
|
print("Creating ThematicWordService...") |
|
|
service = ThematicWordService() |
|
|
service.initialize() |
|
|
|
|
|
|
|
|
inputs = ["universe", "movies", "languages"] |
|
|
print(f"\\nTesting problematic case: {inputs}") |
|
|
print(f"Expected: Word count should INCREASE as beta decreases") |
|
|
print("-" * 50) |
|
|
|
|
|
results = service.generate_thematic_words( |
|
|
inputs, |
|
|
num_words=50, |
|
|
min_similarity=0.3, |
|
|
multi_theme=False |
|
|
) |
|
|
|
|
|
print(f"\\nβ
Final result: {len(results)} words generated") |
|
|
if len(results) > 0: |
|
|
print(f"Top 5 words:") |
|
|
for i, (word, similarity, tier) in enumerate(results[:5], 1): |
|
|
print(f" {i}. {word}: {similarity:.4f}") |
|
|
else: |
|
|
print(" β οΈ No words generated!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
def main(): |
|
|
print("π§ͺ Debugging Adaptive Beta Bug") |
|
|
print("This will show detailed score statistics at each beta level") |
|
|
print("=" * 60) |
|
|
|
|
|
test_debug_adaptive_beta() |
|
|
|
|
|
print("\\n" + "=" * 60) |
|
|
print("π Look for patterns in the debug output:") |
|
|
print("1. Do score ranges change as expected?") |
|
|
print("2. Is the threshold comparison working correctly?") |
|
|
print("3. Are scores getting more permissive with lower beta?") |
|
|
print("=" * 60) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |