|
|
|
|
|
""" |
|
|
Test adaptive beta fix with full vocabulary to see if it now correctly |
|
|
uses the adjusted threshold for filtering |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') |
|
|
|
|
|
def setup_environment(): |
|
|
"""Setup environment and add src to path""" |
|
|
|
|
|
cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir') |
|
|
cache_dir = os.path.abspath(cache_dir) |
|
|
os.environ['HF_HOME'] = cache_dir |
|
|
os.environ['TRANSFORMERS_CACHE'] = cache_dir |
|
|
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir |
|
|
|
|
|
|
|
|
backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src') |
|
|
backend_path = os.path.abspath(backend_path) |
|
|
if backend_path not in sys.path: |
|
|
sys.path.insert(0, backend_path) |
|
|
|
|
|
print(f"Using cache directory: {cache_dir}") |
|
|
|
|
|
def test_adaptive_fix(): |
|
|
"""Test with full vocabulary to see the fix in action""" |
|
|
|
|
|
setup_environment() |
|
|
|
|
|
print("🔧 Testing Adaptive Beta Fix") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum' |
|
|
os.environ['SOFT_MIN_BETA'] = '10.0' |
|
|
os.environ['SOFT_MIN_ADAPTIVE'] = 'true' |
|
|
os.environ['SOFT_MIN_MIN_WORDS'] = '15' |
|
|
os.environ['SOFT_MIN_MAX_RETRIES'] = '5' |
|
|
os.environ['SOFT_MIN_BETA_DECAY'] = '0.7' |
|
|
os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '100000' |
|
|
|
|
|
try: |
|
|
from services.thematic_word_service import ThematicWordService |
|
|
|
|
|
print("Creating ThematicWordService...") |
|
|
service = ThematicWordService() |
|
|
service.initialize() |
|
|
|
|
|
|
|
|
inputs = ["universe", "movies", "languages"] |
|
|
print(f"\\nTesting original case: {inputs} (with full vocabulary)") |
|
|
print(f"Expected: Should now get words using adjusted threshold") |
|
|
print("-" * 50) |
|
|
|
|
|
results = service.generate_thematic_words( |
|
|
inputs, |
|
|
num_words=50, |
|
|
min_similarity=0.25, |
|
|
multi_theme=True |
|
|
) |
|
|
|
|
|
print(f"\\n✅ Final result: {len(results)} words generated") |
|
|
if len(results) > 0: |
|
|
print(f"Top 10 words:") |
|
|
for i, (word, similarity, tier) in enumerate(results[:10], 1): |
|
|
print(f" {i}. {word}: {similarity:.4f}") |
|
|
else: |
|
|
print(" ⚠️ Still no words generated!") |
|
|
|
|
|
print(f"\\n🔬 Test another challenging case: ['science', 'art', 'music']") |
|
|
results2 = service.generate_thematic_words( |
|
|
["science", "art", "music"], |
|
|
num_words=30, |
|
|
min_similarity=0.25, |
|
|
multi_theme=True |
|
|
) |
|
|
|
|
|
print(f"\\n✅ Second result: {len(results2)} words generated") |
|
|
if len(results2) > 0: |
|
|
print(f"Top 5 words:") |
|
|
for i, (word, similarity, tier) in enumerate(results2[:5], 1): |
|
|
print(f" {i}. {word}: {similarity:.4f}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_adaptive_fix() |