|
|
|
|
|
""" |
|
|
Test Adaptive Beta with Cricket+Sports Example |
|
|
|
|
|
Tests that the adaptive beta mechanism generates more words for constrained cases |
|
|
like "cricket sentence" + "sports topic". |
|
|
""" |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import warnings |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(message)s') |
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
def setup_environment(): |
|
|
"""Setup environment and add src to path""" |
|
|
|
|
|
cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir') |
|
|
cache_dir = os.path.abspath(cache_dir) |
|
|
os.environ['HF_HOME'] = cache_dir |
|
|
os.environ['TRANSFORMERS_CACHE'] = cache_dir |
|
|
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir |
|
|
|
|
|
|
|
|
backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src') |
|
|
backend_path = os.path.abspath(backend_path) |
|
|
if backend_path not in sys.path: |
|
|
sys.path.insert(0, backend_path) |
|
|
|
|
|
print(f"Using cache directory: {cache_dir}") |
|
|
|
|
|
def test_adaptive_beta_cricket_sports(): |
|
|
"""Test the cricket+sports case that previously generated only 16 words""" |
|
|
|
|
|
setup_environment() |
|
|
|
|
|
print("π§ͺ Testing Adaptive Beta with Cricket+Sports Example") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum' |
|
|
os.environ['SOFT_MIN_BETA'] = '10.0' |
|
|
os.environ['SOFT_MIN_ADAPTIVE'] = 'true' |
|
|
os.environ['SOFT_MIN_MIN_WORDS'] = '15' |
|
|
os.environ['SOFT_MIN_MAX_RETRIES'] = '5' |
|
|
os.environ['SOFT_MIN_BETA_DECAY'] = '0.7' |
|
|
os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '5000' |
|
|
|
|
|
try: |
|
|
from services.thematic_word_service import ThematicWordService |
|
|
|
|
|
print("Creating ThematicWordService with adaptive soft minimum...") |
|
|
service = ThematicWordService() |
|
|
|
|
|
print("Initializing service (adaptive beta configuration will be logged)...") |
|
|
service.initialize() |
|
|
|
|
|
|
|
|
test_cases = [ |
|
|
{ |
|
|
"name": "Cricket sentence only", |
|
|
"inputs": ["india won test series against england"], |
|
|
"expected": ">30 words (no constraint)", |
|
|
"description": "Single sentence - should generate many words" |
|
|
}, |
|
|
{ |
|
|
"name": "Cricket sentence + Sports topic", |
|
|
"inputs": ["india won test series against england", "Sports"], |
|
|
"expected": "~15-25 words (adaptive beta should kick in)", |
|
|
"description": "Sentence + topic - adaptive beta should relax to get more words" |
|
|
}, |
|
|
{ |
|
|
"name": "Multiple sports topics", |
|
|
"inputs": ["Cricket", "Tennis", "Football"], |
|
|
"expected": "~15-20 words (adaptive beta for 3 topics)", |
|
|
"description": "Three topics - should auto-adapt for more words" |
|
|
} |
|
|
] |
|
|
|
|
|
for i, test_case in enumerate(test_cases, 1): |
|
|
print(f"\nπ Test {i}: {test_case['name']}") |
|
|
print(f" Description: {test_case['description']}") |
|
|
print(f" Expected: {test_case['expected']}") |
|
|
print(f" Inputs: {test_case['inputs']}") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
results = service.generate_thematic_words( |
|
|
test_case['inputs'], |
|
|
num_words=50, |
|
|
min_similarity=0.3, |
|
|
multi_theme=False |
|
|
) |
|
|
|
|
|
print(f"β
Generated {len(results)} words") |
|
|
print(f"Top 15 words:") |
|
|
for j, (word, similarity, tier) in enumerate(results[:15], 1): |
|
|
print(f" {j:2d}. {word:15s}: {similarity:.4f} ({tier})") |
|
|
|
|
|
|
|
|
if len(results) >= 15: |
|
|
print(f" β
Success: Generated {len(results)} words (β₯ 15 minimum)") |
|
|
else: |
|
|
print(f" β οΈ Warning: Only {len(results)} words generated (< 15 minimum)") |
|
|
print(" This suggests adaptive beta may need tuning") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
def test_adaptive_beta_disabled(): |
|
|
"""Test with adaptive beta disabled for comparison""" |
|
|
|
|
|
print(f"\n\nπ Testing with Adaptive Beta DISABLED") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
os.environ['SOFT_MIN_ADAPTIVE'] = 'false' |
|
|
|
|
|
try: |
|
|
from services.thematic_word_service import ThematicWordService |
|
|
|
|
|
service = ThematicWordService() |
|
|
service.initialize() |
|
|
|
|
|
|
|
|
inputs = ["india won test series against england", "Sports"] |
|
|
print(f"Testing cricket+sports with fixed beta=10.0...") |
|
|
|
|
|
results = service.generate_thematic_words( |
|
|
inputs, |
|
|
num_words=50, |
|
|
min_similarity=0.3, |
|
|
multi_theme=False |
|
|
) |
|
|
|
|
|
print(f"β
Generated {len(results)} words (with fixed beta)") |
|
|
print(f"Top 10 words:") |
|
|
for j, (word, similarity, tier) in enumerate(results[:10], 1): |
|
|
print(f" {j:2d}. {word:15s}: {similarity:.4f}") |
|
|
|
|
|
if len(results) < 15: |
|
|
print(f" β οΈ As expected: Only {len(results)} words with fixed beta (too strict)") |
|
|
else: |
|
|
print(f" β
Surprisingly good: {len(results)} words even with fixed beta") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
def main(): |
|
|
"""Main test runner""" |
|
|
print("π§ͺ Adaptive Beta Integration Test") |
|
|
print("Testing automatic beta relaxation for constrained word generation") |
|
|
print("=" * 70) |
|
|
|
|
|
try: |
|
|
|
|
|
test_adaptive_beta_cricket_sports() |
|
|
|
|
|
|
|
|
test_adaptive_beta_disabled() |
|
|
|
|
|
print("\n" + "=" * 70) |
|
|
print("π― ADAPTIVE BETA TEST RESULTS:") |
|
|
print("1. Adaptive beta should automatically relax when < 15 words found") |
|
|
print("2. Cricket+Sports should now generate 15+ words (was 16)") |
|
|
print("3. Complex multi-topic queries should auto-adapt for sufficient words") |
|
|
print("4. Logging shows beta adjustment process") |
|
|
print("=" * 70) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"β Adaptive beta test failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |