Spaces:

egumasa
/

simple-text-analyzer

Building

File size: 5,184 Bytes

e7279e4

#!/usr/bin/env python3
"""
Test script to verify the fix for bigram/trigram plot sample words
"""

import sys
import os

# Add the project root to the path
sys.path.insert(0, os.getcwd())

from web_app.config_manager import ConfigManager
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer

def test_plot_fix():
    print("=== Testing Plot Fix ===")
    
    # Load config and create reference lists
    config = ConfigManager.load_reference_config()
    english_config = config.get('english', {})
    
    reference_lists = {}
    
    # Load a unigram, bigram, and trigram reference
    unigram_config = english_config['unigrams']['COCA_spoken_frequency_token']
    bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token']
    trigram_config = english_config['trigrams']['COCA_trigram_frequency_token']
    
    reference_lists['COCA_spoken_frequency_token'] = ConfigManager.load_reference_list_data(unigram_config)
    reference_lists['COCA_spoken_bigram_frequency_token'] = ConfigManager.load_reference_list_data(bigram_config)
    reference_lists['COCA_trigram_frequency_token'] = ConfigManager.load_reference_list_data(trigram_config)
    
    # Create analyzer and analyze text
    analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md')
    analyzer.load_reference_lists(reference_lists)
    
    test_text = "The cat sat on the mat. The dog ran quickly."
    results = analyzer.analyze_text(test_text, list(reference_lists.keys()), apply_log=False)
    
    print("\n=== Testing Column Matching with Fixed Algorithm ===")
    
    # Test the fixed algorithm for bigrams
    for key in results['raw_scores'].keys():
        if '_bigram_' in key:
            print(f"\nTesting bigram key: {key}")
            # Use the new algorithm: remove '_bigram' from the key
            index_measure_col = key.replace('_bigram', '')
            print(f"  Fixed algorithm expects column: '{index_measure_col}'")
            
            # Check if this column exists in bigram_details
            if results['bigram_details']:
                sample_bigram = results['bigram_details'][0]
                if index_measure_col in sample_bigram:
                    print(f"  ✅ Column found in bigram_details")
                    
                    # Test if we can build word_score_map successfully
                    word_score_map = {}
                    for bigram_detail in results['bigram_details']:
                        if index_measure_col in bigram_detail and bigram_detail[index_measure_col] is not None:
                            bigram_text = bigram_detail.get('bigram', '')
                            word_score_map[bigram_text] = bigram_detail[index_measure_col]
                    
                    print(f"  ✅ Successfully built word_score_map with {len(word_score_map)} entries")
                    if word_score_map:
                        sample_entries = list(word_score_map.items())[:3]
                        print(f"  Sample entries: {sample_entries}")
                else:
                    print(f"  ❌ Column still NOT found in bigram_details")
    
    # Test the fixed algorithm for trigrams
    for key in results['raw_scores'].keys():
        if '_trigram_' in key:
            print(f"\nTesting trigram key: {key}")
            # Use the new algorithm: remove '_trigram' from the key
            index_measure_col = key.replace('_trigram', '')
            print(f"  Fixed algorithm expects column: '{index_measure_col}'")
            
            # Check if this column exists in trigram_details
            if results['trigram_details']:
                sample_trigram = results['trigram_details'][0]
                if index_measure_col in sample_trigram:
                    print(f"  ✅ Column found in trigram_details")
                    
                    # Test if we can build word_score_map successfully
                    word_score_map = {}
                    for trigram_detail in results['trigram_details']:
                        if index_measure_col in trigram_detail and trigram_detail[index_measure_col] is not None:
                            trigram_text = trigram_detail.get('trigram', '')
                            word_score_map[trigram_text] = trigram_detail[index_measure_col]
                    
                    print(f"  ✅ Successfully built word_score_map with {len(word_score_map)} entries")
                    if word_score_map:
                        sample_entries = list(word_score_map.items())[:3]
                        print(f"  Sample entries: {sample_entries}")
                else:
                    print(f"  ❌ Column still NOT found in trigram_details")
    
    print("\n=== Fix Verification Complete ===")
    if any('_bigram_' in key for key in results['raw_scores'].keys()) and any('_trigram_' in key for key in results['raw_scores'].keys()):
        print("✅ Fix appears to be working correctly!")
        print("Sample words should now appear in bigram and trigram plots.")
    else:
        print("❌ No bigram/trigram results found to test")

if __name__ == "__main__":
    test_plot_fix()