#!/usr/bin/env python3 """ Test script to verify the fix for bigram/trigram plot sample words """ import sys import os # Add the project root to the path sys.path.insert(0, os.getcwd()) from web_app.config_manager import ConfigManager from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer def test_plot_fix(): print("=== Testing Plot Fix ===") # Load config and create reference lists config = ConfigManager.load_reference_config() english_config = config.get('english', {}) reference_lists = {} # Load a unigram, bigram, and trigram reference unigram_config = english_config['unigrams']['COCA_spoken_frequency_token'] bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token'] trigram_config = english_config['trigrams']['COCA_trigram_frequency_token'] reference_lists['COCA_spoken_frequency_token'] = ConfigManager.load_reference_list_data(unigram_config) reference_lists['COCA_spoken_bigram_frequency_token'] = ConfigManager.load_reference_list_data(bigram_config) reference_lists['COCA_trigram_frequency_token'] = ConfigManager.load_reference_list_data(trigram_config) # Create analyzer and analyze text analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md') analyzer.load_reference_lists(reference_lists) test_text = "The cat sat on the mat. The dog ran quickly." results = analyzer.analyze_text(test_text, list(reference_lists.keys()), apply_log=False) print("\n=== Testing Column Matching with Fixed Algorithm ===") # Test the fixed algorithm for bigrams for key in results['raw_scores'].keys(): if '_bigram_' in key: print(f"\nTesting bigram key: {key}") # Use the new algorithm: remove '_bigram' from the key index_measure_col = key.replace('_bigram', '') print(f" Fixed algorithm expects column: '{index_measure_col}'") # Check if this column exists in bigram_details if results['bigram_details']: sample_bigram = results['bigram_details'][0] if index_measure_col in sample_bigram: print(f" ✅ Column found in bigram_details") # Test if we can build word_score_map successfully word_score_map = {} for bigram_detail in results['bigram_details']: if index_measure_col in bigram_detail and bigram_detail[index_measure_col] is not None: bigram_text = bigram_detail.get('bigram', '') word_score_map[bigram_text] = bigram_detail[index_measure_col] print(f" ✅ Successfully built word_score_map with {len(word_score_map)} entries") if word_score_map: sample_entries = list(word_score_map.items())[:3] print(f" Sample entries: {sample_entries}") else: print(f" ❌ Column still NOT found in bigram_details") # Test the fixed algorithm for trigrams for key in results['raw_scores'].keys(): if '_trigram_' in key: print(f"\nTesting trigram key: {key}") # Use the new algorithm: remove '_trigram' from the key index_measure_col = key.replace('_trigram', '') print(f" Fixed algorithm expects column: '{index_measure_col}'") # Check if this column exists in trigram_details if results['trigram_details']: sample_trigram = results['trigram_details'][0] if index_measure_col in sample_trigram: print(f" ✅ Column found in trigram_details") # Test if we can build word_score_map successfully word_score_map = {} for trigram_detail in results['trigram_details']: if index_measure_col in trigram_detail and trigram_detail[index_measure_col] is not None: trigram_text = trigram_detail.get('trigram', '') word_score_map[trigram_text] = trigram_detail[index_measure_col] print(f" ✅ Successfully built word_score_map with {len(word_score_map)} entries") if word_score_map: sample_entries = list(word_score_map.items())[:3] print(f" Sample entries: {sample_entries}") else: print(f" ❌ Column still NOT found in trigram_details") print("\n=== Fix Verification Complete ===") if any('_bigram_' in key for key in results['raw_scores'].keys()) and any('_trigram_' in key for key in results['raw_scores'].keys()): print("✅ Fix appears to be working correctly!") print("Sample words should now appear in bigram and trigram plots.") else: print("❌ No bigram/trigram results found to test") if __name__ == "__main__": test_plot_fix()