Spaces:
Building
Building
| #!/usr/bin/env python3 | |
| """ | |
| Debug script to examine column naming issues in bigram/trigram plots | |
| """ | |
| import sys | |
| import os | |
| # Add the project root to the path | |
| sys.path.insert(0, os.getcwd()) | |
| from web_app.config_manager import ConfigManager | |
| from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer | |
| def debug_plot_columns(): | |
| print("=== Debugging Plot Column Names ==") | |
| # Load config and create reference lists | |
| config = ConfigManager.load_reference_config() | |
| english_config = config.get('english', {}) | |
| reference_lists = {} | |
| # Load a unigram, bigram, and trigram reference | |
| unigram_config = english_config['unigrams']['COCA_spoken_frequency_token'] | |
| bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token'] | |
| trigram_config = english_config['trigrams']['COCA_trigram_frequency_token'] | |
| reference_lists['COCA_spoken_frequency_token'] = ConfigManager.load_reference_list_data(unigram_config) | |
| reference_lists['COCA_spoken_bigram_frequency_token'] = ConfigManager.load_reference_list_data(bigram_config) | |
| reference_lists['COCA_trigram_frequency_token'] = ConfigManager.load_reference_list_data(trigram_config) | |
| # Create analyzer and analyze text | |
| analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md') | |
| analyzer.load_reference_lists(reference_lists) | |
| test_text = "The cat sat on the mat. The dog ran quickly." | |
| results = analyzer.analyze_text(test_text, list(reference_lists.keys()), apply_log=False) | |
| print("\n=== Raw Scores Keys ===") | |
| for key in results['raw_scores'].keys(): | |
| print(f" {key}") | |
| print("\n=== Token Details Columns ===") | |
| if results['token_details']: | |
| print(f" Sample token: {list(results['token_details'][0].keys())}") | |
| print("\n=== Bigram Details Columns ===") | |
| if results['bigram_details']: | |
| print(f" Sample bigram: {list(results['bigram_details'][0].keys())}") | |
| print("\n=== Trigram Details Columns ===") | |
| if results['trigram_details']: | |
| print(f" Sample trigram: {list(results['trigram_details'][0].keys())}") | |
| print("\n=== Column Matching Analysis ===") | |
| # Test the current algorithm for bigrams | |
| for key in results['raw_scores'].keys(): | |
| if '_bigram_' in key: | |
| print(f"\nAnalyzing bigram key: {key}") | |
| key_parts = key.split('_') | |
| if len(key_parts) >= 3 and 'bigram' in key_parts: | |
| measure_name = '_'.join(key_parts[key_parts.index('bigram') + 1:]) | |
| index_measure_col = f"{key_parts[0]}_{measure_name}" | |
| print(f" Algorithm expects column: '{index_measure_col}'") | |
| # Check if this column exists in bigram_details | |
| if results['bigram_details']: | |
| sample_bigram = results['bigram_details'][0] | |
| if index_measure_col in sample_bigram: | |
| print(f" β Column found in bigram_details") | |
| else: | |
| print(f" β Column NOT found in bigram_details") | |
| print(f" Available columns: {list(sample_bigram.keys())}") | |
| # Try to find the correct column | |
| for col in sample_bigram.keys(): | |
| if measure_name in col: | |
| print(f" Possible match: '{col}'") | |
| # Test the current algorithm for trigrams | |
| for key in results['raw_scores'].keys(): | |
| if '_trigram_' in key: | |
| print(f"\nAnalyzing trigram key: {key}") | |
| key_parts = key.split('_') | |
| if len(key_parts) >= 3 and 'trigram' in key_parts: | |
| measure_name = '_'.join(key_parts[key_parts.index('trigram') + 1:]) | |
| index_measure_col = f"{key_parts[0]}_{measure_name}" | |
| print(f" Algorithm expects column: '{index_measure_col}'") | |
| # Check if this column exists in trigram_details | |
| if results['trigram_details']: | |
| sample_trigram = results['trigram_details'][0] | |
| if index_measure_col in sample_trigram: | |
| print(f" β Column found in trigram_details") | |
| else: | |
| print(f" β Column NOT found in trigram_details") | |
| print(f" Available columns: {list(sample_trigram.keys())}") | |
| # Try to find the correct column | |
| for col in sample_trigram.keys(): | |
| if measure_name in col: | |
| print(f" Possible match: '{col}'") | |
| if __name__ == "__main__": | |
| debug_plot_columns() | |