Spaces:
Building
Building
| #!/usr/bin/env python3 | |
| """ | |
| Test script to diagnose the reference loading issue in the web app | |
| """ | |
| import sys | |
| import os | |
| # Add the project root to the path | |
| sys.path.insert(0, os.getcwd()) | |
| from web_app.config_manager import ConfigManager | |
| from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer | |
| def test_reference_loading(): | |
| print("=== Testing Reference Loading Issue ===") | |
| # Load config | |
| config = ConfigManager.load_reference_config() | |
| english_config = config.get('english', {}) | |
| print(f"\nAvailable sections in config: {list(english_config.keys())}") | |
| # Test what happens when we simulate loading different types of references | |
| print("\n=== Simulating Reference List Selection ===") | |
| # Simulate selecting some unigrams, bigrams, and trigrams | |
| selected_lists = [] | |
| # Add a unigram | |
| if 'unigrams' in english_config and 'COCA_spoken_frequency_token' in english_config['unigrams']: | |
| unigram_config = english_config['unigrams']['COCA_spoken_frequency_token'] | |
| selected_lists.append(('unigrams', 'COCA_spoken_frequency_token', unigram_config)) | |
| print(f"Added unigram: COCA_spoken_frequency_token") | |
| # Add a bigram | |
| if 'bigrams' in english_config and 'COCA_spoken_bigram_frequency_token' in english_config['bigrams']: | |
| bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token'] | |
| selected_lists.append(('bigrams', 'COCA_spoken_bigram_frequency_token', bigram_config)) | |
| print(f"Added bigram: COCA_spoken_bigram_frequency_token") | |
| # Add a trigram | |
| if 'trigrams' in english_config and 'COCA_trigram_frequency_token' in english_config['trigrams']: | |
| trigram_config = english_config['trigrams']['COCA_trigram_frequency_token'] | |
| selected_lists.append(('trigrams', 'COCA_trigram_frequency_token', trigram_config)) | |
| print(f"Added trigram: COCA_trigram_frequency_token") | |
| print(f"\nTotal selected lists: {len(selected_lists)}") | |
| # Load reference data directly | |
| reference_lists = {} | |
| for ngram_type, list_key, list_config in selected_lists: | |
| print(f"\nLoading {ngram_type}: {list_key}") | |
| data = ConfigManager.load_reference_list_data(list_config) | |
| if data: | |
| print(f" Data keys: {list(data.keys())}") | |
| for key, value in data.items(): | |
| if hasattr(value, '__len__'): | |
| print(f" {key}: {len(value)} entries") | |
| else: | |
| print(f" {key}: {type(value)}") | |
| reference_lists[list_key] = data | |
| else: | |
| print(f" Failed to load data for {list_key}") | |
| # Check what was loaded | |
| print(f"\n=== Loaded Reference Lists ===") | |
| print(f"Keys loaded: {list(reference_lists.keys())}") | |
| for key, data in reference_lists.items(): | |
| print(f"\n{key}:") | |
| for file_type, file_data in data.items(): | |
| if hasattr(file_data, '__len__'): | |
| print(f" {file_type}: {len(file_data)} entries") | |
| else: | |
| print(f" {file_type}: {type(file_data)}") | |
| # Test analysis with these reference lists | |
| print(f"\n=== Testing Analysis ===") | |
| analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md') | |
| # Load reference lists into analyzer | |
| analyzer.load_reference_lists(reference_lists) | |
| # Test text | |
| test_text = "The cat sat on the mat. The dog ran quickly." | |
| # Analyze | |
| results = analyzer.analyze_text( | |
| test_text, | |
| list(reference_lists.keys()), | |
| apply_log=False | |
| ) | |
| print(f"\nAnalysis summary keys: {list(results['summary'].keys())}") | |
| print(f"Bigram details count: {len(results.get('bigram_details', []))}") | |
| print(f"Trigram details count: {len(results.get('trigram_details', []))}") | |
| # Check for bigram/trigram entries in summary | |
| bigram_summary_keys = [k for k in results['summary'].keys() if 'bigram' in k] | |
| trigram_summary_keys = [k for k in results['summary'].keys() if 'trigram' in k] | |
| print(f"\nBigram summary keys: {bigram_summary_keys}") | |
| print(f"Trigram summary keys: {trigram_summary_keys}") | |
| if not bigram_summary_keys and not trigram_summary_keys: | |
| print("\n⚠️ WARNING: No bigram or trigram results in summary!") | |
| print("This suggests the issue is in the analysis process, not the display function.") | |
| else: | |
| print("\n✓ Bigram and trigram results found in summary.") | |
| print("The issue might be in how the web app loads reference lists.") | |
| if __name__ == "__main__": | |
| test_reference_loading() | |