Spaces:
Building
Building
File size: 4,674 Bytes
e7279e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
#!/usr/bin/env python3
"""
Test script to diagnose the reference loading issue in the web app
"""
import sys
import os
# Add the project root to the path
sys.path.insert(0, os.getcwd())
from web_app.config_manager import ConfigManager
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer
def test_reference_loading():
print("=== Testing Reference Loading Issue ===")
# Load config
config = ConfigManager.load_reference_config()
english_config = config.get('english', {})
print(f"\nAvailable sections in config: {list(english_config.keys())}")
# Test what happens when we simulate loading different types of references
print("\n=== Simulating Reference List Selection ===")
# Simulate selecting some unigrams, bigrams, and trigrams
selected_lists = []
# Add a unigram
if 'unigrams' in english_config and 'COCA_spoken_frequency_token' in english_config['unigrams']:
unigram_config = english_config['unigrams']['COCA_spoken_frequency_token']
selected_lists.append(('unigrams', 'COCA_spoken_frequency_token', unigram_config))
print(f"Added unigram: COCA_spoken_frequency_token")
# Add a bigram
if 'bigrams' in english_config and 'COCA_spoken_bigram_frequency_token' in english_config['bigrams']:
bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token']
selected_lists.append(('bigrams', 'COCA_spoken_bigram_frequency_token', bigram_config))
print(f"Added bigram: COCA_spoken_bigram_frequency_token")
# Add a trigram
if 'trigrams' in english_config and 'COCA_trigram_frequency_token' in english_config['trigrams']:
trigram_config = english_config['trigrams']['COCA_trigram_frequency_token']
selected_lists.append(('trigrams', 'COCA_trigram_frequency_token', trigram_config))
print(f"Added trigram: COCA_trigram_frequency_token")
print(f"\nTotal selected lists: {len(selected_lists)}")
# Load reference data directly
reference_lists = {}
for ngram_type, list_key, list_config in selected_lists:
print(f"\nLoading {ngram_type}: {list_key}")
data = ConfigManager.load_reference_list_data(list_config)
if data:
print(f" Data keys: {list(data.keys())}")
for key, value in data.items():
if hasattr(value, '__len__'):
print(f" {key}: {len(value)} entries")
else:
print(f" {key}: {type(value)}")
reference_lists[list_key] = data
else:
print(f" Failed to load data for {list_key}")
# Check what was loaded
print(f"\n=== Loaded Reference Lists ===")
print(f"Keys loaded: {list(reference_lists.keys())}")
for key, data in reference_lists.items():
print(f"\n{key}:")
for file_type, file_data in data.items():
if hasattr(file_data, '__len__'):
print(f" {file_type}: {len(file_data)} entries")
else:
print(f" {file_type}: {type(file_data)}")
# Test analysis with these reference lists
print(f"\n=== Testing Analysis ===")
analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md')
# Load reference lists into analyzer
analyzer.load_reference_lists(reference_lists)
# Test text
test_text = "The cat sat on the mat. The dog ran quickly."
# Analyze
results = analyzer.analyze_text(
test_text,
list(reference_lists.keys()),
apply_log=False
)
print(f"\nAnalysis summary keys: {list(results['summary'].keys())}")
print(f"Bigram details count: {len(results.get('bigram_details', []))}")
print(f"Trigram details count: {len(results.get('trigram_details', []))}")
# Check for bigram/trigram entries in summary
bigram_summary_keys = [k for k in results['summary'].keys() if 'bigram' in k]
trigram_summary_keys = [k for k in results['summary'].keys() if 'trigram' in k]
print(f"\nBigram summary keys: {bigram_summary_keys}")
print(f"Trigram summary keys: {trigram_summary_keys}")
if not bigram_summary_keys and not trigram_summary_keys:
print("\n⚠️ WARNING: No bigram or trigram results in summary!")
print("This suggests the issue is in the analysis process, not the display function.")
else:
print("\n✓ Bigram and trigram results found in summary.")
print("The issue might be in how the web app loads reference lists.")
if __name__ == "__main__":
test_reference_loading()
|