Spaces:

egumasa
/

simple-text-analyzer

Building

File size: 4,674 Bytes

e7279e4

#!/usr/bin/env python3
"""
Test script to diagnose the reference loading issue in the web app
"""

import sys
import os

# Add the project root to the path
sys.path.insert(0, os.getcwd())

from web_app.config_manager import ConfigManager
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer

def test_reference_loading():
    print("=== Testing Reference Loading Issue ===")
    
    # Load config
    config = ConfigManager.load_reference_config()
    english_config = config.get('english', {})
    
    print(f"\nAvailable sections in config: {list(english_config.keys())}")
    
    # Test what happens when we simulate loading different types of references
    print("\n=== Simulating Reference List Selection ===")
    
    # Simulate selecting some unigrams, bigrams, and trigrams
    selected_lists = []
    
    # Add a unigram
    if 'unigrams' in english_config and 'COCA_spoken_frequency_token' in english_config['unigrams']:
        unigram_config = english_config['unigrams']['COCA_spoken_frequency_token']
        selected_lists.append(('unigrams', 'COCA_spoken_frequency_token', unigram_config))
        print(f"Added unigram: COCA_spoken_frequency_token")
    
    # Add a bigram
    if 'bigrams' in english_config and 'COCA_spoken_bigram_frequency_token' in english_config['bigrams']:
        bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token']
        selected_lists.append(('bigrams', 'COCA_spoken_bigram_frequency_token', bigram_config))
        print(f"Added bigram: COCA_spoken_bigram_frequency_token")
    
    # Add a trigram
    if 'trigrams' in english_config and 'COCA_trigram_frequency_token' in english_config['trigrams']:
        trigram_config = english_config['trigrams']['COCA_trigram_frequency_token']
        selected_lists.append(('trigrams', 'COCA_trigram_frequency_token', trigram_config))
        print(f"Added trigram: COCA_trigram_frequency_token")
    
    print(f"\nTotal selected lists: {len(selected_lists)}")
    
    # Load reference data directly
    reference_lists = {}
    for ngram_type, list_key, list_config in selected_lists:
        print(f"\nLoading {ngram_type}: {list_key}")
        data = ConfigManager.load_reference_list_data(list_config)
        
        if data:
            print(f"  Data keys: {list(data.keys())}")
            for key, value in data.items():
                if hasattr(value, '__len__'):
                    print(f"    {key}: {len(value)} entries")
                else:
                    print(f"    {key}: {type(value)}")
            
            reference_lists[list_key] = data
        else:
            print(f"  Failed to load data for {list_key}")
    
    # Check what was loaded
    print(f"\n=== Loaded Reference Lists ===")
    print(f"Keys loaded: {list(reference_lists.keys())}")
    
    for key, data in reference_lists.items():
        print(f"\n{key}:")
        for file_type, file_data in data.items():
            if hasattr(file_data, '__len__'):
                print(f"  {file_type}: {len(file_data)} entries")
            else:
                print(f"  {file_type}: {type(file_data)}")
    
    # Test analysis with these reference lists
    print(f"\n=== Testing Analysis ===")
    analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md')
    
    # Load reference lists into analyzer
    analyzer.load_reference_lists(reference_lists)
    
    # Test text
    test_text = "The cat sat on the mat. The dog ran quickly."
    
    # Analyze
    results = analyzer.analyze_text(
        test_text,
        list(reference_lists.keys()),
        apply_log=False
    )
    
    print(f"\nAnalysis summary keys: {list(results['summary'].keys())}")
    print(f"Bigram details count: {len(results.get('bigram_details', []))}")
    print(f"Trigram details count: {len(results.get('trigram_details', []))}")
    
    # Check for bigram/trigram entries in summary
    bigram_summary_keys = [k for k in results['summary'].keys() if 'bigram' in k]
    trigram_summary_keys = [k for k in results['summary'].keys() if 'trigram' in k]
    
    print(f"\nBigram summary keys: {bigram_summary_keys}")
    print(f"Trigram summary keys: {trigram_summary_keys}")
    
    if not bigram_summary_keys and not trigram_summary_keys:
        print("\n⚠️  WARNING: No bigram or trigram results in summary!")
        print("This suggests the issue is in the analysis process, not the display function.")
    else:
        print("\n✓ Bigram and trigram results found in summary.")
        print("The issue might be in how the web app loads reference lists.")

if __name__ == "__main__":
    test_reference_loading()