Spaces:

egumasa
/

simple-text-analyzer

Building

App Files Files Community

simple-text-analyzer / test /test_reference_loading_issue.py

egumasa

tag set

025fa56 7 months ago

raw

history blame contribute delete

4.67 kB

	#!/usr/bin/env python3
	"""
	Test script to diagnose the reference loading issue in the web app
	"""

	import sys
	import os

	# Add the project root to the path
	sys.path.insert(0, os.getcwd())

	from web_app.config_manager import ConfigManager
	from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer

	def test_reference_loading():
	print("=== Testing Reference Loading Issue ===")

	# Load config
	config = ConfigManager.load_reference_config()
	english_config = config.get('english', {})

	print(f"\nAvailable sections in config: {list(english_config.keys())}")

	# Test what happens when we simulate loading different types of references
	print("\n=== Simulating Reference List Selection ===")

	# Simulate selecting some unigrams, bigrams, and trigrams
	selected_lists = []

	# Add a unigram
	if 'unigrams' in english_config and 'COCA_spoken_frequency_token' in english_config['unigrams']:
	unigram_config = english_config['unigrams']['COCA_spoken_frequency_token']
	selected_lists.append(('unigrams', 'COCA_spoken_frequency_token', unigram_config))
	print(f"Added unigram: COCA_spoken_frequency_token")

	# Add a bigram
	if 'bigrams' in english_config and 'COCA_spoken_bigram_frequency_token' in english_config['bigrams']:
	bigram_config = english_config['bigrams']['COCA_spoken_bigram_frequency_token']
	selected_lists.append(('bigrams', 'COCA_spoken_bigram_frequency_token', bigram_config))
	print(f"Added bigram: COCA_spoken_bigram_frequency_token")

	# Add a trigram
	if 'trigrams' in english_config and 'COCA_trigram_frequency_token' in english_config['trigrams']:
	trigram_config = english_config['trigrams']['COCA_trigram_frequency_token']
	selected_lists.append(('trigrams', 'COCA_trigram_frequency_token', trigram_config))
	print(f"Added trigram: COCA_trigram_frequency_token")

	print(f"\nTotal selected lists: {len(selected_lists)}")

	# Load reference data directly
	reference_lists = {}
	for ngram_type, list_key, list_config in selected_lists:
	print(f"\nLoading {ngram_type}: {list_key}")
	data = ConfigManager.load_reference_list_data(list_config)

	if data:
	print(f" Data keys: {list(data.keys())}")
	for key, value in data.items():
	if hasattr(value, '__len__'):
	print(f" {key}: {len(value)} entries")
	else:
	print(f" {key}: {type(value)}")

	reference_lists[list_key] = data
	else:
	print(f" Failed to load data for {list_key}")

	# Check what was loaded
	print(f"\n=== Loaded Reference Lists ===")
	print(f"Keys loaded: {list(reference_lists.keys())}")

	for key, data in reference_lists.items():
	print(f"\n{key}:")
	for file_type, file_data in data.items():
	if hasattr(file_data, '__len__'):
	print(f" {file_type}: {len(file_data)} entries")
	else:
	print(f" {file_type}: {type(file_data)}")

	# Test analysis with these reference lists
	print(f"\n=== Testing Analysis ===")
	analyzer = LexicalSophisticationAnalyzer(language='en', model_size='md')

	# Load reference lists into analyzer
	analyzer.load_reference_lists(reference_lists)

	# Test text
	test_text = "The cat sat on the mat. The dog ran quickly."

	# Analyze
	results = analyzer.analyze_text(
	test_text,
	list(reference_lists.keys()),
	apply_log=False
	)

	print(f"\nAnalysis summary keys: {list(results['summary'].keys())}")
	print(f"Bigram details count: {len(results.get('bigram_details', []))}")
	print(f"Trigram details count: {len(results.get('trigram_details', []))}")

	# Check for bigram/trigram entries in summary
	bigram_summary_keys = [k for k in results['summary'].keys() if 'bigram' in k]
	trigram_summary_keys = [k for k in results['summary'].keys() if 'trigram' in k]

	print(f"\nBigram summary keys: {bigram_summary_keys}")
	print(f"Trigram summary keys: {trigram_summary_keys}")

	if not bigram_summary_keys and not trigram_summary_keys:
	print("\n⚠️ WARNING: No bigram or trigram results in summary!")
	print("This suggests the issue is in the analysis process, not the display function.")
	else:
	print("\n✓ Bigram and trigram results found in summary.")
	print("The issue might be in how the web app loads reference lists.")

	if __name__ == "__main__":
	test_reference_loading()