#!/usr/bin/env python3 import sys import os import tempfile from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer def test_multi_index_functionality(): print("Testing multi-index functionality...") # Create test data with multiple score columns test_data = """Type,POS,Headword,Rank,Freq,Range,NormFreq,NormRange the,,,1,60056,500,59001.119,1.000 of,,,2,30331,500,29798.237,1.000 and,,,3,28973,500,28464.091,1.000 to,,,4,26036,500,25578.679,1.000 a,,,5,23926,500,23505.741,1.000 in,,,6,19923,500,19573.053,1.000 that,,,7,12279,500,12063.320,1.000""" # Save to temporary file temp_dir = tempfile.mkdtemp() test_file = os.path.join(temp_dir, "multi_freq.csv") with open(test_file, 'w') as f: f.write(test_data) print(f"✓ Created test file: {test_file}") # Test multiple indices from single file configs = [ { 'index_name': 'test_freq', 'word_column': 'Type', 'score_column': 'Freq' }, { 'index_name': 'test_range', 'word_column': 'Type', 'score_column': 'Range' }, { 'index_name': 'test_normfreq', 'word_column': 'Type', 'score_column': 'NormFreq' } ] analyzer = LexicalSophisticationAnalyzer() reference_lists = {} # Create multiple reference lists from same file for i, config in enumerate(configs): custom_config = { 'file_path': test_file, 'word_column': config['word_column'], 'freq_column': config['score_column'], 'delimiter': ',', 'is_custom_config': True } reference_lists[config['index_name']] = { 'token': custom_config } print(f"✓ Created {len(reference_lists)} reference configurations") # Test loading try: analyzer.load_reference_lists(reference_lists) print("✓ Successfully loaded all reference lists") # Verify each index was loaded correctly for config in configs: index_name = config['index_name'] if index_name in analyzer.reference_lists: token_data = analyzer.reference_lists[index_name].get('token', {}) print(f"✓ {index_name}: {len(token_data)} entries") # Test some words test_words = ['the', 'of', 'and'] for word in test_words[:1]: # Just test first word if word in token_data: print(f" - '{word}': {token_data[word]}") else: print(f"✗ {index_name}: not found in loaded lists") except Exception as e: print(f"✗ Error loading reference lists: {e}") import traceback traceback.print_exc() return # Test analysis with multiple indices print("\n✓ Testing analysis with multiple indices...") test_text = "The quick brown fox jumps over the lazy dog and runs to the park." try: results = analyzer.analyze_text( test_text, ['test_freq', 'test_range', 'test_normfreq'], apply_log=False, word_type_filter=None ) print("✓ Analysis completed successfully") # Check results if results['summary']: print("✓ Summary results:") for key, stats in results['summary'].items(): print(f" - {key}: mean={stats['mean']:.3f}, count={stats['count']}") # Check token details if results['token_details']: print("✓ Token details (first 3 tokens):") for token in results['token_details'][:3]: token_word = token.get('token', 'N/A') freq_score = token.get('test_freq', 'N/A') range_score = token.get('test_range', 'N/A') normfreq_score = token.get('test_normfreq', 'N/A') print(f" - {token_word}: freq={freq_score}, range={range_score}, normfreq={normfreq_score}") except Exception as e: print(f"✗ Error during analysis: {e}") import traceback traceback.print_exc() if __name__ == "__main__": test_multi_index_functionality()