Spaces:
Building
Building
| #!/usr/bin/env python3 | |
| import sys | |
| import os | |
| import tempfile | |
| from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer | |
| def test_multi_index_functionality(): | |
| print("Testing multi-index functionality...") | |
| # Create test data with multiple score columns | |
| test_data = """Type,POS,Headword,Rank,Freq,Range,NormFreq,NormRange | |
| the,,,1,60056,500,59001.119,1.000 | |
| of,,,2,30331,500,29798.237,1.000 | |
| and,,,3,28973,500,28464.091,1.000 | |
| to,,,4,26036,500,25578.679,1.000 | |
| a,,,5,23926,500,23505.741,1.000 | |
| in,,,6,19923,500,19573.053,1.000 | |
| that,,,7,12279,500,12063.320,1.000""" | |
| # Save to temporary file | |
| temp_dir = tempfile.mkdtemp() | |
| test_file = os.path.join(temp_dir, "multi_freq.csv") | |
| with open(test_file, 'w') as f: | |
| f.write(test_data) | |
| print(f"β Created test file: {test_file}") | |
| # Test multiple indices from single file | |
| configs = [ | |
| { | |
| 'index_name': 'test_freq', | |
| 'word_column': 'Type', | |
| 'score_column': 'Freq' | |
| }, | |
| { | |
| 'index_name': 'test_range', | |
| 'word_column': 'Type', | |
| 'score_column': 'Range' | |
| }, | |
| { | |
| 'index_name': 'test_normfreq', | |
| 'word_column': 'Type', | |
| 'score_column': 'NormFreq' | |
| } | |
| ] | |
| analyzer = LexicalSophisticationAnalyzer() | |
| reference_lists = {} | |
| # Create multiple reference lists from same file | |
| for i, config in enumerate(configs): | |
| custom_config = { | |
| 'file_path': test_file, | |
| 'word_column': config['word_column'], | |
| 'freq_column': config['score_column'], | |
| 'delimiter': ',', | |
| 'is_custom_config': True | |
| } | |
| reference_lists[config['index_name']] = { | |
| 'token': custom_config | |
| } | |
| print(f"β Created {len(reference_lists)} reference configurations") | |
| # Test loading | |
| try: | |
| analyzer.load_reference_lists(reference_lists) | |
| print("β Successfully loaded all reference lists") | |
| # Verify each index was loaded correctly | |
| for config in configs: | |
| index_name = config['index_name'] | |
| if index_name in analyzer.reference_lists: | |
| token_data = analyzer.reference_lists[index_name].get('token', {}) | |
| print(f"β {index_name}: {len(token_data)} entries") | |
| # Test some words | |
| test_words = ['the', 'of', 'and'] | |
| for word in test_words[:1]: # Just test first word | |
| if word in token_data: | |
| print(f" - '{word}': {token_data[word]}") | |
| else: | |
| print(f"β {index_name}: not found in loaded lists") | |
| except Exception as e: | |
| print(f"β Error loading reference lists: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return | |
| # Test analysis with multiple indices | |
| print("\nβ Testing analysis with multiple indices...") | |
| test_text = "The quick brown fox jumps over the lazy dog and runs to the park." | |
| try: | |
| results = analyzer.analyze_text( | |
| test_text, | |
| ['test_freq', 'test_range', 'test_normfreq'], | |
| apply_log=False, | |
| word_type_filter=None | |
| ) | |
| print("β Analysis completed successfully") | |
| # Check results | |
| if results['summary']: | |
| print("β Summary results:") | |
| for key, stats in results['summary'].items(): | |
| print(f" - {key}: mean={stats['mean']:.3f}, count={stats['count']}") | |
| # Check token details | |
| if results['token_details']: | |
| print("β Token details (first 3 tokens):") | |
| for token in results['token_details'][:3]: | |
| token_word = token.get('token', 'N/A') | |
| freq_score = token.get('test_freq', 'N/A') | |
| range_score = token.get('test_range', 'N/A') | |
| normfreq_score = token.get('test_normfreq', 'N/A') | |
| print(f" - {token_word}: freq={freq_score}, range={range_score}, normfreq={normfreq_score}") | |
| except Exception as e: | |
| print(f"β Error during analysis: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| test_multi_index_functionality() | |