Spaces:
Building
Building
| """ | |
| Reference list management module. | |
| Handles default and custom reference list configuration and management. | |
| """ | |
| import streamlit as st | |
| from typing import Dict, List, Any, Optional | |
| from pathlib import Path | |
| from web_app.session_manager import SessionManager | |
| from web_app.config_manager import ConfigManager | |
| from web_app.components.ui_components import UIComponents | |
| class ReferenceManager: | |
| """Manages reference lists (both default and custom).""" | |
| def configure_reference_lists(analyzer): | |
| """Configure reference lists based on language.""" | |
| config = ConfigManager.load_reference_config() | |
| language_key = "english" if st.session_state.language == 'en' else "japanese" | |
| language_config = config.get(language_key, {"unigrams": {}, "bigrams": {}, "trigrams": {}}) | |
| st.write("**Default Reference Lists**") | |
| # Track selected lists | |
| selected_lists = [] | |
| # Process each type of n-gram | |
| for ngram_type, type_lists in language_config.items(): | |
| if not type_lists: # Skip empty sections | |
| continue | |
| # Get enabled lists for this type | |
| enabled_lists = {k: v for k, v in type_lists.items() if v.get('enabled', True)} | |
| if enabled_lists: | |
| # Create section header | |
| st.write(f"**{ngram_type.title()}:**") | |
| # Create checkboxes for this type | |
| cols = st.columns(min(len(enabled_lists), 3)) # Max 3 columns | |
| for i, (list_key, list_config) in enumerate(enabled_lists.items()): | |
| with cols[i % 3]: | |
| checkbox_key = f"{ngram_type}_{list_key}_checkbox" | |
| selected = st.checkbox( | |
| list_config["display_name"], | |
| key=checkbox_key, | |
| help=list_config.get("description", "") | |
| ) | |
| if selected: | |
| selected_lists.append((ngram_type, list_key, list_config)) | |
| # Load selected reference lists | |
| ReferenceManager._update_default_reference_lists(selected_lists) | |
| # Display currently loaded lists | |
| ReferenceManager._display_loaded_lists() | |
| def _update_default_reference_lists(selected_lists: List[tuple]): | |
| """Update default reference lists based on selections.""" | |
| current_keys = set(SessionManager.get_reference_lists().keys()) | |
| new_keys = set(list_key for ngram_type, list_key, _ in selected_lists) # Use list_key directly | |
| # Remove deselected lists (only default lists, not custom ones) | |
| for key in current_keys - new_keys: | |
| if key in SessionManager.get_reference_lists(): | |
| # Only remove if it's a default list (not custom uploaded) | |
| if not SessionManager.is_custom_reference_list(key): | |
| SessionManager.remove_reference_list(key) | |
| # Add newly selected lists | |
| for ngram_type, list_key, list_config in selected_lists: | |
| # Use the YAML entry name directly (list_key) instead of combining with ngram_type | |
| if list_key not in SessionManager.get_reference_lists(): | |
| # Load the actual data | |
| data = ConfigManager.load_reference_list_data(list_config) | |
| if data: | |
| SessionManager.add_reference_list(list_key, data) # Use list_key directly | |
| def _display_loaded_lists(): | |
| """Display currently loaded reference lists.""" | |
| reference_lists = SessionManager.get_reference_lists() | |
| if reference_lists: | |
| st.write("**Currently Loaded Lists:**") | |
| for key, data in reference_lists.items(): | |
| # Calculate total vocabulary size | |
| vocab_size = 0 | |
| for file_type, file_data in data.items(): | |
| if isinstance(file_data, dict) and not file_data.get('is_custom_config', False): | |
| vocab_size += len(file_data) | |
| elif hasattr(file_data, '__len__'): | |
| vocab_size += len(file_data) | |
| if vocab_size > 0: | |
| st.write(f"- {key}: {vocab_size:,} entries") | |
| else: | |
| st.write(f"- {key}: No data loaded") | |
| def process_custom_reference_uploads(uploaded_files): | |
| """Process uploaded custom reference files.""" | |
| if not uploaded_files: | |
| return | |
| st.write("**Configure Uploaded Files:**") | |
| # Process each uploaded file | |
| all_configs = {} | |
| for uploaded_file in uploaded_files: | |
| file_key = uploaded_file.name | |
| # Check if already processed | |
| uploaded_configs = SessionManager.get_uploaded_file_configs() | |
| if file_key not in uploaded_configs: | |
| # Process new file | |
| config = ConfigManager.process_uploaded_file(uploaded_file) | |
| if config: | |
| SessionManager.set_uploaded_file_config(file_key, config) | |
| # Get configuration | |
| config = SessionManager.get_uploaded_file_configs().get(file_key) | |
| if not config: | |
| continue | |
| # Render configuration UI | |
| UIComponents.render_file_preview(file_key, config) | |
| # Get number of indices to create | |
| index_count = UIComponents.render_index_count_selector(file_key, config) | |
| if index_count > 0: | |
| st.write(f"**Configure {index_count} indices:**") | |
| # Collect configurations for this file | |
| file_configs = [] | |
| for i in range(index_count): | |
| index_config = UIComponents.render_index_configuration(file_key, config, i, index_count) | |
| file_configs.append(index_config) | |
| all_configs[file_key] = { | |
| 'file_name': config['file_name'], | |
| 'content': config['content'], | |
| 'delimiter': config['delimiter'], | |
| 'indices': file_configs | |
| } | |
| st.write("---") | |
| # Apply configuration button | |
| if st.button("Apply Configuration", type="primary"): | |
| success_count, errors = ConfigManager.apply_configurations(all_configs) | |
| UIComponents.render_configuration_results(success_count, errors) | |
| # Display currently configured indices | |
| UIComponents.display_configured_indices() | |
| def render_custom_upload_section(): | |
| """Render the custom reference list upload section.""" | |
| st.write("**Custom Reference Lists**") | |
| uploaded_refs = st.file_uploader( | |
| "Upload Custom Reference Lists", | |
| type=['csv', 'tsv', 'txt'], | |
| accept_multiple_files=True, | |
| help="Upload CSV/TSV files with frequency data", | |
| key="custom_refs_upload" | |
| ) | |
| if uploaded_refs: | |
| ReferenceManager.process_custom_reference_uploads(uploaded_refs) | |
| def validate_reference_lists() -> bool: | |
| """Validate that reference lists are available for analysis.""" | |
| reference_lists = SessionManager.get_reference_lists() | |
| return bool(reference_lists) | |
| def get_available_indices() -> List[str]: | |
| """Get list of available reference list indices.""" | |
| reference_lists = SessionManager.get_reference_lists() | |
| return list(reference_lists.keys()) | |
| def clear_custom_reference_lists(): | |
| """Clear all custom reference lists.""" | |
| reference_lists = SessionManager.get_reference_lists() | |
| custom_keys = [key for key in reference_lists.keys() | |
| if SessionManager.is_custom_reference_list(key)] | |
| for key in custom_keys: | |
| SessionManager.remove_reference_list(key) | |
| def export_reference_list_config() -> Dict[str, Any]: | |
| """Export current reference list configuration.""" | |
| reference_lists = SessionManager.get_reference_lists() | |
| config = {} | |
| for name, data in reference_lists.items(): | |
| if SessionManager.is_custom_reference_list(name): | |
| custom_data = data['token'] | |
| config[name] = { | |
| 'type': 'custom', | |
| 'word_column': custom_data.get('word_column'), | |
| 'freq_column': custom_data.get('freq_column'), | |
| 'file_path': custom_data.get('file_path') | |
| } | |
| else: | |
| config[name] = { | |
| 'type': 'default', | |
| 'data_size': len(data.get('token', {})) if isinstance(data.get('token'), dict) else 0 | |
| } | |
| return config | |