""" Reference list management module. Handles default and custom reference list configuration and management. """ import streamlit as st from typing import Dict, List, Any, Optional from pathlib import Path from web_app.session_manager import SessionManager from web_app.config_manager import ConfigManager from web_app.components.ui_components import UIComponents class ReferenceManager: """Manages reference lists (both default and custom).""" @staticmethod def configure_reference_lists(analyzer): """Configure reference lists based on language.""" config = ConfigManager.load_reference_config() language_key = "english" if st.session_state.language == 'en' else "japanese" language_config = config.get(language_key, {"unigrams": {}, "bigrams": {}, "trigrams": {}}) st.write("**Default Reference Lists**") # Track selected lists selected_lists = [] # Process each type of n-gram for ngram_type, type_lists in language_config.items(): if not type_lists: # Skip empty sections continue # Get enabled lists for this type enabled_lists = {k: v for k, v in type_lists.items() if v.get('enabled', True)} if enabled_lists: # Create section header st.write(f"**{ngram_type.title()}:**") # Create checkboxes for this type cols = st.columns(min(len(enabled_lists), 3)) # Max 3 columns for i, (list_key, list_config) in enumerate(enabled_lists.items()): with cols[i % 3]: checkbox_key = f"{ngram_type}_{list_key}_checkbox" selected = st.checkbox( list_config["display_name"], key=checkbox_key, help=list_config.get("description", "") ) if selected: selected_lists.append((ngram_type, list_key, list_config)) # Load selected reference lists ReferenceManager._update_default_reference_lists(selected_lists) # Display currently loaded lists ReferenceManager._display_loaded_lists() @staticmethod def _update_default_reference_lists(selected_lists: List[tuple]): """Update default reference lists based on selections.""" current_keys = set(SessionManager.get_reference_lists().keys()) new_keys = set(list_key for ngram_type, list_key, _ in selected_lists) # Use list_key directly # Remove deselected lists (only default lists, not custom ones) for key in current_keys - new_keys: if key in SessionManager.get_reference_lists(): # Only remove if it's a default list (not custom uploaded) if not SessionManager.is_custom_reference_list(key): SessionManager.remove_reference_list(key) # Add newly selected lists for ngram_type, list_key, list_config in selected_lists: # Use the YAML entry name directly (list_key) instead of combining with ngram_type if list_key not in SessionManager.get_reference_lists(): # Load the actual data data = ConfigManager.load_reference_list_data(list_config) if data: SessionManager.add_reference_list(list_key, data) # Use list_key directly @staticmethod def _display_loaded_lists(): """Display currently loaded reference lists.""" reference_lists = SessionManager.get_reference_lists() if reference_lists: st.write("**Currently Loaded Lists:**") for key, data in reference_lists.items(): # Calculate total vocabulary size vocab_size = 0 for file_type, file_data in data.items(): if isinstance(file_data, dict) and not file_data.get('is_custom_config', False): vocab_size += len(file_data) elif hasattr(file_data, '__len__'): vocab_size += len(file_data) if vocab_size > 0: st.write(f"- {key}: {vocab_size:,} entries") else: st.write(f"- {key}: No data loaded") @staticmethod def process_custom_reference_uploads(uploaded_files): """Process uploaded custom reference files.""" if not uploaded_files: return st.write("**Configure Uploaded Files:**") # Process each uploaded file all_configs = {} for uploaded_file in uploaded_files: file_key = uploaded_file.name # Check if already processed uploaded_configs = SessionManager.get_uploaded_file_configs() if file_key not in uploaded_configs: # Process new file config = ConfigManager.process_uploaded_file(uploaded_file) if config: SessionManager.set_uploaded_file_config(file_key, config) # Get configuration config = SessionManager.get_uploaded_file_configs().get(file_key) if not config: continue # Render configuration UI UIComponents.render_file_preview(file_key, config) # Get number of indices to create index_count = UIComponents.render_index_count_selector(file_key, config) if index_count > 0: st.write(f"**Configure {index_count} indices:**") # Collect configurations for this file file_configs = [] for i in range(index_count): index_config = UIComponents.render_index_configuration(file_key, config, i, index_count) file_configs.append(index_config) all_configs[file_key] = { 'file_name': config['file_name'], 'content': config['content'], 'delimiter': config['delimiter'], 'indices': file_configs } st.write("---") # Apply configuration button if st.button("Apply Configuration", type="primary"): success_count, errors = ConfigManager.apply_configurations(all_configs) UIComponents.render_configuration_results(success_count, errors) # Display currently configured indices UIComponents.display_configured_indices() @staticmethod def render_custom_upload_section(): """Render the custom reference list upload section.""" st.write("**Custom Reference Lists**") uploaded_refs = st.file_uploader( "Upload Custom Reference Lists", type=['csv', 'tsv', 'txt'], accept_multiple_files=True, help="Upload CSV/TSV files with frequency data", key="custom_refs_upload" ) if uploaded_refs: ReferenceManager.process_custom_reference_uploads(uploaded_refs) @staticmethod def validate_reference_lists() -> bool: """Validate that reference lists are available for analysis.""" reference_lists = SessionManager.get_reference_lists() return bool(reference_lists) @staticmethod def get_available_indices() -> List[str]: """Get list of available reference list indices.""" reference_lists = SessionManager.get_reference_lists() return list(reference_lists.keys()) @staticmethod def clear_custom_reference_lists(): """Clear all custom reference lists.""" reference_lists = SessionManager.get_reference_lists() custom_keys = [key for key in reference_lists.keys() if SessionManager.is_custom_reference_list(key)] for key in custom_keys: SessionManager.remove_reference_list(key) @staticmethod def export_reference_list_config() -> Dict[str, Any]: """Export current reference list configuration.""" reference_lists = SessionManager.get_reference_lists() config = {} for name, data in reference_lists.items(): if SessionManager.is_custom_reference_list(name): custom_data = data['token'] config[name] = { 'type': 'custom', 'word_column': custom_data.get('word_column'), 'freq_column': custom_data.get('freq_column'), 'file_path': custom_data.get('file_path') } else: config[name] = { 'type': 'default', 'data_size': len(data.get('token', {})) if isinstance(data.get('token'), dict) else 0 } return config