Spaces:

egumasa
/

simple-text-analyzer

Building

File size: 9,270 Bytes

"""
Reference list management module.
Handles default and custom reference list configuration and management.
"""

import streamlit as st
from typing import Dict, List, Any, Optional
from pathlib import Path

from web_app.session_manager import SessionManager
from web_app.config_manager import ConfigManager
from web_app.components.ui_components import UIComponents


class ReferenceManager:
    """Manages reference lists (both default and custom)."""
    
    @staticmethod
    def configure_reference_lists(analyzer):
        """Configure reference lists based on language."""
        config = ConfigManager.load_reference_config()
        language_key = "english" if st.session_state.language == 'en' else "japanese"
        language_config = config.get(language_key, {"unigrams": {}, "bigrams": {}, "trigrams": {}})
        
        st.write("**Default Reference Lists**")
        
        # Track selected lists
        selected_lists = []
        
        # Process each type of n-gram
        for ngram_type, type_lists in language_config.items():
            if not type_lists:  # Skip empty sections
                continue
            
            # Get enabled lists for this type
            enabled_lists = {k: v for k, v in type_lists.items() if v.get('enabled', True)}
            
            if enabled_lists:
                # Create section header
                st.write(f"**{ngram_type.title()}:**")
                
                # Create checkboxes for this type
                cols = st.columns(min(len(enabled_lists), 3))  # Max 3 columns
                
                for i, (list_key, list_config) in enumerate(enabled_lists.items()):
                    with cols[i % 3]:
                        checkbox_key = f"{ngram_type}_{list_key}_checkbox"
                        selected = st.checkbox(
                            list_config["display_name"],
                            key=checkbox_key,
                            help=list_config.get("description", "")
                        )
                        
                        if selected:
                            selected_lists.append((ngram_type, list_key, list_config))
        
        # Load selected reference lists
        ReferenceManager._update_default_reference_lists(selected_lists)
        
        # Display currently loaded lists
        ReferenceManager._display_loaded_lists()
    
    @staticmethod
    def _update_default_reference_lists(selected_lists: List[tuple]):
        """Update default reference lists based on selections."""
        current_keys = set(SessionManager.get_reference_lists().keys())
        new_keys = set(list_key for ngram_type, list_key, _ in selected_lists)  # Use list_key directly
        
        # Remove deselected lists (only default lists, not custom ones)
        for key in current_keys - new_keys:
            if key in SessionManager.get_reference_lists():
                # Only remove if it's a default list (not custom uploaded)
                if not SessionManager.is_custom_reference_list(key):
                    SessionManager.remove_reference_list(key)
        
        # Add newly selected lists
        for ngram_type, list_key, list_config in selected_lists:
            # Use the YAML entry name directly (list_key) instead of combining with ngram_type
            if list_key not in SessionManager.get_reference_lists():
                # Load the actual data
                data = ConfigManager.load_reference_list_data(list_config)
                
                if data:
                    SessionManager.add_reference_list(list_key, data)  # Use list_key directly
    
    @staticmethod
    def _display_loaded_lists():
        """Display currently loaded reference lists."""
        reference_lists = SessionManager.get_reference_lists()
        if reference_lists:
            st.write("**Currently Loaded Lists:**")
            for key, data in reference_lists.items():
                # Calculate total vocabulary size
                vocab_size = 0
                for file_type, file_data in data.items():
                    if isinstance(file_data, dict) and not file_data.get('is_custom_config', False):
                        vocab_size += len(file_data)
                    elif hasattr(file_data, '__len__'):
                        vocab_size += len(file_data)
                
                if vocab_size > 0:
                    st.write(f"- {key}: {vocab_size:,} entries")
                else:
                    st.write(f"- {key}: No data loaded")
    
    @staticmethod
    def process_custom_reference_uploads(uploaded_files):
        """Process uploaded custom reference files."""
        if not uploaded_files:
            return
        
        st.write("**Configure Uploaded Files:**")
        
        # Process each uploaded file
        all_configs = {}
        
        for uploaded_file in uploaded_files:
            file_key = uploaded_file.name
            
            # Check if already processed
            uploaded_configs = SessionManager.get_uploaded_file_configs()
            if file_key not in uploaded_configs:
                # Process new file
                config = ConfigManager.process_uploaded_file(uploaded_file)
                if config:
                    SessionManager.set_uploaded_file_config(file_key, config)
            
            # Get configuration
            config = SessionManager.get_uploaded_file_configs().get(file_key)
            if not config:
                continue
            
            # Render configuration UI
            UIComponents.render_file_preview(file_key, config)
            
            # Get number of indices to create
            index_count = UIComponents.render_index_count_selector(file_key, config)
            
            if index_count > 0:
                st.write(f"**Configure {index_count} indices:**")
                
                # Collect configurations for this file
                file_configs = []
                for i in range(index_count):
                    index_config = UIComponents.render_index_configuration(file_key, config, i, index_count)
                    file_configs.append(index_config)
                
                all_configs[file_key] = {
                    'file_name': config['file_name'],
                    'content': config['content'],
                    'delimiter': config['delimiter'],
                    'indices': file_configs
                }
            
            st.write("---")
        
        # Apply configuration button
        if st.button("Apply Configuration", type="primary"):
            success_count, errors = ConfigManager.apply_configurations(all_configs)
            UIComponents.render_configuration_results(success_count, errors)
        
        # Display currently configured indices
        UIComponents.display_configured_indices()
    
    @staticmethod
    def render_custom_upload_section():
        """Render the custom reference list upload section."""
        st.write("**Custom Reference Lists**")
        uploaded_refs = st.file_uploader(
            "Upload Custom Reference Lists",
            type=['csv', 'tsv', 'txt'],
            accept_multiple_files=True,
            help="Upload CSV/TSV files with frequency data",
            key="custom_refs_upload"
        )
        
        if uploaded_refs:
            ReferenceManager.process_custom_reference_uploads(uploaded_refs)
    
    @staticmethod
    def validate_reference_lists() -> bool:
        """Validate that reference lists are available for analysis."""
        reference_lists = SessionManager.get_reference_lists()
        return bool(reference_lists)
    
    @staticmethod
    def get_available_indices() -> List[str]:
        """Get list of available reference list indices."""
        reference_lists = SessionManager.get_reference_lists()
        return list(reference_lists.keys())
    
    @staticmethod
    def clear_custom_reference_lists():
        """Clear all custom reference lists."""
        reference_lists = SessionManager.get_reference_lists()
        custom_keys = [key for key in reference_lists.keys() 
                      if SessionManager.is_custom_reference_list(key)]
        
        for key in custom_keys:
            SessionManager.remove_reference_list(key)
    
    @staticmethod
    def export_reference_list_config() -> Dict[str, Any]:
        """Export current reference list configuration."""
        reference_lists = SessionManager.get_reference_lists()
        config = {}
        
        for name, data in reference_lists.items():
            if SessionManager.is_custom_reference_list(name):
                custom_data = data['token']
                config[name] = {
                    'type': 'custom',
                    'word_column': custom_data.get('word_column'),
                    'freq_column': custom_data.get('freq_column'),
                    'file_path': custom_data.get('file_path')
                }
            else:
                config[name] = {
                    'type': 'default',
                    'data_size': len(data.get('token', {})) if isinstance(data.get('token'), dict) else 0
                }
        
        return config