simple-text-analyzer / web_app /reference_manager.py
egumasa's picture
more sophistication indice selection
42f8800
"""
Reference list management module.
Handles default and custom reference list configuration and management.
"""
import streamlit as st
from typing import Dict, List, Any, Optional
from pathlib import Path
from web_app.session_manager import SessionManager
from web_app.config_manager import ConfigManager
from web_app.components.ui_components import UIComponents
class ReferenceManager:
"""Manages reference lists (both default and custom)."""
@staticmethod
def configure_reference_lists(analyzer):
"""Configure reference lists based on language."""
config = ConfigManager.load_reference_config()
language_key = "english" if st.session_state.language == 'en' else "japanese"
language_config = config.get(language_key, {"unigrams": {}, "bigrams": {}, "trigrams": {}})
st.write("**Default Reference Lists**")
# Track selected lists
selected_lists = []
# Process each type of n-gram
for ngram_type, type_lists in language_config.items():
if not type_lists: # Skip empty sections
continue
# Get enabled lists for this type
enabled_lists = {k: v for k, v in type_lists.items() if v.get('enabled', True)}
if enabled_lists:
# Create section header
st.write(f"**{ngram_type.title()}:**")
# Create checkboxes for this type
cols = st.columns(min(len(enabled_lists), 3)) # Max 3 columns
for i, (list_key, list_config) in enumerate(enabled_lists.items()):
with cols[i % 3]:
checkbox_key = f"{ngram_type}_{list_key}_checkbox"
selected = st.checkbox(
list_config["display_name"],
key=checkbox_key,
help=list_config.get("description", "")
)
if selected:
selected_lists.append((ngram_type, list_key, list_config))
# Load selected reference lists
ReferenceManager._update_default_reference_lists(selected_lists)
# Display currently loaded lists
ReferenceManager._display_loaded_lists()
@staticmethod
def _update_default_reference_lists(selected_lists: List[tuple]):
"""Update default reference lists based on selections."""
current_keys = set(SessionManager.get_reference_lists().keys())
new_keys = set(list_key for ngram_type, list_key, _ in selected_lists) # Use list_key directly
# Remove deselected lists (only default lists, not custom ones)
for key in current_keys - new_keys:
if key in SessionManager.get_reference_lists():
# Only remove if it's a default list (not custom uploaded)
if not SessionManager.is_custom_reference_list(key):
SessionManager.remove_reference_list(key)
# Add newly selected lists
for ngram_type, list_key, list_config in selected_lists:
# Use the YAML entry name directly (list_key) instead of combining with ngram_type
if list_key not in SessionManager.get_reference_lists():
# Load the actual data
data = ConfigManager.load_reference_list_data(list_config)
if data:
SessionManager.add_reference_list(list_key, data) # Use list_key directly
@staticmethod
def _display_loaded_lists():
"""Display currently loaded reference lists."""
reference_lists = SessionManager.get_reference_lists()
if reference_lists:
st.write("**Currently Loaded Lists:**")
for key, data in reference_lists.items():
# Calculate total vocabulary size
vocab_size = 0
for file_type, file_data in data.items():
if isinstance(file_data, dict) and not file_data.get('is_custom_config', False):
vocab_size += len(file_data)
elif hasattr(file_data, '__len__'):
vocab_size += len(file_data)
if vocab_size > 0:
st.write(f"- {key}: {vocab_size:,} entries")
else:
st.write(f"- {key}: No data loaded")
@staticmethod
def process_custom_reference_uploads(uploaded_files):
"""Process uploaded custom reference files."""
if not uploaded_files:
return
st.write("**Configure Uploaded Files:**")
# Process each uploaded file
all_configs = {}
for uploaded_file in uploaded_files:
file_key = uploaded_file.name
# Check if already processed
uploaded_configs = SessionManager.get_uploaded_file_configs()
if file_key not in uploaded_configs:
# Process new file
config = ConfigManager.process_uploaded_file(uploaded_file)
if config:
SessionManager.set_uploaded_file_config(file_key, config)
# Get configuration
config = SessionManager.get_uploaded_file_configs().get(file_key)
if not config:
continue
# Render configuration UI
UIComponents.render_file_preview(file_key, config)
# Get number of indices to create
index_count = UIComponents.render_index_count_selector(file_key, config)
if index_count > 0:
st.write(f"**Configure {index_count} indices:**")
# Collect configurations for this file
file_configs = []
for i in range(index_count):
index_config = UIComponents.render_index_configuration(file_key, config, i, index_count)
file_configs.append(index_config)
all_configs[file_key] = {
'file_name': config['file_name'],
'content': config['content'],
'delimiter': config['delimiter'],
'indices': file_configs
}
st.write("---")
# Apply configuration button
if st.button("Apply Configuration", type="primary"):
success_count, errors = ConfigManager.apply_configurations(all_configs)
UIComponents.render_configuration_results(success_count, errors)
# Display currently configured indices
UIComponents.display_configured_indices()
@staticmethod
def render_custom_upload_section():
"""Render the custom reference list upload section."""
st.write("**Custom Reference Lists**")
uploaded_refs = st.file_uploader(
"Upload Custom Reference Lists",
type=['csv', 'tsv', 'txt'],
accept_multiple_files=True,
help="Upload CSV/TSV files with frequency data",
key="custom_refs_upload"
)
if uploaded_refs:
ReferenceManager.process_custom_reference_uploads(uploaded_refs)
@staticmethod
def validate_reference_lists() -> bool:
"""Validate that reference lists are available for analysis."""
reference_lists = SessionManager.get_reference_lists()
return bool(reference_lists)
@staticmethod
def get_available_indices() -> List[str]:
"""Get list of available reference list indices."""
reference_lists = SessionManager.get_reference_lists()
return list(reference_lists.keys())
@staticmethod
def clear_custom_reference_lists():
"""Clear all custom reference lists."""
reference_lists = SessionManager.get_reference_lists()
custom_keys = [key for key in reference_lists.keys()
if SessionManager.is_custom_reference_list(key)]
for key in custom_keys:
SessionManager.remove_reference_list(key)
@staticmethod
def export_reference_list_config() -> Dict[str, Any]:
"""Export current reference list configuration."""
reference_lists = SessionManager.get_reference_lists()
config = {}
for name, data in reference_lists.items():
if SessionManager.is_custom_reference_list(name):
custom_data = data['token']
config[name] = {
'type': 'custom',
'word_column': custom_data.get('word_column'),
'freq_column': custom_data.get('freq_column'),
'file_path': custom_data.get('file_path')
}
else:
config[name] = {
'type': 'default',
'data_size': len(data.get('token', {})) if isinstance(data.get('token'), dict) else 0
}
return config