Spaces:
Building
Building
File size: 9,270 Bytes
a543e33 42f8800 a543e33 42f8800 a543e33 42f8800 a543e33 ca02ec3 a543e33 42f8800 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
"""
Reference list management module.
Handles default and custom reference list configuration and management.
"""
import streamlit as st
from typing import Dict, List, Any, Optional
from pathlib import Path
from web_app.session_manager import SessionManager
from web_app.config_manager import ConfigManager
from web_app.components.ui_components import UIComponents
class ReferenceManager:
"""Manages reference lists (both default and custom)."""
@staticmethod
def configure_reference_lists(analyzer):
"""Configure reference lists based on language."""
config = ConfigManager.load_reference_config()
language_key = "english" if st.session_state.language == 'en' else "japanese"
language_config = config.get(language_key, {"unigrams": {}, "bigrams": {}, "trigrams": {}})
st.write("**Default Reference Lists**")
# Track selected lists
selected_lists = []
# Process each type of n-gram
for ngram_type, type_lists in language_config.items():
if not type_lists: # Skip empty sections
continue
# Get enabled lists for this type
enabled_lists = {k: v for k, v in type_lists.items() if v.get('enabled', True)}
if enabled_lists:
# Create section header
st.write(f"**{ngram_type.title()}:**")
# Create checkboxes for this type
cols = st.columns(min(len(enabled_lists), 3)) # Max 3 columns
for i, (list_key, list_config) in enumerate(enabled_lists.items()):
with cols[i % 3]:
checkbox_key = f"{ngram_type}_{list_key}_checkbox"
selected = st.checkbox(
list_config["display_name"],
key=checkbox_key,
help=list_config.get("description", "")
)
if selected:
selected_lists.append((ngram_type, list_key, list_config))
# Load selected reference lists
ReferenceManager._update_default_reference_lists(selected_lists)
# Display currently loaded lists
ReferenceManager._display_loaded_lists()
@staticmethod
def _update_default_reference_lists(selected_lists: List[tuple]):
"""Update default reference lists based on selections."""
current_keys = set(SessionManager.get_reference_lists().keys())
new_keys = set(list_key for ngram_type, list_key, _ in selected_lists) # Use list_key directly
# Remove deselected lists (only default lists, not custom ones)
for key in current_keys - new_keys:
if key in SessionManager.get_reference_lists():
# Only remove if it's a default list (not custom uploaded)
if not SessionManager.is_custom_reference_list(key):
SessionManager.remove_reference_list(key)
# Add newly selected lists
for ngram_type, list_key, list_config in selected_lists:
# Use the YAML entry name directly (list_key) instead of combining with ngram_type
if list_key not in SessionManager.get_reference_lists():
# Load the actual data
data = ConfigManager.load_reference_list_data(list_config)
if data:
SessionManager.add_reference_list(list_key, data) # Use list_key directly
@staticmethod
def _display_loaded_lists():
"""Display currently loaded reference lists."""
reference_lists = SessionManager.get_reference_lists()
if reference_lists:
st.write("**Currently Loaded Lists:**")
for key, data in reference_lists.items():
# Calculate total vocabulary size
vocab_size = 0
for file_type, file_data in data.items():
if isinstance(file_data, dict) and not file_data.get('is_custom_config', False):
vocab_size += len(file_data)
elif hasattr(file_data, '__len__'):
vocab_size += len(file_data)
if vocab_size > 0:
st.write(f"- {key}: {vocab_size:,} entries")
else:
st.write(f"- {key}: No data loaded")
@staticmethod
def process_custom_reference_uploads(uploaded_files):
"""Process uploaded custom reference files."""
if not uploaded_files:
return
st.write("**Configure Uploaded Files:**")
# Process each uploaded file
all_configs = {}
for uploaded_file in uploaded_files:
file_key = uploaded_file.name
# Check if already processed
uploaded_configs = SessionManager.get_uploaded_file_configs()
if file_key not in uploaded_configs:
# Process new file
config = ConfigManager.process_uploaded_file(uploaded_file)
if config:
SessionManager.set_uploaded_file_config(file_key, config)
# Get configuration
config = SessionManager.get_uploaded_file_configs().get(file_key)
if not config:
continue
# Render configuration UI
UIComponents.render_file_preview(file_key, config)
# Get number of indices to create
index_count = UIComponents.render_index_count_selector(file_key, config)
if index_count > 0:
st.write(f"**Configure {index_count} indices:**")
# Collect configurations for this file
file_configs = []
for i in range(index_count):
index_config = UIComponents.render_index_configuration(file_key, config, i, index_count)
file_configs.append(index_config)
all_configs[file_key] = {
'file_name': config['file_name'],
'content': config['content'],
'delimiter': config['delimiter'],
'indices': file_configs
}
st.write("---")
# Apply configuration button
if st.button("Apply Configuration", type="primary"):
success_count, errors = ConfigManager.apply_configurations(all_configs)
UIComponents.render_configuration_results(success_count, errors)
# Display currently configured indices
UIComponents.display_configured_indices()
@staticmethod
def render_custom_upload_section():
"""Render the custom reference list upload section."""
st.write("**Custom Reference Lists**")
uploaded_refs = st.file_uploader(
"Upload Custom Reference Lists",
type=['csv', 'tsv', 'txt'],
accept_multiple_files=True,
help="Upload CSV/TSV files with frequency data",
key="custom_refs_upload"
)
if uploaded_refs:
ReferenceManager.process_custom_reference_uploads(uploaded_refs)
@staticmethod
def validate_reference_lists() -> bool:
"""Validate that reference lists are available for analysis."""
reference_lists = SessionManager.get_reference_lists()
return bool(reference_lists)
@staticmethod
def get_available_indices() -> List[str]:
"""Get list of available reference list indices."""
reference_lists = SessionManager.get_reference_lists()
return list(reference_lists.keys())
@staticmethod
def clear_custom_reference_lists():
"""Clear all custom reference lists."""
reference_lists = SessionManager.get_reference_lists()
custom_keys = [key for key in reference_lists.keys()
if SessionManager.is_custom_reference_list(key)]
for key in custom_keys:
SessionManager.remove_reference_list(key)
@staticmethod
def export_reference_list_config() -> Dict[str, Any]:
"""Export current reference list configuration."""
reference_lists = SessionManager.get_reference_lists()
config = {}
for name, data in reference_lists.items():
if SessionManager.is_custom_reference_list(name):
custom_data = data['token']
config[name] = {
'type': 'custom',
'word_column': custom_data.get('word_column'),
'freq_column': custom_data.get('freq_column'),
'file_path': custom_data.get('file_path')
}
else:
config[name] = {
'type': 'default',
'data_size': len(data.get('token', {})) if isinstance(data.get('token'), dict) else 0
}
return config
|