File size: 9,270 Bytes
a543e33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42f8800
a543e33
 
 
 
 
 
 
 
 
 
42f8800
 
a543e33
 
 
 
42f8800
a543e33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca02ec3
 
a543e33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42f8800
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
"""
Reference list management module.
Handles default and custom reference list configuration and management.
"""

import streamlit as st
from typing import Dict, List, Any, Optional
from pathlib import Path

from web_app.session_manager import SessionManager
from web_app.config_manager import ConfigManager
from web_app.components.ui_components import UIComponents


class ReferenceManager:
    """Manages reference lists (both default and custom)."""
    
    @staticmethod
    def configure_reference_lists(analyzer):
        """Configure reference lists based on language."""
        config = ConfigManager.load_reference_config()
        language_key = "english" if st.session_state.language == 'en' else "japanese"
        language_config = config.get(language_key, {"unigrams": {}, "bigrams": {}, "trigrams": {}})
        
        st.write("**Default Reference Lists**")
        
        # Track selected lists
        selected_lists = []
        
        # Process each type of n-gram
        for ngram_type, type_lists in language_config.items():
            if not type_lists:  # Skip empty sections
                continue
            
            # Get enabled lists for this type
            enabled_lists = {k: v for k, v in type_lists.items() if v.get('enabled', True)}
            
            if enabled_lists:
                # Create section header
                st.write(f"**{ngram_type.title()}:**")
                
                # Create checkboxes for this type
                cols = st.columns(min(len(enabled_lists), 3))  # Max 3 columns
                
                for i, (list_key, list_config) in enumerate(enabled_lists.items()):
                    with cols[i % 3]:
                        checkbox_key = f"{ngram_type}_{list_key}_checkbox"
                        selected = st.checkbox(
                            list_config["display_name"],
                            key=checkbox_key,
                            help=list_config.get("description", "")
                        )
                        
                        if selected:
                            selected_lists.append((ngram_type, list_key, list_config))
        
        # Load selected reference lists
        ReferenceManager._update_default_reference_lists(selected_lists)
        
        # Display currently loaded lists
        ReferenceManager._display_loaded_lists()
    
    @staticmethod
    def _update_default_reference_lists(selected_lists: List[tuple]):
        """Update default reference lists based on selections."""
        current_keys = set(SessionManager.get_reference_lists().keys())
        new_keys = set(list_key for ngram_type, list_key, _ in selected_lists)  # Use list_key directly
        
        # Remove deselected lists (only default lists, not custom ones)
        for key in current_keys - new_keys:
            if key in SessionManager.get_reference_lists():
                # Only remove if it's a default list (not custom uploaded)
                if not SessionManager.is_custom_reference_list(key):
                    SessionManager.remove_reference_list(key)
        
        # Add newly selected lists
        for ngram_type, list_key, list_config in selected_lists:
            # Use the YAML entry name directly (list_key) instead of combining with ngram_type
            if list_key not in SessionManager.get_reference_lists():
                # Load the actual data
                data = ConfigManager.load_reference_list_data(list_config)
                
                if data:
                    SessionManager.add_reference_list(list_key, data)  # Use list_key directly
    
    @staticmethod
    def _display_loaded_lists():
        """Display currently loaded reference lists."""
        reference_lists = SessionManager.get_reference_lists()
        if reference_lists:
            st.write("**Currently Loaded Lists:**")
            for key, data in reference_lists.items():
                # Calculate total vocabulary size
                vocab_size = 0
                for file_type, file_data in data.items():
                    if isinstance(file_data, dict) and not file_data.get('is_custom_config', False):
                        vocab_size += len(file_data)
                    elif hasattr(file_data, '__len__'):
                        vocab_size += len(file_data)
                
                if vocab_size > 0:
                    st.write(f"- {key}: {vocab_size:,} entries")
                else:
                    st.write(f"- {key}: No data loaded")
    
    @staticmethod
    def process_custom_reference_uploads(uploaded_files):
        """Process uploaded custom reference files."""
        if not uploaded_files:
            return
        
        st.write("**Configure Uploaded Files:**")
        
        # Process each uploaded file
        all_configs = {}
        
        for uploaded_file in uploaded_files:
            file_key = uploaded_file.name
            
            # Check if already processed
            uploaded_configs = SessionManager.get_uploaded_file_configs()
            if file_key not in uploaded_configs:
                # Process new file
                config = ConfigManager.process_uploaded_file(uploaded_file)
                if config:
                    SessionManager.set_uploaded_file_config(file_key, config)
            
            # Get configuration
            config = SessionManager.get_uploaded_file_configs().get(file_key)
            if not config:
                continue
            
            # Render configuration UI
            UIComponents.render_file_preview(file_key, config)
            
            # Get number of indices to create
            index_count = UIComponents.render_index_count_selector(file_key, config)
            
            if index_count > 0:
                st.write(f"**Configure {index_count} indices:**")
                
                # Collect configurations for this file
                file_configs = []
                for i in range(index_count):
                    index_config = UIComponents.render_index_configuration(file_key, config, i, index_count)
                    file_configs.append(index_config)
                
                all_configs[file_key] = {
                    'file_name': config['file_name'],
                    'content': config['content'],
                    'delimiter': config['delimiter'],
                    'indices': file_configs
                }
            
            st.write("---")
        
        # Apply configuration button
        if st.button("Apply Configuration", type="primary"):
            success_count, errors = ConfigManager.apply_configurations(all_configs)
            UIComponents.render_configuration_results(success_count, errors)
        
        # Display currently configured indices
        UIComponents.display_configured_indices()
    
    @staticmethod
    def render_custom_upload_section():
        """Render the custom reference list upload section."""
        st.write("**Custom Reference Lists**")
        uploaded_refs = st.file_uploader(
            "Upload Custom Reference Lists",
            type=['csv', 'tsv', 'txt'],
            accept_multiple_files=True,
            help="Upload CSV/TSV files with frequency data",
            key="custom_refs_upload"
        )
        
        if uploaded_refs:
            ReferenceManager.process_custom_reference_uploads(uploaded_refs)
    
    @staticmethod
    def validate_reference_lists() -> bool:
        """Validate that reference lists are available for analysis."""
        reference_lists = SessionManager.get_reference_lists()
        return bool(reference_lists)
    
    @staticmethod
    def get_available_indices() -> List[str]:
        """Get list of available reference list indices."""
        reference_lists = SessionManager.get_reference_lists()
        return list(reference_lists.keys())
    
    @staticmethod
    def clear_custom_reference_lists():
        """Clear all custom reference lists."""
        reference_lists = SessionManager.get_reference_lists()
        custom_keys = [key for key in reference_lists.keys() 
                      if SessionManager.is_custom_reference_list(key)]
        
        for key in custom_keys:
            SessionManager.remove_reference_list(key)
    
    @staticmethod
    def export_reference_list_config() -> Dict[str, Any]:
        """Export current reference list configuration."""
        reference_lists = SessionManager.get_reference_lists()
        config = {}
        
        for name, data in reference_lists.items():
            if SessionManager.is_custom_reference_list(name):
                custom_data = data['token']
                config[name] = {
                    'type': 'custom',
                    'word_column': custom_data.get('word_column'),
                    'freq_column': custom_data.get('freq_column'),
                    'file_path': custom_data.get('file_path')
                }
            else:
                config[name] = {
                    'type': 'default',
                    'data_size': len(data.get('token', {})) if isinstance(data.get('token'), dict) else 0
                }
        
        return config