Spaces:
Building
Building
| """ | |
| UI components module for the text analysis application. | |
| Contains reusable UI components and rendering functions. | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| from typing import Dict, List, Any, Optional, Tuple | |
| from pathlib import Path | |
| from web_app.utils import MemoryFileHandler | |
| from web_app.config_manager import ConfigManager | |
| from web_app.session_manager import SessionManager | |
| class UIComponents: | |
| """Reusable UI components for the application.""" | |
| def render_file_preview(file_key: str, config: Dict[str, Any]): | |
| """Render file preview section.""" | |
| st.write(f"### {file_key}") | |
| st.write("**Preview:**") | |
| st.dataframe(config['preview'], use_container_width=True) | |
| def render_index_count_selector(file_key: str, config: Dict[str, Any]) -> int: | |
| """Render index count selection UI.""" | |
| numeric_cols = ConfigManager.get_numeric_columns(config['preview']) | |
| max_indices = len(numeric_cols) | |
| if max_indices == 0: | |
| st.warning("No numeric columns found in this file.") | |
| return 0 | |
| count = st.selectbox( | |
| "Number of indices to create", | |
| options=list(range(1, max_indices + 1)), | |
| key=f"index_count_{file_key}", | |
| help=f"You can create up to {max_indices} indices from this file" | |
| ) | |
| return count | |
| def render_index_configuration(file_key: str, config: Dict[str, Any], | |
| index_num: int, count: int) -> Dict[str, str]: | |
| """Render configuration UI for a single index.""" | |
| st.write(f"**Index {index_num + 1}:**") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| word_col = st.selectbox( | |
| "Word Column", | |
| options=config['columns'], | |
| key=f"word_col_{file_key}_{index_num}", | |
| help="Column containing words/tokens" | |
| ) | |
| with col2: | |
| score_col = st.selectbox( | |
| "Score Column", | |
| options=config['columns'], | |
| key=f"score_col_{file_key}_{index_num}", | |
| help="Column containing frequency/score values" | |
| ) | |
| with col3: | |
| index_name = st.text_input( | |
| "Index Name", | |
| value=f"{config['base_name']}_{index_num + 1}", | |
| key=f"index_name_{file_key}_{index_num}", | |
| help="Name for this reference index" | |
| ) | |
| return { | |
| 'word_column': word_col, | |
| 'score_column': score_col, | |
| 'index_name': index_name | |
| } | |
| def render_language_selector(): | |
| """Render language selection UI.""" | |
| st.subheader("Language") | |
| new_language = st.selectbox( | |
| "Select Language", | |
| options=['en', 'ja'], | |
| format_func=lambda x: 'English' if x == 'en' else 'Japanese', | |
| index=0 if st.session_state.language == 'en' else 1, | |
| key='language_selector' | |
| ) | |
| if new_language != st.session_state.language: | |
| st.session_state.show_language_warning = True | |
| UIComponents.display_language_warning() | |
| if st.button("Confirm Language Change"): | |
| st.session_state.language = new_language | |
| SessionManager.handle_language_change() | |
| st.rerun() | |
| def render_model_selector(): | |
| """Render model size selection UI.""" | |
| st.subheader("SpaCy Model") | |
| new_model_size = st.selectbox( | |
| "Model Size", | |
| options=['md', 'trf'], | |
| format_func=lambda x: 'Transformer (trf)' if x == 'trf' else 'Medium (md)', | |
| index=0 if st.session_state.model_size == 'md' else 1 | |
| ) | |
| # Only update if changed | |
| if new_model_size != st.session_state.model_size: | |
| st.session_state.model_size = new_model_size | |
| SessionManager.clear_analyzers() | |
| def render_tool_selector(): | |
| """Render tool selection UI.""" | |
| st.subheader("Analysis Tools") | |
| return st.radio( | |
| "Select Tool", | |
| options=['Lexical Sophistication', 'POS & Dependency Parser', 'Frequency Analysis', 'Corpus Data Visualizer'], | |
| key='tool_choice' | |
| ) | |
| def display_language_warning(): | |
| """Display warning before language change.""" | |
| if st.session_state.get('show_language_warning', False): | |
| st.warning("β οΈ Changing language will clear all current inputs and outputs.") | |
| def render_text_input(label: str, key_suffix: str) -> str: | |
| """Render text input UI with file upload or paste options.""" | |
| text_input_method = st.radio( | |
| "Input Method", | |
| options=['Paste Text', 'Upload File'], | |
| horizontal=True, | |
| key=f"input_method_{key_suffix}" | |
| ) | |
| text_content = "" | |
| if text_input_method == 'Upload File': | |
| uploaded_file = st.file_uploader( | |
| "Upload Text File", | |
| type=['txt'], | |
| accept_multiple_files=False, | |
| key=f"file_upload_{key_suffix}" | |
| ) | |
| if uploaded_file: | |
| try: | |
| # Use memory-based approach to avoid filesystem restrictions | |
| text_content = MemoryFileHandler.process_uploaded_file(uploaded_file, as_text=True) | |
| if not text_content: | |
| st.error("Failed to read uploaded file. Please try again.") | |
| return "" | |
| except Exception as e: | |
| st.error(f"Error reading uploaded file: {str(e)}") | |
| return "" | |
| else: | |
| text_content = st.text_area( | |
| f"Enter {label}", | |
| height=200, | |
| placeholder=f"Paste your {label.lower()} here...", | |
| key=f"text_area_{key_suffix}" | |
| ) | |
| return text_content | |
| def render_analysis_options(): | |
| """Render enhanced analysis options UI with sophisticated hierarchical interface.""" | |
| from web_app.defaults_manager import DefaultsManager | |
| from web_app.config_manager import ConfigManager | |
| from web_app.session_manager import SessionManager | |
| st.subheader("π§ Analysis Configuration") | |
| # Get current configuration | |
| config = ConfigManager.load_reference_config() | |
| reference_lists = SessionManager.get_reference_lists() | |
| # Enhanced Reference Lists & Measures Section | |
| st.write("### π Reference Lists & Measures") | |
| # Render the sophisticated hierarchical interface | |
| selected_measures, log_transforms = UIComponents.render_enhanced_reference_selection(config, reference_lists) | |
| # Global Analysis Options | |
| st.write("### π― Analysis Types") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| token_analysis = st.checkbox("Token-based", value=True, key="token_analysis_enabled") | |
| with col2: | |
| lemma_analysis = st.checkbox("Lemma-based", value=True, key="lemma_analysis_enabled") | |
| # Global Options | |
| st.write("### βοΈ Global Options") | |
| word_type_filter = st.selectbox( | |
| "Word Type Filter:", | |
| options=[None, 'CW', 'FW'], | |
| format_func=lambda x: 'All Words βΌ' if x is None else ('Content Words' if x == 'CW' else 'Function Words'), | |
| key="word_type_filter" | |
| ) | |
| # Advanced Configuration Section | |
| with st.expander("π― Advanced Configuration (Optional)", expanded=False): | |
| st.info("βΉοΈ **Smart Defaults Active**: The system automatically applies appropriate settings. " | |
| "Expand this section only if you need custom control.") | |
| # Legacy log transformation toggle | |
| legacy_log_toggle = st.checkbox( | |
| "Apply logββ transformation to ALL measures (Legacy Mode)", | |
| value=False, | |
| help="β οΈ Not recommended: This applies log transformation to all measures, " | |
| "including those where it's scientifically inappropriate (e.g., concreteness ratings).", | |
| key="legacy_log_transform" | |
| ) | |
| if legacy_log_toggle: | |
| st.warning("β οΈ Legacy mode enabled: Log transformation will be applied to ALL numerical measures. " | |
| "This may produce scientifically invalid results for psycholinguistic measures.") | |
| # Return enhanced configuration | |
| return { | |
| 'token_analysis': token_analysis, | |
| 'lemma_analysis': lemma_analysis, | |
| 'word_type_filter': word_type_filter, | |
| 'selected_measures': selected_measures, | |
| 'log_transforms': log_transforms, | |
| 'use_smart_defaults': not st.session_state.get('legacy_log_transform', False), | |
| 'legacy_log_transform': st.session_state.get('legacy_log_transform', False) | |
| } | |
| def _find_entry_config(entry_name: str, config: Dict[str, Any]) -> Optional[Dict[str, Any]]: | |
| """Find configuration entry by name.""" | |
| for language, lang_data in config.items(): | |
| if not isinstance(lang_data, dict): | |
| continue | |
| for ngram_type, type_data in lang_data.items(): | |
| if not isinstance(type_data, dict): | |
| continue | |
| if entry_name in type_data: | |
| return type_data[entry_name] | |
| return None | |
| def display_configured_indices(): | |
| """Display currently configured indices.""" | |
| reference_lists = SessionManager.get_reference_lists() | |
| if not reference_lists: | |
| return | |
| st.write("**Currently Configured Indices:**") | |
| custom_indices = [] | |
| default_indices = [] | |
| for index_name, data in reference_lists.items(): | |
| if SessionManager.is_custom_reference_list(index_name): | |
| config = data['token'] | |
| custom_indices.append(f"- {index_name}: {config['word_column']} β {config['freq_column']}") | |
| elif isinstance(data, dict) and 'token' in data: | |
| if isinstance(data['token'], dict): | |
| default_indices.append(f"- {index_name}: {len(data['token'])} entries") | |
| else: | |
| default_indices.append(f"- {index_name}: configured") | |
| if custom_indices: | |
| st.write("*Custom Indices:*") | |
| for idx in custom_indices: | |
| st.write(idx) | |
| if default_indices: | |
| st.write("*Default Indices:*") | |
| for idx in default_indices: | |
| st.write(idx) | |
| def render_configuration_results(success_count: int, errors: List[str]): | |
| """Render configuration application results.""" | |
| if success_count > 0: | |
| st.success(f"Successfully configured {success_count} indices") | |
| if errors: | |
| st.error("Configuration errors:") | |
| for error in errors: | |
| st.write(f"- {error}") | |
| if success_count == 0: | |
| st.error("No valid configurations found") | |
| def render_enhanced_reference_selection(config: Dict[str, Any], reference_lists: Dict[str, Any]) -> Tuple[Dict[str, List[str]], Dict[str, List[str]]]: | |
| """Render the advanced reference list selection interface with hierarchical grouping and individual measure control.""" | |
| from web_app.defaults_manager import DefaultsManager | |
| # Initialize return values | |
| selected_measures = {} | |
| log_transforms = {} | |
| if not reference_lists: | |
| st.info("No reference lists selected. Please configure reference lists first.") | |
| return selected_measures, log_transforms | |
| # Group reference lists by base name for hierarchical display | |
| groups = UIComponents._group_reference_lists(reference_lists, config) | |
| st.write("**Reference Lists & Measures:**") | |
| # Render each group with hierarchical interface | |
| for base_name, group_data in groups.items(): | |
| # Group-level enable/disable checkbox | |
| group_key = f"group_enabled_{base_name}" | |
| group_enabled = st.checkbox( | |
| f"**{base_name}**", | |
| value=True, # Default enabled | |
| key=group_key, | |
| help=f"Enable/disable all {base_name} analyses" | |
| ) | |
| if group_enabled: | |
| # Analysis type badges display | |
| badges = [] | |
| if group_data['token']: | |
| badges.append("[Token β]") | |
| if group_data['lemma']: | |
| badges.append("[Lemma β]") | |
| if badges: | |
| st.write(f" {' '.join(badges)}") | |
| # Expandable measure selection for each analysis type | |
| if group_data['token']: | |
| with st.expander("π Token Measures β¬οΈ (click to customize)", expanded=False): | |
| token_measures, token_logs = UIComponents._render_measure_selection( | |
| group_data['token'][0], 'token', base_name | |
| ) | |
| # Always store the results, even if empty (to maintain structure) | |
| selected_measures[group_data['token'][0][0]] = token_measures | |
| log_transforms[group_data['token'][0][0]] = token_logs | |
| if group_data['lemma']: | |
| with st.expander("π Lemma Measures β¬οΈ (click to customize)", expanded=False): | |
| lemma_measures, lemma_logs = UIComponents._render_measure_selection( | |
| group_data['lemma'][0], 'lemma', base_name | |
| ) | |
| # Always store the results, even if empty (to maintain structure) | |
| selected_measures[group_data['lemma'][0][0]] = lemma_measures | |
| log_transforms[group_data['lemma'][0][0]] = lemma_logs | |
| # Show smart defaults summary | |
| token_entry_name = group_data['token'][0][0] if group_data['token'] else None | |
| lemma_entry_name = group_data['lemma'][0][0] if group_data['lemma'] else None | |
| total_measures = 0 | |
| total_logs = 0 | |
| if token_entry_name: | |
| total_measures += len(selected_measures.get(token_entry_name, [])) | |
| total_logs += len(log_transforms.get(token_entry_name, [])) | |
| if lemma_entry_name: | |
| total_measures += len(selected_measures.get(lemma_entry_name, [])) | |
| total_logs += len(log_transforms.get(lemma_entry_name, [])) | |
| st.write(f" π {total_measures} measures selected, π {total_logs} log-transformed") | |
| st.write("") # Add spacing | |
| return selected_measures, log_transforms | |
| def _group_reference_lists(reference_lists: Dict[str, Any], config: Dict[str, Any]) -> Dict[str, Dict[str, List]]: | |
| """Group related reference lists for hierarchical display.""" | |
| from collections import defaultdict | |
| groups = defaultdict(lambda: {'token': [], 'lemma': []}) | |
| for entry_name in reference_lists.keys(): | |
| # Extract base name (remove _token/_lemma suffix) | |
| base_name = entry_name.replace('_token', '').replace('_lemma', '') | |
| # Get analysis type from config | |
| entry_config = UIComponents._find_entry_config(entry_name, config) | |
| if entry_config: | |
| analysis_type = entry_config.get('analysis_type', 'token') | |
| groups[base_name][analysis_type].append((entry_name, entry_config)) | |
| return groups | |
| def _render_measure_selection(entry_data: Tuple[str, Dict], analysis_type: str, base_name: str) -> Tuple[List[str], List[str]]: | |
| """Render individual measure checkboxes with log transform controls.""" | |
| entry_name, entry_config = entry_data | |
| # Get measure information from config | |
| selectable_measures = entry_config.get('selectable_measures', []) | |
| log_transformable = entry_config.get('log_transformable', []) | |
| default_measures = entry_config.get('default_measures', []) | |
| default_log_transforms = entry_config.get('default_log_transforms', []) | |
| # Initialize session state for this entry if not exists | |
| if f'custom_measures_{entry_name}' not in st.session_state: | |
| st.session_state[f'custom_measures_{entry_name}'] = default_measures.copy() | |
| if f'custom_logs_{entry_name}' not in st.session_state: | |
| st.session_state[f'custom_logs_{entry_name}'] = default_log_transforms.copy() | |
| # Display measure selection interface | |
| st.write(f"**Available Measures for {entry_config.get('display_name', entry_name)}:**") | |
| selected_measures = [] | |
| selected_logs = [] | |
| for measure in selectable_measures: | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| # Measure checkbox (pre-selected based on defaults) | |
| measure_key = f"measure_{entry_name}_{measure}" | |
| selected = st.checkbox( | |
| f"{measure.replace('_', ' ').title()}", | |
| value=measure in st.session_state[f'custom_measures_{entry_name}'], | |
| key=measure_key, | |
| help=f"Include {measure} in analysis" | |
| ) | |
| if selected: | |
| selected_measures.append(measure) | |
| with col2: | |
| # Log transform toggle (disabled if not transformable) | |
| if measure in log_transformable and selected: | |
| log_key = f"log_{entry_name}_{measure}" | |
| log_enabled = st.checkbox( | |
| "π logββ", | |
| value=measure in st.session_state[f'custom_logs_{entry_name}'], | |
| key=log_key, | |
| help=f"Apply logββ transformation to {measure}" | |
| ) | |
| if log_enabled: | |
| selected_logs.append(measure) | |
| elif measure in log_transformable: | |
| st.write("π (disabled)") | |
| else: | |
| st.write("β (not transformable)") | |
| # Update session state | |
| st.session_state[f'custom_measures_{entry_name}'] = selected_measures | |
| st.session_state[f'custom_logs_{entry_name}'] = selected_logs | |
| # Show selection summary | |
| if selected_measures: | |
| st.success(f"β {len(selected_measures)} measures selected, {len(selected_logs)} log-transformed") | |
| else: | |
| st.warning("β οΈ No measures selected for this analysis type") | |
| return selected_measures, selected_logs | |
| def group_has_smart_defaults(group_entries: List[str], config: Dict[str, Any]) -> bool: | |
| """Check if a group has smart defaults configured.""" | |
| for entry_name in group_entries: | |
| entry_config = UIComponents._find_entry_config(entry_name, config) | |
| if entry_config and entry_config.get('default_measures'): | |
| return True | |
| return False | |