Spaces:
Building
Building
| """ | |
| Streamlit web application for Linguistic Data Analysis I tutorials. | |
| Provides lexical sophistication analysis and POS/dependency parsing. | |
| Refactored version with modular architecture for better maintainability. | |
| """ | |
| import sys | |
| import os | |
| from pathlib import Path | |
| # Add parent directory to path for imports | |
| sys.path.append(os.path.dirname(os.path.dirname(__file__))) | |
| # CRITICAL: Initialize GPU BEFORE any SpaCy/model imports | |
| from web_app.gpu_init import GPU_AVAILABLE | |
| import streamlit as st | |
| # Import custom modules | |
| from web_app.session_manager import SessionManager | |
| from web_app.components.ui_components import UIComponents | |
| from web_app.handlers.analysis_handlers import AnalysisHandlers | |
| from web_app.reference_manager import ReferenceManager | |
| from web_app.handlers.pos_handlers import POSHandlers | |
| from web_app.handlers.frequency_handlers import FrequencyHandlers | |
| from web_app.handlers.corpus_viz_handlers import CorpusVizHandlers | |
| # Import logging for GPU verification | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| # Configure Streamlit page | |
| st.set_page_config( | |
| page_title="Linguistic Data Analysis I - Text Analysis Tools", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| def main(): | |
| """Main application entry point.""" | |
| st.title("π Linguistic Data Analysis I - Text Analysis Tools") | |
| st.markdown("*Educational tools for lexical sophistication analysis, POS/dependency parsing, and word frequency visualization*") | |
| # GPU status is already initialized in gpu_init module | |
| if GPU_AVAILABLE: | |
| logger.info("GPU initialization successful - models will use GPU") | |
| else: | |
| logger.info("GPU not available - models will use CPU") | |
| # Initialize session state | |
| SessionManager.initialize_session_state() | |
| # Render sidebar and get tool choice | |
| tool_choice = render_sidebar() | |
| # Handle language changes | |
| SessionManager.handle_language_change() | |
| # Route to appropriate interface | |
| if tool_choice == 'Lexical Sophistication': | |
| render_lexical_sophistication_interface() | |
| elif tool_choice == 'POS & Dependency Parser': | |
| render_pos_parser_interface() | |
| elif tool_choice == 'Corpus Data Visualizer': | |
| render_corpus_visualization_interface() | |
| else: # Frequency Analysis | |
| render_frequency_analysis_interface() | |
| def render_sidebar(): | |
| """Render sidebar configuration options.""" | |
| with st.sidebar: | |
| st.title("Configuration") | |
| # Language selection | |
| UIComponents.render_language_selector() | |
| # Model size selection | |
| UIComponents.render_model_selector() | |
| # Tool selection | |
| tool_choice = UIComponents.render_tool_selector() | |
| # Debug mode toggle | |
| st.write("---") | |
| debug_mode = st.checkbox("π Debug Mode", key="debug_mode", help="Enable debug information for troubleshooting") | |
| if debug_mode: | |
| from web_app.debug_utils import show_environment_info, test_file_operations, debug_file_upload, show_gpu_status | |
| with st.expander("Environment Info", expanded=False): | |
| show_environment_info() | |
| with st.expander("File Operations Test", expanded=False): | |
| test_file_operations() | |
| with st.expander("File Upload Test", expanded=False): | |
| debug_file_upload() | |
| with st.expander("GPU Status", expanded=False): | |
| show_gpu_status() | |
| return tool_choice | |
| def render_lexical_sophistication_interface(): | |
| """Render lexical sophistication analysis interface.""" | |
| st.header("π Emulation of the Tool for Automatic Analysis of Lexical Sophistication (emuTAALES)") | |
| # Get analyzer | |
| analyzer = AnalysisHandlers.get_analyzer() | |
| if analyzer is None: | |
| st.error("Failed to load analyzer. Please check your SpaCy model installation.") | |
| return | |
| # Analysis mode selection | |
| analysis_mode = st.radio( | |
| "Analysis Mode", | |
| options=['Single Text', 'Two-Text Comparison', 'Batch Analysis'], | |
| horizontal=True | |
| ) | |
| # Route to appropriate analysis handler | |
| if analysis_mode == 'Single Text': | |
| AnalysisHandlers.handle_single_text_analysis(analyzer) | |
| elif analysis_mode == 'Batch Analysis': | |
| AnalysisHandlers.handle_batch_analysis(analyzer) | |
| else: | |
| AnalysisHandlers.handle_two_text_comparison(analyzer) | |
| def render_pos_parser_interface(): | |
| """Simplified POS parser interface without separate rule testing.""" | |
| st.header("π·οΈ POS and Dependency Parser") | |
| # Get parser | |
| parser = AnalysisHandlers.get_pos_parser() | |
| if parser is None: | |
| st.error("Failed to load POS parser. Please check your SpaCy model installation.") | |
| return | |
| # Simplified analysis mode selection (removed Rule Testing) | |
| analysis_mode = st.radio( | |
| "Analysis Mode", | |
| options=['POS Analysis', 'Construction Extraction'], | |
| horizontal=True, | |
| key='pos_analysis_mode' | |
| ) | |
| if analysis_mode == 'POS Analysis': | |
| # Sub-mode for POS analysis | |
| pos_sub_mode = st.radio( | |
| "POS Analysis Type", | |
| options=['Single Text', 'Batch Analysis'], | |
| horizontal=True, | |
| key='pos_sub_mode' | |
| ) | |
| if pos_sub_mode == 'Single Text': | |
| POSHandlers.handle_single_text_pos_analysis(parser) | |
| else: | |
| POSHandlers.handle_batch_pos_analysis(parser) | |
| else: # Construction Extraction (now includes integrated rule testing) | |
| construction_sub_mode = st.radio( | |
| "Construction Analysis Type", | |
| options=['Single Text', 'Batch Analysis'], | |
| horizontal=True, | |
| key='construction_sub_mode' | |
| ) | |
| if construction_sub_mode == 'Single Text': | |
| POSHandlers.handle_construction_analysis(parser) # Now includes visual parsing | |
| else: | |
| POSHandlers.handle_batch_construction_analysis(parser) | |
| def render_frequency_analysis_interface(): | |
| """Render frequency analysis interface.""" | |
| st.header("π Word Frequency Analysis") | |
| st.markdown("Analyze and visualize word frequency distributions from TSV data files.") | |
| # Handle frequency analysis | |
| FrequencyHandlers.handle_frequency_analysis() | |
| def render_corpus_visualization_interface(): | |
| """Render corpus data visualization interface.""" | |
| st.header("ποΈ Corpus Data Visualizer") | |
| st.markdown("Merge and visualize corpus metadata with analysis results to create insightful visualizations.") | |
| # Handle corpus visualization | |
| CorpusVizHandlers.handle_corpus_visualization() | |
| if __name__ == "__main__": | |
| main() | |