egumasa's picture
plot function update
864b9a2
"""
Streamlit web application for Linguistic Data Analysis I tutorials.
Provides lexical sophistication analysis and POS/dependency parsing.
Refactored version with modular architecture for better maintainability.
"""
import sys
import os
from pathlib import Path
# Add parent directory to path for imports
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
# CRITICAL: Initialize GPU BEFORE any SpaCy/model imports
from web_app.gpu_init import GPU_AVAILABLE
import streamlit as st
# Import custom modules
from web_app.session_manager import SessionManager
from web_app.components.ui_components import UIComponents
from web_app.handlers.analysis_handlers import AnalysisHandlers
from web_app.reference_manager import ReferenceManager
from web_app.handlers.pos_handlers import POSHandlers
from web_app.handlers.frequency_handlers import FrequencyHandlers
from web_app.handlers.corpus_viz_handlers import CorpusVizHandlers
# Import logging for GPU verification
import logging
logger = logging.getLogger(__name__)
# Configure Streamlit page
st.set_page_config(
page_title="Linguistic Data Analysis I - Text Analysis Tools",
page_icon="πŸ“Š",
layout="wide",
initial_sidebar_state="expanded"
)
def main():
"""Main application entry point."""
st.title("πŸ“Š Linguistic Data Analysis I - Text Analysis Tools")
st.markdown("*Educational tools for lexical sophistication analysis, POS/dependency parsing, and word frequency visualization*")
# GPU status is already initialized in gpu_init module
if GPU_AVAILABLE:
logger.info("GPU initialization successful - models will use GPU")
else:
logger.info("GPU not available - models will use CPU")
# Initialize session state
SessionManager.initialize_session_state()
# Render sidebar and get tool choice
tool_choice = render_sidebar()
# Handle language changes
SessionManager.handle_language_change()
# Route to appropriate interface
if tool_choice == 'Lexical Sophistication':
render_lexical_sophistication_interface()
elif tool_choice == 'POS & Dependency Parser':
render_pos_parser_interface()
elif tool_choice == 'Corpus Data Visualizer':
render_corpus_visualization_interface()
else: # Frequency Analysis
render_frequency_analysis_interface()
def render_sidebar():
"""Render sidebar configuration options."""
with st.sidebar:
st.title("Configuration")
# Language selection
UIComponents.render_language_selector()
# Model size selection
UIComponents.render_model_selector()
# Tool selection
tool_choice = UIComponents.render_tool_selector()
# Debug mode toggle
st.write("---")
debug_mode = st.checkbox("πŸ› Debug Mode", key="debug_mode", help="Enable debug information for troubleshooting")
if debug_mode:
from web_app.debug_utils import show_environment_info, test_file_operations, debug_file_upload, show_gpu_status
with st.expander("Environment Info", expanded=False):
show_environment_info()
with st.expander("File Operations Test", expanded=False):
test_file_operations()
with st.expander("File Upload Test", expanded=False):
debug_file_upload()
with st.expander("GPU Status", expanded=False):
show_gpu_status()
return tool_choice
def render_lexical_sophistication_interface():
"""Render lexical sophistication analysis interface."""
st.header("πŸ” Emulation of the Tool for Automatic Analysis of Lexical Sophistication (emuTAALES)")
# Get analyzer
analyzer = AnalysisHandlers.get_analyzer()
if analyzer is None:
st.error("Failed to load analyzer. Please check your SpaCy model installation.")
return
# Analysis mode selection
analysis_mode = st.radio(
"Analysis Mode",
options=['Single Text', 'Two-Text Comparison', 'Batch Analysis'],
horizontal=True
)
# Route to appropriate analysis handler
if analysis_mode == 'Single Text':
AnalysisHandlers.handle_single_text_analysis(analyzer)
elif analysis_mode == 'Batch Analysis':
AnalysisHandlers.handle_batch_analysis(analyzer)
else:
AnalysisHandlers.handle_two_text_comparison(analyzer)
def render_pos_parser_interface():
"""Simplified POS parser interface without separate rule testing."""
st.header("🏷️ POS and Dependency Parser")
# Get parser
parser = AnalysisHandlers.get_pos_parser()
if parser is None:
st.error("Failed to load POS parser. Please check your SpaCy model installation.")
return
# Simplified analysis mode selection (removed Rule Testing)
analysis_mode = st.radio(
"Analysis Mode",
options=['POS Analysis', 'Construction Extraction'],
horizontal=True,
key='pos_analysis_mode'
)
if analysis_mode == 'POS Analysis':
# Sub-mode for POS analysis
pos_sub_mode = st.radio(
"POS Analysis Type",
options=['Single Text', 'Batch Analysis'],
horizontal=True,
key='pos_sub_mode'
)
if pos_sub_mode == 'Single Text':
POSHandlers.handle_single_text_pos_analysis(parser)
else:
POSHandlers.handle_batch_pos_analysis(parser)
else: # Construction Extraction (now includes integrated rule testing)
construction_sub_mode = st.radio(
"Construction Analysis Type",
options=['Single Text', 'Batch Analysis'],
horizontal=True,
key='construction_sub_mode'
)
if construction_sub_mode == 'Single Text':
POSHandlers.handle_construction_analysis(parser) # Now includes visual parsing
else:
POSHandlers.handle_batch_construction_analysis(parser)
def render_frequency_analysis_interface():
"""Render frequency analysis interface."""
st.header("πŸ“Š Word Frequency Analysis")
st.markdown("Analyze and visualize word frequency distributions from TSV data files.")
# Handle frequency analysis
FrequencyHandlers.handle_frequency_analysis()
def render_corpus_visualization_interface():
"""Render corpus data visualization interface."""
st.header("πŸ—‚οΈ Corpus Data Visualizer")
st.markdown("Merge and visualize corpus metadata with analysis results to create insightful visualizations.")
# Handle corpus visualization
CorpusVizHandlers.handle_corpus_visualization()
if __name__ == "__main__":
main()