Spaces:

egumasa
/

simple-text-analyzer

Building

App Files Files Community

simple-text-analyzer / web_app /app.py

egumasa

plot function update

864b9a2 7 months ago

raw

history blame contribute delete

6.84 kB

	"""
	Streamlit web application for Linguistic Data Analysis I tutorials.
	Provides lexical sophistication analysis and POS/dependency parsing.

	Refactored version with modular architecture for better maintainability.
	"""

	import sys
	import os
	from pathlib import Path

	# Add parent directory to path for imports
	sys.path.append(os.path.dirname(os.path.dirname(__file__)))

	# CRITICAL: Initialize GPU BEFORE any SpaCy/model imports
	from web_app.gpu_init import GPU_AVAILABLE

	import streamlit as st

	# Import custom modules
	from web_app.session_manager import SessionManager
	from web_app.components.ui_components import UIComponents
	from web_app.handlers.analysis_handlers import AnalysisHandlers
	from web_app.reference_manager import ReferenceManager
	from web_app.handlers.pos_handlers import POSHandlers
	from web_app.handlers.frequency_handlers import FrequencyHandlers
	from web_app.handlers.corpus_viz_handlers import CorpusVizHandlers

	# Import logging for GPU verification
	import logging
	logger = logging.getLogger(__name__)

	# Configure Streamlit page
	st.set_page_config(
	page_title="Linguistic Data Analysis I - Text Analysis Tools",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)


	def main():
	"""Main application entry point."""
	st.title("📊 Linguistic Data Analysis I - Text Analysis Tools")
	st.markdown("Educational tools for lexical sophistication analysis, POS/dependency parsing, and word frequency visualization")

	# GPU status is already initialized in gpu_init module
	if GPU_AVAILABLE:
	logger.info("GPU initialization successful - models will use GPU")
	else:
	logger.info("GPU not available - models will use CPU")

	# Initialize session state
	SessionManager.initialize_session_state()

	# Render sidebar and get tool choice
	tool_choice = render_sidebar()

	# Handle language changes
	SessionManager.handle_language_change()

	# Route to appropriate interface
	if tool_choice == 'Lexical Sophistication':
	render_lexical_sophistication_interface()
	elif tool_choice == 'POS & Dependency Parser':
	render_pos_parser_interface()
	elif tool_choice == 'Corpus Data Visualizer':
	render_corpus_visualization_interface()
	else: # Frequency Analysis
	render_frequency_analysis_interface()


	def render_sidebar():
	"""Render sidebar configuration options."""
	with st.sidebar:
	st.title("Configuration")

	# Language selection
	UIComponents.render_language_selector()

	# Model size selection
	UIComponents.render_model_selector()

	# Tool selection
	tool_choice = UIComponents.render_tool_selector()

	# Debug mode toggle
	st.write("---")
	debug_mode = st.checkbox("🐛 Debug Mode", key="debug_mode", help="Enable debug information for troubleshooting")

	if debug_mode:
	from web_app.debug_utils import show_environment_info, test_file_operations, debug_file_upload, show_gpu_status

	with st.expander("Environment Info", expanded=False):
	show_environment_info()

	with st.expander("File Operations Test", expanded=False):
	test_file_operations()

	with st.expander("File Upload Test", expanded=False):
	debug_file_upload()

	with st.expander("GPU Status", expanded=False):
	show_gpu_status()

	return tool_choice


	def render_lexical_sophistication_interface():
	"""Render lexical sophistication analysis interface."""
	st.header("🔍 Emulation of the Tool for Automatic Analysis of Lexical Sophistication (emuTAALES)")

	# Get analyzer
	analyzer = AnalysisHandlers.get_analyzer()
	if analyzer is None:
	st.error("Failed to load analyzer. Please check your SpaCy model installation.")
	return

	# Analysis mode selection
	analysis_mode = st.radio(
	"Analysis Mode",
	options=['Single Text', 'Two-Text Comparison', 'Batch Analysis'],
	horizontal=True
	)


	# Route to appropriate analysis handler
	if analysis_mode == 'Single Text':
	AnalysisHandlers.handle_single_text_analysis(analyzer)
	elif analysis_mode == 'Batch Analysis':
	AnalysisHandlers.handle_batch_analysis(analyzer)
	else:
	AnalysisHandlers.handle_two_text_comparison(analyzer)


	def render_pos_parser_interface():
	"""Simplified POS parser interface without separate rule testing."""
	st.header("🏷️ POS and Dependency Parser")

	# Get parser
	parser = AnalysisHandlers.get_pos_parser()
	if parser is None:
	st.error("Failed to load POS parser. Please check your SpaCy model installation.")
	return

	# Simplified analysis mode selection (removed Rule Testing)
	analysis_mode = st.radio(
	"Analysis Mode",
	options=['POS Analysis', 'Construction Extraction'],
	horizontal=True,
	key='pos_analysis_mode'
	)

	if analysis_mode == 'POS Analysis':
	# Sub-mode for POS analysis
	pos_sub_mode = st.radio(
	"POS Analysis Type",
	options=['Single Text', 'Batch Analysis'],
	horizontal=True,
	key='pos_sub_mode'
	)

	if pos_sub_mode == 'Single Text':
	POSHandlers.handle_single_text_pos_analysis(parser)
	else:
	POSHandlers.handle_batch_pos_analysis(parser)

	else: # Construction Extraction (now includes integrated rule testing)
	construction_sub_mode = st.radio(
	"Construction Analysis Type",
	options=['Single Text', 'Batch Analysis'],
	horizontal=True,
	key='construction_sub_mode'
	)

	if construction_sub_mode == 'Single Text':
	POSHandlers.handle_construction_analysis(parser) # Now includes visual parsing
	else:
	POSHandlers.handle_batch_construction_analysis(parser)


	def render_frequency_analysis_interface():
	"""Render frequency analysis interface."""
	st.header("📊 Word Frequency Analysis")
	st.markdown("Analyze and visualize word frequency distributions from TSV data files.")

	# Handle frequency analysis
	FrequencyHandlers.handle_frequency_analysis()


	def render_corpus_visualization_interface():
	"""Render corpus data visualization interface."""
	st.header("🗂️ Corpus Data Visualizer")
	st.markdown("Merge and visualize corpus metadata with analysis results to create insightful visualizations.")

	# Handle corpus visualization
	CorpusVizHandlers.handle_corpus_visualization()


	if __name__ == "__main__":
	main()