Spaces:

janduplessis886
/

text-lense2

Build error

App Files Files Community

text-lense2 / src /streamlit_app.py

janduplessis886

Update src/streamlit_app.py

f18a780 verified 11 months ago

raw

history blame contribute delete

13.2 kB

	import os

	# Set cache dirs to writable local folders
	os.environ["TRANSFORMERS_CACHE"] = "./hf_cache"
	os.environ["HF_HOME"] = "./hf_home"
	os.environ["XDG_CACHE_HOME"] = "./.cache"

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import io # Import io for handling file uploads
	import traceback # Import traceback for detailed error info
	import altair as alt
	# Import functions from main.py
	from main import analyze_text, visualize_results


	st.set_page_config(page_title="Text Analysis Module", layout="centered") # Use wide layout

	st.title(":material/reset_focus: TextLense")

	# --- Sidebar for Input Method ---
	with st.sidebar:
	st.header("Input Reviews")
	st.write("Upload a CSV file or enter free text reviews.")
	input_method = st.radio("Choose input method:", ("Upload CSV", "Enter Text"))

	# Store raw reviews by source in session state
	if 'raw_reviews_by_source' not in st.session_state:
	st.session_state.raw_reviews_by_source = {}

	if input_method == "Upload CSV":
	uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
	if uploaded_file is not None:
	has_header = st.checkbox("CSV has header row", value=True)

	try:
	# Use io.StringIO to read the uploaded file
	stringio = io.StringIO(uploaded_file.getvalue().decode("utf-8"))
	df = pd.read_csv(stringio, header=0 if has_header else None)

	st.session_state.raw_reviews_by_source = {} # Clear previous data

	if has_header:
	# Use column names as potential sources
	for col_name in df.columns:
	# Ensure column name is a string and handle potential non-string data in column
	reviews_list = df[col_name].dropna().astype(str).tolist()
	if reviews_list: # Only add if there are reviews
	st.session_state.raw_reviews_by_source[str(col_name)] = reviews_list

	else:
	# No header, use column indices as sources
	if df.shape[1] > 0:
	st.write(f"CSV has {df.shape[1]} column(s). Reviews loaded from each column.")
	for i in range(df.shape[1]):
	# Ensure data is string and handle potential non-string data
	reviews_list = df[i].dropna().astype(str).tolist()
	if reviews_list: # Only add if there are reviews
	st.session_state.raw_reviews_by_source[f"Column {i+1}"] = reviews_list
	else:
	st.warning("CSV is empty or could not be read.")
	st.session_state.raw_reviews_by_source = {} # Ensure empty if CSV is empty


	except Exception as e:
	st.error(f"Error reading CSV file: {e}")
	st.session_state.raw_reviews_by_source = {} # Clear data on error
	st.exception(e) # Display the full exception traceback

	else:
	st.session_state.raw_reviews_by_source = {} # Clear data if no file is uploaded


	elif input_method == "Enter Text":
	# Input area for free text reviews
	reviews_input = st.text_area("Enter Free Text Reviews (one per line):", height=200)
	st.session_state.raw_reviews_by_source = {} # Clear previous data
	if reviews_input:
	reviews_list = [review.strip() for review in reviews_input.split('\n') if review.strip()]
	if reviews_list:
	st.session_state.raw_reviews_by_source["Entered Text Reviews"] = reviews_list
	else:
	st.session_state.raw_reviews_by_source = {} # Ensure empty if no text is entered


	# --- Main Content Area ---

	# Display logo if no reviews are loaded
	if not st.session_state.raw_reviews_by_source:
	st.write("""Please use the sidebar to upload a CSV file or enter free text reviews to start the analysis.
	:material/csv: CSV files should contain one or more columns of text reviews. It can include headers or be a simple list of reviews. :material/more_horiz: You will be given the option to select which columns to analyze.""")

	st.image('images/textlense_logo.png', width=600)
	else:
	# --- Classification Configuration Section (Only show if reviews are loaded) ---
	st.header("Classification Configuration")
	st.write("Define the questions and categories for zero-shot classification and select which reviews apply to each question.")

	# Use session state to manage classification configurations dynamically
	if 'classification_configs' not in st.session_state:
	st.session_state.classification_configs = []

	def add_classification_config():
	st.session_state.classification_configs.append({'question': '', 'categories': [], 'review_sources': []})

	st.button("Add Classification Question", on_click=add_classification_config)

	# Display and edit existing configurations
	# Use enumerate with a copy to allow deletion while iterating
	for i, config in enumerate(st.session_state.classification_configs):
	st.write(f"---")
	st.write(f"Classification Question {i+1}")
	# Use unique keys for each widget based on index
	config['question'] = st.text_input("Question:", value=config.get('question', ''), key=f'q_{i}')

	# Add category prefill checkbox
	prefill_categories = st.checkbox(f"Prefill common categories for Question {i+1}?", value=False, key=f'prefill_{i}')
	default_categories = ["Staff Professionalism", "Communication Effectiveness", "Appointment Availability", "Waiting Time", "Facility Cleanliness", "Patient Respect", "Treatment Quality", "Staff Empathy and Compassion", "Administrative Efficiency", "Reception Staff Interaction", "Environment and Ambiance", "Follow-up and Continuity of Care", "Accessibility and Convenience", "Patient Education and Information", "Feedback and Complaints Handling", "Test Results", "Surgery Website", "Telehealth", "Vaccinations", "Prescriptions and Medication Management", "Mental Health Support"]
	# Use the current categories in state if prefill is not checked
	categories_str = ", ".join(default_categories) if prefill_categories else ", ".join(config.get('categories', []))

	categories_input = st.text_area("Categories (comma-separated):", value=categories_str, key=f'cat_{i}')
	config['categories'] = [cat.strip() for cat in categories_input.split(',') if cat.strip()]

	# Allow mapping review sources to this classification question
	available_sources = list(st.session_state.raw_reviews_by_source.keys())
	if available_sources:
	selected_sources = st.multiselect(
	"Select review sources for this question:",
	available_sources,
	default=config.get('review_sources', []),
	key=f'sources_{i}'
	)
	config['review_sources'] = selected_sources
	else:
	st.info("Upload a CSV or enter text reviews in the sidebar to select sources.")
	config['review_sources'] = [] # Clear sources if none available

	# Button to remove this configuration
	# Use a unique key and a lambda function for the button's callback
	if st.button("Remove Question", key=f'remove_{i}'):
	# Remove the config at index i and rerun to update the UI
	del st.session_state.classification_configs[i]
	st.rerun() # Rerun the script to update the list display


	analyze_button = st.button("Analyze Reviews")

	# Placeholder for progress bar and status messages
	progress_container = st.empty()
	status_container = st.empty() # Use a separate container for status



	if analyze_button:
	questions_data_for_analysis = []
	total_reviews_for_progress = 0

	# Build the questions_data list for analyze_text based on configurations and selected sources
	for config in st.session_state.classification_configs:
	question = config.get('question', '').strip()
	categories = config.get('categories', [])
	selected_sources = config.get('review_sources', [])

	# Only include configurations that have a question, categories, and selected sources
	if question and categories and selected_sources:
	reviews_with_source_for_this_question = []
	for source in selected_sources:
	if source in st.session_state.raw_reviews_by_source:
	for review in st.session_state.raw_reviews_by_source[source]:
	reviews_with_source_for_this_question.append({"review": review, "source": source})


	if reviews_with_source_for_this_question:
	questions_data_for_analysis.append({
	"question": question,
	"reviews": reviews_with_source_for_this_question,
	"categories": categories
	})
	total_reviews_for_progress += len(reviews_with_source_for_this_question)

	if questions_data_for_analysis:
	# Create a progress bar
	progress_bar = progress_container.progress(0)

	# Define a callback function to update the progress bar
	def update_progress(current_progress):
	progress_bar.progress(current_progress)

	# Use st.status to display analysis status
	# status_container is used here
	results_df = pd.DataFrame() # Initialize results_df outside the try block

	try:
	with st.status("Analyzing reviews: Sentiment, Emotion & Zero-shot classification...", expanded=True) as status:

	# Define a callback function to update the status message
	def update_status(message):
	status.write(message) # Write message inside the status expander

	# Call analysis function with the prepared data
	analysis_results_list = analyze_text(
	questions_data_for_analysis,
	total_reviews_for_progress,
	progress_callback=update_progress,
	status_callback=update_status
	)

	# --- Specific Error Handling and Debugging for DataFrame Creation and Display ---
	results_df = pd.DataFrame()
	try:
	results_df = pd.DataFrame(analysis_results_list)
	print("\n--- Debugging results_df ---")
	print(f"DataFrame created successfully. Shape: {results_df.shape}") # Print shape
	print(f"Columns: {results_df.columns.tolist()}") # Print columns
	print("Head of DataFrame:")
	print(results_df.head()) # Print head of DataFrame to terminal
	print("--- End Debugging results_df ---\n")

	except Exception as df_error:
	st.error(f"Error creating DataFrame: {df_error}")
	st.exception(df_error)
	results_df = pd.DataFrame() # Ensure results_df is empty on error
	# --- End Specific Error Handling and Debugging ---

	# After analysis
	status.update(label="Analysis complete!", state="complete", expanded=False)
	progress_bar.progress(1.0) # Ensure progress bar reaches 100%

	# Clear progress bar and status container after analysis is complete and status is closed
	# Add a small delay maybe? Sometimes clearing too fast can affect rendering.
	# import time
	# time.sleep(0.1) # Optional: Add a tiny delay
	progress_container.empty()
	status_container.empty()

	except Exception as e:
	st.error(f"An error occurred during analysis: {e}")
	st.exception(e) # Display the full exception traceback
	# You might want to clear the progress and status containers here too on error
	progress_container.empty()
	status_container.empty()

	# Display results outside the st.status block
	st.subheader("Analysis Results")

	# Visualize the results
	if not results_df.empty:
	visualize_results(results_df)

	# Display results DataFrame below visualizations
	st.write("### Raw Analysis Data")
	st.dataframe(results_df, width=2000) # Set a large width
	else:
	st.info("No results to display. Please check your input data and configurations.")

	else:
	st.warning("Please configure at least one classification question with categories and select review sources, and ensure reviews are loaded.")