Spaces:

Julseb42
/

nlp-project

Sleeping

Test deploy

8e83170 12 months ago

1.17 kB

	# modules/cluster.py

	import sys, os

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

	import streamlit as st
	import pandas as pd
	from scripts.visualize_pca import visualize_pca
	from utils.text_cleaning import clean_text # cleaning function


	def run_clustering_interface():
	"""
	Streamlit UI: Upload CSV → clean reviews → run PCA visualization.
	"""
	st.subheader("🔍 Cluster Reviews with PCA")

	uploaded_file = st.file_uploader("Upload a CSV file with 'reviews.text' and 'reviews.rating' columns", type=["csv"])

	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)

	if "reviews.text" not in df.columns or "reviews.rating" not in df.columns:
	st.error("❌ CSV must contain 'reviews.text' and 'reviews.rating' columns.")
	return

	# Clean the review texts
	df["cleaned_text"] = df["reviews.text"].astype(str).apply(clean_text)

	st.success("✅ Reviews loaded and cleaned!")

	# This will display the chart using your existing function
	fig = visualize_pca(df)
	st.pyplot(fig) # Streamlit captures the current figure