# modules/cluster.py

import sys, os

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

import streamlit as st
import pandas as pd
from scripts.visualize_pca import visualize_pca
from utils.text_cleaning import clean_text  # cleaning function


def run_clustering_interface():
    """
    Streamlit UI: Upload CSV → clean reviews → run PCA visualization. 
    """
    st.subheader("🔍 Cluster Reviews with PCA")

    uploaded_file = st.file_uploader("Upload a CSV file with 'reviews.text' and 'reviews.rating' columns", type=["csv"])

    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)

        if "reviews.text" not in df.columns or "reviews.rating" not in df.columns:
            st.error("❌ CSV must contain 'reviews.text' and 'reviews.rating' columns.")
            return

        # Clean the review texts
        df["cleaned_text"] = df["reviews.text"].astype(str).apply(clean_text)

        st.success("✅ Reviews loaded and cleaned!")

        # This will display the chart using your existing function
        fig = visualize_pca(df)
        st.pyplot(fig)  # Streamlit captures the current figure