import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import joblib from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.manifold import TSNE # Load models kmeans = joblib.load("kmeans_model.joblib") dbscan = joblib.load("dbscan_model.joblib") hierarchical = joblib.load("hierarchical_model.joblib") # Load Wine Datasets red_wine = pd.read_csv("./wine+quality/winequality-red.csv", delimiter=";") white_wine = pd.read_csv("./wine+quality/winequality-white.csv", delimiter=";") # Add a column to distinguish red and white wines red_wine["type"] = "red" white_wine["type"] = "white" # Combine datasets wine_data = pd.concat([red_wine, white_wine], ignore_index=True) # Remove the 'quality' column wine_features = wine_data.drop(columns=["quality", "type"]) # Standardize the features scaler = StandardScaler() scaled_features = scaler.fit_transform(wine_features) # Apply t-SNE for visualization tsne = TSNE(n_components=2, perplexity=30, random_state=42) tsne_features = tsne.fit_transform(scaled_features) # Streamlit App st.set_page_config(page_title="Wine Clustering Analysis", layout="wide") st.title("🍷 Wine Clustering Analysis") # Sidebar Navigation page = st.sidebar.selectbox("Select a Page", ["Data Overview", "K-Means Clustering", "DBSCAN Clustering", "Hierarchical Clustering"]) if page == "Data Overview": st.header("📊 Data Overview") st.write("First 5 Rows of the Dataset:") st.dataframe(wine_data.head()) st.write("### Missing Values") missing_values = wine_data.isnull().sum() st.dataframe(missing_values[missing_values > 0]) elif page == "K-Means Clustering": st.header("🔹 K-Means Clustering") wine_data["Cluster_KMeans"] = kmeans.predict(scaled_features) fig, ax = plt.subplots() sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=wine_data["Cluster_KMeans"], palette="coolwarm", ax=ax) plt.title("K-Means Clustering Visualization (t-SNE)") st.pyplot(fig) elif page == "DBSCAN Clustering": st.header("🟢 DBSCAN Clustering") wine_data["Cluster_DBSCAN"] = dbscan.fit_predict(scaled_features) fig, ax = plt.subplots() sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=wine_data["Cluster_DBSCAN"], palette="coolwarm", ax=ax) plt.title("DBSCAN Clustering Visualization (t-SNE)") st.pyplot(fig) elif page == "Hierarchical Clustering": st.header("🔺 Hierarchical Clustering") wine_data["Cluster_Hierarchical"] = hierarchical.fit_predict(scaled_features) fig, ax = plt.subplots() sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=wine_data["Cluster_Hierarchical"], palette="coolwarm", ax=ax) plt.title("Hierarchical Clustering Visualization (t-SNE)") st.pyplot(fig) st.sidebar.markdown("Developed with ❤️ using Streamlit")