WineCluster / app.py
Ci-Dave's picture
Added files
3bed50f
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
# Load models
kmeans = joblib.load("kmeans_model.joblib")
dbscan = joblib.load("dbscan_model.joblib")
hierarchical = joblib.load("hierarchical_model.joblib")
# Load Wine Datasets
red_wine = pd.read_csv("./wine+quality/winequality-red.csv", delimiter=";")
white_wine = pd.read_csv("./wine+quality/winequality-white.csv", delimiter=";")
# Add a column to distinguish red and white wines
red_wine["type"] = "red"
white_wine["type"] = "white"
# Combine datasets
wine_data = pd.concat([red_wine, white_wine], ignore_index=True)
# Remove the 'quality' column
wine_features = wine_data.drop(columns=["quality", "type"])
# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(wine_features)
# Apply t-SNE for visualization
tsne = TSNE(n_components=2, perplexity=30, random_state=42)
tsne_features = tsne.fit_transform(scaled_features)
# Streamlit App
st.set_page_config(page_title="Wine Clustering Analysis", layout="wide")
st.title("🍷 Wine Clustering Analysis")
# Sidebar Navigation
page = st.sidebar.selectbox("Select a Page", ["Data Overview", "K-Means Clustering", "DBSCAN Clustering", "Hierarchical Clustering"])
if page == "Data Overview":
st.header("📊 Data Overview")
st.write("First 5 Rows of the Dataset:")
st.dataframe(wine_data.head())
st.write("### Missing Values")
missing_values = wine_data.isnull().sum()
st.dataframe(missing_values[missing_values > 0])
elif page == "K-Means Clustering":
st.header("🔹 K-Means Clustering")
wine_data["Cluster_KMeans"] = kmeans.predict(scaled_features)
fig, ax = plt.subplots()
sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=wine_data["Cluster_KMeans"], palette="coolwarm", ax=ax)
plt.title("K-Means Clustering Visualization (t-SNE)")
st.pyplot(fig)
elif page == "DBSCAN Clustering":
st.header("🟢 DBSCAN Clustering")
wine_data["Cluster_DBSCAN"] = dbscan.fit_predict(scaled_features)
fig, ax = plt.subplots()
sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=wine_data["Cluster_DBSCAN"], palette="coolwarm", ax=ax)
plt.title("DBSCAN Clustering Visualization (t-SNE)")
st.pyplot(fig)
elif page == "Hierarchical Clustering":
st.header("🔺 Hierarchical Clustering")
wine_data["Cluster_Hierarchical"] = hierarchical.fit_predict(scaled_features)
fig, ax = plt.subplots()
sns.scatterplot(x=tsne_features[:, 0], y=tsne_features[:, 1], hue=wine_data["Cluster_Hierarchical"], palette="coolwarm", ax=ax)
plt.title("Hierarchical Clustering Visualization (t-SNE)")
st.pyplot(fig)
st.sidebar.markdown("Developed with ❤️ using Streamlit")