Spaces:
Sleeping
Sleeping
File size: 4,317 Bytes
7fdfca3 f5b702a 7fdfca3 f5b702a 7fdfca3 f5b702a 7fdfca3 f5b702a 7fdfca3 f5b702a 7fdfca3 f5b702a 7fdfca3 f5b702a 7fdfca3 f5b702a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import streamlit as st
import joblib
import numpy as np
from sklearn.neighbors import NearestCentroid
# STREAMLIT TABS
app, model_eval = st.tabs(["Application", "Model Evaluation"])
# Load Models
models = {
"K-Means": "kmeans_model.pkl",
"Gaussian Mixture": "gaussianMixture_model.pkl",
"Hierarchical": "hierarchical_model.pkl"
}
scaler = joblib.load("scaler.pkl")
with app:
# Sidebar Model Selection
selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))
# Load Selected Model
with open(models[selected_model], "rb") as file:
model = joblib.load(file)
# Cluster Labels for Each Model
cluster_labels = {
"K-Means": {
0: "Balanced Consumer",
1: "Premium Customer",
2: "Impulsive Buyer",
3: "Cautious Buyer",
4: "Budget-Conscious Customer"
},
"Hierarchical": {
2: "Balanced Consumer",
1: "Premium Customer",
3: "Impulsive Buyer",
0: "Cautious Buyer",
4: "Budget-Conscious Customer"
},
"Gaussian Mixture": {
0: "Balanced Consumer",
1: "Premium Customer",
2: "Impulsive Buyer",
3: "Cautious Buyer",
4: "Budget-Conscious Customer"
}
}
# User Input
st.title("Mall Customer Segmentation")
income = st.number_input("Annual Income ($)", min_value=0, step=1)
spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)
if st.button("Predict"):
scaled_input = scaler.transform([[income, spending_score]])
if selected_model in ["K-Means", "Gaussian Mixture"]:
cluster = model.predict(scaled_input)[0]
elif selected_model == "Hierarchical":
# Load the dataset with assigned hierarchical clusters
# Load precomputed hierarchical clusters
df_clustered = joblib.load("clustered_data.pkl") # Ensure this file exists
# Compute Centroids for Hierarchical Clustering
# Compute centroids for each cluster
centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()
# Use Nearest Centroid Classifier
clf = NearestCentroid()
clf.fit(centroids, centroids.index)
cluster = clf.predict(scaled_input)[0]
# Display Prediction
st.subheader("Customer Classification:")
st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**")
with model_eval:
st.header("๐ Model Evaluation")
st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")
st.header("K Means Clustering ")
st.image("KMeansClusteringSegmentation.png")
st.header("Huerarchical Clustering ")
st.image("HierarchicalClusteringSegmentation.png")
st.header("Gaussian Mixture ")
st.image("GaussianMixtureSegmentation.png")
# EVALUATION METRICS
st.subheader("๐ Evaluation Metrics")
st.write("Silhouette and Davis Bouldin Scores")
st.header("K Means Clustering Evaluation Metrics")
st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.")
st.image("kmeans_clustering_metrics.png")
st.header("Hierarchical Clustering Evaluation Metrics")
st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.")
st.image("hierarchical_clustering_metrics.png")
st.header("Gaussian Mixture Evaluation Metrics")
st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.")
st.image("gmm_evaluation_metrics.png")
st.header("Comparison")
st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")
|