Spaces:

CristopherWVSU
/

CustomerClustering

Sleeping

App Files Files Community

CristopherWVSU commited on Mar 16, 2025

Commit

f5b702a

1 Parent(s): 54d2273

Added more Models

Browse files

Files changed (11) hide show

GaussianMixtureSegmentation.png +0 -0
HierarchicalClusteringSegmentation.png +0 -0
KMeansClusteringSegmentation.png +0 -0
app.py +103 -27
clustered_data.pkl +3 -0
gaussianMixture_model.pkl +3 -0
gmm_evaluation_metrics.png +0 -0
hierarchical_clustering_metrics.png +0 -0
hierarchical_model.pkl +3 -0
kmeans_clustering_metrics.png +0 -0
main.ipynb +0 -0

GaussianMixtureSegmentation.png ADDED Viewed

HierarchicalClusteringSegmentation.png ADDED Viewed

KMeansClusteringSegmentation.png ADDED Viewed

app.py CHANGED Viewed

@@ -1,37 +1,113 @@
 import streamlit as st
 import joblib
-# Load Pretrained K-Means Model and Scaler
-with open("kmeans_model.pkl", "rb") as file:
-    kmeans = joblib.load(file)
-with open("scaler.pkl", "rb") as file:
-    scaler = joblib.load(file)
-# Mapping Cluster Numbers to Descriptive Labels
-cluster_labels = {
-    0: "Balanced Consumer",
-    1: "Premium Customer",
-    2: "Impulsive Buyer",
-    3: "Cautious Buyer",
-    4: "Budget-Conscious Customer"
 }
-# Streamlit UI
-st.title("Mall Customer Segmentation")
-st.write("Enter your details to classify your customer segment.")
-# User Input
-income = st.number_input("Annual Income ($)", min_value=0, step=1)
-spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)
-if st.button("Predict"):
-    # Scale Input
-    scaled_input = scaler.transform([[income, spending_score]])
-    # Predict Cluster
-    cluster = kmeans.predict(scaled_input)[0]
-    # Display Result
-    st.subheader("Customer Classification:")
-    st.success(f"You are a: **{cluster_labels[cluster]}**")

 import streamlit as st
 import joblib
+import numpy as np
+from sklearn.neighbors import NearestCentroid
+# STREAMLIT TABS
+app, model_eval = st.tabs(["Application", "Model Evaluation"])
+# Load Models
+models = {
+    "K-Means": "kmeans_model.pkl",
+    "Gaussian Mixture": "gaussianMixture_model.pkl",
+    "Hierarchical": "hierarchical_model.pkl"
 }
+scaler = joblib.load("scaler.pkl")
+with app:
+    # Sidebar Model Selection
+    selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))
+    # Load Selected Model
+    with open(models[selected_model], "rb") as file:
+        model = joblib.load(file)
+    # Cluster Labels for Each Model
+    cluster_labels = {
+        "K-Means": {
+            0: "Balanced Consumer",
+            1: "Premium Customer",
+            2: "Impulsive Buyer",
+            3: "Cautious Buyer",
+            4: "Budget-Conscious Customer"
+        },
+        "Hierarchical": {
+            2: "Balanced Consumer",
+            1: "Premium Customer",
+            3: "Impulsive Buyer",
+            0: "Cautious Buyer",
+            4: "Budget-Conscious Customer"
+        },
+        "Gaussian Mixture": {
+            0: "Balanced Consumer",
+            1: "Premium Customer",
+            2: "Impulsive Buyer",
+            3: "Cautious Buyer",
+            4: "Budget-Conscious Customer"
+        }
+    }
+    # User Input
+    st.title("Mall Customer Segmentation")
+    income = st.number_input("Annual Income ($)", min_value=0, step=1)
+    spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)
+    if st.button("Predict"):
+        scaled_input = scaler.transform([[income, spending_score]])
+        if selected_model in ["K-Means", "Gaussian Mixture"]:
+            cluster = model.predict(scaled_input)[0]
+        elif selected_model == "Hierarchical":
+            # Load the dataset with assigned hierarchical clusters
+            # Load precomputed hierarchical clusters
+            df_clustered = joblib.load("clustered_data.pkl")  # Ensure this file exists
+            # Compute Centroids for Hierarchical Clustering
+            # Compute centroids for each cluster
+            centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()
+            # Use Nearest Centroid Classifier
+            clf = NearestCentroid()
+            clf.fit(centroids, centroids.index)
+            cluster = clf.predict(scaled_input)[0]
+        # Display Prediction
+        st.subheader("Customer Classification:")
+        st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**")
+with model_eval:
+    st.header("📊 Model Evaluation")
+    st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
+    st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")
+    st.header("K Means Clustering ")
+    st.image("KMeansClusteringSegmentation.png")
+    st.header("Huerarchical Clustering ")
+    st.image("HierarchicalClusteringSegmentation.png")
+    st.header("Gaussian Mixture ")
+    st.image("GaussianMixtureSegmentation.png")
+    # EVALUATION METRICS
+    st.subheader("📌 Evaluation Metrics")
+    st.write("Silhouette and Davis Bouldin Scores")
+    st.header("K Means Clustering Evaluation Metrics")
+    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.")
+    st.image("kmeans_clustering_metrics.png")
+    st.header("Hierarchical Clustering Evaluation Metrics")
+    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.")
+    st.image("hierarchical_clustering_metrics.png")
+    st.header("Gaussian Mixture Evaluation Metrics")
+    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.")
+    st.image("gmm_evaluation_metrics.png")
+    st.header("Comparison")
+    st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")

clustered_data.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7422d83a5def3de89a70ee205d4815da23092ff83d0abc6d4d45dbbc89fb7d76
+size 6828

gaussianMixture_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a12ba1de6067855dad9e9fe7ab4ad25b18b18a26ef0ea0b1ee81b6cd657026a6
+size 1590

gmm_evaluation_metrics.png ADDED Viewed

hierarchical_clustering_metrics.png ADDED Viewed

hierarchical_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bd23592dd3652c7e40a83f1afd8a362ef64d40f93e7ab3a810c3d9d27c9d49d
+size 5447

kmeans_clustering_metrics.png ADDED Viewed

main.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff