CristopherWVSU commited on
Commit
f5b702a
Β·
1 Parent(s): 54d2273

Added more Models

Browse files
GaussianMixtureSegmentation.png ADDED
HierarchicalClusteringSegmentation.png ADDED
KMeansClusteringSegmentation.png ADDED
app.py CHANGED
@@ -1,37 +1,113 @@
1
  import streamlit as st
2
  import joblib
 
 
3
 
4
- # Load Pretrained K-Means Model and Scaler
5
- with open("kmeans_model.pkl", "rb") as file:
6
- kmeans = joblib.load(file)
7
 
8
- with open("scaler.pkl", "rb") as file:
9
- scaler = joblib.load(file)
10
-
11
- # Mapping Cluster Numbers to Descriptive Labels
12
- cluster_labels = {
13
- 0: "Balanced Consumer",
14
- 1: "Premium Customer",
15
- 2: "Impulsive Buyer",
16
- 3: "Cautious Buyer",
17
- 4: "Budget-Conscious Customer"
18
  }
19
 
20
- # Streamlit UI
21
- st.title("Mall Customer Segmentation")
22
- st.write("Enter your details to classify your customer segment.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # User Input
25
- income = st.number_input("Annual Income ($)", min_value=0, step=1)
26
- spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)
27
 
28
- if st.button("Predict"):
29
- # Scale Input
30
- scaled_input = scaler.transform([[income, spending_score]])
31
 
32
- # Predict Cluster
33
- cluster = kmeans.predict(scaled_input)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Display Result
36
- st.subheader("Customer Classification:")
37
- st.success(f"You are a: **{cluster_labels[cluster]}**")
 
1
  import streamlit as st
2
  import joblib
3
+ import numpy as np
4
+ from sklearn.neighbors import NearestCentroid
5
 
6
+ # STREAMLIT TABS
7
+ app, model_eval = st.tabs(["Application", "Model Evaluation"])
 
8
 
9
+ # Load Models
10
+ models = {
11
+ "K-Means": "kmeans_model.pkl",
12
+ "Gaussian Mixture": "gaussianMixture_model.pkl",
13
+ "Hierarchical": "hierarchical_model.pkl"
 
 
 
 
 
14
  }
15
 
16
+ scaler = joblib.load("scaler.pkl")
17
+
18
+
19
+ with app:
20
+ # Sidebar Model Selection
21
+ selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))
22
+
23
+ # Load Selected Model
24
+ with open(models[selected_model], "rb") as file:
25
+ model = joblib.load(file)
26
+
27
+ # Cluster Labels for Each Model
28
+ cluster_labels = {
29
+ "K-Means": {
30
+ 0: "Balanced Consumer",
31
+ 1: "Premium Customer",
32
+ 2: "Impulsive Buyer",
33
+ 3: "Cautious Buyer",
34
+ 4: "Budget-Conscious Customer"
35
+ },
36
+ "Hierarchical": {
37
+ 2: "Balanced Consumer",
38
+ 1: "Premium Customer",
39
+ 3: "Impulsive Buyer",
40
+ 0: "Cautious Buyer",
41
+ 4: "Budget-Conscious Customer"
42
+ },
43
+ "Gaussian Mixture": {
44
+ 0: "Balanced Consumer",
45
+ 1: "Premium Customer",
46
+ 2: "Impulsive Buyer",
47
+ 3: "Cautious Buyer",
48
+ 4: "Budget-Conscious Customer"
49
+ }
50
+ }
51
+
52
+ # User Input
53
+ st.title("Mall Customer Segmentation")
54
+ income = st.number_input("Annual Income ($)", min_value=0, step=1)
55
+ spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)
56
+
57
+ if st.button("Predict"):
58
+ scaled_input = scaler.transform([[income, spending_score]])
59
+
60
+ if selected_model in ["K-Means", "Gaussian Mixture"]:
61
+ cluster = model.predict(scaled_input)[0]
62
+
63
+ elif selected_model == "Hierarchical":
64
+ # Load the dataset with assigned hierarchical clusters
65
+ # Load precomputed hierarchical clusters
66
+ df_clustered = joblib.load("clustered_data.pkl") # Ensure this file exists
67
+
68
+
69
+ # Compute Centroids for Hierarchical Clustering
70
+ # Compute centroids for each cluster
71
+ centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()
72
+
73
+ # Use Nearest Centroid Classifier
74
+ clf = NearestCentroid()
75
+ clf.fit(centroids, centroids.index)
76
+ cluster = clf.predict(scaled_input)[0]
77
+
78
+ # Display Prediction
79
+ st.subheader("Customer Classification:")
80
+ st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**")
81
+ with model_eval:
82
+ st.header("πŸ“Š Model Evaluation")
83
+ st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
84
+ st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")
85
+
86
+ st.header("K Means Clustering ")
87
+ st.image("KMeansClusteringSegmentation.png")
88
 
89
+ st.header("Huerarchical Clustering ")
90
+ st.image("HierarchicalClusteringSegmentation.png")
 
91
 
92
+ st.header("Gaussian Mixture ")
93
+ st.image("GaussianMixtureSegmentation.png")
 
94
 
95
+ # EVALUATION METRICS
96
+ st.subheader("πŸ“Œ Evaluation Metrics")
97
+ st.write("Silhouette and Davis Bouldin Scores")
98
+
99
+ st.header("K Means Clustering Evaluation Metrics")
100
+ st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.")
101
+ st.image("kmeans_clustering_metrics.png")
102
+
103
+ st.header("Hierarchical Clustering Evaluation Metrics")
104
+ st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.")
105
+ st.image("hierarchical_clustering_metrics.png")
106
+
107
+ st.header("Gaussian Mixture Evaluation Metrics")
108
+ st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.")
109
+ st.image("gmm_evaluation_metrics.png")
110
+
111
 
112
+ st.header("Comparison")
113
+ st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")
 
clustered_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7422d83a5def3de89a70ee205d4815da23092ff83d0abc6d4d45dbbc89fb7d76
3
+ size 6828
gaussianMixture_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a12ba1de6067855dad9e9fe7ab4ad25b18b18a26ef0ea0b1ee81b6cd657026a6
3
+ size 1590
gmm_evaluation_metrics.png ADDED
hierarchical_clustering_metrics.png ADDED
hierarchical_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bd23592dd3652c7e40a83f1afd8a362ef64d40f93e7ab3a810c3d9d27c9d49d
3
+ size 5447
kmeans_clustering_metrics.png ADDED
main.ipynb CHANGED
The diff for this file is too large to render. See raw diff