aikanava commited on
Commit
0e9897a
Β·
1 Parent(s): 8e3a8f4

uploaded files

Browse files
Files changed (3) hide show
  1. Mall_Customers.csv +201 -0
  2. app.py +109 -0
  3. requirements.txt +5 -0
Mall_Customers.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
2
+ 1,Male,19,15,39
3
+ 2,Male,21,15,81
4
+ 3,Female,20,16,6
5
+ 4,Female,23,16,77
6
+ 5,Female,31,17,40
7
+ 6,Female,22,17,76
8
+ 7,Female,35,18,6
9
+ 8,Female,23,18,94
10
+ 9,Male,64,19,3
11
+ 10,Female,30,19,72
12
+ 11,Male,67,19,14
13
+ 12,Female,35,19,99
14
+ 13,Female,58,20,15
15
+ 14,Female,24,20,77
16
+ 15,Male,37,20,13
17
+ 16,Male,22,20,79
18
+ 17,Female,35,21,35
19
+ 18,Male,20,21,66
20
+ 19,Male,52,23,29
21
+ 20,Female,35,23,98
22
+ 21,Male,35,24,35
23
+ 22,Male,25,24,73
24
+ 23,Female,46,25,5
25
+ 24,Male,31,25,73
26
+ 25,Female,54,28,14
27
+ 26,Male,29,28,82
28
+ 27,Female,45,28,32
29
+ 28,Male,35,28,61
30
+ 29,Female,40,29,31
31
+ 30,Female,23,29,87
32
+ 31,Male,60,30,4
33
+ 32,Female,21,30,73
34
+ 33,Male,53,33,4
35
+ 34,Male,18,33,92
36
+ 35,Female,49,33,14
37
+ 36,Female,21,33,81
38
+ 37,Female,42,34,17
39
+ 38,Female,30,34,73
40
+ 39,Female,36,37,26
41
+ 40,Female,20,37,75
42
+ 41,Female,65,38,35
43
+ 42,Male,24,38,92
44
+ 43,Male,48,39,36
45
+ 44,Female,31,39,61
46
+ 45,Female,49,39,28
47
+ 46,Female,24,39,65
48
+ 47,Female,50,40,55
49
+ 48,Female,27,40,47
50
+ 49,Female,29,40,42
51
+ 50,Female,31,40,42
52
+ 51,Female,49,42,52
53
+ 52,Male,33,42,60
54
+ 53,Female,31,43,54
55
+ 54,Male,59,43,60
56
+ 55,Female,50,43,45
57
+ 56,Male,47,43,41
58
+ 57,Female,51,44,50
59
+ 58,Male,69,44,46
60
+ 59,Female,27,46,51
61
+ 60,Male,53,46,46
62
+ 61,Male,70,46,56
63
+ 62,Male,19,46,55
64
+ 63,Female,67,47,52
65
+ 64,Female,54,47,59
66
+ 65,Male,63,48,51
67
+ 66,Male,18,48,59
68
+ 67,Female,43,48,50
69
+ 68,Female,68,48,48
70
+ 69,Male,19,48,59
71
+ 70,Female,32,48,47
72
+ 71,Male,70,49,55
73
+ 72,Female,47,49,42
74
+ 73,Female,60,50,49
75
+ 74,Female,60,50,56
76
+ 75,Male,59,54,47
77
+ 76,Male,26,54,54
78
+ 77,Female,45,54,53
79
+ 78,Male,40,54,48
80
+ 79,Female,23,54,52
81
+ 80,Female,49,54,42
82
+ 81,Male,57,54,51
83
+ 82,Male,38,54,55
84
+ 83,Male,67,54,41
85
+ 84,Female,46,54,44
86
+ 85,Female,21,54,57
87
+ 86,Male,48,54,46
88
+ 87,Female,55,57,58
89
+ 88,Female,22,57,55
90
+ 89,Female,34,58,60
91
+ 90,Female,50,58,46
92
+ 91,Female,68,59,55
93
+ 92,Male,18,59,41
94
+ 93,Male,48,60,49
95
+ 94,Female,40,60,40
96
+ 95,Female,32,60,42
97
+ 96,Male,24,60,52
98
+ 97,Female,47,60,47
99
+ 98,Female,27,60,50
100
+ 99,Male,48,61,42
101
+ 100,Male,20,61,49
102
+ 101,Female,23,62,41
103
+ 102,Female,49,62,48
104
+ 103,Male,67,62,59
105
+ 104,Male,26,62,55
106
+ 105,Male,49,62,56
107
+ 106,Female,21,62,42
108
+ 107,Female,66,63,50
109
+ 108,Male,54,63,46
110
+ 109,Male,68,63,43
111
+ 110,Male,66,63,48
112
+ 111,Male,65,63,52
113
+ 112,Female,19,63,54
114
+ 113,Female,38,64,42
115
+ 114,Male,19,64,46
116
+ 115,Female,18,65,48
117
+ 116,Female,19,65,50
118
+ 117,Female,63,65,43
119
+ 118,Female,49,65,59
120
+ 119,Female,51,67,43
121
+ 120,Female,50,67,57
122
+ 121,Male,27,67,56
123
+ 122,Female,38,67,40
124
+ 123,Female,40,69,58
125
+ 124,Male,39,69,91
126
+ 125,Female,23,70,29
127
+ 126,Female,31,70,77
128
+ 127,Male,43,71,35
129
+ 128,Male,40,71,95
130
+ 129,Male,59,71,11
131
+ 130,Male,38,71,75
132
+ 131,Male,47,71,9
133
+ 132,Male,39,71,75
134
+ 133,Female,25,72,34
135
+ 134,Female,31,72,71
136
+ 135,Male,20,73,5
137
+ 136,Female,29,73,88
138
+ 137,Female,44,73,7
139
+ 138,Male,32,73,73
140
+ 139,Male,19,74,10
141
+ 140,Female,35,74,72
142
+ 141,Female,57,75,5
143
+ 142,Male,32,75,93
144
+ 143,Female,28,76,40
145
+ 144,Female,32,76,87
146
+ 145,Male,25,77,12
147
+ 146,Male,28,77,97
148
+ 147,Male,48,77,36
149
+ 148,Female,32,77,74
150
+ 149,Female,34,78,22
151
+ 150,Male,34,78,90
152
+ 151,Male,43,78,17
153
+ 152,Male,39,78,88
154
+ 153,Female,44,78,20
155
+ 154,Female,38,78,76
156
+ 155,Female,47,78,16
157
+ 156,Female,27,78,89
158
+ 157,Male,37,78,1
159
+ 158,Female,30,78,78
160
+ 159,Male,34,78,1
161
+ 160,Female,30,78,73
162
+ 161,Female,56,79,35
163
+ 162,Female,29,79,83
164
+ 163,Male,19,81,5
165
+ 164,Female,31,81,93
166
+ 165,Male,50,85,26
167
+ 166,Female,36,85,75
168
+ 167,Male,42,86,20
169
+ 168,Female,33,86,95
170
+ 169,Female,36,87,27
171
+ 170,Male,32,87,63
172
+ 171,Male,40,87,13
173
+ 172,Male,28,87,75
174
+ 173,Male,36,87,10
175
+ 174,Male,36,87,92
176
+ 175,Female,52,88,13
177
+ 176,Female,30,88,86
178
+ 177,Male,58,88,15
179
+ 178,Male,27,88,69
180
+ 179,Male,59,93,14
181
+ 180,Male,35,93,90
182
+ 181,Female,37,97,32
183
+ 182,Female,32,97,86
184
+ 183,Male,46,98,15
185
+ 184,Female,29,98,88
186
+ 185,Female,41,99,39
187
+ 186,Male,30,99,97
188
+ 187,Female,54,101,24
189
+ 188,Male,28,101,68
190
+ 189,Female,41,103,17
191
+ 190,Female,36,103,85
192
+ 191,Female,34,103,23
193
+ 192,Female,32,103,69
194
+ 193,Male,33,113,8
195
+ 194,Female,38,113,91
196
+ 195,Female,47,120,16
197
+ 196,Female,35,120,79
198
+ 197,Female,45,126,28
199
+ 198,Male,32,126,74
200
+ 199,Male,32,137,18
201
+ 200,Male,30,137,83
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.cluster import KMeans
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+
10
+ # Streamlit page settings
11
+ st.set_page_config(page_title="K-Means Clustering App", page_icon="πŸ€–", layout="wide")
12
+
13
+ # Title
14
+ st.title("πŸ€– K-Means Clustering Explorer")
15
+ st.write("This app performs **K-Means Clustering** on a customer segmentation dataset.")
16
+
17
+ # Load dataset (local file)
18
+ @st.cache_data
19
+ def load_data():
20
+ data = pd.read_csv("Mall_Customers.csv") # Make sure this file is in the same folder
21
+ return data
22
+
23
+ data = load_data()
24
+
25
+ # Select features
26
+ features = data[['Annual Income (k$)', 'Spending Score (1-100)']]
27
+
28
+ # Sidebar
29
+ st.sidebar.header("Settings")
30
+ k = st.sidebar.slider("Select number of clusters (K)", 1, 10, 3)
31
+
32
+ # Perform KMeans clustering
33
+ kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
34
+ clusters = kmeans.fit_predict(features)
35
+ data['Cluster'] = clusters
36
+
37
+ # Calculate Elbow Method data
38
+ wcss = []
39
+ for i in range(1, 11):
40
+ km = KMeans(n_clusters=i, init='k-means++', random_state=42)
41
+ km.fit(features)
42
+ wcss.append(km.inertia_)
43
+
44
+ # Analyze clusters
45
+ cluster_summary = data.groupby('Cluster')[['Annual Income (k$)', 'Spending Score (1-100)']].mean()
46
+
47
+ def interpret_cluster(income, spending):
48
+ if income >= 70 and spending >= 50:
49
+ return "πŸ’Ž Premium Customers (High Income, High Spending)"
50
+ elif income <= 40 and spending >= 60:
51
+ return "πŸ”” Potential Risk Customers (Low Income, High Spending)"
52
+ elif income >= 70 and spending <= 40:
53
+ return "πŸ’Ό Careful Spenders (High Income, Low Spending)"
54
+ elif income <= 40 and spending <= 40:
55
+ return "πŸ›’ Budget Customers (Low Income, Low Spending)"
56
+ else:
57
+ return "🧩 Standard Customers"
58
+
59
+ # Create Tabs
60
+ tab1, tab2, tab3, tab4 = st.tabs(["πŸ“„ Raw Dataset", "πŸ“ˆ Elbow Method", "🎯 Clustered Customers", "πŸ“ Cluster Explanations"])
61
+
62
+ with tab1:
63
+ st.subheader("🧹 Raw Dataset")
64
+ st.dataframe(data.head())
65
+
66
+ with tab2:
67
+ st.subheader("πŸ“ˆ Elbow Method (to find optimal K)")
68
+ fig, ax = plt.subplots()
69
+ ax.plot(range(1, 11), wcss, marker='o')
70
+ ax.set_xlabel('Number of Clusters (K)')
71
+ ax.set_ylabel('WCSS (Within Cluster Sum of Squares)')
72
+ ax.set_title('The Elbow Method')
73
+ st.pyplot(fig)
74
+
75
+ with tab3:
76
+ st.subheader("🎯 Clustered Customers")
77
+ fig2, ax2 = plt.subplots()
78
+ palette = sns.color_palette("bright", k)
79
+ sns.scatterplot(
80
+ x='Annual Income (k$)',
81
+ y='Spending Score (1-100)',
82
+ hue='Cluster',
83
+ palette=palette,
84
+ data=data,
85
+ ax=ax2,
86
+ s=100
87
+ )
88
+ ax2.scatter(
89
+ kmeans.cluster_centers_[:, 0],
90
+ kmeans.cluster_centers_[:, 1],
91
+ s=300,
92
+ c='black',
93
+ marker='X',
94
+ label='Centroids'
95
+ )
96
+ ax2.legend()
97
+ ax2.set_title('Customer Segments')
98
+ st.pyplot(fig2)
99
+
100
+ with tab4:
101
+ st.subheader("πŸ“ Cluster Explanations")
102
+ for cluster_num, row in cluster_summary.iterrows():
103
+ explanation = interpret_cluster(row['Annual Income (k$)'], row['Spending Score (1-100)'])
104
+ st.markdown(f"**Cluster {cluster_num}:** {explanation}")
105
+ st.dataframe(cluster_summary.style.highlight_max(axis=0))
106
+
107
+ # Footer
108
+ st.markdown("---")
109
+ st.caption("Made with ❀️ using Streamlit")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ scikit-learn
4
+ matplotlib
5
+ seaborn