XPMaster commited on
Commit
62b26f7
·
1 Parent(s): 246739d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -44
app.py CHANGED
@@ -10,6 +10,7 @@ import plotly.figure_factory as ff
10
  import plotly.graph_objects as go
11
  from scipy.spatial import ConvexHull
12
  from scipy.spatial import distance
 
13
 
14
  st.set_page_config(layout="wide")
15
 
@@ -72,42 +73,70 @@ st.markdown("""
72
  """, unsafe_allow_html=True)
73
 
74
  with tab1:
75
- st.write("""
76
- ### What is Clustering?
77
- ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
78
- """)
79
- # K-Means Algorithm
80
- kmeans = KMeans(n_clusters=n_clusters_advanced)
81
- y_kmeans = kmeans.fit_predict(X)
82
- predicted_cluster = kmeans.predict([user_features])
83
-
84
- # Create a DataFrame for easier plotting with plotly
85
- df = pd.DataFrame(X, columns=iris.feature_names)
86
- df['cluster'] = y_kmeans
87
 
88
- # fig = px.scatter(df, x=df.columns[0], y=df.columns[1], color='cluster',
89
- # title='Visualizing Groups with K-Means Clustering',
90
- # labels={df.columns[0]: 'Feature 1', df.columns[1]: 'Feature 2'},
91
- # color_continuous_scale=px.colors.qualitative.Set1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- # # Remove the legend
94
- # #fig.update_layout(showlegend=False)
95
- # fig.update_layout(width=1200, height=500)
96
  # # Add user input as a star marker
97
- # fig.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
98
- # fig.update_coloraxes(showscale=False)
99
- # #fig.update_traces(visible='legendonly')
100
- # # Add annotation for user input
101
- # fig.add_annotation(
102
- # x=user_features[0],
103
- # y=user_features[1],
104
- # xshift=10,
105
- # text="Your Flower ("+dmojis[(predicted_cluster[0]+1)]+")",
106
- # font=dict(color='white', size=30),
107
- # arrowhead=2,
108
- # ax=10,
109
- # ay=-40
110
- # )
111
 
112
  # # Add centroids with group numbers
113
  # for i, coord in enumerate(kmeans.cluster_centers_):
@@ -118,24 +147,69 @@ with tab1:
118
  # showarrow=True,
119
  # font=dict(color='white', size=30)
120
  # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # For tab1
123
  fig = go.Figure()
124
 
125
  # Add shaded regions using convex hull
126
  for cluster in np.unique(y_kmeans):
127
- cluster_data = df[df['cluster'] == cluster]
128
- x_data = cluster_data[df.columns[0]].values
129
- y_data = cluster_data[df.columns[1]].values
130
  if len(cluster_data) > 2: # ConvexHull requires at least 3 points
131
- hull = ConvexHull(cluster_data[[df.columns[0], df.columns[1]]])
132
  fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
133
 
134
  # Add scatter plot
135
- fig.add_trace(go.Scatter(x=df[df.columns[0]], y=df[df.columns[1]], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
136
 
137
  # Add user input as a star marker
138
- fig.add_trace(go.Scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
139
 
140
  # Add centroids with group numbers
141
  for i, coord in enumerate(kmeans.cluster_centers_):
@@ -149,19 +223,19 @@ with tab1:
149
 
150
  # Update layout
151
  fig.update_layout(width=1200, height=500)
152
-
153
  st.write("""
154
  ### Visualizing Groups
155
  ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
156
  """)
157
- #st.pyplot(fig)
158
  st.plotly_chart(fig)
 
159
  # Predict Cluster for User Input
160
- dist_to_group1 = distance.euclidean(user_features, kmeans.cluster_centers_[0])
161
- dist_to_group2 = distance.euclidean(user_features, kmeans.cluster_centers_[1])
162
  st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
163
  st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
164
-
165
  st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
166
  st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
167
 
 
10
  import plotly.graph_objects as go
11
  from scipy.spatial import ConvexHull
12
  from scipy.spatial import distance
13
+ from sklearn.decomposition import PCA
14
 
15
  st.set_page_config(layout="wide")
16
 
 
73
  """, unsafe_allow_html=True)
74
 
75
  with tab1:
76
+ # st.write("""
77
+ # ### What is Clustering?
78
+ # ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
79
+ # """)
80
+ # # K-Means Algorithm
81
+ # kmeans = KMeans(n_clusters=n_clusters_advanced)
82
+ # y_kmeans = kmeans.fit_predict(X)
83
+ # predicted_cluster = kmeans.predict([user_features])
84
+
85
+ # # Create a DataFrame for easier plotting with plotly
86
+ # df = pd.DataFrame(X, columns=iris.feature_names)
87
+ # df['cluster'] = y_kmeans
88
 
89
+ # # fig = px.scatter(df, x=df.columns[0], y=df.columns[1], color='cluster',
90
+ # # title='Visualizing Groups with K-Means Clustering',
91
+ # # labels={df.columns[0]: 'Feature 1', df.columns[1]: 'Feature 2'},
92
+ # # color_continuous_scale=px.colors.qualitative.Set1)
93
+
94
+ # # # Remove the legend
95
+ # # #fig.update_layout(showlegend=False)
96
+ # # fig.update_layout(width=1200, height=500)
97
+ # # # Add user input as a star marker
98
+ # # fig.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
99
+ # # fig.update_coloraxes(showscale=False)
100
+ # # #fig.update_traces(visible='legendonly')
101
+ # # # Add annotation for user input
102
+ # # fig.add_annotation(
103
+ # # x=user_features[0],
104
+ # # y=user_features[1],
105
+ # # xshift=10,
106
+ # # text="Your Flower ("+dmojis[(predicted_cluster[0]+1)]+")",
107
+ # # font=dict(color='white', size=30),
108
+ # # arrowhead=2,
109
+ # # ax=10,
110
+ # # ay=-40
111
+ # # )
112
+
113
+ # # # Add centroids with group numbers
114
+ # # for i, coord in enumerate(kmeans.cluster_centers_):
115
+ # # fig.add_annotation(
116
+ # # x=coord[0],
117
+ # # y=coord[1],
118
+ # # text=dmojis[i+1],
119
+ # # showarrow=True,
120
+ # # font=dict(color='white', size=30)
121
+ # # )
122
+
123
+ # # For tab1
124
+ # fig = go.Figure()
125
+
126
+ # # Add shaded regions using convex hull
127
+ # for cluster in np.unique(y_kmeans):
128
+ # cluster_data = df[df['cluster'] == cluster]
129
+ # x_data = cluster_data[df.columns[0]].values
130
+ # y_data = cluster_data[df.columns[1]].values
131
+ # if len(cluster_data) > 2: # ConvexHull requires at least 3 points
132
+ # hull = ConvexHull(cluster_data[[df.columns[0], df.columns[1]]])
133
+ # fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
134
+
135
+ # # Add scatter plot
136
+ # fig.add_trace(go.Scatter(x=df[df.columns[0]], y=df[df.columns[1]], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
137
 
 
 
 
138
  # # Add user input as a star marker
139
+ # fig.add_trace(go.Scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  # # Add centroids with group numbers
142
  # for i, coord in enumerate(kmeans.cluster_centers_):
 
147
  # showarrow=True,
148
  # font=dict(color='white', size=30)
149
  # )
150
+
151
+ # # Update layout
152
+ # fig.update_layout(width=1200, height=500)
153
+
154
+ # st.write("""
155
+ # ### Visualizing Groups
156
+ # ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
157
+ # """)
158
+ # #st.pyplot(fig)
159
+ # st.plotly_chart(fig)
160
+ # # Predict Cluster for User Input
161
+ # dist_to_group1 = distance.euclidean(user_features, kmeans.cluster_centers_[0])
162
+ # dist_to_group2 = distance.euclidean(user_features, kmeans.cluster_centers_[1])
163
+ # st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
164
+ # st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
165
+
166
+ # st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
167
+ # st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
168
+
169
+ # # Closing Note
170
+ # st.write("""
171
+ # ### Wrap Up
172
+ # ##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
173
+ # """)
174
 
175
+ st.write("""
176
+ ### What is Clustering?
177
+ ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
178
+ """)
179
+
180
+ # Apply PCA for dimensionality reduction
181
+ pca = PCA(n_components=2)
182
+ X_pca = pca.fit_transform(X)
183
+ user_features_pca = pca.transform([user_features])[0]
184
+
185
+ # K-Means Algorithm
186
+ kmeans = KMeans(n_clusters=n_clusters_advanced)
187
+ y_kmeans = kmeans.fit_predict(X_pca)
188
+
189
+ # Predict the cluster for the user input in the PCA-transformed space
190
+ predicted_cluster = kmeans.predict([user_features_pca])
191
+
192
+ # Create a DataFrame for easier plotting with plotly
193
+ df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
194
+ df_pca['cluster'] = y_kmeans
195
+
196
  # For tab1
197
  fig = go.Figure()
198
 
199
  # Add shaded regions using convex hull
200
  for cluster in np.unique(y_kmeans):
201
+ cluster_data = df_pca[df_pca['cluster'] == cluster]
202
+ x_data = cluster_data['PCA1'].values
203
+ y_data = cluster_data['PCA2'].values
204
  if len(cluster_data) > 2: # ConvexHull requires at least 3 points
205
+ hull = ConvexHull(cluster_data[['PCA1', 'PCA2']])
206
  fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
207
 
208
  # Add scatter plot
209
+ fig.add_trace(go.Scatter(x=df_pca['PCA1'], y=df_pca['PCA2'], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
210
 
211
  # Add user input as a star marker
212
+ fig.add_trace(go.Scatter(x=[user_features_pca[0]], y=[user_features_pca[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
213
 
214
  # Add centroids with group numbers
215
  for i, coord in enumerate(kmeans.cluster_centers_):
 
223
 
224
  # Update layout
225
  fig.update_layout(width=1200, height=500)
226
+
227
  st.write("""
228
  ### Visualizing Groups
229
  ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
230
  """)
 
231
  st.plotly_chart(fig)
232
+
233
  # Predict Cluster for User Input
234
+ dist_to_group1 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[0])
235
+ dist_to_group2 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[1])
236
  st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
237
  st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
238
+
239
  st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
240
  st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
241