XPMaster commited on
Commit
e43973a
·
1 Parent(s): 62b26f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -17
app.py CHANGED
@@ -177,39 +177,50 @@ with tab1:
177
  ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
178
  """)
179
 
180
- # Apply PCA for dimensionality reduction
181
- pca = PCA(n_components=2)
182
- X_pca = pca.fit_transform(X)
183
- user_features_pca = pca.transform([user_features])[0]
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  # K-Means Algorithm
186
  kmeans = KMeans(n_clusters=n_clusters_advanced)
187
- y_kmeans = kmeans.fit_predict(X_pca)
188
 
189
- # Predict the cluster for the user input in the PCA-transformed space
190
- predicted_cluster = kmeans.predict([user_features_pca])
191
 
192
  # Create a DataFrame for easier plotting with plotly
193
- df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
194
- df_pca['cluster'] = y_kmeans
195
 
196
  # For tab1
197
  fig = go.Figure()
198
 
199
  # Add shaded regions using convex hull
200
  for cluster in np.unique(y_kmeans):
201
- cluster_data = df_pca[df_pca['cluster'] == cluster]
202
- x_data = cluster_data['PCA1'].values
203
- y_data = cluster_data['PCA2'].values
204
  if len(cluster_data) > 2: # ConvexHull requires at least 3 points
205
- hull = ConvexHull(cluster_data[['PCA1', 'PCA2']])
206
  fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
207
 
208
  # Add scatter plot
209
- fig.add_trace(go.Scatter(x=df_pca['PCA1'], y=df_pca['PCA2'], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
210
 
211
  # Add user input as a star marker
212
- fig.add_trace(go.Scatter(x=[user_features_pca[0]], y=[user_features_pca[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
213
 
214
  # Add centroids with group numbers
215
  for i, coord in enumerate(kmeans.cluster_centers_):
@@ -231,8 +242,8 @@ with tab1:
231
  st.plotly_chart(fig)
232
 
233
  # Predict Cluster for User Input
234
- dist_to_group1 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[0])
235
- dist_to_group2 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[1])
236
  st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
237
  st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
238
 
@@ -245,6 +256,7 @@ with tab1:
245
  ##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
246
  """)
247
 
 
248
  with tab2:
249
  st.write("""
250
  ## Advanced Overview of Clustering
 
177
  ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
178
  """)
179
 
180
+ # Option to toggle PCA
181
+ use_pca = st.checkbox('Use PCA for Visualization', value=True)
182
+
183
+ if use_pca:
184
+ st.write("""
185
+ ##### 🧠 PCA (Principal Component Analysis) is like looking at a messy room from the best angle to see the most mess. It helps us see our data more clearly!
186
+ """)
187
+
188
+ # Apply PCA for dimensionality reduction
189
+ pca = PCA(n_components=2)
190
+ X_transformed = pca.fit_transform(X)
191
+ user_features_transformed = pca.transform([user_features])[0]
192
+ else:
193
+ X_transformed = X[:, :2] # Just use the first two features for visualization
194
+ user_features_transformed = user_features[:2]
195
 
196
  # K-Means Algorithm
197
  kmeans = KMeans(n_clusters=n_clusters_advanced)
198
+ y_kmeans = kmeans.fit_predict(X_transformed)
199
 
200
+ # Predict the cluster for the user input in the transformed space
201
+ predicted_cluster = kmeans.predict([user_features_transformed])
202
 
203
  # Create a DataFrame for easier plotting with plotly
204
+ df_transformed = pd.DataFrame(X_transformed, columns=['Feature1', 'Feature2'])
205
+ df_transformed['cluster'] = y_kmeans
206
 
207
  # For tab1
208
  fig = go.Figure()
209
 
210
  # Add shaded regions using convex hull
211
  for cluster in np.unique(y_kmeans):
212
+ cluster_data = df_transformed[df_transformed['cluster'] == cluster]
213
+ x_data = cluster_data['Feature1'].values
214
+ y_data = cluster_data['Feature2'].values
215
  if len(cluster_data) > 2: # ConvexHull requires at least 3 points
216
+ hull = ConvexHull(cluster_data[['Feature1', 'Feature2']])
217
  fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
218
 
219
  # Add scatter plot
220
+ fig.add_trace(go.Scatter(x=df_transformed['Feature1'], y=df_transformed['Feature2'], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
221
 
222
  # Add user input as a star marker
223
+ fig.add_trace(go.Scatter(x=[user_features_transformed[0]], y=[user_features_transformed[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
224
 
225
  # Add centroids with group numbers
226
  for i, coord in enumerate(kmeans.cluster_centers_):
 
242
  st.plotly_chart(fig)
243
 
244
  # Predict Cluster for User Input
245
+ dist_to_group1 = distance.euclidean(user_features_transformed, kmeans.cluster_centers_[0])
246
+ dist_to_group2 = distance.euclidean(user_features_transformed, kmeans.cluster_centers_[1])
247
  st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
248
  st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
249
 
 
256
  ##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
257
  """)
258
 
259
+
260
  with tab2:
261
  st.write("""
262
  ## Advanced Overview of Clustering