XPMaster commited on
Commit
5d5343f
·
1 Parent(s): 7888962

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -46
app.py CHANGED
@@ -196,64 +196,50 @@ with tab2:
196
  & x \text{ is a data point in cluster } C_i.
197
  \end{align*}
198
  ''')
199
-
200
  st.write("""
201
  The K-Means algorithm tries to find the best centroids such that the \( \mathrm{WCSS} \) is minimized.
202
 
203
- ### Let's Visualize!
 
 
 
 
 
 
204
 
205
- Here, we've plotted the iris dataset using the first two features. You can adjust the number of clusters using the sidebar.
206
  """)
207
- # Sidebar for Advanced
208
- # st.sidebar.header('K-Means Parameters')
209
- # n_clusters_advanced = st.sidebar.slider('Number of Clusters (K)', 1, 10, 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  # K-Means Algorithm for Advanced Tab
212
  kmeans_advanced = KMeans(n_clusters=n_clusters_advanced)
213
- y_kmeans_advanced = kmeans_advanced.fit_predict(X)
214
-
215
- # Create a DataFrame for easier plotting with plotly
216
- df_advanced = pd.DataFrame(X, columns=iris.feature_names)
217
- df_advanced['cluster'] = y_kmeans_advanced
218
 
219
- fig_advanced = px.scatter(df_advanced, x=df_advanced.columns[0], y=df_advanced.columns[1], color='cluster',
220
- title='K-Means Clustering for Advanced',
221
- labels={df_advanced.columns[0]: 'Feature 1', df_advanced.columns[1]: 'Feature 2'},
222
- color_continuous_scale=px.colors.qualitative.Set1)
223
 
224
- # Remove the legend
225
- fig_advanced.update_layout(showlegend=False)
226
-
227
- # Increase the size of the plot
228
- fig_advanced.update_layout(width=1200, height=500)
229
- fig_advanced.update_coloraxes(showscale=False)
230
- # Add user input as a star marker
231
- fig_advanced.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
232
-
233
- # Add annotation for user input
234
- fig_advanced.add_annotation(
235
- x=user_features[0],
236
- y=user_features[1],
237
- xshift=10,
238
- text="Your Flower",
239
- font=dict(color='white', size=30),
240
- arrowhead=2,
241
- ax=10,
242
- ay=-40
243
- )
244
-
245
- # Add centroids with group numbers
246
- for i, coord in enumerate(kmeans.cluster_centers_):
247
- fig_advanced.add_annotation(
248
- x=coord[0],
249
- y=coord[1],
250
- text="Group "+str(i+1),
251
- showarrow=True,
252
- font=dict(color='white', size=25)
253
- )
254
 
 
 
 
255
 
256
- st.plotly_chart(fig_advanced)
257
 
258
  st.write("""
259
  ### Interpretation
 
196
  & x \text{ is a data point in cluster } C_i.
197
  \end{align*}
198
  ''')
199
+
200
  st.write("""
201
  The K-Means algorithm tries to find the best centroids such that the \( \mathrm{WCSS} \) is minimized.
202
 
203
+ ### Principal Component Analysis (PCA)
204
+
205
+ PCA is a dimensionality reduction technique that transforms high-dimensional data into a lower-dimensional form, while retaining as much of the original variance as possible. It achieves this by identifying the 'directions' (or principal components) that maximize variance.
206
+
207
+ Mathematically, PCA seeks to find the eigenvectors and eigenvalues of the data's covariance matrix. These eigenvectors, ordered by their corresponding eigenvalues, form the new 'axes' of the reduced space.
208
+
209
+ Using PCA for visualization helps in projecting the data onto the first two principal components, making it easier to spot patterns and clusters.
210
 
211
+ ### Let's Visualize!
212
  """)
213
+
214
+ # Check if 'use_pca' is already in the session state
215
+ if 'use_pca' not in st.session_state:
216
+ st.session_state.use_pca = True
217
+
218
+ if st.session_state.use_pca:
219
+ # Apply PCA for dimensionality reduction
220
+ pca = PCA(n_components=2)
221
+ X_transformed = pca.fit_transform(X)
222
+ user_features_transformed = pca.transform([user_features])[0]
223
+ else:
224
+ X_transformed = X[:, :2] # Just use the first two features for visualization
225
+ user_features_transformed = user_features[:2]
226
+
227
+ # Create a DataFrame for easier plotting with plotly
228
+ df_transformed = pd.DataFrame(X_transformed, columns=['Feature1', 'Feature2'])
229
 
230
  # K-Means Algorithm for Advanced Tab
231
  kmeans_advanced = KMeans(n_clusters=n_clusters_advanced)
232
+ y_kmeans_advanced = kmeans_advanced.fit_predict(X_transformed)
233
+ df_transformed['cluster'] = y_kmeans_advanced
 
 
 
234
 
235
+ # ... [rest of the visualization code]
 
 
 
236
 
237
+ st.plotly_chart(fig_advanced)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
+ # Button to toggle PCA
240
+ if st.button('Toggle PCA for Visualization'):
241
+ st.session_state.use_pca = not st.session_state.use_pca
242
 
 
243
 
244
  st.write("""
245
  ### Interpretation