XPMaster commited on
Commit
4795f03
·
1 Parent(s): 5d5343f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -202,12 +202,12 @@ with tab2:
202
 
203
  ### Principal Component Analysis (PCA)
204
 
205
- PCA is a dimensionality reduction technique that transforms high-dimensional data into a lower-dimensional form, while retaining as much of the original variance as possible. It achieves this by identifying the 'directions' (or principal components) that maximize variance.
206
 
207
- Mathematically, PCA seeks to find the eigenvectors and eigenvalues of the data's covariance matrix. These eigenvectors, ordered by their corresponding eigenvalues, form the new 'axes' of the reduced space.
 
 
208
 
209
- Using PCA for visualization helps in projecting the data onto the first two principal components, making it easier to spot patterns and clusters.
210
-
211
  ### Let's Visualize!
212
  """)
213
 
@@ -224,15 +224,36 @@ with tab2:
224
  X_transformed = X[:, :2] # Just use the first two features for visualization
225
  user_features_transformed = user_features[:2]
226
 
227
- # Create a DataFrame for easier plotting with plotly
228
- df_transformed = pd.DataFrame(X_transformed, columns=['Feature1', 'Feature2'])
229
-
230
  # K-Means Algorithm for Advanced Tab
231
  kmeans_advanced = KMeans(n_clusters=n_clusters_advanced)
232
  y_kmeans_advanced = kmeans_advanced.fit_predict(X_transformed)
 
 
 
233
  df_transformed['cluster'] = y_kmeans_advanced
234
 
235
- # ... [rest of the visualization code]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  st.plotly_chart(fig_advanced)
238
 
@@ -240,7 +261,6 @@ with tab2:
240
  if st.button('Toggle PCA for Visualization'):
241
  st.session_state.use_pca = not st.session_state.use_pca
242
 
243
-
244
  st.write("""
245
  ### Interpretation
246
 
@@ -250,6 +270,8 @@ with tab2:
250
 
251
  **Feel free to adjust the number of clusters to see how data points get re-grouped!**
252
  """)
 
 
253
  with about:
254
  st.title("About")
255
  st.markdown("""
 
202
 
203
  ### Principal Component Analysis (PCA)
204
 
205
+ PCA is a dimensionality reduction technique that identifies the axes (principal components) in the dataset that maximize variance. It's like finding the best angle to view data so that differences between data points are most apparent. Mathematically, PCA aims to find orthogonal vectors in the original feature space that capture the most variance in the data.
206
 
207
+ The first principal component captures the most variance, the second principal component (which is orthogonal to the first) captures the second most, and so on.
208
+
209
+ Using PCA can help in visualizing high-dimensional data in a 2D or 3D space, making patterns more discernible.
210
 
 
 
211
  ### Let's Visualize!
212
  """)
213
 
 
224
  X_transformed = X[:, :2] # Just use the first two features for visualization
225
  user_features_transformed = user_features[:2]
226
 
 
 
 
227
  # K-Means Algorithm for Advanced Tab
228
  kmeans_advanced = KMeans(n_clusters=n_clusters_advanced)
229
  y_kmeans_advanced = kmeans_advanced.fit_predict(X_transformed)
230
+
231
+ # Create a DataFrame for easier plotting with plotly
232
+ df_transformed = pd.DataFrame(X_transformed, columns=['Feature1', 'Feature2'])
233
  df_transformed['cluster'] = y_kmeans_advanced
234
 
235
+ fig_advanced = px.scatter(df_transformed, x='Feature1', y='Feature2', color='cluster',
236
+ title='K-Means Clustering for Advanced',
237
+ color_continuous_scale=px.colors.qualitative.Set1)
238
+
239
+ # Remove the legend
240
+ fig_advanced.update_layout(showlegend=False)
241
+
242
+ # Increase the size of the plot
243
+ fig_advanced.update_layout(width=1200, height=500)
244
+ fig_advanced.update_coloraxes(showscale=False)
245
+ # Add user input as a star marker
246
+ fig_advanced.add_scatter(x=[user_features_transformed[0]], y=[user_features_transformed[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
247
+
248
+ # Add centroids with group numbers
249
+ for i, coord in enumerate(kmeans_advanced.cluster_centers_):
250
+ fig_advanced.add_annotation(
251
+ x=coord[0],
252
+ y=coord[1],
253
+ text="Group "+str(i+1),
254
+ showarrow=True,
255
+ font=dict(color='white', size=25)
256
+ )
257
 
258
  st.plotly_chart(fig_advanced)
259
 
 
261
  if st.button('Toggle PCA for Visualization'):
262
  st.session_state.use_pca = not st.session_state.use_pca
263
 
 
264
  st.write("""
265
  ### Interpretation
266
 
 
270
 
271
  **Feel free to adjust the number of clusters to see how data points get re-grouped!**
272
  """)
273
+
274
+
275
  with about:
276
  st.title("About")
277
  st.markdown("""