Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -196,64 +196,50 @@ with tab2:
|
|
| 196 |
& x \text{ is a data point in cluster } C_i.
|
| 197 |
\end{align*}
|
| 198 |
''')
|
| 199 |
-
|
| 200 |
st.write("""
|
| 201 |
The K-Means algorithm tries to find the best centroids such that the \( \mathrm{WCSS} \) is minimized.
|
| 202 |
|
| 203 |
-
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
-
|
| 206 |
""")
|
| 207 |
-
|
| 208 |
-
#
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
# K-Means Algorithm for Advanced Tab
|
| 212 |
kmeans_advanced = KMeans(n_clusters=n_clusters_advanced)
|
| 213 |
-
y_kmeans_advanced = kmeans_advanced.fit_predict(
|
| 214 |
-
|
| 215 |
-
# Create a DataFrame for easier plotting with plotly
|
| 216 |
-
df_advanced = pd.DataFrame(X, columns=iris.feature_names)
|
| 217 |
-
df_advanced['cluster'] = y_kmeans_advanced
|
| 218 |
|
| 219 |
-
|
| 220 |
-
title='K-Means Clustering for Advanced',
|
| 221 |
-
labels={df_advanced.columns[0]: 'Feature 1', df_advanced.columns[1]: 'Feature 2'},
|
| 222 |
-
color_continuous_scale=px.colors.qualitative.Set1)
|
| 223 |
|
| 224 |
-
|
| 225 |
-
fig_advanced.update_layout(showlegend=False)
|
| 226 |
-
|
| 227 |
-
# Increase the size of the plot
|
| 228 |
-
fig_advanced.update_layout(width=1200, height=500)
|
| 229 |
-
fig_advanced.update_coloraxes(showscale=False)
|
| 230 |
-
# Add user input as a star marker
|
| 231 |
-
fig_advanced.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
|
| 232 |
-
|
| 233 |
-
# Add annotation for user input
|
| 234 |
-
fig_advanced.add_annotation(
|
| 235 |
-
x=user_features[0],
|
| 236 |
-
y=user_features[1],
|
| 237 |
-
xshift=10,
|
| 238 |
-
text="Your Flower",
|
| 239 |
-
font=dict(color='white', size=30),
|
| 240 |
-
arrowhead=2,
|
| 241 |
-
ax=10,
|
| 242 |
-
ay=-40
|
| 243 |
-
)
|
| 244 |
-
|
| 245 |
-
# Add centroids with group numbers
|
| 246 |
-
for i, coord in enumerate(kmeans.cluster_centers_):
|
| 247 |
-
fig_advanced.add_annotation(
|
| 248 |
-
x=coord[0],
|
| 249 |
-
y=coord[1],
|
| 250 |
-
text="Group "+str(i+1),
|
| 251 |
-
showarrow=True,
|
| 252 |
-
font=dict(color='white', size=25)
|
| 253 |
-
)
|
| 254 |
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
-
st.plotly_chart(fig_advanced)
|
| 257 |
|
| 258 |
st.write("""
|
| 259 |
### Interpretation
|
|
|
|
| 196 |
& x \text{ is a data point in cluster } C_i.
|
| 197 |
\end{align*}
|
| 198 |
''')
|
| 199 |
+
|
| 200 |
st.write("""
|
| 201 |
The K-Means algorithm tries to find the best centroids such that the \( \mathrm{WCSS} \) is minimized.
|
| 202 |
|
| 203 |
+
### Principal Component Analysis (PCA)
|
| 204 |
+
|
| 205 |
+
PCA is a dimensionality reduction technique that transforms high-dimensional data into a lower-dimensional form, while retaining as much of the original variance as possible. It achieves this by identifying the 'directions' (or principal components) that maximize variance.
|
| 206 |
+
|
| 207 |
+
Mathematically, PCA seeks to find the eigenvectors and eigenvalues of the data's covariance matrix. These eigenvectors, ordered by their corresponding eigenvalues, form the new 'axes' of the reduced space.
|
| 208 |
+
|
| 209 |
+
Using PCA for visualization helps in projecting the data onto the first two principal components, making it easier to spot patterns and clusters.
|
| 210 |
|
| 211 |
+
### Let's Visualize!
|
| 212 |
""")
|
| 213 |
+
|
| 214 |
+
# Check if 'use_pca' is already in the session state
|
| 215 |
+
if 'use_pca' not in st.session_state:
|
| 216 |
+
st.session_state.use_pca = True
|
| 217 |
+
|
| 218 |
+
if st.session_state.use_pca:
|
| 219 |
+
# Apply PCA for dimensionality reduction
|
| 220 |
+
pca = PCA(n_components=2)
|
| 221 |
+
X_transformed = pca.fit_transform(X)
|
| 222 |
+
user_features_transformed = pca.transform([user_features])[0]
|
| 223 |
+
else:
|
| 224 |
+
X_transformed = X[:, :2] # Just use the first two features for visualization
|
| 225 |
+
user_features_transformed = user_features[:2]
|
| 226 |
+
|
| 227 |
+
# Create a DataFrame for easier plotting with plotly
|
| 228 |
+
df_transformed = pd.DataFrame(X_transformed, columns=['Feature1', 'Feature2'])
|
| 229 |
|
| 230 |
# K-Means Algorithm for Advanced Tab
|
| 231 |
kmeans_advanced = KMeans(n_clusters=n_clusters_advanced)
|
| 232 |
+
y_kmeans_advanced = kmeans_advanced.fit_predict(X_transformed)
|
| 233 |
+
df_transformed['cluster'] = y_kmeans_advanced
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
+
# ... [rest of the visualization code]
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
+
st.plotly_chart(fig_advanced)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
+
# Button to toggle PCA
|
| 240 |
+
if st.button('Toggle PCA for Visualization'):
|
| 241 |
+
st.session_state.use_pca = not st.session_state.use_pca
|
| 242 |
|
|
|
|
| 243 |
|
| 244 |
st.write("""
|
| 245 |
### Interpretation
|