Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -177,39 +177,50 @@ with tab1:
|
|
| 177 |
##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
|
| 178 |
""")
|
| 179 |
|
| 180 |
-
#
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
# K-Means Algorithm
|
| 186 |
kmeans = KMeans(n_clusters=n_clusters_advanced)
|
| 187 |
-
y_kmeans = kmeans.fit_predict(
|
| 188 |
|
| 189 |
-
# Predict the cluster for the user input in the
|
| 190 |
-
predicted_cluster = kmeans.predict([
|
| 191 |
|
| 192 |
# Create a DataFrame for easier plotting with plotly
|
| 193 |
-
|
| 194 |
-
|
| 195 |
|
| 196 |
# For tab1
|
| 197 |
fig = go.Figure()
|
| 198 |
|
| 199 |
# Add shaded regions using convex hull
|
| 200 |
for cluster in np.unique(y_kmeans):
|
| 201 |
-
cluster_data =
|
| 202 |
-
x_data = cluster_data['
|
| 203 |
-
y_data = cluster_data['
|
| 204 |
if len(cluster_data) > 2: # ConvexHull requires at least 3 points
|
| 205 |
-
hull = ConvexHull(cluster_data[['
|
| 206 |
fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
|
| 207 |
|
| 208 |
# Add scatter plot
|
| 209 |
-
fig.add_trace(go.Scatter(x=
|
| 210 |
|
| 211 |
# Add user input as a star marker
|
| 212 |
-
fig.add_trace(go.Scatter(x=[
|
| 213 |
|
| 214 |
# Add centroids with group numbers
|
| 215 |
for i, coord in enumerate(kmeans.cluster_centers_):
|
|
@@ -231,8 +242,8 @@ with tab1:
|
|
| 231 |
st.plotly_chart(fig)
|
| 232 |
|
| 233 |
# Predict Cluster for User Input
|
| 234 |
-
dist_to_group1 = distance.euclidean(
|
| 235 |
-
dist_to_group2 = distance.euclidean(
|
| 236 |
st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
|
| 237 |
st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
|
| 238 |
|
|
@@ -245,6 +256,7 @@ with tab1:
|
|
| 245 |
##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
|
| 246 |
""")
|
| 247 |
|
|
|
|
| 248 |
with tab2:
|
| 249 |
st.write("""
|
| 250 |
## Advanced Overview of Clustering
|
|
|
|
| 177 |
##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
|
| 178 |
""")
|
| 179 |
|
| 180 |
+
# Option to toggle PCA
|
| 181 |
+
use_pca = st.checkbox('Use PCA for Visualization', value=True)
|
| 182 |
+
|
| 183 |
+
if use_pca:
|
| 184 |
+
st.write("""
|
| 185 |
+
##### 🧠 PCA (Principal Component Analysis) is like looking at a messy room from the best angle to see the most mess. It helps us see our data more clearly!
|
| 186 |
+
""")
|
| 187 |
+
|
| 188 |
+
# Apply PCA for dimensionality reduction
|
| 189 |
+
pca = PCA(n_components=2)
|
| 190 |
+
X_transformed = pca.fit_transform(X)
|
| 191 |
+
user_features_transformed = pca.transform([user_features])[0]
|
| 192 |
+
else:
|
| 193 |
+
X_transformed = X[:, :2] # Just use the first two features for visualization
|
| 194 |
+
user_features_transformed = user_features[:2]
|
| 195 |
|
| 196 |
# K-Means Algorithm
|
| 197 |
kmeans = KMeans(n_clusters=n_clusters_advanced)
|
| 198 |
+
y_kmeans = kmeans.fit_predict(X_transformed)
|
| 199 |
|
| 200 |
+
# Predict the cluster for the user input in the transformed space
|
| 201 |
+
predicted_cluster = kmeans.predict([user_features_transformed])
|
| 202 |
|
| 203 |
# Create a DataFrame for easier plotting with plotly
|
| 204 |
+
df_transformed = pd.DataFrame(X_transformed, columns=['Feature1', 'Feature2'])
|
| 205 |
+
df_transformed['cluster'] = y_kmeans
|
| 206 |
|
| 207 |
# For tab1
|
| 208 |
fig = go.Figure()
|
| 209 |
|
| 210 |
# Add shaded regions using convex hull
|
| 211 |
for cluster in np.unique(y_kmeans):
|
| 212 |
+
cluster_data = df_transformed[df_transformed['cluster'] == cluster]
|
| 213 |
+
x_data = cluster_data['Feature1'].values
|
| 214 |
+
y_data = cluster_data['Feature2'].values
|
| 215 |
if len(cluster_data) > 2: # ConvexHull requires at least 3 points
|
| 216 |
+
hull = ConvexHull(cluster_data[['Feature1', 'Feature2']])
|
| 217 |
fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
|
| 218 |
|
| 219 |
# Add scatter plot
|
| 220 |
+
fig.add_trace(go.Scatter(x=df_transformed['Feature1'], y=df_transformed['Feature2'], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
|
| 221 |
|
| 222 |
# Add user input as a star marker
|
| 223 |
+
fig.add_trace(go.Scatter(x=[user_features_transformed[0]], y=[user_features_transformed[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
|
| 224 |
|
| 225 |
# Add centroids with group numbers
|
| 226 |
for i, coord in enumerate(kmeans.cluster_centers_):
|
|
|
|
| 242 |
st.plotly_chart(fig)
|
| 243 |
|
| 244 |
# Predict Cluster for User Input
|
| 245 |
+
dist_to_group1 = distance.euclidean(user_features_transformed, kmeans.cluster_centers_[0])
|
| 246 |
+
dist_to_group2 = distance.euclidean(user_features_transformed, kmeans.cluster_centers_[1])
|
| 247 |
st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
|
| 248 |
st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
|
| 249 |
|
|
|
|
| 256 |
##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
|
| 257 |
""")
|
| 258 |
|
| 259 |
+
|
| 260 |
with tab2:
|
| 261 |
st.write("""
|
| 262 |
## Advanced Overview of Clustering
|