Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import plotly.figure_factory as ff
|
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
from scipy.spatial import ConvexHull
|
| 12 |
from scipy.spatial import distance
|
|
|
|
| 13 |
|
| 14 |
st.set_page_config(layout="wide")
|
| 15 |
|
|
@@ -72,42 +73,70 @@ st.markdown("""
|
|
| 72 |
""", unsafe_allow_html=True)
|
| 73 |
|
| 74 |
with tab1:
|
| 75 |
-
st.write("""
|
| 76 |
-
### What is Clustering?
|
| 77 |
-
##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
|
| 78 |
-
""")
|
| 79 |
-
# K-Means Algorithm
|
| 80 |
-
kmeans = KMeans(n_clusters=n_clusters_advanced)
|
| 81 |
-
y_kmeans = kmeans.fit_predict(X)
|
| 82 |
-
predicted_cluster = kmeans.predict([user_features])
|
| 83 |
-
|
| 84 |
-
# Create a DataFrame for easier plotting with plotly
|
| 85 |
-
df = pd.DataFrame(X, columns=iris.feature_names)
|
| 86 |
-
df['cluster'] = y_kmeans
|
| 87 |
|
| 88 |
-
# fig = px.scatter(df, x=df.columns[0], y=df.columns[1], color='cluster',
|
| 89 |
-
# title='Visualizing Groups with K-Means Clustering',
|
| 90 |
-
# labels={df.columns[0]: 'Feature 1', df.columns[1]: 'Feature 2'},
|
| 91 |
-
# color_continuous_scale=px.colors.qualitative.Set1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
-
# # Remove the legend
|
| 94 |
-
# #fig.update_layout(showlegend=False)
|
| 95 |
-
# fig.update_layout(width=1200, height=500)
|
| 96 |
# # Add user input as a star marker
|
| 97 |
-
# fig.
|
| 98 |
-
# fig.update_coloraxes(showscale=False)
|
| 99 |
-
# #fig.update_traces(visible='legendonly')
|
| 100 |
-
# # Add annotation for user input
|
| 101 |
-
# fig.add_annotation(
|
| 102 |
-
# x=user_features[0],
|
| 103 |
-
# y=user_features[1],
|
| 104 |
-
# xshift=10,
|
| 105 |
-
# text="Your Flower ("+dmojis[(predicted_cluster[0]+1)]+")",
|
| 106 |
-
# font=dict(color='white', size=30),
|
| 107 |
-
# arrowhead=2,
|
| 108 |
-
# ax=10,
|
| 109 |
-
# ay=-40
|
| 110 |
-
# )
|
| 111 |
|
| 112 |
# # Add centroids with group numbers
|
| 113 |
# for i, coord in enumerate(kmeans.cluster_centers_):
|
|
@@ -118,24 +147,69 @@ with tab1:
|
|
| 118 |
# showarrow=True,
|
| 119 |
# font=dict(color='white', size=30)
|
| 120 |
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# For tab1
|
| 123 |
fig = go.Figure()
|
| 124 |
|
| 125 |
# Add shaded regions using convex hull
|
| 126 |
for cluster in np.unique(y_kmeans):
|
| 127 |
-
cluster_data =
|
| 128 |
-
x_data = cluster_data[
|
| 129 |
-
y_data = cluster_data[
|
| 130 |
if len(cluster_data) > 2: # ConvexHull requires at least 3 points
|
| 131 |
-
hull = ConvexHull(cluster_data[[
|
| 132 |
fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
|
| 133 |
|
| 134 |
# Add scatter plot
|
| 135 |
-
fig.add_trace(go.Scatter(x=
|
| 136 |
|
| 137 |
# Add user input as a star marker
|
| 138 |
-
fig.add_trace(go.Scatter(x=[
|
| 139 |
|
| 140 |
# Add centroids with group numbers
|
| 141 |
for i, coord in enumerate(kmeans.cluster_centers_):
|
|
@@ -149,19 +223,19 @@ with tab1:
|
|
| 149 |
|
| 150 |
# Update layout
|
| 151 |
fig.update_layout(width=1200, height=500)
|
| 152 |
-
|
| 153 |
st.write("""
|
| 154 |
### Visualizing Groups
|
| 155 |
##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
|
| 156 |
""")
|
| 157 |
-
#st.pyplot(fig)
|
| 158 |
st.plotly_chart(fig)
|
|
|
|
| 159 |
# Predict Cluster for User Input
|
| 160 |
-
dist_to_group1 = distance.euclidean(
|
| 161 |
-
dist_to_group2 = distance.euclidean(
|
| 162 |
st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
|
| 163 |
st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
|
| 164 |
-
|
| 165 |
st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
|
| 166 |
st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
|
| 167 |
|
|
|
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
from scipy.spatial import ConvexHull
|
| 12 |
from scipy.spatial import distance
|
| 13 |
+
from sklearn.decomposition import PCA
|
| 14 |
|
| 15 |
st.set_page_config(layout="wide")
|
| 16 |
|
|
|
|
| 73 |
""", unsafe_allow_html=True)
|
| 74 |
|
| 75 |
with tab1:
|
| 76 |
+
# st.write("""
|
| 77 |
+
# ### What is Clustering?
|
| 78 |
+
# ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
|
| 79 |
+
# """)
|
| 80 |
+
# # K-Means Algorithm
|
| 81 |
+
# kmeans = KMeans(n_clusters=n_clusters_advanced)
|
| 82 |
+
# y_kmeans = kmeans.fit_predict(X)
|
| 83 |
+
# predicted_cluster = kmeans.predict([user_features])
|
| 84 |
+
|
| 85 |
+
# # Create a DataFrame for easier plotting with plotly
|
| 86 |
+
# df = pd.DataFrame(X, columns=iris.feature_names)
|
| 87 |
+
# df['cluster'] = y_kmeans
|
| 88 |
|
| 89 |
+
# # fig = px.scatter(df, x=df.columns[0], y=df.columns[1], color='cluster',
|
| 90 |
+
# # title='Visualizing Groups with K-Means Clustering',
|
| 91 |
+
# # labels={df.columns[0]: 'Feature 1', df.columns[1]: 'Feature 2'},
|
| 92 |
+
# # color_continuous_scale=px.colors.qualitative.Set1)
|
| 93 |
+
|
| 94 |
+
# # # Remove the legend
|
| 95 |
+
# # #fig.update_layout(showlegend=False)
|
| 96 |
+
# # fig.update_layout(width=1200, height=500)
|
| 97 |
+
# # # Add user input as a star marker
|
| 98 |
+
# # fig.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
|
| 99 |
+
# # fig.update_coloraxes(showscale=False)
|
| 100 |
+
# # #fig.update_traces(visible='legendonly')
|
| 101 |
+
# # # Add annotation for user input
|
| 102 |
+
# # fig.add_annotation(
|
| 103 |
+
# # x=user_features[0],
|
| 104 |
+
# # y=user_features[1],
|
| 105 |
+
# # xshift=10,
|
| 106 |
+
# # text="Your Flower ("+dmojis[(predicted_cluster[0]+1)]+")",
|
| 107 |
+
# # font=dict(color='white', size=30),
|
| 108 |
+
# # arrowhead=2,
|
| 109 |
+
# # ax=10,
|
| 110 |
+
# # ay=-40
|
| 111 |
+
# # )
|
| 112 |
+
|
| 113 |
+
# # # Add centroids with group numbers
|
| 114 |
+
# # for i, coord in enumerate(kmeans.cluster_centers_):
|
| 115 |
+
# # fig.add_annotation(
|
| 116 |
+
# # x=coord[0],
|
| 117 |
+
# # y=coord[1],
|
| 118 |
+
# # text=dmojis[i+1],
|
| 119 |
+
# # showarrow=True,
|
| 120 |
+
# # font=dict(color='white', size=30)
|
| 121 |
+
# # )
|
| 122 |
+
|
| 123 |
+
# # For tab1
|
| 124 |
+
# fig = go.Figure()
|
| 125 |
+
|
| 126 |
+
# # Add shaded regions using convex hull
|
| 127 |
+
# for cluster in np.unique(y_kmeans):
|
| 128 |
+
# cluster_data = df[df['cluster'] == cluster]
|
| 129 |
+
# x_data = cluster_data[df.columns[0]].values
|
| 130 |
+
# y_data = cluster_data[df.columns[1]].values
|
| 131 |
+
# if len(cluster_data) > 2: # ConvexHull requires at least 3 points
|
| 132 |
+
# hull = ConvexHull(cluster_data[[df.columns[0], df.columns[1]]])
|
| 133 |
+
# fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
|
| 134 |
+
|
| 135 |
+
# # Add scatter plot
|
| 136 |
+
# fig.add_trace(go.Scatter(x=df[df.columns[0]], y=df[df.columns[1]], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
|
| 137 |
|
|
|
|
|
|
|
|
|
|
| 138 |
# # Add user input as a star marker
|
| 139 |
+
# fig.add_trace(go.Scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
# # Add centroids with group numbers
|
| 142 |
# for i, coord in enumerate(kmeans.cluster_centers_):
|
|
|
|
| 147 |
# showarrow=True,
|
| 148 |
# font=dict(color='white', size=30)
|
| 149 |
# )
|
| 150 |
+
|
| 151 |
+
# # Update layout
|
| 152 |
+
# fig.update_layout(width=1200, height=500)
|
| 153 |
+
|
| 154 |
+
# st.write("""
|
| 155 |
+
# ### Visualizing Groups
|
| 156 |
+
# ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
|
| 157 |
+
# """)
|
| 158 |
+
# #st.pyplot(fig)
|
| 159 |
+
# st.plotly_chart(fig)
|
| 160 |
+
# # Predict Cluster for User Input
|
| 161 |
+
# dist_to_group1 = distance.euclidean(user_features, kmeans.cluster_centers_[0])
|
| 162 |
+
# dist_to_group2 = distance.euclidean(user_features, kmeans.cluster_centers_[1])
|
| 163 |
+
# st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
|
| 164 |
+
# st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
|
| 165 |
+
|
| 166 |
+
# st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
|
| 167 |
+
# st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
|
| 168 |
+
|
| 169 |
+
# # Closing Note
|
| 170 |
+
# st.write("""
|
| 171 |
+
# ### Wrap Up
|
| 172 |
+
# ##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
|
| 173 |
+
# """)
|
| 174 |
|
| 175 |
+
st.write("""
|
| 176 |
+
### What is Clustering?
|
| 177 |
+
##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
|
| 178 |
+
""")
|
| 179 |
+
|
| 180 |
+
# Apply PCA for dimensionality reduction
|
| 181 |
+
pca = PCA(n_components=2)
|
| 182 |
+
X_pca = pca.fit_transform(X)
|
| 183 |
+
user_features_pca = pca.transform([user_features])[0]
|
| 184 |
+
|
| 185 |
+
# K-Means Algorithm
|
| 186 |
+
kmeans = KMeans(n_clusters=n_clusters_advanced)
|
| 187 |
+
y_kmeans = kmeans.fit_predict(X_pca)
|
| 188 |
+
|
| 189 |
+
# Predict the cluster for the user input in the PCA-transformed space
|
| 190 |
+
predicted_cluster = kmeans.predict([user_features_pca])
|
| 191 |
+
|
| 192 |
+
# Create a DataFrame for easier plotting with plotly
|
| 193 |
+
df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
|
| 194 |
+
df_pca['cluster'] = y_kmeans
|
| 195 |
+
|
| 196 |
# For tab1
|
| 197 |
fig = go.Figure()
|
| 198 |
|
| 199 |
# Add shaded regions using convex hull
|
| 200 |
for cluster in np.unique(y_kmeans):
|
| 201 |
+
cluster_data = df_pca[df_pca['cluster'] == cluster]
|
| 202 |
+
x_data = cluster_data['PCA1'].values
|
| 203 |
+
y_data = cluster_data['PCA2'].values
|
| 204 |
if len(cluster_data) > 2: # ConvexHull requires at least 3 points
|
| 205 |
+
hull = ConvexHull(cluster_data[['PCA1', 'PCA2']])
|
| 206 |
fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
|
| 207 |
|
| 208 |
# Add scatter plot
|
| 209 |
+
fig.add_trace(go.Scatter(x=df_pca['PCA1'], y=df_pca['PCA2'], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
|
| 210 |
|
| 211 |
# Add user input as a star marker
|
| 212 |
+
fig.add_trace(go.Scatter(x=[user_features_pca[0]], y=[user_features_pca[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
|
| 213 |
|
| 214 |
# Add centroids with group numbers
|
| 215 |
for i, coord in enumerate(kmeans.cluster_centers_):
|
|
|
|
| 223 |
|
| 224 |
# Update layout
|
| 225 |
fig.update_layout(width=1200, height=500)
|
| 226 |
+
|
| 227 |
st.write("""
|
| 228 |
### Visualizing Groups
|
| 229 |
##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
|
| 230 |
""")
|
|
|
|
| 231 |
st.plotly_chart(fig)
|
| 232 |
+
|
| 233 |
# Predict Cluster for User Input
|
| 234 |
+
dist_to_group1 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[0])
|
| 235 |
+
dist_to_group2 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[1])
|
| 236 |
st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
|
| 237 |
st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
|
| 238 |
+
|
| 239 |
st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
|
| 240 |
st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
|
| 241 |
|