Spaces:

XPMaster
/

clustering_ed

Build error

App Files Files Community

XPMaster commited on Aug 23, 2023

Commit

62b26f7

1 Parent(s): 246739d

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -44

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import plotly.figure_factory as ff
 import plotly.graph_objects as go
 from scipy.spatial import ConvexHull
 from scipy.spatial import distance
 st.set_page_config(layout="wide")
@@ -72,42 +73,70 @@ st.markdown("""
 """, unsafe_allow_html=True)
 with tab1:
-    st.write("""
-    ### What is Clustering?
-    ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
-    """)
-    # K-Means Algorithm
-    kmeans = KMeans(n_clusters=n_clusters_advanced)
-    y_kmeans = kmeans.fit_predict(X)
-    predicted_cluster = kmeans.predict([user_features])
-    # Create a DataFrame for easier plotting with plotly
-    df = pd.DataFrame(X, columns=iris.feature_names)
-    df['cluster'] = y_kmeans
-    # fig = px.scatter(df, x=df.columns[0], y=df.columns[1], color='cluster',
-    #                  title='Visualizing Groups with K-Means Clustering',
-    #                  labels={df.columns[0]: 'Feature 1', df.columns[1]: 'Feature 2'},
-    #                  color_continuous_scale=px.colors.qualitative.Set1)
-    # # Remove the legend
-    # #fig.update_layout(showlegend=False)
-    # fig.update_layout(width=1200, height=500)
     # # Add user input as a star marker
-    # fig.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
-    # fig.update_coloraxes(showscale=False)
-    # #fig.update_traces(visible='legendonly')
-    # # Add annotation for user input
-    # fig.add_annotation(
-    #     x=user_features[0],
-    #     y=user_features[1],
-    #     xshift=10,
-    #     text="Your Flower ("+dmojis[(predicted_cluster[0]+1)]+")",
-    #     font=dict(color='white', size=30),
-    #     arrowhead=2,
-    #     ax=10,
-    #     ay=-40
-    # )
     # # Add centroids with group numbers
     # for i, coord in enumerate(kmeans.cluster_centers_):
@@ -118,24 +147,69 @@ with tab1:
     #         showarrow=True,
     #         font=dict(color='white', size=30)
     #     )
     # For tab1
     fig = go.Figure()
     # Add shaded regions using convex hull
     for cluster in np.unique(y_kmeans):
-        cluster_data = df[df['cluster'] == cluster]
-        x_data = cluster_data[df.columns[0]].values
-        y_data = cluster_data[df.columns[1]].values
         if len(cluster_data) > 2:  # ConvexHull requires at least 3 points
-            hull = ConvexHull(cluster_data[[df.columns[0], df.columns[1]]])
             fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
     # Add scatter plot
-    fig.add_trace(go.Scatter(x=df[df.columns[0]], y=df[df.columns[1]], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
     # Add user input as a star marker
-    fig.add_trace(go.Scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
     # Add centroids with group numbers
     for i, coord in enumerate(kmeans.cluster_centers_):
@@ -149,19 +223,19 @@ with tab1:
     # Update layout
     fig.update_layout(width=1200, height=500)
     st.write("""
     ### Visualizing Groups
     ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
     """)
-    #st.pyplot(fig)
     st.plotly_chart(fig)
     # Predict Cluster for User Input
-    dist_to_group1 = distance.euclidean(user_features, kmeans.cluster_centers_[0])
-    dist_to_group2 = distance.euclidean(user_features, kmeans.cluster_centers_[1])
     st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
     st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
     st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
     st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")

 import plotly.graph_objects as go
 from scipy.spatial import ConvexHull
 from scipy.spatial import distance
+from sklearn.decomposition import PCA
 st.set_page_config(layout="wide")
 """, unsafe_allow_html=True)
 with tab1:
+    # st.write("""
+    # ### What is Clustering?
+    # ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
+    # """)
+    # # K-Means Algorithm
+    # kmeans = KMeans(n_clusters=n_clusters_advanced)
+    # y_kmeans = kmeans.fit_predict(X)
+    # predicted_cluster = kmeans.predict([user_features])
+    # # Create a DataFrame for easier plotting with plotly
+    # df = pd.DataFrame(X, columns=iris.feature_names)
+    # df['cluster'] = y_kmeans
+    # # fig = px.scatter(df, x=df.columns[0], y=df.columns[1], color='cluster',
+    # #                  title='Visualizing Groups with K-Means Clustering',
+    # #                  labels={df.columns[0]: 'Feature 1', df.columns[1]: 'Feature 2'},
+    # #                  color_continuous_scale=px.colors.qualitative.Set1)
+    # # # Remove the legend
+    # # #fig.update_layout(showlegend=False)
+    # # fig.update_layout(width=1200, height=500)
+    # # # Add user input as a star marker
+    # # fig.add_scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white'))
+    # # fig.update_coloraxes(showscale=False)
+    # # #fig.update_traces(visible='legendonly')
+    # # # Add annotation for user input
+    # # fig.add_annotation(
+    # #     x=user_features[0],
+    # #     y=user_features[1],
+    # #     xshift=10,
+    # #     text="Your Flower ("+dmojis[(predicted_cluster[0]+1)]+")",
+    # #     font=dict(color='white', size=30),
+    # #     arrowhead=2,
+    # #     ax=10,
+    # #     ay=-40
+    # # )
+    # # # Add centroids with group numbers
+    # # for i, coord in enumerate(kmeans.cluster_centers_):
+    # #     fig.add_annotation(
+    # #         x=coord[0],
+    # #         y=coord[1],
+    # #         text=dmojis[i+1],
+    # #         showarrow=True,
+    # #         font=dict(color='white', size=30)
+    # #     )
+    # # For tab1
+    # fig = go.Figure()
+    # # Add shaded regions using convex hull
+    # for cluster in np.unique(y_kmeans):
+    #     cluster_data = df[df['cluster'] == cluster]
+    #     x_data = cluster_data[df.columns[0]].values
+    #     y_data = cluster_data[df.columns[1]].values
+    #     if len(cluster_data) > 2:  # ConvexHull requires at least 3 points
+    #         hull = ConvexHull(cluster_data[[df.columns[0], df.columns[1]]])
+    #         fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
+    # # Add scatter plot
+    # fig.add_trace(go.Scatter(x=df[df.columns[0]], y=df[df.columns[1]], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
     # # Add user input as a star marker
+    # fig.add_trace(go.Scatter(x=[user_features[0]], y=[user_features[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
     # # Add centroids with group numbers
     # for i, coord in enumerate(kmeans.cluster_centers_):
     #         showarrow=True,
     #         font=dict(color='white', size=30)
     #     )
+    # # Update layout
+    # fig.update_layout(width=1200, height=500)
+    # st.write("""
+    # ### Visualizing Groups
+    # ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
+    # """)
+    # #st.pyplot(fig)
+    # st.plotly_chart(fig)
+    # # Predict Cluster for User Input
+    # dist_to_group1 = distance.euclidean(user_features, kmeans.cluster_centers_[0])
+    # dist_to_group2 = distance.euclidean(user_features, kmeans.cluster_centers_[1])
+    # st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
+    # st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
+    # st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
+    # st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")
+    # # Closing Note
+    # st.write("""
+    # ### Wrap Up
+    # ##### Just as sorting toys in a room, we group flowers by features; adjust the data to pick a flower and set how many boxes (groups) you want to use.
+    # """)
+    st.write("""
+    ### What is Clustering?
+    ##### Clustering with K-Means is a machine learning concept like tidying a messy room by grouping similar items, but for data instead of physical objects.
+    """)
+    # Apply PCA for dimensionality reduction
+    pca = PCA(n_components=2)
+    X_pca = pca.fit_transform(X)
+    user_features_pca = pca.transform([user_features])[0]
+    # K-Means Algorithm
+    kmeans = KMeans(n_clusters=n_clusters_advanced)
+    y_kmeans = kmeans.fit_predict(X_pca)
+    # Predict the cluster for the user input in the PCA-transformed space
+    predicted_cluster = kmeans.predict([user_features_pca])
+    # Create a DataFrame for easier plotting with plotly
+    df_pca = pd.DataFrame(X_pca, columns=['PCA1', 'PCA2'])
+    df_pca['cluster'] = y_kmeans
     # For tab1
     fig = go.Figure()
     # Add shaded regions using convex hull
     for cluster in np.unique(y_kmeans):
+        cluster_data = df_pca[df_pca['cluster'] == cluster]
+        x_data = cluster_data['PCA1'].values
+        y_data = cluster_data['PCA2'].values
         if len(cluster_data) > 2:  # ConvexHull requires at least 3 points
+            hull = ConvexHull(cluster_data[['PCA1', 'PCA2']])
             fig.add_trace(go.Scatter(x=x_data[hull.vertices], y=y_data[hull.vertices], fill='toself', fillcolor=px.colors.qualitative.Set1[cluster], opacity=0.5, line=dict(width=0), showlegend=False))
     # Add scatter plot
+    fig.add_trace(go.Scatter(x=df_pca['PCA1'], y=df_pca['PCA2'], mode='markers', marker=dict(color=y_kmeans, colorscale=px.colors.qualitative.Set1), showlegend=False))
     # Add user input as a star marker
+    fig.add_trace(go.Scatter(x=[user_features_pca[0]], y=[user_features_pca[1]], mode='markers', marker=dict(symbol='star', size=30, color='white')))
     # Add centroids with group numbers
     for i, coord in enumerate(kmeans.cluster_centers_):
     # Update layout
     fig.update_layout(width=1200, height=500)
     st.write("""
     ### Visualizing Groups
     ##### Here are the groups from our tidying method. Each color has a number at its center, representing its group.
     """)
     st.plotly_chart(fig)
     # Predict Cluster for User Input
+    dist_to_group1 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[0])
+    dist_to_group2 = distance.euclidean(user_features_pca, kmeans.cluster_centers_[1])
     st.write(f"Distance to Group 1 centroid: {dist_to_group1}")
     st.write(f"Distance to Group 2 centroid: {dist_to_group2}")
     st.write(f"##### Overlapping clusters mean some flowers are very similar and hard to tell apart just by looking at these features.")
     st.write(f"# Based on your flower data (⭐), it likely belongs to **Group {dmojis[predicted_cluster[0]+1]}**")