Spaces:
Build error
Build error
| from sklearn.cluster import KMeans | |
| from sklearn.metrics import silhouette_score | |
| from sklearn.preprocessing import StandardScaler | |
| import streamlit as st | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import pandas as pd | |
| from sklearn.decomposition import PCA | |
| def summarize_cluster_characteristics(clustered_data, labels, cluster_number): | |
| cluster_data = clustered_data[labels == cluster_number] | |
| summary = cluster_data.mean().to_dict() | |
| return summary | |
| def perform_clustering(df, n_clusters): | |
| df = df.dropna() | |
| scaler = StandardScaler() | |
| df_value_scaled = scaler.fit_transform(df) | |
| # Apply KMeans with the selected number of clusters | |
| model = KMeans(n_clusters=n_clusters, random_state=42) | |
| model.fit(df_value_scaled) | |
| labels = model.predict(df_value_scaled) | |
| score = silhouette_score(df_value_scaled, labels) | |
| df['Cluster'] = labels | |
| return df, score, df_value_scaled, labels, model | |
| def plot_clusters(df_value_scaled, labels, new_data_point=None): | |
| pca = PCA(n_components=2) | |
| components = pca.fit_transform(df_value_scaled) | |
| df_components = pd.DataFrame(data=components, columns=['PC1', 'PC2']) | |
| df_components['Cluster'] = labels | |
| plt.figure(figsize=(10, 6)) | |
| sns.scatterplot(x='PC1', y='PC2', hue='Cluster', data=df_components, palette='viridis', s=100, alpha=0.7) | |
| # Plot new data point if provided | |
| if new_data_point is not None: | |
| plt.scatter(new_data_point[:, 0], new_data_point[:, 1], color='red', marker='o', s=100, label='New Data Point') | |
| plt.title('Cluster Visualization') | |
| plt.xlabel('Principal Component 1') | |
| plt.ylabel('Principal Component 2') | |
| plt.legend(title='Cluster') | |
| st.pyplot(plt) | |