Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from sklearn.cluster import KMeans | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import silhouette_score | |
| import seaborn as sns | |
| st.set_page_config(page_title="Mall Customer Segmentation", layout="wide") | |
| st.title("๐๏ธ Mall Customer Segmentation using K-Means") | |
| # ---------------------------------------- | |
| # Load dataset | |
| # ---------------------------------------- | |
| def load_data(): | |
| return pd.read_csv("Mall_Customers.csv") | |
| df = load_data() | |
| st.write("### Dataset Preview", df.head()) | |
| # ---------------------------------------- | |
| # Feature selection | |
| # ---------------------------------------- | |
| selected_columns = ['Age', 'Annual Income (k$)', 'Spending Score (1-100)'] | |
| st.write("### Pairplot (Age, Income, Spending Score)") | |
| fig1 = sns.pairplot(df[selected_columns]) | |
| st.pyplot(fig1) | |
| # ---------------------------------------- | |
| # Prepare data for clustering | |
| # ---------------------------------------- | |
| X = df[['Age', 'Spending Score (1-100)']] | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # ---------------------------------------- | |
| # Elbow Method | |
| # ---------------------------------------- | |
| st.write("### Elbow Method to Find Optimal k") | |
| inertia = [] | |
| K = range(1, 11) | |
| for k in K: | |
| kmeans = KMeans(n_clusters=k, random_state=42) | |
| kmeans.fit(X_scaled) | |
| inertia.append(kmeans.inertia_) | |
| fig2, ax2 = plt.subplots() | |
| ax2.plot(K, inertia, 'bo-') | |
| ax2.set_xlabel('Number of clusters (k)') | |
| ax2.set_ylabel('Inertia') | |
| ax2.set_title('Elbow Method For Optimal k') | |
| st.pyplot(fig2) | |
| # ---------------------------------------- | |
| # Choose k interactively | |
| # ---------------------------------------- | |
| st.sidebar.header("๐ข Select number of clusters") | |
| k = st.sidebar.slider("Choose k (clusters)", 2, 10, 5) | |
| kmeans = KMeans(n_clusters=k, random_state=42) | |
| df['Cluster'] = kmeans.fit_predict(X_scaled) | |
| # ---------------------------------------- | |
| # Cluster visualization | |
| # ---------------------------------------- | |
| st.write(f"### Cluster Visualization (k={k})") | |
| fig3, ax3 = plt.subplots(figsize=(8,6)) | |
| scatter = ax3.scatter(X_scaled[:, 0], X_scaled[:, 1], c=df['Cluster'], cmap='viridis') | |
| ax3.set_xlabel('Age (scaled)') | |
| ax3.set_ylabel('Spending Score (scaled)') | |
| ax3.set_title('Customer Segments (K-Means Clustering)') | |
| plt.colorbar(scatter, ax=ax3, label='Cluster') | |
| st.pyplot(fig3) | |
| # ---------------------------------------- | |
| # Silhouette score | |
| # ---------------------------------------- | |
| score = silhouette_score(X_scaled, df['Cluster']) | |
| st.success(f"Silhouette Score for k={k}: **{score:.3f}**") | |
| st.write("### Clustered Data Sample") | |
| st.dataframe(df.head()) | |