import streamlit as st import pandas as pd import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler from sklearn.metrics import silhouette_score import seaborn as sns st.set_page_config(page_title="Mall Customer Segmentation", layout="wide") st.title("🛍️ Mall Customer Segmentation using K-Means") # ---------------------------------------- # Load dataset # ---------------------------------------- @st.cache_data def load_data(): return pd.read_csv("Mall_Customers.csv") df = load_data() st.write("### Dataset Preview", df.head()) # ---------------------------------------- # Feature selection # ---------------------------------------- selected_columns = ['Age', 'Annual Income (k$)', 'Spending Score (1-100)'] st.write("### Pairplot (Age, Income, Spending Score)") fig1 = sns.pairplot(df[selected_columns]) st.pyplot(fig1) # ---------------------------------------- # Prepare data for clustering # ---------------------------------------- X = df[['Age', 'Spending Score (1-100)']] scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # ---------------------------------------- # Elbow Method # ---------------------------------------- st.write("### Elbow Method to Find Optimal k") inertia = [] K = range(1, 11) for k in K: kmeans = KMeans(n_clusters=k, random_state=42) kmeans.fit(X_scaled) inertia.append(kmeans.inertia_) fig2, ax2 = plt.subplots() ax2.plot(K, inertia, 'bo-') ax2.set_xlabel('Number of clusters (k)') ax2.set_ylabel('Inertia') ax2.set_title('Elbow Method For Optimal k') st.pyplot(fig2) # ---------------------------------------- # Choose k interactively # ---------------------------------------- st.sidebar.header("🔢 Select number of clusters") k = st.sidebar.slider("Choose k (clusters)", 2, 10, 5) kmeans = KMeans(n_clusters=k, random_state=42) df['Cluster'] = kmeans.fit_predict(X_scaled) # ---------------------------------------- # Cluster visualization # ---------------------------------------- st.write(f"### Cluster Visualization (k={k})") fig3, ax3 = plt.subplots(figsize=(8,6)) scatter = ax3.scatter(X_scaled[:, 0], X_scaled[:, 1], c=df['Cluster'], cmap='viridis') ax3.set_xlabel('Age (scaled)') ax3.set_ylabel('Spending Score (scaled)') ax3.set_title('Customer Segments (K-Means Clustering)') plt.colorbar(scatter, ax=ax3, label='Cluster') st.pyplot(fig3) # ---------------------------------------- # Silhouette score # ---------------------------------------- score = silhouette_score(X_scaled, df['Cluster']) st.success(f"Silhouette Score for k={k}: **{score:.3f}**") st.write("### Clustered Data Sample") st.dataframe(df.head())