Spaces:
Sleeping
Sleeping
| # app.py | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.cluster import KMeans | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| # Streamlit page settings | |
| st.set_page_config(page_title="K-Means Clustering App", page_icon="π€", layout="wide") | |
| # Title | |
| st.title("π€ K-Means Clustering Explorer") | |
| st.write("This app performs **K-Means Clustering** on a customer segmentation dataset.") | |
| # Load dataset (local file) | |
| def load_data(): | |
| data = pd.read_csv("Mall_Customers.csv") # Make sure this file is in the same folder | |
| return data | |
| data = load_data() | |
| # Select features | |
| features = data[['Annual Income (k$)', 'Spending Score (1-100)']] | |
| # Sidebar | |
| st.sidebar.header("Settings") | |
| k = st.sidebar.slider("Select number of clusters (K)", 1, 10, 3) | |
| # Perform KMeans clustering | |
| kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42) | |
| clusters = kmeans.fit_predict(features) | |
| data['Cluster'] = clusters | |
| # Calculate Elbow Method data | |
| wcss = [] | |
| for i in range(1, 11): | |
| km = KMeans(n_clusters=i, init='k-means++', random_state=42) | |
| km.fit(features) | |
| wcss.append(km.inertia_) | |
| # Analyze clusters | |
| cluster_summary = data.groupby('Cluster')[['Annual Income (k$)', 'Spending Score (1-100)']].mean() | |
| def interpret_cluster(income, spending): | |
| if income >= 70 and spending >= 50: | |
| return "π Premium Customers (High Income, High Spending)" | |
| elif income <= 40 and spending >= 60: | |
| return "π Potential Risk Customers (Low Income, High Spending)" | |
| elif income >= 70 and spending <= 40: | |
| return "πΌ Careful Spenders (High Income, Low Spending)" | |
| elif income <= 40 and spending <= 40: | |
| return "π Budget Customers (Low Income, Low Spending)" | |
| else: | |
| return "π§© Standard Customers" | |
| # Create Tabs | |
| tab1, tab2, tab3, tab4 = st.tabs(["π Raw Dataset", "π Elbow Method", "π― Clustered Customers", "π Cluster Explanations"]) | |
| with tab1: | |
| st.subheader("π§Ή Raw Dataset") | |
| st.dataframe(data.head()) | |
| with tab2: | |
| st.subheader("π Elbow Method (to find optimal K)") | |
| fig, ax = plt.subplots() | |
| ax.plot(range(1, 11), wcss, marker='o') | |
| ax.set_xlabel('Number of Clusters (K)') | |
| ax.set_ylabel('WCSS (Within Cluster Sum of Squares)') | |
| ax.set_title('The Elbow Method') | |
| st.pyplot(fig) | |
| with tab3: | |
| st.subheader("π― Clustered Customers") | |
| fig2, ax2 = plt.subplots() | |
| palette = sns.color_palette("bright", k) | |
| sns.scatterplot( | |
| x='Annual Income (k$)', | |
| y='Spending Score (1-100)', | |
| hue='Cluster', | |
| palette=palette, | |
| data=data, | |
| ax=ax2, | |
| s=100 | |
| ) | |
| ax2.scatter( | |
| kmeans.cluster_centers_[:, 0], | |
| kmeans.cluster_centers_[:, 1], | |
| s=300, | |
| c='black', | |
| marker='X', | |
| label='Centroids' | |
| ) | |
| ax2.legend() | |
| ax2.set_title('Customer Segments') | |
| st.pyplot(fig2) | |
| with tab4: | |
| st.subheader("π Cluster Explanations") | |
| for cluster_num, row in cluster_summary.iterrows(): | |
| explanation = interpret_cluster(row['Annual Income (k$)'], row['Spending Score (1-100)']) | |
| st.markdown(f"**Cluster {cluster_num}:** {explanation}") | |
| st.dataframe(cluster_summary.style.highlight_max(axis=0)) | |
| # Footer | |
| st.markdown("---") | |
| st.caption("Made with β€οΈ using Streamlit") | |