Spaces:

aikanava
/

customer_segmentation

Sleeping

File size: 3,327 Bytes

0e9897a

# app.py

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

# Streamlit page settings
st.set_page_config(page_title="K-Means Clustering App", page_icon="🤖", layout="wide")

# Title
st.title("🤖 K-Means Clustering Explorer")
st.write("This app performs **K-Means Clustering** on a customer segmentation dataset.")

# Load dataset (local file)
@st.cache_data
def load_data():
    data = pd.read_csv("Mall_Customers.csv")  # Make sure this file is in the same folder
    return data

data = load_data()

# Select features
features = data[['Annual Income (k$)', 'Spending Score (1-100)']]

# Sidebar
st.sidebar.header("Settings")
k = st.sidebar.slider("Select number of clusters (K)", 1, 10, 3)

# Perform KMeans clustering
kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
clusters = kmeans.fit_predict(features)
data['Cluster'] = clusters

# Calculate Elbow Method data
wcss = []
for i in range(1, 11):
    km = KMeans(n_clusters=i, init='k-means++', random_state=42)
    km.fit(features)
    wcss.append(km.inertia_)

# Analyze clusters
cluster_summary = data.groupby('Cluster')[['Annual Income (k$)', 'Spending Score (1-100)']].mean()

def interpret_cluster(income, spending):
    if income >= 70 and spending >= 50:
        return "💎 Premium Customers (High Income, High Spending)"
    elif income <= 40 and spending >= 60:
        return "🔔 Potential Risk Customers (Low Income, High Spending)"
    elif income >= 70 and spending <= 40:
        return "💼 Careful Spenders (High Income, Low Spending)"
    elif income <= 40 and spending <= 40:
        return "🛒 Budget Customers (Low Income, Low Spending)"
    else:
        return "🧩 Standard Customers"

# Create Tabs
tab1, tab2, tab3, tab4 = st.tabs(["📄 Raw Dataset", "📈 Elbow Method", "🎯 Clustered Customers", "📝 Cluster Explanations"])

with tab1:
    st.subheader("🧹 Raw Dataset")
    st.dataframe(data.head())

with tab2:
    st.subheader("📈 Elbow Method (to find optimal K)")
    fig, ax = plt.subplots()
    ax.plot(range(1, 11), wcss, marker='o')
    ax.set_xlabel('Number of Clusters (K)')
    ax.set_ylabel('WCSS (Within Cluster Sum of Squares)')
    ax.set_title('The Elbow Method')
    st.pyplot(fig)

with tab3:
    st.subheader("🎯 Clustered Customers")
    fig2, ax2 = plt.subplots()
    palette = sns.color_palette("bright", k)
    sns.scatterplot(
        x='Annual Income (k$)', 
        y='Spending Score (1-100)', 
        hue='Cluster', 
        palette=palette, 
        data=data,
        ax=ax2,
        s=100
    )
    ax2.scatter(
        kmeans.cluster_centers_[:, 0], 
        kmeans.cluster_centers_[:, 1], 
        s=300, 
        c='black', 
        marker='X', 
        label='Centroids'
    )
    ax2.legend()
    ax2.set_title('Customer Segments')
    st.pyplot(fig2)

with tab4:
    st.subheader("📝 Cluster Explanations")
    for cluster_num, row in cluster_summary.iterrows():
        explanation = interpret_cluster(row['Annual Income (k$)'], row['Spending Score (1-100)'])
        st.markdown(f"**Cluster {cluster_num}:** {explanation}")
    st.dataframe(cluster_summary.style.highlight_max(axis=0))

# Footer
st.markdown("---")
st.caption("Made with ❤️ using Streamlit")