Spaces:

CristopherWVSU
/

CustomerClustering

Sleeping

File size: 4,317 Bytes

7fdfca3
 
f5b702a
 
7fdfca3
f5b702a
 
7fdfca3
f5b702a
 
 
 
 
7fdfca3
 
f5b702a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fdfca3
f5b702a
 
7fdfca3
f5b702a
 
7fdfca3
f5b702a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fdfca3
f5b702a

import streamlit as st
import joblib
import numpy as np
from sklearn.neighbors import NearestCentroid

# STREAMLIT TABS
app, model_eval = st.tabs(["Application", "Model Evaluation"])

# Load Models
models = {
    "K-Means": "kmeans_model.pkl",
    "Gaussian Mixture": "gaussianMixture_model.pkl",
    "Hierarchical": "hierarchical_model.pkl"
}

scaler = joblib.load("scaler.pkl")


with app:
    # Sidebar Model Selection
    selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))

    # Load Selected Model
    with open(models[selected_model], "rb") as file:
        model = joblib.load(file)

    # Cluster Labels for Each Model
    cluster_labels = {
        "K-Means": {
            0: "Balanced Consumer",
            1: "Premium Customer",
            2: "Impulsive Buyer",
            3: "Cautious Buyer",
            4: "Budget-Conscious Customer"
        },
        "Hierarchical": {
            2: "Balanced Consumer",
            1: "Premium Customer",
            3: "Impulsive Buyer",
            0: "Cautious Buyer",
            4: "Budget-Conscious Customer"
        },
        "Gaussian Mixture": {
            0: "Balanced Consumer",
            1: "Premium Customer",
            2: "Impulsive Buyer",
            3: "Cautious Buyer",
            4: "Budget-Conscious Customer"
        }
    }

    # User Input
    st.title("Mall Customer Segmentation")
    income = st.number_input("Annual Income ($)", min_value=0, step=1)
    spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)

    if st.button("Predict"):
        scaled_input = scaler.transform([[income, spending_score]])

        if selected_model in ["K-Means", "Gaussian Mixture"]:
            cluster = model.predict(scaled_input)[0]
        
        elif selected_model == "Hierarchical":
            # Load the dataset with assigned hierarchical clusters
            # Load precomputed hierarchical clusters
            df_clustered = joblib.load("clustered_data.pkl")  # Ensure this file exists

            
            # Compute Centroids for Hierarchical Clustering
            # Compute centroids for each cluster
            centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()
            
            # Use Nearest Centroid Classifier
            clf = NearestCentroid()
            clf.fit(centroids, centroids.index)
            cluster = clf.predict(scaled_input)[0]

        # Display Prediction
        st.subheader("Customer Classification:")
        st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**")
with model_eval:
    st.header("📊 Model Evaluation")
    st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
    st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")
    
    st.header("K Means Clustering ")
    st.image("KMeansClusteringSegmentation.png")

    st.header("Huerarchical Clustering ")
    st.image("HierarchicalClusteringSegmentation.png")

    st.header("Gaussian Mixture ")
    st.image("GaussianMixtureSegmentation.png")

    # EVALUATION METRICS
    st.subheader("📌 Evaluation Metrics")
    st.write("Silhouette and Davis Bouldin Scores")
    
    st.header("K Means Clustering Evaluation Metrics")
    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.")
    st.image("kmeans_clustering_metrics.png")
    
    st.header("Hierarchical Clustering Evaluation Metrics")
    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.")
    st.image("hierarchical_clustering_metrics.png")
    
    st.header("Gaussian Mixture Evaluation Metrics")
    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.")
    st.image("gmm_evaluation_metrics.png")
    

    st.header("Comparison")
    st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")