File size: 4,317 Bytes
7fdfca3
 
f5b702a
 
7fdfca3
f5b702a
 
7fdfca3
f5b702a
 
 
 
 
7fdfca3
 
f5b702a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fdfca3
f5b702a
 
7fdfca3
f5b702a
 
7fdfca3
f5b702a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fdfca3
f5b702a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import streamlit as st
import joblib
import numpy as np
from sklearn.neighbors import NearestCentroid

# STREAMLIT TABS
app, model_eval = st.tabs(["Application", "Model Evaluation"])

# Load Models
models = {
    "K-Means": "kmeans_model.pkl",
    "Gaussian Mixture": "gaussianMixture_model.pkl",
    "Hierarchical": "hierarchical_model.pkl"
}

scaler = joblib.load("scaler.pkl")


with app:
    # Sidebar Model Selection
    selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))

    # Load Selected Model
    with open(models[selected_model], "rb") as file:
        model = joblib.load(file)

    # Cluster Labels for Each Model
    cluster_labels = {
        "K-Means": {
            0: "Balanced Consumer",
            1: "Premium Customer",
            2: "Impulsive Buyer",
            3: "Cautious Buyer",
            4: "Budget-Conscious Customer"
        },
        "Hierarchical": {
            2: "Balanced Consumer",
            1: "Premium Customer",
            3: "Impulsive Buyer",
            0: "Cautious Buyer",
            4: "Budget-Conscious Customer"
        },
        "Gaussian Mixture": {
            0: "Balanced Consumer",
            1: "Premium Customer",
            2: "Impulsive Buyer",
            3: "Cautious Buyer",
            4: "Budget-Conscious Customer"
        }
    }

    # User Input
    st.title("Mall Customer Segmentation")
    income = st.number_input("Annual Income ($)", min_value=0, step=1)
    spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)

    if st.button("Predict"):
        scaled_input = scaler.transform([[income, spending_score]])

        if selected_model in ["K-Means", "Gaussian Mixture"]:
            cluster = model.predict(scaled_input)[0]
        
        elif selected_model == "Hierarchical":
            # Load the dataset with assigned hierarchical clusters
            # Load precomputed hierarchical clusters
            df_clustered = joblib.load("clustered_data.pkl")  # Ensure this file exists

            
            # Compute Centroids for Hierarchical Clustering
            # Compute centroids for each cluster
            centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()
            
            # Use Nearest Centroid Classifier
            clf = NearestCentroid()
            clf.fit(centroids, centroids.index)
            cluster = clf.predict(scaled_input)[0]

        # Display Prediction
        st.subheader("Customer Classification:")
        st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**")
with model_eval:
    st.header("๐Ÿ“Š Model Evaluation")
    st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
    st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")
    
    st.header("K Means Clustering ")
    st.image("KMeansClusteringSegmentation.png")

    st.header("Huerarchical Clustering ")
    st.image("HierarchicalClusteringSegmentation.png")

    st.header("Gaussian Mixture ")
    st.image("GaussianMixtureSegmentation.png")

    # EVALUATION METRICS
    st.subheader("๐Ÿ“Œ Evaluation Metrics")
    st.write("Silhouette and Davis Bouldin Scores")
    
    st.header("K Means Clustering Evaluation Metrics")
    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.")
    st.image("kmeans_clustering_metrics.png")
    
    st.header("Hierarchical Clustering Evaluation Metrics")
    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.")
    st.image("hierarchical_clustering_metrics.png")
    
    st.header("Gaussian Mixture Evaluation Metrics")
    st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.")
    st.image("gmm_evaluation_metrics.png")
    

    st.header("Comparison")
    st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")