File size: 3,085 Bytes
de75509
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from sklearn import preprocessing
import streamlit as st
import pandas as pd
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# Load the pre-trained model
filename = 'final_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))

# Load the clustered dataset
df = pd.read_csv("Clustered_Data.csv")

# Streamlit app setup
st.markdown('<style>body{background-color: Blue;}</style>', unsafe_allow_html=True)
st.title("Customer Segmentaion")

# Input form
with st.form("my_form"):
    balance = st.number_input(label='Balance', step=0.001, format="%.6f")
    balance_frequency = st.number_input(label='Balance Frequency', step=0.001, format="%.6f")
    purchases = st.number_input(label='Purchases', step=0.01, format="%.2f")
    oneoff_purchases = st.number_input(label='OneOff Purchases', step=0.01, format="%.2f")
    installments_purchases = st.number_input(label='Installments Purchases', step=0.01, format="%.2f")
    cash_advance = st.number_input(label='Cash Advance', step=0.01, format="%.6f")
    purchases_frequency = st.number_input(label='Purchases Frequency', step=0.01, format="%.6f")
    oneoff_purchases_frequency = st.number_input(label='OneOff Purchases Frequency', step=0.1, format="%.6f")
    purchases_installment_frequency = st.number_input(label='Purchases Installments Frequency', step=0.1, format="%.6f")
    cash_advance_frequency = st.number_input(label='Cash Advance Frequency', step=0.1, format="%.6f")
    cash_advance_trx = st.number_input(label='Cash Advance Trx', step=1)
    purchases_trx = st.number_input(label='Purchases TRX', step=1)
    credit_limit = st.number_input(label='Credit Limit', step=0.1, format="%.1f")
    payments = st.number_input(label='Payments', step=0.01, format="%.6f")
    minimum_payments = st.number_input(label='Minimum Payments', step=0.01, format="%.6f")
    prc_full_payment = st.number_input(label='PRC Full Payment', step=0.01, format="%.6f")
    tenure = st.number_input(label='Tenure', step=1)

    # Create a data structure for prediction
    data = [[
        balance, balance_frequency, purchases, oneoff_purchases, installments_purchases, cash_advance,
        purchases_frequency, oneoff_purchases_frequency, purchases_installment_frequency, cash_advance_frequency,
        cash_advance_trx, purchases_trx, credit_limit, payments, minimum_payments, prc_full_payment, tenure
    ]]

    submitted = st.form_submit_button("Submit")

# Prediction and visualization
if submitted:
    clust = loaded_model.predict(data)[0]
    st.write(f'Data belongs to Cluster {clust}')

    # Filter the cluster data
    cluster_df1 = df[df['Cluster'] == clust]

    # Plot the histograms for each feature in the cluster
    st.write(f"Feature Distributions for Cluster {clust}")
    for c in cluster_df1.drop(['Cluster'], axis=1):
        fig, ax = plt.subplots()
        sns.histplot(data=cluster_df1, x=c, kde=True, ax=ax)
        plt.title(f'Distribution of {c} in Cluster {clust}')
        st.pyplot(fig)