File size: 5,300 Bytes
ba27b3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

st.set_page_config(
    page_title="Laptop Price Regression",
    layout="wide",
    initial_sidebar_state="expanded",
)

# dataset
dataset = "https://raw.githubusercontent.com/ediashta/p2-ftds020-rmt-m1/main/churn.csv"
data = pd.read_csv(dataset)


def distribution():
    # distribution plot
    st.title("HaiMeds Customer Distribution")
    col1, col2 = st.columns(2)

    hist_plot_1 = col1.selectbox(
        "Choose Table",
        ("Age", "Last Login (Days)", "Avg. Time Spent"),
    )
    hist_plot(hist_plot_1, col1)

    hist_plot_2 = col2.selectbox(
        "Choose Table",
        ("Avg. Transaction", "Avg. Login Frequency (Days)", "Points"),
    )
    hist_plot(hist_plot_2, col2)

    col1, col2 = st.columns(2)
    bar_plot_1 = col1.selectbox(
        "Choose Table",
        ("Gender", "Region", "Membership", "Referral", "Preferred Offer", "Devices"),
    )
    bar_plot(bar_plot_1, col1)

    bar_plot_2 = col2.selectbox(
        "Choose Table",
        (
            "Internet",
            "Used Discount",
            "Offer Application Preference",
            "Past Complaint",
            "Complaint Status",
            "Feedback",
        ),
    )
    bar_plot(bar_plot_2, col2)

    st.subheader("Churn Risk Score Distribution")
    churn_score()


def corr_matrix():
    # distribution plot
    st.title("Features Correlation")
    col1, col2 = st.columns([7, 5])

    # correlation for numerical
    fig = plt.figure(figsize=(10, 10))
    corr_matrix = data[
        [
            "age",
            "days_since_last_login",
            "avg_time_spent",
            "avg_transaction_value",
            "avg_frequency_login_days",
            "points_in_wallet",
            "churn_risk_score",
        ]
    ].corr(method="spearman")
    sns.heatmap(corr_matrix, annot=True, cmap="mako", square=True)
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    col1.pyplot(fig)

    feature_importance_info = """
        **Feature Importance:**

        - **gender:** 0.0
        - **region_category:** 0.0223
        - **membership_category:** 0.7859
        - **joining_date:** 0.0
        - **joined_through_referral:** 0.0355
        - **preferred_offer_types:** 0.0434
        - **medium_of_operation:** 0.0218
        - **internet_option:** 0.0025
        - **last_visit_time:** 0.0604
        - **used_special_discount:** 0.0092
        - **offer_application_preference:** 0.0179
        - **past_complaint:** 0.0072
        - **complaint_status:** 0.0054
        - **feedback:** 0.4561
        """
    col2.markdown(feature_importance_info)


def bar_plot(var, col):
    # ram storage dist
    col.write("Distribusi " + var + " terbanyak")
    var_old = var

    if var == "Gender":
        var = "gender"
    elif var == "Region":
        var = "region_category"
    elif var == "Membership":
        var = "membership_category"
    elif var == "Referral":
        var = "joined_through_referral"
    elif var == "Preferred Offer":
        var = "preferred_offer_types"
    elif var == "Devices":
        var = "medium_of_operation"
    elif var == "Internet":
        var = "internet_option"
    elif var == "Used Discount":
        var = "used_special_discount"
    elif var == "Offer Application Preference":
        var = "offer_application_preference"
    elif var == "Past Complaint":
        var = "past_complaint"
    elif var == "Complaint Status":
        var = "complaint_status"
    elif var == "Feedback":
        var = "feedback"

    fig = plt.figure(figsize=(10, 5))
    ax1 = sns.countplot(
        data=data,
        x=var,
        palette="mako",
    )
    plt.xlabel(var_old)
    ax1.bar_label(container=ax1.containers[0], labels=data[var].value_counts().values)
    col.pyplot(fig)


def hist_plot(var, col):
    # check price distribution
    col.write("Distribusi " + var)
    var_old = var

    if var == "Age":
        var = "age"
    elif var == "Last Login (Days)":
        var = "days_since_last_login"
    elif var == "Avg. Time Spent":
        var = "avg_time_spent"
    elif var == "Avg. Transaction":
        var = "avg_transaction_value"
    elif var == "Avg. Login Frequency (Days)":
        var = "avg_frequency_login_days"
    elif var == "Points":
        var = "points_in_wallet"
    else:
        var = var

    fig = plt.figure(figsize=(10, 5))

    palette = sns.color_palette("mako_r", 50)
    plt.xlabel(var_old)
    plot = sns.histplot(data=data, x=var, kde=True, bins=50, color="teal")

    for bin_, i in zip(plot.patches, palette):
        bin_.set_facecolor(i)

    col.pyplot(fig)


def churn_score():
    fig = plt.figure(figsize=(20, 5))
    plt.ylabel("Churn Risk Score")

    sorted_scores = data["churn_risk_score"].value_counts().sort_index(ascending=False)
    ax = sns.countplot(
        data=data, y="churn_risk_score", palette="mako", order=sorted_scores.index
    )
    # Get the value counts for each category of 'churn_risk_score'
    value_counts = data["churn_risk_score"].value_counts()

    # Add labels on top of each bar
    for idx, count in enumerate(value_counts):
        ax.text(count + 5, idx, str(count), va="center")

    st.pyplot(fig)


if __name__ == "__main__":
    distribution()