File size: 5,300 Bytes
ba27b3c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
st.set_page_config(
page_title="Laptop Price Regression",
layout="wide",
initial_sidebar_state="expanded",
)
# dataset
dataset = "https://raw.githubusercontent.com/ediashta/p2-ftds020-rmt-m1/main/churn.csv"
data = pd.read_csv(dataset)
def distribution():
# distribution plot
st.title("HaiMeds Customer Distribution")
col1, col2 = st.columns(2)
hist_plot_1 = col1.selectbox(
"Choose Table",
("Age", "Last Login (Days)", "Avg. Time Spent"),
)
hist_plot(hist_plot_1, col1)
hist_plot_2 = col2.selectbox(
"Choose Table",
("Avg. Transaction", "Avg. Login Frequency (Days)", "Points"),
)
hist_plot(hist_plot_2, col2)
col1, col2 = st.columns(2)
bar_plot_1 = col1.selectbox(
"Choose Table",
("Gender", "Region", "Membership", "Referral", "Preferred Offer", "Devices"),
)
bar_plot(bar_plot_1, col1)
bar_plot_2 = col2.selectbox(
"Choose Table",
(
"Internet",
"Used Discount",
"Offer Application Preference",
"Past Complaint",
"Complaint Status",
"Feedback",
),
)
bar_plot(bar_plot_2, col2)
st.subheader("Churn Risk Score Distribution")
churn_score()
def corr_matrix():
# distribution plot
st.title("Features Correlation")
col1, col2 = st.columns([7, 5])
# correlation for numerical
fig = plt.figure(figsize=(10, 10))
corr_matrix = data[
[
"age",
"days_since_last_login",
"avg_time_spent",
"avg_transaction_value",
"avg_frequency_login_days",
"points_in_wallet",
"churn_risk_score",
]
].corr(method="spearman")
sns.heatmap(corr_matrix, annot=True, cmap="mako", square=True)
plt.xticks(rotation=45)
plt.yticks(rotation=45)
col1.pyplot(fig)
feature_importance_info = """
**Feature Importance:**
- **gender:** 0.0
- **region_category:** 0.0223
- **membership_category:** 0.7859
- **joining_date:** 0.0
- **joined_through_referral:** 0.0355
- **preferred_offer_types:** 0.0434
- **medium_of_operation:** 0.0218
- **internet_option:** 0.0025
- **last_visit_time:** 0.0604
- **used_special_discount:** 0.0092
- **offer_application_preference:** 0.0179
- **past_complaint:** 0.0072
- **complaint_status:** 0.0054
- **feedback:** 0.4561
"""
col2.markdown(feature_importance_info)
def bar_plot(var, col):
# ram storage dist
col.write("Distribusi " + var + " terbanyak")
var_old = var
if var == "Gender":
var = "gender"
elif var == "Region":
var = "region_category"
elif var == "Membership":
var = "membership_category"
elif var == "Referral":
var = "joined_through_referral"
elif var == "Preferred Offer":
var = "preferred_offer_types"
elif var == "Devices":
var = "medium_of_operation"
elif var == "Internet":
var = "internet_option"
elif var == "Used Discount":
var = "used_special_discount"
elif var == "Offer Application Preference":
var = "offer_application_preference"
elif var == "Past Complaint":
var = "past_complaint"
elif var == "Complaint Status":
var = "complaint_status"
elif var == "Feedback":
var = "feedback"
fig = plt.figure(figsize=(10, 5))
ax1 = sns.countplot(
data=data,
x=var,
palette="mako",
)
plt.xlabel(var_old)
ax1.bar_label(container=ax1.containers[0], labels=data[var].value_counts().values)
col.pyplot(fig)
def hist_plot(var, col):
# check price distribution
col.write("Distribusi " + var)
var_old = var
if var == "Age":
var = "age"
elif var == "Last Login (Days)":
var = "days_since_last_login"
elif var == "Avg. Time Spent":
var = "avg_time_spent"
elif var == "Avg. Transaction":
var = "avg_transaction_value"
elif var == "Avg. Login Frequency (Days)":
var = "avg_frequency_login_days"
elif var == "Points":
var = "points_in_wallet"
else:
var = var
fig = plt.figure(figsize=(10, 5))
palette = sns.color_palette("mako_r", 50)
plt.xlabel(var_old)
plot = sns.histplot(data=data, x=var, kde=True, bins=50, color="teal")
for bin_, i in zip(plot.patches, palette):
bin_.set_facecolor(i)
col.pyplot(fig)
def churn_score():
fig = plt.figure(figsize=(20, 5))
plt.ylabel("Churn Risk Score")
sorted_scores = data["churn_risk_score"].value_counts().sort_index(ascending=False)
ax = sns.countplot(
data=data, y="churn_risk_score", palette="mako", order=sorted_scores.index
)
# Get the value counts for each category of 'churn_risk_score'
value_counts = data["churn_risk_score"].value_counts()
# Add labels on top of each bar
for idx, count in enumerate(value_counts):
ax.text(count + 5, idx, str(count), va="center")
st.pyplot(fig)
if __name__ == "__main__":
distribution()
|