3v324v23's picture
Commit Deployment Final
ba27b3c
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
st.set_page_config(
page_title="Laptop Price Regression",
layout="wide",
initial_sidebar_state="expanded",
)
# dataset
dataset = "https://raw.githubusercontent.com/ediashta/p2-ftds020-rmt-m1/main/churn.csv"
data = pd.read_csv(dataset)
def distribution():
# distribution plot
st.title("HaiMeds Customer Distribution")
col1, col2 = st.columns(2)
hist_plot_1 = col1.selectbox(
"Choose Table",
("Age", "Last Login (Days)", "Avg. Time Spent"),
)
hist_plot(hist_plot_1, col1)
hist_plot_2 = col2.selectbox(
"Choose Table",
("Avg. Transaction", "Avg. Login Frequency (Days)", "Points"),
)
hist_plot(hist_plot_2, col2)
col1, col2 = st.columns(2)
bar_plot_1 = col1.selectbox(
"Choose Table",
("Gender", "Region", "Membership", "Referral", "Preferred Offer", "Devices"),
)
bar_plot(bar_plot_1, col1)
bar_plot_2 = col2.selectbox(
"Choose Table",
(
"Internet",
"Used Discount",
"Offer Application Preference",
"Past Complaint",
"Complaint Status",
"Feedback",
),
)
bar_plot(bar_plot_2, col2)
st.subheader("Churn Risk Score Distribution")
churn_score()
def corr_matrix():
# distribution plot
st.title("Features Correlation")
col1, col2 = st.columns([7, 5])
# correlation for numerical
fig = plt.figure(figsize=(10, 10))
corr_matrix = data[
[
"age",
"days_since_last_login",
"avg_time_spent",
"avg_transaction_value",
"avg_frequency_login_days",
"points_in_wallet",
"churn_risk_score",
]
].corr(method="spearman")
sns.heatmap(corr_matrix, annot=True, cmap="mako", square=True)
plt.xticks(rotation=45)
plt.yticks(rotation=45)
col1.pyplot(fig)
feature_importance_info = """
**Feature Importance:**
- **gender:** 0.0
- **region_category:** 0.0223
- **membership_category:** 0.7859
- **joining_date:** 0.0
- **joined_through_referral:** 0.0355
- **preferred_offer_types:** 0.0434
- **medium_of_operation:** 0.0218
- **internet_option:** 0.0025
- **last_visit_time:** 0.0604
- **used_special_discount:** 0.0092
- **offer_application_preference:** 0.0179
- **past_complaint:** 0.0072
- **complaint_status:** 0.0054
- **feedback:** 0.4561
"""
col2.markdown(feature_importance_info)
def bar_plot(var, col):
# ram storage dist
col.write("Distribusi " + var + " terbanyak")
var_old = var
if var == "Gender":
var = "gender"
elif var == "Region":
var = "region_category"
elif var == "Membership":
var = "membership_category"
elif var == "Referral":
var = "joined_through_referral"
elif var == "Preferred Offer":
var = "preferred_offer_types"
elif var == "Devices":
var = "medium_of_operation"
elif var == "Internet":
var = "internet_option"
elif var == "Used Discount":
var = "used_special_discount"
elif var == "Offer Application Preference":
var = "offer_application_preference"
elif var == "Past Complaint":
var = "past_complaint"
elif var == "Complaint Status":
var = "complaint_status"
elif var == "Feedback":
var = "feedback"
fig = plt.figure(figsize=(10, 5))
ax1 = sns.countplot(
data=data,
x=var,
palette="mako",
)
plt.xlabel(var_old)
ax1.bar_label(container=ax1.containers[0], labels=data[var].value_counts().values)
col.pyplot(fig)
def hist_plot(var, col):
# check price distribution
col.write("Distribusi " + var)
var_old = var
if var == "Age":
var = "age"
elif var == "Last Login (Days)":
var = "days_since_last_login"
elif var == "Avg. Time Spent":
var = "avg_time_spent"
elif var == "Avg. Transaction":
var = "avg_transaction_value"
elif var == "Avg. Login Frequency (Days)":
var = "avg_frequency_login_days"
elif var == "Points":
var = "points_in_wallet"
else:
var = var
fig = plt.figure(figsize=(10, 5))
palette = sns.color_palette("mako_r", 50)
plt.xlabel(var_old)
plot = sns.histplot(data=data, x=var, kde=True, bins=50, color="teal")
for bin_, i in zip(plot.patches, palette):
bin_.set_facecolor(i)
col.pyplot(fig)
def churn_score():
fig = plt.figure(figsize=(20, 5))
plt.ylabel("Churn Risk Score")
sorted_scores = data["churn_risk_score"].value_counts().sort_index(ascending=False)
ax = sns.countplot(
data=data, y="churn_risk_score", palette="mako", order=sorted_scores.index
)
# Get the value counts for each category of 'churn_risk_score'
value_counts = data["churn_risk_score"].value_counts()
# Add labels on top of each bar
for idx, count in enumerate(value_counts):
ax.text(count + 5, idx, str(count), va="center")
st.pyplot(fig)
if __name__ == "__main__":
distribution()