import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt st.set_page_config( page_title="Laptop Price Regression", layout="wide", initial_sidebar_state="expanded", ) # dataset dataset = "https://raw.githubusercontent.com/ediashta/p2-ftds020-rmt-m1/main/churn.csv" data = pd.read_csv(dataset) def distribution(): # distribution plot st.title("HaiMeds Customer Distribution") col1, col2 = st.columns(2) hist_plot_1 = col1.selectbox( "Choose Table", ("Age", "Last Login (Days)", "Avg. Time Spent"), ) hist_plot(hist_plot_1, col1) hist_plot_2 = col2.selectbox( "Choose Table", ("Avg. Transaction", "Avg. Login Frequency (Days)", "Points"), ) hist_plot(hist_plot_2, col2) col1, col2 = st.columns(2) bar_plot_1 = col1.selectbox( "Choose Table", ("Gender", "Region", "Membership", "Referral", "Preferred Offer", "Devices"), ) bar_plot(bar_plot_1, col1) bar_plot_2 = col2.selectbox( "Choose Table", ( "Internet", "Used Discount", "Offer Application Preference", "Past Complaint", "Complaint Status", "Feedback", ), ) bar_plot(bar_plot_2, col2) st.subheader("Churn Risk Score Distribution") churn_score() def corr_matrix(): # distribution plot st.title("Features Correlation") col1, col2 = st.columns([7, 5]) # correlation for numerical fig = plt.figure(figsize=(10, 10)) corr_matrix = data[ [ "age", "days_since_last_login", "avg_time_spent", "avg_transaction_value", "avg_frequency_login_days", "points_in_wallet", "churn_risk_score", ] ].corr(method="spearman") sns.heatmap(corr_matrix, annot=True, cmap="mako", square=True) plt.xticks(rotation=45) plt.yticks(rotation=45) col1.pyplot(fig) feature_importance_info = """ **Feature Importance:** - **gender:** 0.0 - **region_category:** 0.0223 - **membership_category:** 0.7859 - **joining_date:** 0.0 - **joined_through_referral:** 0.0355 - **preferred_offer_types:** 0.0434 - **medium_of_operation:** 0.0218 - **internet_option:** 0.0025 - **last_visit_time:** 0.0604 - **used_special_discount:** 0.0092 - **offer_application_preference:** 0.0179 - **past_complaint:** 0.0072 - **complaint_status:** 0.0054 - **feedback:** 0.4561 """ col2.markdown(feature_importance_info) def bar_plot(var, col): # ram storage dist col.write("Distribusi " + var + " terbanyak") var_old = var if var == "Gender": var = "gender" elif var == "Region": var = "region_category" elif var == "Membership": var = "membership_category" elif var == "Referral": var = "joined_through_referral" elif var == "Preferred Offer": var = "preferred_offer_types" elif var == "Devices": var = "medium_of_operation" elif var == "Internet": var = "internet_option" elif var == "Used Discount": var = "used_special_discount" elif var == "Offer Application Preference": var = "offer_application_preference" elif var == "Past Complaint": var = "past_complaint" elif var == "Complaint Status": var = "complaint_status" elif var == "Feedback": var = "feedback" fig = plt.figure(figsize=(10, 5)) ax1 = sns.countplot( data=data, x=var, palette="mako", ) plt.xlabel(var_old) ax1.bar_label(container=ax1.containers[0], labels=data[var].value_counts().values) col.pyplot(fig) def hist_plot(var, col): # check price distribution col.write("Distribusi " + var) var_old = var if var == "Age": var = "age" elif var == "Last Login (Days)": var = "days_since_last_login" elif var == "Avg. Time Spent": var = "avg_time_spent" elif var == "Avg. Transaction": var = "avg_transaction_value" elif var == "Avg. Login Frequency (Days)": var = "avg_frequency_login_days" elif var == "Points": var = "points_in_wallet" else: var = var fig = plt.figure(figsize=(10, 5)) palette = sns.color_palette("mako_r", 50) plt.xlabel(var_old) plot = sns.histplot(data=data, x=var, kde=True, bins=50, color="teal") for bin_, i in zip(plot.patches, palette): bin_.set_facecolor(i) col.pyplot(fig) def churn_score(): fig = plt.figure(figsize=(20, 5)) plt.ylabel("Churn Risk Score") sorted_scores = data["churn_risk_score"].value_counts().sort_index(ascending=False) ax = sns.countplot( data=data, y="churn_risk_score", palette="mako", order=sorted_scores.index ) # Get the value counts for each category of 'churn_risk_score' value_counts = data["churn_risk_score"].value_counts() # Add labels on top of each bar for idx, count in enumerate(value_counts): ax.text(count + 5, idx, str(count), va="center") st.pyplot(fig) if __name__ == "__main__": distribution()