Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| import os | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import json | |
| def run(): | |
| # Load Model Classification | |
| with open('adaboost_logreg_best.pkl', 'rb') as file_1: | |
| # with open('adaboost_logreg_10_features.pkl', 'rb') as file_1: | |
| classification_model = pickle.load(file_1) | |
| # Load Model Clustering | |
| with open('kp.pkl','rb') as file_2: | |
| clustering_model = pickle.load(file_2) | |
| # Load Clustering Scaler | |
| with open('scaler.pkl','rb') as file_3: | |
| scaler = pickle.load(file_3) | |
| # Load Clustering Numerical | |
| with open('num_col.txt','rb') as file_4: | |
| num_col = pickle.load(file_4) | |
| # Load Clustering Categorical | |
| with open('cat_col.txt','rb') as file_5: | |
| cat_col = pickle.load(file_5) | |
| # Choice of input: Upload or Manual Input | |
| inputType = st.selectbox("How would you like to input data ?", ["Upload Excel or CSV File", "Manual Input"]) | |
| st.markdown('---') | |
| # Create Function for Prediction | |
| def predictData(df): | |
| totalCustomer = len(df) | |
| if totalCustomer < 1: | |
| st.write('## There is no Customer on this data, please check again.') | |
| else: | |
| # Classification prediction | |
| y_pred_uploaded = classification_model.predict(df) | |
| df['churn'] = y_pred_uploaded | |
| # st.dataframe(df) | |
| # Filter the DataFrame for Predicted Churn (1) | |
| df_churn = df[df['churn'] == 1] | |
| churnCustomer = len(df_churn) | |
| if churnCustomer == 0: | |
| st.write('## There is no Customer predicted as Churn from this Data!') | |
| else: | |
| # Clustering prediction for Predicted Churn (1) | |
| ## Split Numerical and Categorical for K-Prototype | |
| data_cluster_num = df_churn[num_col] | |
| data_cluster_cat = df_churn[cat_col] | |
| ## Scale Numerical column | |
| num_scaled = scaler.transform(data_cluster_num) | |
| ## Merge Scaled Numerical + Categorical | |
| data_cluster_final = np.concatenate([num_scaled, data_cluster_cat], axis=1) | |
| data_cluster_final = pd.DataFrame(data_cluster_final, columns=['tenure', 'monthly_charges'] + cat_col) | |
| data_cluster_final = data_cluster_final.infer_objects() | |
| ## Mark Categorical Column | |
| index_cat_columns = [data_cluster_final.columns.get_loc(col) for col in cat_col] | |
| ## Predict Cluster | |
| y_cluster = clustering_model.predict(data_cluster_final, categorical=index_cat_columns) | |
| # y_cluster = [] | |
| #for rd in range(0, len(df_churn)): y_cluster.append(random.randint(0, 2)) # Random Generator for testing | |
| df_churn['cluster'] = y_cluster | |
| temp_cols = df_churn.columns.tolist() | |
| new_cols = temp_cols[0:1] + temp_cols[-2:] + temp_cols[1:-2] | |
| df_churn = df_churn[new_cols] | |
| df_churn = df_churn.sort_values(by=['cluster'], ascending=True) | |
| # Saving Result to Excel | |
| df_churn.to_excel('model_result.xlsx', index=False) | |
| # Split Data into 3 Cluster DataFrames | |
| df_cluster_0 = df_churn[df_churn['cluster'] == 0] | |
| df_cluster_1 = df_churn[df_churn['cluster'] == 1] | |
| df_cluster_2 = df_churn[df_churn['cluster'] == 2] | |
| st.write(f'## Result : `{churnCustomer} customer` from total {totalCustomer} customer ({int((churnCustomer/totalCustomer)*100)}%) are predicted as churn!') | |
| st.write('##### Here are some suggestion to minimalize churn potential for each customer depend on their cluster') | |
| c0, c1, c2 = '', '', '' | |
| for x in df_cluster_0['name']: c0 += str(x) + ', ' | |
| for y in df_cluster_1['name']: c1 += str(y) + ', ' | |
| for z in df_cluster_2['name']: c2 += str(z) + ', ' | |
| cluster_0 = ''' | |
| - Most of them are senior citizen | |
| - Having partner and dependents | |
| - High monthly charges | |
| ''' | |
| # suggestion_0 = ''' | |
| # - Offers packages with additional speed for 3 months for those who have subscribed for more than 3 years | |
| # - Open all TV channels during big holiday events such as Eid, Christmas and others | |
| # - Provide special offers to increase internet speed to them | |
| # ''' | |
| suggestion_0 = ''' | |
| - Offers long term packages | |
| - Give limited time offer | |
| - Maintain good communication with this customer | |
| ''' | |
| cluster_1 = ''' | |
| - Mix of senior citizan and youngster | |
| - Having partner and dependents | |
| - Low monthly charges | |
| ''' | |
| suggestion_1 = ''' | |
| - Provides offers with many benefits if they subscribe for the long term | |
| - Offers annual DSL internet packages at affordable prices | |
| - New customer onboarding and orientation | |
| ''' | |
| cluster_2 = ''' | |
| - Most of them are young people | |
| - Most of them have no partner and dependents | |
| - Moderate monthly charges | |
| ''' | |
| # suggestion_2 = ''' | |
| # Providing special packages with the following criteria: | |
| # - High speed internet but lower bandwidth at a cheaper price than normal packages | |
| # - Low speed internet but large bandwidth so the connection is much more stable at a cheaper price than normal packages | |
| # ''' | |
| suggestion_2 = ''' | |
| - Make an affordable internet package prices for this cluster | |
| - Provides variation in Payment Method | |
| ''' | |
| if c0 != '': | |
| st.write(f'##### Cluster 1 - Elder Group - {len(df_cluster_0)} customer ({((len(df_cluster_0)/churnCustomer)*100):.1f}%)') | |
| st.write(cluster_0) | |
| st.write('Suggestion for `', c0[0:-2], '` is') | |
| st.write(suggestion_0) | |
| st.markdown('---') | |
| if c1 != '': | |
| st.write(f'##### Cluster 2 - Mixage - {len(df_cluster_1)} customer ({((len(df_cluster_1)/churnCustomer)*100):.1f}%)') | |
| st.write(cluster_1) | |
| st.write('Suggestion for `', c1[0:-2], '` is') | |
| st.write(suggestion_1) | |
| st.markdown('---') | |
| if c2 != '': | |
| st.write(f'##### Cluster 3 - Young Blood - {len(df_cluster_2)} customer ({((len(df_cluster_2)/churnCustomer)*100):.1f}%)') | |
| st.write(cluster_2) | |
| st.write('Suggestion for `', c2[0:-2], '` is') | |
| st.write(suggestion_2) | |
| st.markdown('---') | |
| # Create Bar Plot for Analyze Cluster | |
| num_agg_df = df_churn.groupby(['cluster']).agg({'tenure': 'mean', 'monthly_charges': 'mean'}) | |
| num_agg_df = np.round(num_agg_df, decimals=2) | |
| fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(9, 5)) | |
| # Loop through each subplot to populate it | |
| for ax, column in zip(axes, num_agg_df.columns): | |
| sns.barplot(ax=ax, data=num_agg_df, x=num_agg_df.index, y=column, orient='v') | |
| ax.set_title(f'Average {column} by Cluster') | |
| ax.set_xlabel('Cluster') | |
| ax.set_ylabel(f'Average {column}') | |
| ax.bar_label(ax.containers[0]) | |
| # plt.style.use('dark_background') | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| # plt.style.use('default') | |
| with open('model_result.xlsx', 'rb') as file: | |
| st.download_button( | |
| label='💾 Download Prediction Result', | |
| data=file, | |
| file_name='model_result.xlsx', | |
| mime='application/vnd.ms-excel' | |
| ) | |
| def tenureMonthToYear(): | |
| year = st.session_state.tenurem % 12 | |
| if year == 0: | |
| st.session_state.tenurey = int((st.session_state.tenurem / 12)) | |
| else: | |
| st.session_state.tenurey = int((st.session_state.tenurem / 12) + 1) | |
| def calculateChargesAndCategory(): | |
| st.session_state.tcharges = int((st.session_state.mcharges * st.session_state.tenurem)) | |
| if st.session_state.mcharges <= 30: | |
| st.session_state.catcharges = 'Low Expense' | |
| elif st.session_state.mcharges <= 60: | |
| st.session_state.catcharges = 'Medium Expense' | |
| elif st.session_state.mcharges <= 90: | |
| st.session_state.catcharges = 'Medium High Expense' | |
| else: | |
| st.session_state.catcharges = 'High Expense' | |
| # A. For CSV | |
| if inputType == "Upload Excel or CSV File": | |
| dl_1, dl_2, dl_3 = st.columns([3, 3, 3]) | |
| with open('telco_data_test.xlsx', 'rb') as file: | |
| dl_1.download_button( | |
| label='💾 Download Data Example', | |
| data=file, | |
| file_name='telco_example.xlsx', | |
| mime='application/vnd.ms-excel' | |
| ) | |
| with open('telco_data_template.xlsx', 'rb') as file: | |
| dl_2.download_button( | |
| label='💾 Download Template Excel', | |
| data=file, | |
| file_name='telco_template.xlsx', | |
| mime='application/vnd.ms-excel' | |
| ) | |
| uploaded_file = st.file_uploader("Choose Excel or CSV file", type=["csv", "xlsx"], accept_multiple_files=False) | |
| if uploaded_file is not None: | |
| split_file_name = os.path.splitext(uploaded_file.name) | |
| # file_name = split_file_name[0] | |
| file_extension = split_file_name[1] | |
| if file_extension == '.csv': | |
| df = pd.read_csv(uploaded_file) | |
| else: | |
| df = pd.read_excel(uploaded_file) | |
| # st.dataframe(df.head()) | |
| predictData(df) | |
| # B. For Manual | |
| else: | |
| # Create Form | |
| # with st.form(key='Form Parameters'): | |
| name = st.text_input('Name', value='', help='Customer Name') | |
| col_left, col_mid, col_right = st.columns([3, 2, 2]) | |
| gender = col_left.selectbox('Gender', ('Male', 'Female'), index=0) | |
| with col_mid: | |
| tenure = st.number_input('Tenure (Month)', min_value=1, max_value=999, step=1, help='Month', key='tenurem', on_change=tenureMonthToYear) | |
| with col_right: | |
| tenure_year = st.number_input('Tenure (Year)', min_value=1, max_value=999, step=1, disabled=True, key='tenurey') | |
| col1, col2, col3 = st.columns([1, 1, 1]) | |
| senior_citizen = col1.radio(label='Senior Citizen?', options=['Yes', 'No'], help='Choose \'Yes\' for 61 years old above') | |
| partner = col2.radio(label='Having a partner?', options=['Yes', 'No']) | |
| dependents = col3.radio(label='Having a dependents?', options=['Yes', 'No'], help='For example : children') | |
| # col4, col5 = st.columns([1, 1]) | |
| # internet_service = col4.selectbox('Internet Service', ('DSL', 'Fiber optic', 'No'), index=0) | |
| col4, col5, col6 = st.columns([1, 1, 1]) | |
| internet_service = col4.radio(label='Subs for Internet service?', options=['DSL', 'Fiber optic', 'No']) | |
| phone_service = col5.radio(label='Subs for Phone service?', options=['Yes', 'No']) | |
| multiple_lines = col6.radio(label='Subs for Multiple Lines?', options=['Yes', 'No', 'No Phone Services']) | |
| col7, col8, col9 = st.columns([1, 1, 1]) | |
| online_security = col7.radio(label='Subs for Online Security?', options=['Yes', 'No', 'No Internet Services']) | |
| online_backup = col8.radio(label='Subs for Online Backup?', options=['Yes', 'No', 'No Internet Services']) | |
| device_protection = col9.radio(label='Having Device Protections?', options=['Yes', 'No', 'No Internet Services']) | |
| tech_support = col7.radio(label='Having Tech Support service?', options=['Yes', 'No', 'No Internet Services']) | |
| streaming_tv = col8.radio(label='Subs for TV Streaming?', options=['Yes', 'No', 'No Internet Services']) | |
| streaming_movies = col9.radio(label='Subs for Movie Streaming?', options=['Yes', 'No', 'No Internet Services']) | |
| col_pm1, col_pm2, col_pm3 = st.columns([3, 3, 2]) | |
| contract = col_pm1.selectbox('Contract', ('Month-to-month', 'One year', 'Two year'), index=0) | |
| payment_method = col_pm2.selectbox('Payment Method', ('Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)'), index=0) | |
| paperless_billing = col_pm3.selectbox('Paperless billing?', ('Yes', 'No'), index=0) | |
| col_charges1, col_charges2, col_charges3 = st.columns([1, 1, 2]) | |
| monthly_charges = col_charges1.number_input('Monthly Charges', min_value=1, max_value=999, step=1, help='Amount to paid per month', key='mcharges', on_change=calculateChargesAndCategory) | |
| total_charges = col_charges2.number_input('Total Charges', min_value=1, max_value=999999, step=1, disabled=True, key='tcharges') | |
| charges_cat = col_charges3.text_input('Charges Category', disabled=True, key='catcharges') | |
| # st.button('Predict', on_click=predict) | |
| data_inf = { | |
| 'name': name, | |
| 'gender': gender, | |
| 'senior_citizen': senior_citizen, | |
| 'partner': partner, | |
| 'dependents': dependents, | |
| 'tenure': tenure, | |
| 'phone_service': phone_service, | |
| 'multiple_lines': multiple_lines, | |
| 'internet_service': internet_service, | |
| 'online_security': online_security, | |
| 'online_backup': online_backup, | |
| 'device_protection': device_protection, | |
| 'tech_support': tech_support, | |
| 'streaming_tv': streaming_tv, | |
| 'streaming_movies': streaming_movies, | |
| 'contract': contract, | |
| 'paperless_billing': paperless_billing, | |
| 'payment_method': payment_method, | |
| 'monthly_charges': monthly_charges, | |
| 'total_charges': total_charges, | |
| 'monthly_charges_cat': charges_cat, | |
| 'tenure_year': tenure_year | |
| } | |
| if st.button('Predict'): | |
| data_inf = pd.DataFrame([data_inf]) | |
| # st.dataframe(data_inf.head()) | |
| predictData(data_inf) | |
| if __name__ == '__main__': | |
| run() |