gilangw commited on
Commit
d6b6b64
Β·
1 Parent(s): 83fdb04

Upload 12 files

Browse files
Files changed (5) hide show
  1. app.py +2 -7
  2. eda.py +63 -62
  3. home.py +2 -1
  4. prediction.py +49 -15
  5. telco_data_test.xlsx +0 -0
app.py CHANGED
@@ -17,13 +17,11 @@ from PIL import Image
17
 
18
  # Create sidebar navigation
19
 
20
- ## Churn Guardian, Yosef Project
21
-
22
  # st.markdown(
23
  # f"""
24
  # <style>
25
  # [data-testid="stSidebar"] {{
26
- # background-image: url(https://raw.githubusercontent.com/FTDS-assignment-bay/FTDS-007-HCK-group-002/main/assets/ChurnGuardian-Logo-Transparants.png);
27
  # background-repeat: no-repeat;
28
  # padding-top: 20px;
29
  # background-position: 10px 50px;
@@ -34,9 +32,6 @@ from PIL import Image
34
  # unsafe_allow_html=True,
35
  # )
36
 
37
- # selected_page = st.sidebar.radio('Select Page', ('πŸ“‹ Home Page', 'πŸ“Š Exploratory Data Analysis', 'πŸ’» Model'))
38
- ## Churn Guardian, Yosef Project
39
-
40
  st.set_page_config(
41
  page_title='Telco Customer Churn and Segmentation',
42
  layout='centered', #wide
@@ -57,7 +52,7 @@ selected = option_menu(None, ["About", "EDA", "Predict"],
57
  menu_icon="cast", default_index=0, orientation="horizontal",
58
  styles={
59
  "icon": {"color": "orange", "font-size": "15px"},
60
- "nav-link": {"font-size": "15px", "text-align": "left", "margin":"1px", "--hover-color": "#eee"},
61
  "nav-link-selected": {"background-color": "grey"},
62
  }
63
  )
 
17
 
18
  # Create sidebar navigation
19
 
 
 
20
  # st.markdown(
21
  # f"""
22
  # <style>
23
  # [data-testid="stSidebar"] {{
24
+ # background-image: url(https://raw.githubusercontent.com/FTDS-assignment-bay/main/assets/ChurnGuardian-Logo-Transparants.png);
25
  # background-repeat: no-repeat;
26
  # padding-top: 20px;
27
  # background-position: 10px 50px;
 
32
  # unsafe_allow_html=True,
33
  # )
34
 
 
 
 
35
  st.set_page_config(
36
  page_title='Telco Customer Churn and Segmentation',
37
  layout='centered', #wide
 
52
  menu_icon="cast", default_index=0, orientation="horizontal",
53
  styles={
54
  "icon": {"color": "orange", "font-size": "15px"},
55
+ "nav-link": {"font-size": "15px", "text-align": "left", "margin":"1px", "--hover-color": "#eee"},
56
  "nav-link-selected": {"background-color": "grey"},
57
  }
58
  )
eda.py CHANGED
@@ -6,7 +6,7 @@ import plotly.express as px
6
  from PIL import Image
7
 
8
  def run():
9
- #Show dataframe
10
  # st.title('Data Overview')
11
  df = pd.read_csv('telco_data_clean.csv')
12
  # st.dataframe(df.head())
@@ -14,76 +14,77 @@ def run():
14
  st.title('Exploratory Data Analysis')
15
  plot_selection = st.selectbox(label='Choose',
16
  options=['Customer Distribution',
17
- 'Top Total Churn City',
18
- 'Customers reasons for churning',
19
- 'Churn Reason',
20
- 'Age Distribution Churn vs Stayed',
21
- 'Gender Distribution Churn vs Stayed'])
22
 
23
  # Plot 1
24
  def plot_1():
25
  st.write('#### Pie Chart for Customer Status Distribution')
26
- # fig_1 = plt.figure()
27
- # customer_status_count = df['Customer Status'].value_counts()
28
- # fig_1, ax = plt.subplots()
29
- # ax.pie(customer_status_count, labels=customer_status_count.index, autopct='%1.1f%%')
30
- # ax.set_title('Customer Status Distribution')
31
- # st.pyplot(fig_1)
32
- # with st.expander('Explanation'):
33
- # st.text('''
34
- # The data frame indicates that 26.5% of customers have churned.
35
- # The "Joined" Category row will be removed, as it doesn't offer
36
- # any useful insights into the churn rate.
37
- # ''')
38
- st.text('''
39
- The data frame indicates that 26.5% of customers have churned.
40
- The "Joined" Category row will be removed, as it doesn't offer
41
- any useful insights into the churn rate.
42
-
43
- Sisa ne nyusul
44
- ''')
45
-
46
- # st.write('## Histogram Limit Balance')
47
- # fig = plt.figure(figsize=(15,5))
48
- # sns.histplot(df['limit_balance'], bins=20, kde=True).set(title='limit_balance')
49
- # st.pyplot(fig)
50
- # st.write('Based on histogram, column _limit\_balance_ skewness is positive, meaning the data distribution is not normal.')
51
- # st.markdown('---')
52
-
53
- # st.write('## Average Amount of Bill Statement')
54
- # df_amt = pd.DataFrame()
55
- # bill_amt = []
56
- # pay_amt = []
57
- # for i in range(1, 7):
58
- # bill_amt.append(df['bill_amt_' + str(i)].mean())
59
- # pay_amt.append(df['pay_amt_' + str(i)].mean())
60
- # df_amt['bill_amt'] = bill_amt
61
- # df_amt['pay_amt'] = pay_amt
62
- # fig, ax = plt.subplots(ncols=2, figsize=(10, 5))
63
- # axis_label = sns.barplot(ax=ax[0], data=df_amt, x=df_amt['bill_amt'].index, y=bill_amt, orient='v')
64
- # ax[0].set_title('Bill Statement')
65
- # axis_label = sns.barplot(ax=ax[1], data=df_amt, x=df_amt['pay_amt'].index, y=pay_amt, orient='v')
66
- # ax[1].set_title('Payment')
67
- # st.pyplot(fig)
68
- # st.write('Average amount of bill statement is decrease every month, showing people using their credit card less during this period.')
69
- # st.markdown('---')
70
 
71
- # st.write('## Barplot Sex')
72
- # fig = plt.figure(figsize=(10,5))
73
- # sns.countplot(x='sex', data=df)
74
- # st.pyplot(fig)
75
- # st.write('Most of the bank customer is female')
76
- # st.markdown('---')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- # st.write('## Barplot Marital Status')
79
- # fig = plt.figure(figsize=(15,5))
80
- # sns.countplot(x='marital_status', data=df)
81
- # st.pyplot(fig)
82
- # st.write('Most of the bank customer is married')
83
- # st.markdown('---')
84
 
85
  if plot_selection == "Customer Distribution":
86
  plot_1()
 
 
 
 
 
 
87
 
88
  if __name__ == '__main__':
89
  run()
 
6
  from PIL import Image
7
 
8
  def run():
9
+ # Show dataframe
10
  # st.title('Data Overview')
11
  df = pd.read_csv('telco_data_clean.csv')
12
  # st.dataframe(df.head())
 
14
  st.title('Exploratory Data Analysis')
15
  plot_selection = st.selectbox(label='Choose',
16
  options=['Customer Distribution',
17
+ 'Churn by Monthly Charge',
18
+ 'Churn by Tenure',
19
+ 'Churn by Internet Service'])
 
 
20
 
21
  # Plot 1
22
  def plot_1():
23
  st.write('#### Pie Chart for Customer Status Distribution')
24
+ target = df["churn"].value_counts().reset_index()
25
+ persen = df["churn"].value_counts(normalize=True).reset_index()
26
+ target["percentage"] = persen["churn"]
27
+
28
+ fig_1 = plt.figure()
29
+ fig_1, ax = plt.subplots(ncols=1, figsize=(5, 5))
30
+ ax.pie(target["percentage"], labels = target["index"], autopct='%.0f%%')
31
+ ax.set_title("Customer Status Distribution")
32
+ st.pyplot(fig_1)
33
+ st.write('''
34
+ From the plot above, it is found that of the total number of customers who churn
35
+ is 27% (1869 customers) and customers who is not churn / stay is 73% (5163 customers).
36
+ ''')
37
+ st.markdown('---')
38
+
39
+ # Plot 2
40
+ def plot_2():
41
+ df_churn_by_mcharges = df.groupby(['monthly_charges_cat']).agg(total=('monthly_charges_cat', 'count')).sort_values(by=['total'], ascending=True)
42
+ fig_2 = plt.figure(figsize=(7, 5))
43
+ ax = sns.barplot(data=df_churn_by_mcharges, x=df_churn_by_mcharges.index.to_list(), y='total', orient='v')
44
+ ax.bar_label(ax.containers[0])
45
+ ax.set(title='Churn by Monthly Charges')
46
+ st.pyplot(fig_2)
47
+ st.write('''
48
+ From the bar plot above we can see that Medium-High Expenses and High Expense
49
+ have the highest churn rate.
50
+ ''')
51
+ st.markdown('---')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ # Plot 3
54
+ def plot_3():
55
+ df_churn_by_mcharge = df.groupby(['tenure_year', 'churn']).agg(total=('churn', 'count'))
56
+ fig_3 = plt.figure(figsize=(7, 5))
57
+ ax = sns.lineplot(data=df_churn_by_mcharge, x="tenure_year", y="total", hue="churn")
58
+ # ax.bar_label(ax.containers[0])
59
+ ax.set(title='Churn by Monthly Charges')
60
+ st.pyplot(fig_3)
61
+ st.write('''
62
+ From the bar plot above we can see that as tenure increases, the churn rate tends
63
+ to decrease. Customers with a longer usage period tend to be more loyal.
64
+ ''')
65
+ st.markdown('---')
66
+
67
+ # Plot 4
68
+ def plot_4():
69
+ fig_4 = plt.figure(figsize=(7, 5))
70
+ ax = sns.countplot(data=df, x='internet_service', hue='churn')
71
+ ax.bar_label(ax.containers[0])
72
+ ax.bar_label(ax.containers[1])
73
+ ax.set(title='Churn by Internet Service')
74
+ st.pyplot(fig_4)
75
+ st.write('There are more customers with Fiber Optic services who churn (1297) than DSL customers who churn (459). The ratio of churn to total customers appears to be higher for customers with Fiber Optic services (41.9%) compared to DSL customers (19.0%).')
76
+ st.write('The solution that needs to be done is to evaluate and update Fiber Optic services to improve quality and customer satisfaction, such as focusing on improving speed, stability and ease of use. In addition, there needs to be an adjustment to the marketing strategy to emphasize the advantages and benefits of Fiber Optic services that can meet customer needs by identifying market segments that are more likely to be interested in this service.')
77
+ st.markdown('---')
78
 
 
 
 
 
 
 
79
 
80
  if plot_selection == "Customer Distribution":
81
  plot_1()
82
+ elif plot_selection == "Churn by Monthly Charge":
83
+ plot_2()
84
+ elif plot_selection == "Churn by Tenure":
85
+ plot_3()
86
+ elif plot_selection == "Churn by Internet Service":
87
+ plot_4()
88
 
89
  if __name__ == '__main__':
90
  run()
home.py CHANGED
@@ -9,7 +9,8 @@ def run():
9
 
10
  st.write('## Background :')
11
  st.markdown('''
12
- Telco tambahono rek .....
 
13
  ''')
14
 
15
  st.write('## Project Objective :')
 
9
 
10
  st.write('## Background :')
11
  st.markdown('''
12
+ The telecommunications industry, being highly competitive, faces challenges in retaining customers.
13
+ Churn or customer attrition, is a critical metric that directly impacts the revenue and growth of a Telco company.
14
  ''')
15
 
16
  st.write('## Project Objective :')
prediction.py CHANGED
@@ -28,12 +28,14 @@ def run():
28
 
29
  # Choice of input: Upload or Manual Input
30
  inputType = st.selectbox("How would you like to input data ?", ["Upload Excel or CSV File", "Manual Input"])
 
31
 
32
  # Create Function for Prediction
33
  def predictData(df):
34
  # Classification prediction
35
  y_pred_uploaded = classification_model.predict(df)
36
  df['churn'] = y_pred_uploaded
 
37
 
38
  # Filter the DataFrame for Predicted Churn (1)
39
  df_churn = df[df['churn'] == 1]
@@ -70,41 +72,65 @@ def run():
70
  df_cluster_1 = df_churn[df_churn['cluster'] == 1]
71
  df_cluster_2 = df_churn[df_churn['cluster'] == 2]
72
 
73
- st.write('## Result')
74
- st.write('##### Here are some suggestion to minimalize churn potential for each customer')
75
  c0, c1, c2 = '', '', ''
76
  for x in df_cluster_0['name']: c0 += str(x) + ', '
77
  for y in df_cluster_1['name']: c1 += str(y) + ', '
78
  for z in df_cluster_2['name']: c2 += str(z) + ', '
79
 
 
 
 
 
 
 
80
  suggestion_0 = '''
81
- - Menawarkan paket dengan tambahan kecepatan selama 3 bulan bagi yang telah berlangganan di atas 3 tahun
82
- - Membuka seluruh channel TV saat event hari besar seperti lebaran, natal dan lain lain
83
- - Memberikan penawaran khusus untuk meningkatkan kecepatan internet kepada mereka
 
 
 
 
 
 
84
  '''
85
 
86
  suggestion_1 = '''
87
- - Memberikan penawaran dengan banyak keuntungan jika berlangganan untuk jangka panjang
88
- - Menawarkan paket internet DSL tahunan dengan harga yang terjangkau
 
 
 
 
 
 
89
  '''
90
 
91
  suggestion_2 = '''
92
- Memberikan paket khusus dengan kriteria sebagai berikut :
93
- - Kecepatan tinggi tetapi banwidth lebih rendah dengan harga yang lebih murah dari paket normal
94
- - Kecepatan rendah tetapi banwidth besar sehingga koneksi jauh lebih stabil dengan harga yang lebih murah dari paket normal
95
  '''
96
 
97
  if c0 != '':
 
 
98
  st.write('Suggestion for `', c0[0:-2], '` is')
99
  st.write(suggestion_0)
100
  st.markdown('---')
101
 
102
  if c1 != '':
 
 
103
  st.write('Suggestion for `', c1[0:-2], '` is')
104
  st.write(suggestion_1)
105
  st.markdown('---')
106
 
107
  if c2 != '':
 
 
108
  st.write('Suggestion for `', c2[0:-2], '` is')
109
  st.write(suggestion_2)
110
  st.markdown('---')
@@ -129,7 +155,15 @@ def run():
129
 
130
  # A. For CSV
131
  if inputType == "Upload Excel or CSV File":
132
- uploaded_file = st.file_uploader("Choose a Excel or CSV file", type=["csv", "xlsx"], accept_multiple_files=False)
 
 
 
 
 
 
 
 
133
  if uploaded_file is not None:
134
  split_file_name = os.path.splitext(uploaded_file.name)
135
  # file_name = split_file_name[0]
@@ -163,7 +197,7 @@ def run():
163
  # internet_service = col4.selectbox('Internet Service', ('DSL', 'Fiber optic', 'No'), index=0)
164
 
165
  col4, col5, col6 = st.columns([1, 1, 1])
166
- internet_service = col4.radio(label='Subs for Phone service?', options=['DSL', 'Fiber optic', 'No'])
167
  phone_service = col5.radio(label='Subs for Phone service?', options=['Yes', 'No'])
168
  multiple_lines = col6.radio(label='Subs for Multiple Lines?', options=['Yes', 'No', 'No Phone Services'])
169
 
@@ -183,7 +217,7 @@ def run():
183
  col_charges1, col_charges2, col_charges3 = st.columns([1, 1, 2])
184
  monthly_charges = col_charges1.number_input('Monthly Charges', min_value=1, max_value=999, step=1, help='Amount to paid per month', key='mcharges', on_change=calculateChargesAndCategory)
185
  total_charges = col_charges2.number_input('Total Charges', min_value=1, max_value=999999, step=1, disabled=True, key='tcharges')
186
- charges_cat = col_charges3.text_input('Chargest Category', disabled=True, key='catcharges')
187
 
188
  # st.button('Predict', on_click=predict)
189
  data_inf = {
@@ -192,7 +226,7 @@ def run():
192
  'senior_citizen': senior_citizen,
193
  'partner': partner,
194
  'dependents': dependents,
195
- 'tenure': int(tenure),
196
  'phone_service': phone_service,
197
  'multiple_lines': multiple_lines,
198
  'internet_service': internet_service,
@@ -206,7 +240,7 @@ def run():
206
  'paperless_billing': paperless_billing,
207
  'payment_method': payment_method,
208
  'monthly_charges': monthly_charges,
209
- 'total_charges': int(total_charges),
210
  'monthly_charges_cat': charges_cat,
211
  'tenure_year': tenure_year
212
  }
 
28
 
29
  # Choice of input: Upload or Manual Input
30
  inputType = st.selectbox("How would you like to input data ?", ["Upload Excel or CSV File", "Manual Input"])
31
+ st.markdown('---')
32
 
33
  # Create Function for Prediction
34
  def predictData(df):
35
  # Classification prediction
36
  y_pred_uploaded = classification_model.predict(df)
37
  df['churn'] = y_pred_uploaded
38
+ # st.dataframe(df)
39
 
40
  # Filter the DataFrame for Predicted Churn (1)
41
  df_churn = df[df['churn'] == 1]
 
72
  df_cluster_1 = df_churn[df_churn['cluster'] == 1]
73
  df_cluster_2 = df_churn[df_churn['cluster'] == 2]
74
 
75
+ st.write(f'## Result : `{churnCustomer} customer` are predicted as churn!')
76
+ st.write('##### Here are some suggestion to minimalize churn potential for each customer depend on their cluster')
77
  c0, c1, c2 = '', '', ''
78
  for x in df_cluster_0['name']: c0 += str(x) + ', '
79
  for y in df_cluster_1['name']: c1 += str(y) + ', '
80
  for z in df_cluster_2['name']: c2 += str(z) + ', '
81
 
82
+ cluster_0 = '''
83
+ - Most of them are senior citizen
84
+ - Having partner and dependents
85
+ - High monthly charges
86
+ '''
87
+
88
  suggestion_0 = '''
89
+ - Offers packages with additional speed for 3 months for those who have subscribed for more than 3 years
90
+ - Open all TV channels during big holiday events such as Eid, Christmas and others
91
+ - Provide special offers to increase internet speed to them
92
+ '''
93
+
94
+ cluster_1 = '''
95
+ - Mix of senior citizan and youngster
96
+ - Having partner and dependents
97
+ - Low monthly charges
98
  '''
99
 
100
  suggestion_1 = '''
101
+ - Provides offers with many benefits if they subscribe for the long term
102
+ - Offers annual DSL internet packages at affordable prices
103
+ '''
104
+
105
+ cluster_2 = '''
106
+ - Most of them are young people
107
+ - Most of them have no partner and dependents
108
+ - Moderate monthly charges
109
  '''
110
 
111
  suggestion_2 = '''
112
+ Providing special packages with the following criteria:
113
+ - High speed internet but lower bandwidth at a cheaper price than normal packages
114
+ - Low speed internet but large bandwidth so the connection is much more stable at a cheaper price than normal packages
115
  '''
116
 
117
  if c0 != '':
118
+ st.write('##### Cluster 1')
119
+ st.write(cluster_0)
120
  st.write('Suggestion for `', c0[0:-2], '` is')
121
  st.write(suggestion_0)
122
  st.markdown('---')
123
 
124
  if c1 != '':
125
+ st.write('##### Cluster 2')
126
+ st.write(cluster_1)
127
  st.write('Suggestion for `', c1[0:-2], '` is')
128
  st.write(suggestion_1)
129
  st.markdown('---')
130
 
131
  if c2 != '':
132
+ st.write('##### Cluster 3')
133
+ st.write(cluster_2)
134
  st.write('Suggestion for `', c2[0:-2], '` is')
135
  st.write(suggestion_2)
136
  st.markdown('---')
 
155
 
156
  # A. For CSV
157
  if inputType == "Upload Excel or CSV File":
158
+ with open('telco_data_test.xlsx', 'rb') as file:
159
+ st.download_button(
160
+ label='πŸ’Ύ Download Template Excel',
161
+ data=file,
162
+ file_name='telco_example.xlsx',
163
+ mime='application/vnd.ms-excel'
164
+ )
165
+
166
+ uploaded_file = st.file_uploader("Choose Excel or CSV file", type=["csv", "xlsx"], accept_multiple_files=False)
167
  if uploaded_file is not None:
168
  split_file_name = os.path.splitext(uploaded_file.name)
169
  # file_name = split_file_name[0]
 
197
  # internet_service = col4.selectbox('Internet Service', ('DSL', 'Fiber optic', 'No'), index=0)
198
 
199
  col4, col5, col6 = st.columns([1, 1, 1])
200
+ internet_service = col4.radio(label='Subs for Internet service?', options=['DSL', 'Fiber optic', 'No'])
201
  phone_service = col5.radio(label='Subs for Phone service?', options=['Yes', 'No'])
202
  multiple_lines = col6.radio(label='Subs for Multiple Lines?', options=['Yes', 'No', 'No Phone Services'])
203
 
 
217
  col_charges1, col_charges2, col_charges3 = st.columns([1, 1, 2])
218
  monthly_charges = col_charges1.number_input('Monthly Charges', min_value=1, max_value=999, step=1, help='Amount to paid per month', key='mcharges', on_change=calculateChargesAndCategory)
219
  total_charges = col_charges2.number_input('Total Charges', min_value=1, max_value=999999, step=1, disabled=True, key='tcharges')
220
+ charges_cat = col_charges3.text_input('Charges Category', disabled=True, key='catcharges')
221
 
222
  # st.button('Predict', on_click=predict)
223
  data_inf = {
 
226
  'senior_citizen': senior_citizen,
227
  'partner': partner,
228
  'dependents': dependents,
229
+ 'tenure': tenure,
230
  'phone_service': phone_service,
231
  'multiple_lines': multiple_lines,
232
  'internet_service': internet_service,
 
240
  'paperless_billing': paperless_billing,
241
  'payment_method': payment_method,
242
  'monthly_charges': monthly_charges,
243
+ 'total_charges': total_charges,
244
  'monthly_charges_cat': charges_cat,
245
  'tenure_year': tenure_year
246
  }
telco_data_test.xlsx ADDED
Binary file (14 kB). View file