amariayudha commited on
Commit
e36103e
Β·
verified Β·
1 Parent(s): 386c5fb

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +26 -17
  2. eda.py +151 -157
app.py CHANGED
@@ -1,21 +1,30 @@
1
- # -- IMPORT LIBRARIES --
2
  import streamlit as st
3
- import time
4
 
5
- # -- SET UP CONFIG --
6
- st.set_page_config(page_title='Predictix',
7
- layout='wide',
8
- page_icon='πŸ‘‹πŸ»')
9
 
10
- def main():
11
- # Navigation sidebar
12
- st.sidebar.title("🧭 Navigation")
13
- page = st.sidebar.selectbox("Go to", ["🏠 Home", "πŸ“Š EDA", "πŸ” Prediction"])
14
 
15
- if page == "🏠 Home":
16
- # Sidebar content for Home page
17
- st.sidebar.markdown("---")
18
- st.sidebar.subheader("πŸ“Š About the Model")
19
- recall_classification = 0.82
20
- st.sidebar.write("🎯 Model Sentiment Analysis:")
21
- st.sidebar.progress(recall_classification)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
 
3
+ # Import the other scripts
4
+ import predict
5
+ import eda
 
6
 
7
+ # Set the title of the app
8
+ st.title("Customer Churn Analysis App")
 
 
9
 
10
+ # Create a sidebar for navigation
11
+ st.sidebar.title("Navigation")
12
+ page = st.sidebar.radio("Go to", ["Home", "Predict Churn", "EDA"])
13
+
14
+ # Define the home page
15
+ if page == "Home":
16
+ st.write("## Welcome to the Customer Churn Analysis App")
17
+ st.write("""
18
+ This application allows you to:
19
+ - Predict customer churn based on input data.
20
+ - Perform exploratory data analysis (EDA) on customer churn data.
21
+ Use the sidebar to navigate between the pages.
22
+ """)
23
+
24
+ # Navigate to the Predict Churn page
25
+ elif page == "Predict Churn":
26
+ predict.run()
27
+
28
+ # Navigate to the EDA page
29
+ elif page == "EDA":
30
+ eda.run()
eda.py CHANGED
@@ -4,160 +4,154 @@ import pandas as pd
4
  import plotly.express as px
5
  import numpy as np
6
 
7
-
8
- # def eda():
9
- # Set the title of the Streamlit app
10
- st.title('πŸ“Š Exploratory Data Analysis')
11
- st.write('---')
12
-
13
- # Load the dataset from a CSV filez
14
- df = pd.read_csv('florist_customer_churn_raw_fix_cleaned.csv')
15
-
16
- # Display the first few rows of the dataset
17
- st.subheader('πŸ“‚ Dataset Overview: ')
18
- st.dataframe(df.head(10))
19
-
20
- # -- CONTAINER --
21
- # Creating container for home-page description
22
- ins_total_churn = st.container(border=True)
23
- ins_total_churn.markdown('<h1 style="font-size: 30px;">🧠 Quick to Know about Dataset: </h1>', unsafe_allow_html=True)
24
- ins_total_churn.write("The dataset contains various customer behavior indicators that may be associated with **customer churn**. From this data, our team will provide a classification based on the sentiment from the `feedback` to predict whether a customer will churn or not.")
25
- st.write('---')
26
-
27
- # -- DATA EXPLORATION --
28
- st.subheader('πŸ—ΊοΈ Data Exploration')
29
-
30
- # Display the number of rows and columns in the dataset
31
-
32
- # --1. CHURN PIE CHART VIZ ---
33
- # Count the number of True/False in 'churn' column
34
- churn_count = df['churn'].value_counts()
35
-
36
- # Create a pie chart using Plotly with a purple color palette
37
- fig = px.pie(values=churn_count.values,
38
- names=churn_count.index,
39
- title="Total Churn Pie Chart Distribution",
40
- color_discrete_sequence=px.colors.sequential.Purples_r)
41
-
42
- # Show the chart in Streamlit
43
- st.subheader("πŸƒ Total Churn")
44
- st.plotly_chart(fig)
45
-
46
- # -- INSIGHT TOTAL CHURN --
47
- ins_total_churn = st.container(border=True)
48
- ins_total_churn.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
49
- ins_total_churn.write("""
50
- - A nearly equal split of `true` and `false` churn indicates that about half of customers remain `loyal` and `the other half churn`.
51
- - Churn ratio approaching `50-50` indicates that there is a significant risk of losing customers.
52
- - This indicates that we should focus on customer retention strategies and service improvements to `reduce true churn and maintain customer loyalty`."""
53
- )
54
-
55
- # -- 2. POSITIF FEEDBACK --
56
- # Membuat chart menggunakan Plotly Express
57
- # Filter for rows where sentiment is 'positive'
58
- positive_df = df[df['sentiment'] == 'positive']
59
-
60
- # Group the data by 'topic' and count the occurrences
61
- positive_topic_counts = positive_df['topic'].value_counts().reset_index()
62
- positive_topic_counts.columns = ['Topic', 'Count of Sentiment']
63
-
64
- # Create the bar chart using Plotly
65
- fig = px.bar(positive_topic_counts,
66
- x='Count of Sentiment',
67
- y='Topic',
68
- orientation='h',
69
- color_discrete_sequence=['#8a2be2'], # Purple color
70
- title="Positive Sentiment by Topic")
71
-
72
- # Display the plot in Streamlit
73
- st.subheader("πŸ—£οΈπŸ’¬ Positive Sentiment by Topic")
74
- st.plotly_chart(fig)
75
-
76
- # -- INSIGHT POSITIF FEEDBACK BY TOPIC --
77
- ins_positive_feedback = st.container(border=True)
78
- ins_positive_feedback.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
79
- ins_positive_feedback.write("""
80
- - `Product Quality` (Kualitas Produk) receives the most attention in positive sentiment, with more than 100 people expressing satisfaction with the product.
81
- - `General Feedback` is also quite high, indicating that many customers provide good general feedback regarding the service or product.
82
- - `Bouquet Preferences` also has a substantial amount of positive sentiment, indicating that customers are quite satisfied with the available flower arrangement options.
83
- - `Customer Service` receives positive sentiment, although not as high as some other topics, but it shows that customer service is still fairly appreciated.
84
- - `Price Appreciation` (Apresiasi Harga) shows that some customers feel the offered prices are quite reasonable.
85
- - `Delivery Quality` (Kualitas Pengiriman) and `Delivery Issues` (Masalah Pengiriman) are relatively low in positive sentiment, meaning the delivery aspect is not a major strength.
86
- """)
87
- st.write('---')
88
-
89
-
90
- # -- 3. NEGATIF FEEDBACK --
91
- # Filter for rows where sentiment is 'negative'
92
- negative_df = df[df['sentiment'] == 'negative']
93
-
94
- # Group the data by 'topic' and count the occurrences
95
- negative_topic_counts = negative_df['topic'].value_counts().reset_index()
96
- negative_topic_counts.columns = ['Topic', 'Count of Sentiment']
97
-
98
- # Create the bar chart using Plotly
99
- fig = px.bar(negative_topic_counts,
100
- x='Count of Sentiment',
101
- y='Topic',
102
- orientation='h',
103
- color_discrete_sequence=['#8a2be2'], # Purple color
104
- title="Negative Feedback by Topic")
105
-
106
- # Display the plot in Streamlit
107
- st.subheader("πŸ—£οΈπŸ’¬ Negative Sentiment by Topic")
108
- st.plotly_chart(fig)
109
-
110
- # -- INSIGHT NEGATIF FEEDBACK BY TOPIC --
111
- ins_negative_feedback = st.container(border=True)
112
- ins_negative_feedback.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
113
- ins_negative_feedback.write("""
114
- - `Product Quality` is also a major topic in negative sentiment, with more than 140 negative comments. This indicates that while there is a lot of praise, there are also significant complaints about product quality.
115
- - `Price Complaints` is a major negative topic, meaning many customers feel that the prices offered are too high or not meeting their expectations.
116
- - `Delivery Issues` is also a major problem in negative sentiment, showing that delivery is a primary source of complaints.
117
- - `Bouquet Preferences` also has a fair amount of negative sentiment, indicating that while many are satisfied, there are also those who feel the flower arrangements do not meet their expectations.
118
- - `Customer Service` has received some negative sentiment, though it is not as prominent as other topics.
119
- - `Delivery Quality` has very minimal negative sentiment, indicating that the quality of delivery is less frequently complained about compared to delivery issues overall.
120
- """)
121
- st.write('---')
122
-
123
- # -- 3. CHURN RATE --
124
- # Group the data by 'churn' and 'contract' and count the occurrences
125
- # Map the churn column to categorical values: False -> 'Not Churned', True -> 'Churned'
126
- df['churn_category'] = df['churn'].map({False: 'Not Churned', True: 'Churned'})
127
-
128
- # Group the data by 'churn_category' and 'contract' and count the occurrences
129
- churn_contract_counts = df.groupby(['churn_category', 'contract']).size().reset_index(name='Count of Churn')
130
-
131
- # Create the bar chart using Plotly
132
- fig = px.bar(churn_contract_counts,
133
- x='Count of Churn',
134
- y='contract',
135
- color='churn_category', # Use the new categorical churn column
136
- barmode='group',
137
- orientation='h',
138
- color_discrete_sequence=['#8a2be2', '#c8a2c8'], # Purple color shades
139
- title="Churn Rate by Contract Type")
140
-
141
- # Display the plot in Streamlit
142
- st.subheader("πŸƒ or πŸ™† by Contract Type")
143
- st.plotly_chart(fig)
144
-
145
- # -- INSIGHT CHURN RATE BY CONTRACT TYPE --
146
- ins_churn_rate = st.container(border=True)
147
- ins_churn_rate.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
148
- ins_churn_rate.write("""
149
- - `Short-term (monthly)` contracts have a very high churn rate, indicating that customers tend to leave the service more easily if they are not tied to a long-term contract
150
- - `Long-term contracts` (one and two years) are more effective in retaining customers than short-term contracts.
151
- """)
152
- st.write('---')
153
-
154
- st.write(
155
- '<p style="font-size: 15px; text-align: center;">All Rights Reserved | Made by ❀️</p>',
156
- unsafe_allow_html=True
157
- )
158
-
159
-
160
-
161
-
162
-
163
-
 
4
  import plotly.express as px
5
  import numpy as np
6
 
7
+ def run():
8
+ # Set the title of the Streamlit app
9
+ st.title('πŸ“Š Exploratory Data Analysis')
10
+ st.write('---')
11
+
12
+ # Load the dataset from a CSV file
13
+ df = pd.read_csv('florist_customer_churn_raw_fix_cleaned.csv')
14
+
15
+ # Display the first few rows of the dataset
16
+ st.subheader('πŸ“‚ Dataset Overview: ')
17
+ st.dataframe(df.head(10))
18
+
19
+ # -- CONTAINER --
20
+ # Creating container for home-page description
21
+ ins_total_churn = st.container()
22
+ ins_total_churn.markdown('<h1 style="font-size: 30px;">🧠 Quick to Know about Dataset: </h1>', unsafe_allow_html=True)
23
+ ins_total_churn.write("The dataset contains various customer behavior indicators that may be associated with **customer churn**. From this data, our team will provide a classification based on the sentiment from the `feedback` to predict whether a customer will churn or not.")
24
+ st.write('---')
25
+
26
+ # -- DATA EXPLORATION --
27
+ st.subheader('πŸ—ΊοΈ Data Exploration')
28
+
29
+ # --1. CHURN PIE CHART VIZ ---
30
+ # Count the number of True/False in 'churn' column
31
+ churn_count = df['churn'].value_counts()
32
+
33
+ # Create a pie chart using Plotly with a purple color palette
34
+ fig = px.pie(values=churn_count.values,
35
+ names=churn_count.index,
36
+ title="Total Churn Pie Chart Distribution",
37
+ color_discrete_sequence=px.colors.sequential.Purples_r)
38
+
39
+ # Show the chart in Streamlit
40
+ st.subheader("πŸƒ Total Churn")
41
+ st.plotly_chart(fig)
42
+
43
+ # -- INSIGHT TOTAL CHURN --
44
+ ins_total_churn = st.container()
45
+ ins_total_churn.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
46
+ ins_total_churn.write("""
47
+ - A nearly equal split of `true` and `false` churn indicates that about half of customers remain `loyal` and `the other half churn`.
48
+ - Churn ratio approaching `50-50` indicates that there is a significant risk of losing customers.
49
+ - This indicates that we should focus on customer retention strategies and service improvements to `reduce true churn and maintain customer loyalty`."""
50
+ )
51
+
52
+ # -- 2. POSITIF FEEDBACK --
53
+ # Membuat chart menggunakan Plotly Express
54
+ # Filter for rows where sentiment is 'positive'
55
+ positive_df = df[df['sentiment'] == 'positive']
56
+
57
+ # Group the data by 'topic' and count the occurrences
58
+ positive_topic_counts = positive_df['topic'].value_counts().reset_index()
59
+ positive_topic_counts.columns = ['Topic', 'Count of Sentiment']
60
+
61
+ # Create the bar chart using Plotly
62
+ fig = px.bar(positive_topic_counts,
63
+ x='Count of Sentiment',
64
+ y='Topic',
65
+ orientation='h',
66
+ color_discrete_sequence=['#8a2be2'], # Purple color
67
+ title="Positive Sentiment by Topic")
68
+
69
+ # Display the plot in Streamlit
70
+ st.subheader("πŸ—£οΈπŸ’¬ Positive Sentiment by Topic")
71
+ st.plotly_chart(fig)
72
+
73
+ # -- INSIGHT POSITIF FEEDBACK BY TOPIC --
74
+ ins_positive_feedback = st.container()
75
+ ins_positive_feedback.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
76
+ ins_positive_feedback.write("""
77
+ - `Product Quality` (Kualitas Produk) receives the most attention in positive sentiment, with more than 100 people expressing satisfaction with the product.
78
+ - `General Feedback` is also quite high, indicating that many customers provide good general feedback regarding the service or product.
79
+ - `Bouquet Preferences` also has a substantial amount of positive sentiment, indicating that customers are quite satisfied with the available flower arrangement options.
80
+ - `Customer Service` receives positive sentiment, although not as high as some other topics, but it shows that customer service is still fairly appreciated.
81
+ - `Price Appreciation` (Apresiasi Harga) shows that some customers feel the offered prices are quite reasonable.
82
+ - `Delivery Quality` (Kualitas Pengiriman) and `Delivery Issues` (Masalah Pengiriman) are relatively low in positive sentiment, meaning the delivery aspect is not a major strength.
83
+ """)
84
+ st.write('---')
85
+
86
+
87
+ # -- 3. NEGATIF FEEDBACK --
88
+ # Filter for rows where sentiment is 'negative'
89
+ negative_df = df[df['sentiment'] == 'negative']
90
+
91
+ # Group the data by 'topic' and count the occurrences
92
+ negative_topic_counts = negative_df['topic'].value_counts().reset_index()
93
+ negative_topic_counts.columns = ['Topic', 'Count of Sentiment']
94
+
95
+ # Create the bar chart using Plotly
96
+ fig = px.bar(negative_topic_counts,
97
+ x='Count of Sentiment',
98
+ y='Topic',
99
+ orientation='h',
100
+ color_discrete_sequence=['#8a2be2'], # Purple color
101
+ title="Negative Sentiment by Topic")
102
+
103
+ # Display the plot in Streamlit
104
+ st.subheader("πŸ—£οΈπŸ’¬ Negative Sentiment by Topic")
105
+ st.plotly_chart(fig)
106
+
107
+ # -- INSIGHT NEGATIF FEEDBACK BY TOPIC --
108
+ ins_negative_feedback = st.container()
109
+ ins_negative_feedback.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
110
+ ins_negative_feedback.write("""
111
+ - `Product Quality` is also a major topic in negative sentiment, with more than 140 negative comments. This indicates that while there is a lot of praise, there are also significant complaints about product quality.
112
+ - `Price Complaints` is a major negative topic, meaning many customers feel that the prices offered are too high or not meeting their expectations.
113
+ - `Delivery Issues` is also a major problem in negative sentiment, showing that delivery is a primary source of complaints.
114
+ - `Bouquet Preferences` also has a fair amount of negative sentiment, indicating that while many are satisfied, there are also those who feel the flower arrangements do not meet their expectations.
115
+ - `Customer Service` has received some negative sentiment, though it is not as prominent as other topics.
116
+ - `Delivery Quality` has very minimal negative sentiment, indicating that the quality of delivery is less frequently complained about compared to delivery issues overall.
117
+ """)
118
+ st.write('---')
119
+
120
+ # -- 4. CHURN RATE --
121
+ # Group the data by 'churn' and 'contract' and count the occurrences
122
+ # Map the churn column to categorical values: False -> 'Not Churned', True -> 'Churned'
123
+ df['churn_category'] = df['churn'].map({False: 'Not Churned', True: 'Churned'})
124
+
125
+ # Group the data by 'churn_category' and 'contract' and count the occurrences
126
+ churn_contract_counts = df.groupby(['churn_category', 'contract']).size().reset_index(name='Count of Churn')
127
+
128
+ # Create the bar chart using Plotly
129
+ fig = px.bar(churn_contract_counts,
130
+ x='Count of Churn',
131
+ y='contract',
132
+ color='churn_category', # Use the new categorical churn column
133
+ barmode='group',
134
+ orientation='h',
135
+ color_discrete_sequence=['#8a2be2', '#c8a2c8'], # Purple color shades
136
+ title="Churn Rate by Contract Type")
137
+
138
+ # Display the plot in Streamlit
139
+ st.subheader("πŸƒ or πŸ™† by Contract Type")
140
+ st.plotly_chart(fig)
141
+
142
+ # -- INSIGHT CHURN RATE BY CONTRACT TYPE --
143
+ ins_churn_rate = st.container()
144
+ ins_churn_rate.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
145
+ ins_churn_rate.write("""
146
+ - `Short-term (monthly)` contracts have a very high churn rate, indicating that customers tend to leave the service more easily if they are not tied to a long-term contract
147
+ - `Long-term contracts` (one and two years) are more effective in retaining customers than short-term contracts.
148
+ """)
149
+ st.write('---')
150
+
151
+ st.write(
152
+ '<p style="font-size: 15px; text-align: center;">All Rights Reserved | Made by ❀️</p>',
153
+ unsafe_allow_html=True
154
+ )
155
+
156
+ if __name__ == "__main__":
157
+ run()