amariayudha commited on
Commit
25f227d
Β·
verified Β·
1 Parent(s): 149605f

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +87 -26
  2. eda.py +182 -134
  3. wordcloud_negative.png +0 -0
  4. wordcloud_positive.png +0 -0
app.py CHANGED
@@ -2,6 +2,7 @@
2
  import streamlit as st
3
  import eda
4
  import predict
 
5
 
6
  # Set up the Streamlit page configuration
7
  st.set_page_config(
@@ -19,62 +20,122 @@ def main():
19
  if page == "🏠 Home":
20
  # Sidebar content for Home page
21
  st.sidebar.markdown("---")
22
- st.sidebar.subheader("πŸ“Š About the Model")
23
- recall = 0.89 # You may want to update this value based on your model's performance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  st.sidebar.write("🎯 Model Recall:")
25
- st.sidebar.progress(recall)
26
- st.sidebar.write(f"{recall:.2%}")
27
- st.sidebar.write("**πŸ€” What is Recall?**")
28
- st.sidebar.write("Recall measures how well our model identifies customers who are likely to churn.")
29
  st.sidebar.write("**πŸ’‘ What does this mean?**")
30
- st.sidebar.write("Out of all the customers who are likely to churn, our model correctly identifies 89% of them.")
31
- st.sidebar.write("This helps us catch most cases, *reducing the chance of missing someone who needs attention*")
32
 
33
  st.sidebar.markdown("---")
34
  st.sidebar.subheader("πŸ“š Fun Fact")
35
  st.sidebar.info("It costs 5-25 times more to acquire a new customer than it does to retain an existing one.")
36
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Main content for Home page
38
  st.title("πŸƒ Welcome to Customer Churn Prediction Tool")
39
- st.write("""
40
- This application provides functionalities for Exploratory Data Analysis and
41
- Prediction regarding customer churn risk. Use the navigation pane on the left to
42
- select the module you wish to utilize.
43
- """)
44
-
45
  # Display image
46
  col1, col2, col3 = st.columns([1,2,1])
47
  with col2:
48
- st.image("predictix.jpg", caption="Customer Churn Prediction", use_column_width=True)
49
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  st.markdown("---")
51
 
52
  # Dataset information
53
  st.write("#### πŸ“Š Dataset")
54
  st.info("""
55
- The dataset contains customer information including tenure, contract type, payment method,
56
- monthly charges, total charges, and feedback. It's used to predict customer churn.
 
 
 
57
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- # Problem Statement
60
  st.write("#### ⚠️ Problem Statement")
61
  st.warning("""
 
 
 
 
 
62
  Customer churn is a significant challenge for businesses, leading to revenue loss and increased
63
  acquisition costs. Early identification of customers likely to churn is crucial for implementing
64
  effective retention strategies. As a data scientist, your task is to develop a machine learning
65
- model that can predict customer churn based on historical data and customer behavior patterns.
66
 
67
- The goal is to develop a model with high recall to identify potential churners, allowing the
68
- business to take proactive measures to retain these customers.
69
- """)
70
 
71
  # Project Objective
72
  st.write("#### 🎯 Objective")
73
  st.success("""
74
- This project aims to create a classification model to predict customer churn by evaluating
75
- various algorithms. Model performance will be primarily assessed using Recall to measure
76
- effectiveness in identifying potential churners, minimizing the risk of missing customers
77
- who are likely to leave.
 
 
 
78
  """)
79
 
80
  elif page == "πŸ“Š EDA":
 
2
  import streamlit as st
3
  import eda
4
  import predict
5
+ import pandas as pd
6
 
7
  # Set up the Streamlit page configuration
8
  st.set_page_config(
 
20
  if page == "🏠 Home":
21
  # Sidebar content for Home page
22
  st.sidebar.markdown("---")
23
+ st.sidebar.subheader("πŸ“Š About the Models")
24
+
25
+ # Sentiment Analysis Model
26
+ st.sidebar.write("**Sentiment Analysis Model (BERT-based)**")
27
+ sentiment_accuracy = 0.89
28
+ st.sidebar.write("🎯 Model Accuracy:")
29
+ col1, col2 = st.sidebar.columns(2)
30
+ col1.metric("Accuracy", f"{sentiment_accuracy:.2%}")
31
+ col2.metric("Error Rate", f"{1-sentiment_accuracy:.2%}")
32
+ st.sidebar.write("Analyzes customer feedback to predict sentiment.")
33
+ st.sidebar.write("**πŸ’‘ What does this mean?**")
34
+ st.sidebar.write("The model correctly classifies the sentiment of customer feedback 89% of the time. This high accuracy ensures that we can reliably interpret customer opinions and make informed decisions based on their feedback.")
35
+
36
+ st.sidebar.markdown("---")
37
+
38
+ # Churn Prediction Model
39
+ st.sidebar.write("**Churn Prediction Model (SVC)**")
40
+ churn_recall = 0.89
41
  st.sidebar.write("🎯 Model Recall:")
42
+ st.sidebar.progress(churn_recall)
43
+ st.sidebar.write(f"{churn_recall:.2%}")
 
 
44
  st.sidebar.write("**πŸ’‘ What does this mean?**")
45
+ st.sidebar.write("The model correctly identifies 89% of actual churning customers. This high recall minimizes false negatives, ensuring we catch most at-risk customers and can take proactive retention measures.")
 
46
 
47
  st.sidebar.markdown("---")
48
  st.sidebar.subheader("πŸ“š Fun Fact")
49
  st.sidebar.info("It costs 5-25 times more to acquire a new customer than it does to retain an existing one.")
50
 
51
+ st.sidebar.markdown("---")
52
+ st.sidebar.subheader("πŸ› οΈ Tools Utilized")
53
+ st.sidebar.write("""
54
+ - `Streamlit` for web app development
55
+ - `Pandas` for data manipulation
56
+ - `Plotly Express` for interactive visualizations
57
+ - `PyTorch` and `Transformers` for sentiment analysis (BERT)
58
+ - `Scikit-learn` for machine learning models (SVC)
59
+ - `Pickle` for model serialization
60
+ """)
61
+
62
  # Main content for Home page
63
  st.title("πŸƒ Welcome to Customer Churn Prediction Tool")
64
+ st.write("Empowering businesses with data-driven insights to retain customers and boost growth.")
 
 
 
 
 
65
  # Display image
66
  col1, col2, col3 = st.columns([1,2,1])
67
  with col2:
68
+ st.image("predictix.jpg", caption="Predictix: Customer Churn Prediction", use_column_width=True)
69
 
70
+ st.write("""
71
+ `Predictix` is an **innovative app** designed to help businesses **understand and predict customer churn risk**.
72
+ Our application combines **powerful Exploratory Data Analysis (EDA)** with **advanced prediction capabilities**,
73
+ utilizing a **sophisticated two-step approach**. First, we employ a **BERT-based model** for **sentiment analysis**,
74
+ which accurately predicts sentiment from customer feedback. This sentiment data is then combined with other
75
+ customer information and fed into a **Support Vector Classifier (SVC)** to predict the **likelihood of churn**.
76
+ This **comprehensive approach** allows businesses to gain **deep insights** into customer behavior and take
77
+ **proactive measures** to improve retention. Whether you're looking to **explore your data** or **make predictions**,
78
+ Predictix has you covered. Simply use the **navigation pane** on the left to access the different modules and
79
+ start leveraging the power of **data-driven decision making** for your business.
80
+ """)
81
+
82
  st.markdown("---")
83
 
84
  # Dataset information
85
  st.write("#### πŸ“Š Dataset")
86
  st.info("""
87
+ The dataset contains customer feedback used to predict sentiment, and then combines this sentiment analysis with customer information to predict customer churn.
88
+
89
+ This two-step approach allows for a more nuanced understanding of customer behavior and improved churn prediction.
90
+
91
+ Dataset source: [Florist Customer Churn](https://huggingface.co/datasets/iammkb2002/florist_customer_churn)
92
  """)
93
+
94
+ # Checkbox to show/hide dataset column description
95
+ if st.checkbox("Show dataset column description", value=True):
96
+ st.table(pd.DataFrame({
97
+ "Column Name": ["customer_id", "churn", "tenure", "monthly_charges", "total_charges", "contract", "payment_method", "feedback", "sentiment", "topic"],
98
+ "Description": [
99
+ "Unique identifier for each customer",
100
+ "Indicates whether the customer has left (True/False)",
101
+ "Number of months the customer has been with the company",
102
+ "Amount charged to the customer monthly (in local currency)",
103
+ "Total amount charged to the customer over their tenure",
104
+ "Type of contract the customer has (e.g., one year, month-to-month, two year)",
105
+ "Payment method used by the customer (e.g., credit card, electronic check)",
106
+ "Customer feedback comments regarding the service or product",
107
+ "Sentiment of the feedback (positive/negative) - predicted by our BERT model",
108
+ "Topic category of the feedback (e.g., bouquet preferences, delivery issues, general feedback)"
109
+ ]
110
+ }))
111
 
112
+ # Problem Statement
113
  st.write("#### ⚠️ Problem Statement")
114
  st.warning("""
115
+ In today's competitive market, understanding customer sentiment and predicting churn are crucial for business success.
116
+ However, manually analyzing large volumes of customer feedback and identifying potential churners is time-consuming
117
+ and prone to human error. Predictix addresses these challenges by automating both the sentiment analysis process
118
+ and churn prediction, allowing businesses to respond promptly to customer needs and preferences.
119
+
120
  Customer churn is a significant challenge for businesses, leading to revenue loss and increased
121
  acquisition costs. Early identification of customers likely to churn is crucial for implementing
122
  effective retention strategies. As a data scientist, your task is to develop a machine learning
123
+ model that can predict customer churn based on historical data, customer behavior patterns, and sentiment analysis.
124
 
125
+ The goal is to develop a two-step model approach with high accuracy and recall to identify potential churners,
126
+ allowing the business to take proactive measures to retain these customers.
127
+ """)
128
 
129
  # Project Objective
130
  st.write("#### 🎯 Objective")
131
  st.success("""
132
+ This project aims to create a two-step classification model to predict customer churn:
133
+ 1. Use a BERT-based model to analyze customer feedback and predict sentiment.
134
+ 2. Use an SVC model to predict customer churn based on the predicted sentiment and other customer information.
135
+
136
+ Model performance will be primarily assessed using Accuracy for the sentiment analysis model and
137
+ Recall for the churn prediction model to measure effectiveness in identifying potential churners,
138
+ minimizing the risk of missing customers who are likely to leave.
139
  """)
140
 
141
  elif page == "πŸ“Š EDA":
eda.py CHANGED
@@ -12,142 +12,190 @@ def run():
12
  # Load the dataset from a CSV file
13
  df = pd.read_csv('florist_customer_churn_raw_fix_cleaned.csv')
14
 
15
- # Display the first few rows of the dataset
16
- st.subheader('πŸ“‚ Dataset Overview: ')
17
- st.dataframe(df.head(10))
18
-
19
- # -- CONTAINER --
20
- # Creating container for home-page description
21
- ins_total_churn = st.container()
22
- ins_total_churn.markdown('<h1 style="font-size: 30px;">🧠 Quick to Know about Dataset: </h1>', unsafe_allow_html=True)
23
- ins_total_churn.write("The dataset contains various customer behavior indicators that may be associated with **customer churn**. From this data, our team will provide a classification based on the sentiment from the `feedback` to predict whether a customer will churn or not.")
24
- st.write('---')
25
-
26
- # -- DATA EXPLORATION --
27
- st.subheader('πŸ—ΊοΈ Data Exploration')
28
-
29
- # --1. CHURN PIE CHART VIZ ---
30
- # Count the number of True/False in 'churn' column
31
- churn_count = df['churn'].value_counts()
32
-
33
- # Create a pie chart using Plotly with a purple color palette
34
- fig = px.pie(values=churn_count.values,
35
- names=churn_count.index,
36
- title="Total Churn Pie Chart Distribution",
37
- color_discrete_sequence=px.colors.sequential.Purples_r)
38
-
39
- # Show the chart in Streamlit
40
- st.subheader("πŸƒ Total Churn")
41
- st.plotly_chart(fig)
42
-
43
- # -- INSIGHT TOTAL CHURN --
44
- ins_total_churn = st.container()
45
- ins_total_churn.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
46
- ins_total_churn.write("""
47
- - A nearly equal split of `true` and `false` churn indicates that about half of customers remain `loyal` and `the other half churn`.
48
- - Churn ratio approaching `50-50` indicates that there is a significant risk of losing customers.
49
- - This indicates that we should focus on customer retention strategies and service improvements to `reduce true churn and maintain customer loyalty`."""
50
- )
51
-
52
- # -- 2. POSITIF FEEDBACK --
53
- # Membuat chart menggunakan Plotly Express
54
- # Filter for rows where sentiment is 'positive'
55
- positive_df = df[df['sentiment'] == 'positive']
56
-
57
- # Group the data by 'topic' and count the occurrences
58
- positive_topic_counts = positive_df['topic'].value_counts().reset_index()
59
- positive_topic_counts.columns = ['Topic', 'Count of Sentiment']
60
-
61
- # Create the bar chart using Plotly
62
- fig = px.bar(positive_topic_counts,
63
- x='Count of Sentiment',
64
- y='Topic',
65
- orientation='h',
66
- color_discrete_sequence=['#8a2be2'], # Purple color
67
- title="Positive Sentiment by Topic")
68
-
69
- # Display the plot in Streamlit
70
- st.subheader("πŸ—£οΈπŸ’¬ Positive Sentiment by Topic")
71
- st.plotly_chart(fig)
72
-
73
- # -- INSIGHT POSITIF FEEDBACK BY TOPIC --
74
- ins_positive_feedback = st.container()
75
- ins_positive_feedback.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
76
- ins_positive_feedback.write("""
77
- - `Product Quality` (Kualitas Produk) receives the most attention in positive sentiment, with more than 100 people expressing satisfaction with the product.
78
- - `General Feedback` is also quite high, indicating that many customers provide good general feedback regarding the service or product.
79
- - `Bouquet Preferences` also has a substantial amount of positive sentiment, indicating that customers are quite satisfied with the available flower arrangement options.
80
- - `Customer Service` receives positive sentiment, although not as high as some other topics, but it shows that customer service is still fairly appreciated.
81
- - `Price Appreciation` (Apresiasi Harga) shows that some customers feel the offered prices are quite reasonable.
82
- - `Delivery Quality` (Kualitas Pengiriman) and `Delivery Issues` (Masalah Pengiriman) are relatively low in positive sentiment, meaning the delivery aspect is not a major strength.
83
- """)
84
- st.write('---')
85
-
86
-
87
- # -- 3. NEGATIF FEEDBACK --
88
- # Filter for rows where sentiment is 'negative'
89
- negative_df = df[df['sentiment'] == 'negative']
90
-
91
- # Group the data by 'topic' and count the occurrences
92
- negative_topic_counts = negative_df['topic'].value_counts().reset_index()
93
- negative_topic_counts.columns = ['Topic', 'Count of Sentiment']
94
-
95
- # Create the bar chart using Plotly
96
- fig = px.bar(negative_topic_counts,
97
- x='Count of Sentiment',
98
- y='Topic',
99
- orientation='h',
100
- color_discrete_sequence=['#8a2be2'], # Purple color
101
- title="Negative Sentiment by Topic")
102
-
103
- # Display the plot in Streamlit
104
- st.subheader("πŸ—£οΈπŸ’¬ Negative Sentiment by Topic")
105
- st.plotly_chart(fig)
106
-
107
- # -- INSIGHT NEGATIF FEEDBACK BY TOPIC --
108
- ins_negative_feedback = st.container()
109
- ins_negative_feedback.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
110
- ins_negative_feedback.write("""
111
- - `Product Quality` is also a major topic in negative sentiment, with more than 140 negative comments. This indicates that while there is a lot of praise, there are also significant complaints about product quality.
112
- - `Price Complaints` is a major negative topic, meaning many customers feel that the prices offered are too high or not meeting their expectations.
113
- - `Delivery Issues` is also a major problem in negative sentiment, showing that delivery is a primary source of complaints.
114
- - `Bouquet Preferences` also has a fair amount of negative sentiment, indicating that while many are satisfied, there are also those who feel the flower arrangements do not meet their expectations.
115
- - `Customer Service` has received some negative sentiment, though it is not as prominent as other topics.
116
- - `Delivery Quality` has very minimal negative sentiment, indicating that the quality of delivery is less frequently complained about compared to delivery issues overall.
117
- """)
118
- st.write('---')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- # -- 4. CHURN RATE --
121
- # Group the data by 'churn' and 'contract' and count the occurrences
122
- # Map the churn column to categorical values: False -> 'Not Churned', True -> 'Churned'
123
- df['churn_category'] = df['churn'].map({False: 'Not Churned', True: 'Churned'})
124
-
125
- # Group the data by 'churn_category' and 'contract' and count the occurrences
126
- churn_contract_counts = df.groupby(['churn_category', 'contract']).size().reset_index(name='Count of Churn')
127
-
128
- # Create the bar chart using Plotly
129
- fig = px.bar(churn_contract_counts,
130
- x='Count of Churn',
131
- y='contract',
132
- color='churn_category', # Use the new categorical churn column
133
- barmode='group',
134
- orientation='h',
135
- color_discrete_sequence=['#8a2be2', '#c8a2c8'], # Purple color shades
136
- title="Churn Rate by Contract Type")
137
-
138
- # Display the plot in Streamlit
139
- st.subheader("πŸƒ or πŸ™† by Contract Type")
140
- st.plotly_chart(fig)
141
-
142
- # -- INSIGHT CHURN RATE BY CONTRACT TYPE --
143
- ins_churn_rate = st.container()
144
- ins_churn_rate.markdown('<h1 style="font-size: 30px;">πŸ’­ Insight: </h1>', unsafe_allow_html=True)
145
- ins_churn_rate.write("""
146
- - `Short-term (monthly)` contracts have a very high churn rate, indicating that customers tend to leave the service more easily if they are not tied to a long-term contract
147
- - `Long-term contracts` (one and two years) are more effective in retaining customers than short-term contracts.
148
- """)
149
  st.write('---')
150
-
151
  st.write(
152
  '<p style="font-size: 15px; text-align: center;">All Rights Reserved | Made by ❀️</p>',
153
  unsafe_allow_html=True
 
12
  # Load the dataset from a CSV file
13
  df = pd.read_csv('florist_customer_churn_raw_fix_cleaned.csv')
14
 
15
+ # Sidebar content
16
+ st.sidebar.title("EDA Options")
17
+
18
+ # Add a selectbox for choosing analysis type
19
+ analysis_option = st.sidebar.selectbox(
20
+ "Select Analysis Type",
21
+ ["Dataset Overview", "Churn Distribution", "Sentiment Analysis", "Contract Analysis", "Word Cloud"]
22
+ )
23
+
24
+ # Add a slider for sample size in Dataset Overview
25
+ if analysis_option == "Dataset Overview":
26
+ sample_size = st.sidebar.slider("Sample size", min_value=5, max_value=50, value=10, step=5)
27
+
28
+ # Add radio buttons for sentiment type
29
+ if analysis_option == "Sentiment Analysis":
30
+ sentiment_option = st.sidebar.radio("Choose sentiment to display:", ("Positive", "Negative"))
31
+
32
+ # Add radio buttons for word cloud type
33
+ if analysis_option == "Word Cloud":
34
+ wordcloud_option = st.sidebar.radio("Choose word cloud to display:", ("Positive Sentiment", "Negative Sentiment"))
35
+
36
+ # Add checkbox for showing statistics in Feature Explorer
37
+ show_stats = st.sidebar.checkbox("Show feature statistics", value=True)
38
+
39
+ # Add more content to sidebar
40
+ st.sidebar.markdown("---")
41
+ st.sidebar.subheader("πŸ”‘ Key Features")
42
+ st.sidebar.write("""
43
+ - Interactive visualizations
44
+ - Sentiment analysis insights
45
+ - Churn distribution analysis
46
+ - Contract type impact on churn
47
+ - Word cloud for sentiment analysis
48
+ """)
49
+
50
+ st.sidebar.markdown("---")
51
+ st.sidebar.subheader("πŸ› οΈ Tools Utilized")
52
+ st.sidebar.write("""
53
+ - `Streamlit` for web app development
54
+ - `Pandas` for data manipulation
55
+ - `Plotly Express` for interactive visualizations
56
+ - `NumPy` for numerical operations
57
+ """)
58
+
59
+ st.sidebar.markdown("---")
60
+ st.sidebar.info("Explore different aspects of the customer churn data using the options above.")
61
+
62
+ # Main page content
63
+ st.write("Welcome to the EDA page. Choose an analysis to explore:")
64
+
65
+ if analysis_option == "Dataset Overview":
66
+ st.subheader('πŸ“‚ Dataset Overview: ')
67
+
68
+ # Move multi-select for choosing columns to display to main page
69
+ columns_to_display = st.multiselect(
70
+ "Select columns to display",
71
+ options=list(df.columns),
72
+ default=list(df.columns)
73
+ )
74
+
75
+ st.dataframe(df[columns_to_display].head(sample_size))
76
+
77
+ st.markdown('<h3 style="font-size: 24px;">🧠 Quick to Know about Dataset: </h3>', unsafe_allow_html=True)
78
+ st.write("The dataset contains various customer behavior indicators that may be associated with **customer churn**. From this data, our team will provide a classification based on the sentiment from the `feedback` to predict whether a customer will churn or not.")
79
+
80
+ elif analysis_option == "Churn Distribution":
81
+ st.subheader("πŸƒ Total Churn")
82
+ churn_count = df['churn'].value_counts()
83
+ fig = px.pie(values=churn_count.values,
84
+ names=churn_count.index,
85
+ title="Total Churn Pie Chart Distribution",
86
+ color_discrete_sequence=px.colors.sequential.Purples_r)
87
+ st.plotly_chart(fig)
88
+
89
+ st.markdown('<h3 style="font-size: 24px;">πŸ’­ Insight: </h3>', unsafe_allow_html=True)
90
+ st.write("""
91
+ - A nearly equal split of `true` and `false` churn indicates that about half of customers remain `loyal` and `the other half churn`.
92
+ - Churn ratio approaching `50-50` indicates that there is a significant risk of losing customers.
93
+ - This indicates that we should focus on customer retention strategies and service improvements to `reduce true churn and maintain customer loyalty`."""
94
+ )
95
+
96
+ elif analysis_option == "Sentiment Analysis":
97
+ st.subheader("πŸ—£οΈπŸ’¬ Sentiment Analysis")
98
+
99
+ if sentiment_option == "Positive":
100
+ positive_df = df[df['sentiment'] == 'positive']
101
+ positive_topic_counts = positive_df['topic'].value_counts().reset_index()
102
+ positive_topic_counts.columns = ['Topic', 'Count of Sentiment']
103
+ fig = px.bar(positive_topic_counts,
104
+ x='Count of Sentiment',
105
+ y='Topic',
106
+ orientation='h',
107
+ color_discrete_sequence=['#8a2be2'],
108
+ title="Positive Sentiment by Topic")
109
+ st.plotly_chart(fig)
110
+ else:
111
+ negative_df = df[df['sentiment'] == 'negative']
112
+ negative_topic_counts = negative_df['topic'].value_counts().reset_index()
113
+ negative_topic_counts.columns = ['Topic', 'Count of Sentiment']
114
+ fig = px.bar(negative_topic_counts,
115
+ x='Count of Sentiment',
116
+ y='Topic',
117
+ orientation='h',
118
+ color_discrete_sequence=['#8a2be2'],
119
+ title="Negative Sentiment by Topic")
120
+ st.plotly_chart(fig)
121
+
122
+ st.markdown('<h3 style="font-size: 24px;">πŸ’­ Insight: </h3>', unsafe_allow_html=True)
123
+ if sentiment_option == "Positive":
124
+ st.write("""
125
+ - `Product Quality` receives the most attention in positive sentiment, with more than 100 people expressing satisfaction with the product.
126
+ - `General Feedback` is also quite high, indicating that many customers provide good general feedback regarding the service or product.
127
+ - `Bouquet Preferences` also has a substantial amount of positive sentiment, indicating that customers are quite satisfied with the available flower arrangement options.
128
+ - `Customer Service` receives positive sentiment, although not as high as some other topics, but it shows that customer service is still fairly appreciated.
129
+ - `Price Appreciation` shows that some customers feel the offered prices are quite reasonable.
130
+ - `Delivery Quality` and `Delivery Issues` are relatively low in positive sentiment, meaning the delivery aspect is not a major strength.
131
+ """)
132
+ else:
133
+ st.write("""
134
+ - `Product Quality` is also a major topic in negative sentiment, with more than 140 negative comments. This indicates that while there is a lot of praise, there are also significant complaints about product quality.
135
+ - `Price Complaints` is a major negative topic, meaning many customers feel that the prices offered are too high or not meeting their expectations.
136
+ - `Delivery Issues` is also a major problem in negative sentiment, showing that delivery is a primary source of complaints.
137
+ - `Bouquet Preferences` also has a fair amount of negative sentiment, indicating that while many are satisfied, there are also those who feel the flower arrangements do not meet their expectations.
138
+ - `Customer Service` has received some negative sentiment, though it is not as prominent as other topics.
139
+ - `Delivery Quality` has very minimal negative sentiment, indicating that the quality of delivery is less frequently complained about compared to delivery issues overall.
140
+ """)
141
+
142
+ elif analysis_option == "Contract Analysis":
143
+ st.subheader("πŸƒ or πŸ™† by Contract Type")
144
+ df['churn_category'] = df['churn'].map({False: 'Not Churned', True: 'Churned'})
145
+ churn_contract_counts = df.groupby(['churn_category', 'contract']).size().reset_index(name='Count of Churn')
146
+ fig = px.bar(churn_contract_counts,
147
+ x='Count of Churn',
148
+ y='contract',
149
+ color='churn_category',
150
+ barmode='group',
151
+ orientation='h',
152
+ color_discrete_sequence=['#8a2be2', '#c8a2c8'],
153
+ title="Churn Rate by Contract Type")
154
+ st.plotly_chart(fig)
155
+
156
+ st.markdown('<h3 style="font-size: 24px;">πŸ’­ Insight: </h3>', unsafe_allow_html=True)
157
+ st.write("""
158
+ - `Short-term (monthly)` contracts have a very high churn rate, indicating that customers tend to leave the service more easily if they are not tied to a long-term contract
159
+ - `Long-term contracts` (one and two years) are more effective in retaining customers than short-term contracts.
160
+ """)
161
+
162
+ elif analysis_option == "Word Cloud":
163
+ st.subheader("☁️ Word Cloud")
164
+ if wordcloud_option == "Positive Sentiment":
165
+ st.image("wordcloud_positive.png", caption="Word Cloud for Positive Sentiment", use_column_width=True, width=150)
166
+ st.markdown('<h3 style="font-size: 24px;">πŸ’­ Insight: </h3>', unsafe_allow_html=True)
167
+ st.write("""
168
+ 1. **Frequent Mention of `Bouquet` and `Flowers`**: The words `bouquet` and `flowers` are prominently featured, indicating that customers often appreciate the quality and variety of the floral arrangements provided.
169
+ 2. **Emphasis on `Service`**: The word `service` appears frequently, suggesting that customers are generally satisfied with the level of service they receive.
170
+ 3. **Positive Adjectives**: Words like `always,` `happy,` `satisfied,` `quality,` and `great` are commonly used, reflecting a high level of customer satisfaction and positive experiences.
171
+ 4. **Subscription Model**: The word `subscription` is also notable, indicating that customers value the subscription service offered, which likely contributes to their positive feedback.
172
+ 5. **Consistency and Reliability**: Terms such as `always,` `every month,` and `arrive` suggest that customers appreciate the consistency and reliability of the service.
173
+
174
+ Overall, the word cloud highlights the aspects of the service that customers find most appealing, such as the quality of the bouquets, the reliability of the service, and the positive experiences associated with the subscription model. These insights can help the company understand what they are doing well and continue to focus on these strengths.
175
+ """)
176
+ else:
177
+ st.image("wordcloud_negative.png", caption="Word Cloud for Negative Sentiment", use_column_width=True, width=150)
178
+ st.markdown('<h3 style="font-size: 24px;">πŸ’­ Insight: </h3>', unsafe_allow_html=True)
179
+ st.write("""
180
+ 1. **Concerns About `Quality` and `Flowers`**: The words `quality` and `flowers` are prominently featured, indicating that many customers have concerns about the quality of the flowers they receive.
181
+ 2. **Issues with `Delivery`**: The word `delivery` appears frequently, suggesting that delivery-related issues are a common source of dissatisfaction among customers.
182
+ 3. **`Expensive` and `Costly`**: Terms like `expensive` and `costly` are notable, indicating that some customers feel the service or products are overpriced.
183
+ 4. **`Bouquet Size` and `Variety`**: Words such as `bouquet size` and `variety` suggest that customers are not satisfied with the size of the bouquets or the variety of flowers offered.
184
+ 5. **`Disappointed` and `Expected Better`**: The presence of words like `disappointed` and `expected better` reflects unmet expectations and general dissatisfaction with the service or product.
185
+ 6. **Mixed Sentiment on `Satisfied`**: Interestingly, the word `satisfied` appears in the negative feedback, possibly indicating that some customers are expressing conditional satisfaction or comparing their current experience to previous, more positive experiences.
186
+
187
+ Overall, the word cloud highlights several areas for improvement, including flower quality, delivery service, pricing, bouquet size, and variety. Addressing these issues can help the company enhance customer satisfaction and reduce negative feedback.
188
+ """)
189
+
190
+ # Feature Explorer
191
+ if show_stats:
192
+ st.subheader("πŸ” Feature Explorer")
193
+ selected_feature = st.selectbox("Select a feature to explore", df.columns)
194
+ if selected_feature:
195
+ st.write(f"Statistics for {selected_feature}:")
196
+ st.write(df[selected_feature].describe())
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  st.write('---')
 
199
  st.write(
200
  '<p style="font-size: 15px; text-align: center;">All Rights Reserved | Made by ❀️</p>',
201
  unsafe_allow_html=True
wordcloud_negative.png ADDED
wordcloud_positive.png ADDED