Spaces:

RICHARDMENSAH
/

Churn_Expresso_Prediction_Streamlit_App

Runtime error

App Files Files Community

Churn_Expresso_Prediction_Streamlit_App / app.py

RICHARDMENSAH

Update app.py

073ec53 over 2 years ago

raw

history blame contribute delete

19.5 kB

	import streamlit as st
	import joblib
	import pandas as pd
	import numpy as np
	from PIL import Image
	import time
	import matplotlib.pyplot as plt
	import qrcode
	from io import BytesIO
	import csv

	# Load the trained models and transformers
	num_imputer = joblib.load('numerical_imputer.joblib')
	cat_imputer = joblib.load('cat_imputer.joblib')
	encoder = joblib.load('encoder.joblib')
	scaler = joblib.load('scaler.joblib')
	model1 = joblib.load('lr_model_vif_smote.joblib')
	model2 = joblib.load('gb_model_vif_smote.joblib')


	def preprocess_input(input_data):
	input_df = pd.DataFrame(input_data, index=[0])

	cat_columns = [col for col in input_df.columns if input_df[col].dtype == 'object']
	num_columns = [col for col in input_df.columns if input_df[col].dtype != 'object']

	input_df_imputed_cat = cat_imputer.transform(input_df[cat_columns])
	input_df_imputed_num = num_imputer.transform(input_df[num_columns])

	input_encoded_df = pd.DataFrame(encoder.transform(input_df_imputed_cat).toarray(),
	columns=encoder.get_feature_names_out(cat_columns))

	input_df_scaled = scaler.transform(input_df_imputed_num)
	input_scaled_df = pd.DataFrame(input_df_scaled, columns=num_columns)
	final_df = pd.concat([input_encoded_df, input_scaled_df], axis=1)
	final_df = final_df.reindex(columns=original_feature_names, fill_value=0)

	return final_df

	original_feature_names = ['MONTANT', 'FREQUENCE_RECH', 'REVENUE', 'ARPU_SEGMENT', 'FREQUENCE',
	'DATA_VOLUME', 'ON_NET', 'ORANGE', 'TIGO', 'ZONE1', 'ZONE2', 'REGULARITY', 'FREQ_TOP_PACK',
	'REGION_DAKAR', 'REGION_DIOURBEL', 'REGION_FATICK', 'REGION_KAFFRINE', 'REGION_KAOLACK',
	'REGION_KEDOUGOU', 'REGION_KOLDA', 'REGION_LOUGA', 'REGION_MATAM', 'REGION_SAINT-LOUIS',
	'REGION_SEDHIOU', 'REGION_TAMBACOUNDA', 'REGION_THIES', 'REGION_ZIGUINCHOR',
	'TENURE_Long-term', 'TENURE_Medium-term', 'TENURE_Mid-term', 'TENURE_Short-term',
	'TENURE_Very short-term', 'TOP_PACK_data', 'TOP_PACK_international', 'TOP_PACK_messaging',
	'TOP_PACK_other_services', 'TOP_PACK_social_media', 'TOP_PACK_value_added_services',
	'TOP_PACK_voice']

	# Set up the Streamlit app
	st.set_page_config(layout="wide")

	# Main page - Churn Prediction
	st.title('📞 EXPRESSO TELECOM CUSTOMER CHURN PREDICTION APP 📞')

	# Main page - Churn Prediction
	st.image("banner.png", use_column_width=True)
	st.markdown("This app predicts whether a customer will leave your company ❌ or not 🎉. Enter the details of the customer on the left sidebar to see the result")


	# How to use
	st.title('How to Use')
	st.markdown('1. Select your model of choice on the left sidebar.')
	st.markdown('2. Adjust the input parameters based on customer details')
	st.markdown('3. Click the "Predict" button to initiate the prediction.')
	st.markdown('4. The app will simulate a prediction process with a progress bar.')
	st.markdown('5. Once the prediction is complete, the results will be displayed below.')

	import csv
	import streamlit as st

	# Add context text
	st.sidebar.markdown('Welcome!')
	st.sidebar.markdown('This is a work in progress, and we would love to hear your suggestions on how to improve the user experience. Please feel free to provide your feedback in the suggestion box below.')

	# Create the sidebar with a text input field for suggestions
	correction_text = st.sidebar.text_input('Enter your suggestion')

	# Button to submit the suggestion
	if st.sidebar.button('Submit'):
	# Perform action on suggestion submission (e.g., save to a CSV file)
	with open('suggestions.csv', 'a', newline='') as file:
	writer = csv.writer(file)
	writer.writerow([correction_text])
	st.sidebar.info('Suggestion submitted successfully')

	# Define a dictionary of models with their names, actual models, and types
	models = {
	'Logistic Regression': {'model': model1, 'type': 'logistic_regression'},
	'Gradient Boosting': {'model': model2, 'type': 'gradient_boosting'}
	}

	# Allow the user to select a model from the sidebar
	# Allow the user to select a model from the sidebar
	st.sidebar.title('Select Model')
	model_name = st.sidebar.selectbox('Choose a model', list(models.keys()))

	# Retrieve the selected model and its type from the dictionary
	model = models[model_name]['model']
	model_type = models[model_name]['type']


	# Collect input from the user
	st.sidebar.title('Enter Customer Details')
	input_features = {
	'MONTANT': st.sidebar.number_input('Top-up Amount (MONTANT)'),
	'FREQUENCE_RECH': st.sidebar.number_input('Number of Times the Customer Refilled (FREQUENCE_RECH)'),
	'REVENUE': st.sidebar.number_input('Monthly income of the client (REVENUE)'),
	'ARPU_SEGMENT': st.sidebar.number_input('Income over 90 days / 3 (ARPU_SEGMENT)'),
	'FREQUENCE': st.sidebar.number_input('Number of times the client has made an income (FREQUENCE)'),
	'DATA_VOLUME': st.sidebar.number_input('Number of Connections (DATA_VOLUME)'),
	'ON_NET': st.sidebar.number_input('Inter Expresso Call (ON_NET)'),
	'ORANGE': st.sidebar.number_input('Call to Orange (ORANGE)'),
	'TIGO': st.sidebar.number_input('Call to Tigo (TIGO)'),
	'ZONE1': st.sidebar.number_input('Call to Zone 1 (ZONE1)'),
	'ZONE2': st.sidebar.number_input('Call to Zone 2 (ZONE2)'),
	'REGULARITY': st.sidebar.number_input('Number of Times the Client is Active for 90 Days (REGULARITY)'),
	'FREQ_TOP_PACK': st.sidebar.number_input('Number of Times the Client has Activated the Top Packs (FREQ_TOP_PACK)'),
	'REGION': st.sidebar.selectbox('Location of Each Client (REGION)', ['SAINT-LOUIS', 'THIES', 'LOUGA', 'MATAM', 'FATICK', 'KAOLACK',
	'DIOURBEL', 'TAMBACOUNDA', 'ZIGUINCHOR', 'KOLDA', 'KAFFRINE', 'SEDHIOU',
	'KEDOUGOU']),
	'TENURE': st.sidebar.selectbox('Duration in the Network (TENURE)', ['Short-term', 'Mid-term', 'Medium-term', 'Very short-term']),
	'TOP_PACK': st.sidebar.selectbox('Most Active Pack (TOP_PACK)', ['data', 'international', 'messaging', 'social_media',
	'value_added_services', 'voice'])
	}

	# Input validation
	valid_input = True
	error_messages = []

	# Validate numeric inputs
	numeric_ranges = {
	'MONTANT': [0, 1000000],
	'FREQUENCE_RECH': [0, 100],
	'REVENUE': [0, 1000000],
	'ARPU_SEGMENT': [0, 100000],
	'FREQUENCE': [0, 100],
	'DATA_VOLUME': [0, 100000],
	'ON_NET': [0, 100000],
	'ORANGE': [0, 100000],
	'TIGO': [0, 100000],
	'ZONE1': [0, 100000],
	'ZONE2': [0, 100000],
	'REGULARITY': [0, 100],
	'FREQ_TOP_PACK': [0, 100]
	}

	for feature, value in input_features.items():
	range_min, range_max = numeric_ranges.get(feature, [None, None])
	if range_min is not None and range_max is not None:
	if not range_min <= value <= range_max:
	valid_input = False
	error_messages.append(f"{feature} should be between {range_min} and {range_max}.")

	#Churn Prediction

	def predict_churn(input_data, model):
	# Preprocess the input data
	preprocessed_data = preprocess_input(input_data)

	# Calculate churn probabilities using the model
	probabilities = model.predict_proba(preprocessed_data)

	# Determine churn labels based on the model type
	if model_type == "logistic_regression":
	churn_labels = ["No Churn", "Churn"]
	elif model_type == "gradient_boosting":
	churn_labels = ["Churn", "No Churn"]
	# Extract churn probability for the first sample
	churn_probability = probabilities[0]

	# Create a dictionary mapping churn labels to their indices
	churn_indices = {label: idx for idx, label in enumerate(churn_labels)}

	# Determine the index with the highest churn probability
	churn_index = np.argmax(churn_probability)

	# Return churn labels, churn probabilities, churn indices, and churn index
	return churn_labels, churn_probability, churn_indices, churn_index


	# Predict churn based on user input
	if st.sidebar.button('Predict Churn'):
	try:
	with st.spinner("Predicting..."):
	# Simulate a long-running process
	progress_bar = st.progress(0)
	step = 20 # A big step will reduce the execution time
	for i in range(0, 100, step):
	time.sleep(0.1)
	progress_bar.progress(i + step)

	#churn_labels, churn_probability = predict_churn(input_features, model) # Pass model1 or model2 based on the selected model
	churn_labels, churn_probability, churn_indices, churn_index = predict_churn(input_features, model)

	st.subheader('Main Results')

	col1, col2 = st.columns(2)

	if churn_labels[churn_index] == "Churn":
	churn_prob = churn_probability[churn_index]
	with col1:
	st.error(f"Beware!!! This customer is likely to churn with a probability of {churn_prob * 100:.2f}% 😢")
	resized_churn_image = Image.open('Churn.png')
	resized_churn_image = resized_churn_image.resize((350, 300)) # Adjust the width and height as desired
	st.image(resized_churn_image)
	# Add suggestions for retaining churned customers in the 'Churn' group
	with col2:
	st.info("Suggestions for retaining churned customers in this customer group:\n"
	"- Offer personalized discounts or promotions\n"
	"- Provide exceptional customer service\n"
	"- Introduce loyalty programs\n"
	"- Send targeted re-engagement emails\n"
	"- Provide a dedicated account manager\n"
	"- Offer extended trial periods\n"
	"- Conduct exit surveys to understand reasons for churn\n"
	"- Implement a customer win-back campaign\n"
	"- Provide incentives for referrals\n"
	"- Improve product or service offerings based on customer feedback")
	else:
	#churn_index = churn_indices["No Churn"]
	churn_prob = churn_probability[churn_index]
	with col1:
	st.success(f"This customer is not likely to churn with a probability of {churn_prob * 100:.2f}% 😀")
	resized_not_churn_image = Image.open('NotChurn.jpg')
	resized_not_churn_image = resized_not_churn_image.resize((350, 300)) # Adjust the width and height as desired
	st.image(resized_not_churn_image)
	# Add suggestions for retaining churned customers in the 'Churn' group
	with col2:
	st.info("Suggestions for retaining non-churned customers in this customer group:\n"
	"- Provide personalized product recommendations\n"
	"- Offer exclusive features or upgrades\n"
	"- Implement proactive customer support\n"
	"- Conduct customer satisfaction surveys\n"
	"- Recognize and reward loyal customers\n"
	"- Organize customer appreciation events\n"
	"- Offer early access to new features or products\n"
	"- Provide educational resources or tutorials\n"
	"- Implement a customer loyalty program\n"
	"- Offer flexible billing or pricing options")

	st.subheader('Churn Probability')

	# Create a donut chart to display probabilities
	fig = go.Figure(data=[go.Pie(
	labels=churn_labels,
	values=churn_probability,
	hole=0.5,
	textinfo='label+percent',
	marker=dict(colors=['#FFA07A', '#6495ED', '#FFD700', '#32CD32', '#FF69B4', '#8B008B']))])

	fig.update_traces(
	hoverinfo='label+percent',
	textfont_size=12,
	textposition='inside',
	texttemplate='%{label}: %{percent:.2f}%'
	)

	fig.update_layout(
	title='Churn Probability',
	title_x=0.5,
	showlegend=False,
	width=500,
	height=500
	)

	st.plotly_chart(fig, use_container_width=True)

	# Calculate the average churn rate (replace with your actual value)

	st.subheader('Customer Churn Probability Comparison')

	average_churn_rate = 19

	# Convert the overall churn rate to churn probability
	main_data_churn_probability = average_churn_rate / 100

	# Retrieve the predicted churn probability for the selected customer
	predicted_churn_prob = churn_probability[churn_index]

	if churn_labels[churn_index] == "Churn":
	churn_prob = churn_probability[churn_index]
	# Create a bar chart comparing the churn probability with the average churn rate
	labels = ['Churn Probability', 'Average Churn Probability']
	values = [predicted_churn_prob, main_data_churn_probability]

	fig = go.Figure(data=[go.Bar(x=labels, y=values)])
	fig.update_layout(
	xaxis_title='Churn Probability',
	yaxis_title='Probability',
	title='Comparison with Average Churn Rate',
	yaxis=dict(range=[0, 1]) # Set the y-axis limits between 0 and 1
	)

	# Add explanations
	if predicted_churn_prob > main_data_churn_probability:
	churn_comparison = "higher"
	elif predicted_churn_prob < main_data_churn_probability:
	churn_comparison = "lower"
	else:
	churn_comparison = "equal"


	explanation = f"This bar chart compares the churn probability of the selected customer " \
	f"with the average churn rate of all customers. It provides insights into how the " \
	f"individual customer's churn likelihood ({predicted_churn_prob:.2f}) compares to the " \
	f"overall trend. The 'Churn Probability' represents the likelihood of churn " \
	f"for the selected customer, while the 'Average Churn Rate' represents the average " \
	f"churn rate across all customers ({main_data_churn_probability:.2f}).\n\n" \
	f"The customer's churn rate is {churn_comparison} than the average churn rate."

	st.plotly_chart(fig)
	st.write(explanation)
	else:
	# Create a bar chart comparing the no-churn probability with the average churn rate
	labels = ['No-Churn Probability', 'Average Churn Probability']
	values = [1 - predicted_churn_prob, main_data_churn_probability]

	fig = go.Figure(data=[go.Bar(x=labels, y=values)])
	fig.update_layout(
	xaxis_title='Churn Probability',
	yaxis_title='Probability',
	title='Comparison with Average Churn Rate',
	yaxis=dict(range=[0, 1]) # Set the y-axis limits between 0 and 1
	)

	explanation = f"This bar chart compares the churn probability of the selected customer " \
	f"with the average churn rate of all customers. It provides insights into how the " \
	f"individual customer's likelihood of churn ({1 - predicted_churn_prob:.2f}) compares to the " \
	f"overall trend. A lower churn probability indicates that the customer is less likely to churn. " \
	f"The chart shows that the churn probability ({1 - predicted_churn_prob:.2f}) is lower than the " \
	f"average churn probability ({main_data_churn_probability:.2f}), suggesting that the customer " \
	f"is predicted to stay with the company. Keep in mind that the prediction is based on the " \
	f"available data and the applied model, and there might still be some uncertainty in the result."


	st.plotly_chart(fig)
	st.write(explanation)

	# Visualize Feature Importance

	st.subheader('Feature Importance')
	if hasattr(model, 'coef_'): # Check if the model has attribute 'coef_' to determine importance type
	feature_importances = model.coef_[0]
	importance_type = 'Coef'
	elif hasattr(model, 'feature_importances_'):
	feature_importances = model.feature_importances_
	importance_type = 'Importance'
	else:
	st.write('Feature importance is not available for this model.')

	# If importance information is available, create a DataFrame and sort it
	if hasattr(model, 'coef_') or hasattr(model, 'feature_importances_'):
	importance_df = pd.DataFrame({'Feature': original_feature_names, importance_type: feature_importances})
	importance_df = importance_df.sort_values(importance_type, ascending=False)

	# Determine color for each bar based on positive or negative importance
	colors = ['green' if importance > 0 else 'red' for importance in importance_df[importance_type]]

	# Create a horizontal bar chart using Plotly
	fig = go.Figure(go.Bar(
	x=importance_df[importance_type],
	y=importance_df['Feature'],
	orientation='h',
	marker=dict(color=colors),
	text=importance_df[importance_type].apply(lambda x: f'{x:.2f}'),
	textposition='inside'))

	# Configure the layout of the bar chart
	fig.update_layout(
	title='Feature Importance',
	xaxis_title='Importance',
	yaxis_title='Feature',
	bargap=0.1,
	width=600,
	height=800)

	# Display the bar chart using Plotly chart in Streamlit
	st.plotly_chart(fig)

	# Explanation of feature importance
	importance_explanation = f"The feature importance plot shows the relative importance of each feature " \
	f"for predicting churn. The importance is calculated based on the " \
	f"{importance_type} value of each feature in the model. " \
	f"A higher {importance_type} value indicates a stronger influence " \
	f"of the corresponding feature on the prediction of churn.\n\n" \
	f"For logistic regression, positive {importance_type} values indicate " \
	f"features that positively contribute to predicting churn, " \
	f"while negative {importance_type} values indicate features that " \
	f"negatively contribute to predicting churn.\n\n" \
	f"For gradient boosting, higher {importance_type} values " \
	f"indicate features that have a greater importance in predicting churn.\n\n" \
	f"Please note that the feature importance values may vary depending on the model " \
	f"and the data used for training."


	st.write(importance_explanation)


	except Exception as e:
	st.error(f"An error occurred: {str(e)}")