Spaces:

mrciomnl
/

mall_customer_segmentation

Sleeping

App Files Files Community

mall_customer_segmentation / app.py

mrciomnl

initial commit

a487e3e 11 months ago

raw

history blame contribute delete

4.61 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.cluster import KMeans
	from sklearn.preprocessing import StandardScaler

	# Load dataset
	@st.cache_data
	def load_data():
	file_path = "Mall_Customers.csv"
	df = pd.read_csv(file_path)
	return df

	df = load_data()

	# Sidebar for navigation
	st.sidebar.title("K-Means Clustering App")
	section = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "K-Means Model", "Test Model"])

	if section == "Introduction":
	st.title("Introduction to K-Means Clustering")

	st.write("### About the Model")
	st.write("K-Means Clustering is an unsupervised machine learning algorithm used for customer segmentation. It helps identify different groups of customers based on their spending behavior and income.")

	st.write("### About the Dataset")
	st.write("The dataset consists of customer information, including:")
	st.markdown("- CustomerID: Unique identifier for each customer.")
	st.markdown("- Gender: Male or Female.")
	st.markdown("- Age: Age of the customer.")
	st.markdown("- Annual Income (k$): Customer's yearly income.")
	st.markdown("- Spending Score (1-100): A score assigned based on spending behavior.")

	st.write("### How to Use the App")
	st.markdown("1. Go to 'Data Exploration': Understand the dataset using statistics and visualizations.")
	st.markdown("2. Go to 'K-Means Model': Train the model and visualize clusters.")
	st.markdown("3. Go to 'Test Model': Input values to predict customer cluster.")

	st.write("### Insights")
	st.markdown("- Customers can be grouped into different segments based on their income and spending habits.")
	st.markdown("- The Elbow Method helps determine the optimal number of clusters.")
	st.markdown("- Businesses can use these insights to tailor marketing strategies and improve customer engagement.")

	elif section == "Data Exploration":
	st.title("Data Exploration")
	st.write("### First 5 rows of dataset")
	st.dataframe(df.head())

	st.write("### Summary Statistics")
	st.write(df.describe())

	st.write("### Pairplot")
	sns.pairplot(df.drop(columns=["CustomerID", "Gender"]), diag_kind="kde")
	st.pyplot()

	st.write("### Correlation Heatmap")
	plt.figure(figsize=(8, 6))
	sns.heatmap(df.drop(columns=["CustomerID", "Gender"]).corr(), annot=True, cmap="coolwarm")
	st.pyplot()

	elif section == "K-Means Model":
	st.title("K-Means Clustering")

	# Selecting features for clustering
	features = df[["Annual Income (k$)", "Spending Score (1-100)"]]
	scaler = StandardScaler()
	scaled_features = scaler.fit_transform(features)

	# Finding the optimal number of clusters using Elbow Method
	st.write("### Elbow Method")
	inertia = []
	for k in range(1, 11):
	kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
	kmeans.fit(scaled_features)
	inertia.append(kmeans.inertia_)

	plt.figure(figsize=(8, 5))
	plt.plot(range(1, 11), inertia, marker='o')
	plt.xlabel('Number of Clusters')
	plt.ylabel('Inertia')
	plt.title('Elbow Method for Optimal k')
	st.pyplot()

	# Train K-Means Model
	k = st.slider("Select Number of Clusters", 2, 10, 5)
	kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
	df['Cluster'] = kmeans.fit_predict(scaled_features)

	st.write("### Clustered Data")
	st.dataframe(df)

	# Visualization of clusters
	plt.figure(figsize=(8, 6))
	sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df['Cluster'], palette='viridis')
	plt.xlabel("Annual Income (k$)")
	plt.ylabel("Spending Score (1-100)")
	plt.title("Customer Segmentation using K-Means")
	st.pyplot()

	# Store the model and scaler globally
	st.session_state['scaler'] = scaler
	st.session_state['kmeans'] = kmeans

	elif section == "Test Model":
	st.title("Test K-Means Model")

	income = st.number_input("Enter Annual Income (k$)", min_value=0, max_value=200, value=50)
	score = st.number_input("Enter Spending Score (1-100)", min_value=1, max_value=100, value=50)

	if 'scaler' in st.session_state and 'kmeans' in st.session_state:
	input_data = st.session_state['scaler'].transform([[income, score]])
	prediction = st.session_state['kmeans'].predict(input_data)
	st.write(f"### Predicted Cluster: {prediction[0]}")
	else:
	st.write("### Please run the K-Means Model section first.")