Spaces:

Markndrei
/

market-analysis

Build error

App Files Files Community

market-analysis / app.py

Markndrei

Update app.py

abb8bd6 verified about 1 year ago

raw

history blame contribute delete

9.04 kB

	import streamlit as st
	import pandas as pd
	import joblib
	import plotly.express as px
	from sklearn.preprocessing import StandardScaler
	from sklearn.cluster import KMeans

	# Set page config
	st.set_page_config(
	page_title="Market Basket Analysis & Customer Clustering",
	page_icon="🛒",
	layout="wide"
	)

	# Application introduction
	st.title("🛒 Market Basket Analysis & Customer Segmentation Dashboard")
	st.write("🔬NOTE: STILL FIXING THE LIVE CLUSTERING GRAPH PROBLEM AS IT IS NOT UPDATING REAL-TIME🔬")
	with st.expander("📋 About This Application", expanded=True):
	st.markdown("""
	### Welcome to the Market Basket Analysis & Customer Segmentation Dashboard 🎉

	This application is designed to help businesses gain valuable insights from their sales data through two powerful analytical techniques:

	1. Market Basket Analysis 🧺
	This technique identifies relationships between products that customers tend to purchase together. By understanding these patterns, businesses can optimize product placement, create effective bundle offers, and improve recommendation systems.

	2. Customer Segmentation 👥
	This approach groups customers based on their purchasing behavior using the RFM model (Recency, Frequency, Monetary value). This helps in developing targeted marketing strategies for different customer segments.

	### How to Use This Application 🛠️

	1. Navigate through the two tabs to explore different aspects of your data:
	- Dataset Preview 📊: Examine your data structure and basic statistics.
	- Live Application 🚀: Perform live customer segmentation with customizable parameters.
	""")

	# Load cluster profile data from pickle file
	@st.cache_data
	def load_rfm():
	try:
	rfm_cluster = joblib.load("rfm_cluster.pkl")
	return rfm_cluster
	except Exception as e:
	st.error(f"❌ Error loading rfm cluster data: {e}")
	return pd.DataFrame() # Return an empty DataFrame if there's an error

	rfm_cluster = load_rfm()

	@st.cache_resource
	def load_sample_data():
	try:
	sample_data = pd.read_csv("market_basket_data.csv", sep=";")
	return sample_data
	except Exception as e:
	st.error(f"❌ Error loading sample data: {e}")
	return pd.DataFrame() # Return an empty DataFrame if there's an error

	dataset = load_sample_data()
	# Create tabs
	tab1, tab2 = st.tabs(["📊 Dataset Preview", "🚀 Live Application"])

	# Tab 1: Dataset Preview
	with tab1:
	st.header("📊 Dataset Preview")

	st.markdown("""
	### Understanding Your Data 🧐
	This section provides an overview of your dataset structure, allowing you to examine its contents before diving into the analysis. A good understanding of your data is essential for interpreting the results in the subsequent tabs.
	""")

	if not rfm_cluster.empty:
	st.write(f"📐 Dataset shape: {rfm_cluster.shape[0]} rows, {rfm_cluster.shape[1]} columns")

	# Data summary
	col1, col2 = st.columns(2)
	with col1:
	st.subheader("📄 Data Sample")
	st.markdown("This table shows the first 10 rows of your data, allowing you to see the actual values and structure.")
	st.dataframe(dataset.head(10))

	with col2:
	st.subheader("📈 Data Statistics")
	st.markdown("This summary provides statistical measures for numerical columns in your dataset.")
	st.dataframe(dataset.describe())

	# Data columns info
	st.subheader("📑 Column Information")
	st.markdown("This table provides details about each column in your dataset.")

	col_info = pd.DataFrame({
	'Column': dataset.columns,
	'Type': dataset.dtypes.values,
	'Non-Null Count': dataset.count().values,
	'Null Count': dataset.isnull().sum().values,
	'Unique Values': [dataset[col].nunique() for col in dataset.columns]
	})
	st.dataframe(col_info)
	else:
	st.error("❌ No data available in the cluster profile.")

	# Tab 2: Live Application
	with tab2:
	st.header("🚀 Live Customer Clustering")

	st.markdown("""
	### RFM Analysis & Customer Segmentation 👥
	This tab uses the RFM (Recency, Frequency, Monetary) model to segment customers based on their purchasing behavior. Understanding these segments helps in developing targeted marketing strategies.

	Key Concepts:
	- Recency: How recently a customer made a purchase (fewer days = better) ⏳
	- Frequency: How often a customer makes purchases (more purchases = better) 🔄
	- Monetary Value: How much money a customer spends (higher amount = better) 💰
	- Clustering: Grouping similar customers together based on their RFM values 🎯

	Use the controls in the sidebar to customize your analysis and see how different parameters affect customer segmentation.
	""")

	if not rfm_cluster.empty:
	# Ensure required columns are present
	required_cols = ['recency', 'frequency', 'monetary']
	if all(col in rfm_cluster.columns for col in required_cols):
	# Sidebar for clustering parameters
	st.sidebar.markdown("---")
	st.sidebar.subheader("⚙️ Clustering Parameters")

	# RFM weight adjustment sliders
	recency_weight = st.sidebar.slider(
	"⏳ Recency Importance",
	0.1, 2.0, 1.0, 0.1,
	help="Increase this value to give more importance to how recently customers purchased."
	)
	frequency_weight = st.sidebar.slider(
	"🔄 Frequency Importance",
	0.1, 2.0, 1.0, 0.1,
	help="Increase this value to give more importance to how often customers purchase."
	)
	monetary_weight = st.sidebar.slider(
	"💰 Monetary Importance",
	0.1, 2.0, 1.0, 0.1,
	help="Increase this value to give more importance to how much customers spend."
	)

	# Number of clusters
	num_clusters = st.sidebar.slider(
	"🎯 Number of Clusters",
	1, 3, 3,
	help="This determines how many customer segments to create. More segments means more granular groups, but they may be harder to interpret."
	)

	# Perform live clustering
	X = rfm_cluster[required_cols].copy()

	# Apply weights
	X['recency'] = X['recency'] * recency_weight
	X['frequency'] = X['frequency'] * frequency_weight
	X['monetary'] = X['monetary'] * monetary_weight

	# Standardize the data
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	# Perform clustering
	kmeans = KMeans(n_clusters=num_clusters, random_state=42)
	clusters = kmeans.fit_predict(X_scaled)

	# Add cluster labels to the data
	rfm_cluster['Live_Cluster'] = clusters

	# Display the clustering results
	st.subheader("📊 Recency vs Frequency by Cluster")

	fig_rfm = px.scatter(
	rfm_cluster,
	x="recency",
	y="frequency",
	color="Live_Cluster",
	size="monetary",
	hover_data=["recency", "frequency", "monetary"],
	title="Live RFM Clustering: Recency vs Frequency",
	height=600,
	color_continuous_scale=px.colors.qualitative.G10
	)

	fig_rfm.update_layout(
	xaxis_title="Recency (days since last purchase)",
	yaxis_title="Frequency (number of purchases)"
	)

	st.plotly_chart(fig_rfm, use_container_width=True)

	# Cluster Analysis
	st.subheader("📈 Live Cluster Analysis")

	cluster_stats = rfm_cluster.groupby('Live_Cluster').agg({
	'recency': ['mean', 'min', 'max'],
	'frequency': ['mean', 'min', 'max'],
	'monetary': ['mean', 'min', 'max'],
	'Live_Cluster': 'count'
	}).reset_index()

	cluster_stats.columns = ['Cluster', 'Avg Recency', 'Min Recency', 'Max Recency',
	'Avg Frequency', 'Min Frequency', 'Max Frequency',
	'Avg Monetary', 'Min Monetary', 'Max Monetary', 'Count']

	st.dataframe(cluster_stats)
	else:
	st.error(f"❌ Required columns for clustering not found. Ensure your dataset contains: {required_cols}")
	else:
	st.error("❌ No data available in the cluster profile.")