File size: 3,524 Bytes
6ba4689 f4f0c0c 5e451cb f6c40ec 5e451cb f6c40ec f4f0c0c f6c40ec 5e451cb 6ba4689 f6c40ec b5535f7 f6c40ec f4f0c0c f6c40ec f4f0c0c f6c40ec dfe582d f4f0c0c f6c40ec f4f0c0c f6c40ec f4f0c0c f6c40ec f4f0c0c b5535f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
# App title
st.title("🛍️ Customer Segmentation Tool")
# 🎯 Streamlit Tabs
tab1, tab2, tab3 = st.tabs(["📖 About", "📊 Dataset Overview", "🧑🤝🧑 Customer Segmentation"])
# About Tab
with tab1:
st.write("""
This app uses unsupervised learning techniques to segment customers based on their purchasing behavior.
The dataset is preloaded and contains online retail data.
### How It Works:
- **Step 1**: Load customer transaction data, including details like Quantity, UnitPrice, and CustomerID.
- **Step 2**: Process the data by calculating the total spent and aggregating the information by customer.
- **Step 3**: Apply **K-Means Clustering** to segment the customers into distinct groups.
- **Step 4**: Visualize the customer segments with a scatter plot.
""")
# Load preloaded dataset
file_path = "Online Retail.xlsx"
df = pd.read_excel(file_path, sheet_name='Online Retail')
# Dataset Overview Tab
with tab2:
st.write("### Dataset Overview")
st.write(df.head())
# Preprocess data
df = df.dropna(subset=["CustomerID"]) # Remove rows without CustomerID
df["TotalSpent"] = pd.to_numeric(df["Quantity"], errors='coerce') * pd.to_numeric(df["UnitPrice"], errors='coerce')
df = df.dropna(subset=["TotalSpent"])
# Aggregate data by Customer
customer_data = df.groupby("CustomerID").agg({
"TotalSpent": "sum",
"Quantity": "sum",
"UnitPrice": "mean"
}).rename(columns={"Quantity": "NumTransactions", "UnitPrice": "AvgUnitPrice"})
st.write("### Processed Customer Data")
st.write(customer_data.head())
# Standardize the data
scaler = StandardScaler()
customer_scaled = pd.DataFrame(scaler.fit_transform(customer_data), columns=customer_data.columns, index=customer_data.index)
# Elbow Method to determine optimal clusters
st.write("### Elbow Method for Optimal Cluster Selection")
distortions = []
K = range(1, 11)
for k in K:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(customer_scaled)
distortions.append(kmeans.inertia_)
fig, ax = plt.subplots()
ax.plot(K, distortions, marker='o')
ax.set_xlabel("Number of Clusters")
ax.set_ylabel("Distortion")
ax.set_title("Elbow Method for Optimal k")
st.pyplot(fig)
# Customer Segmentation Tab
with tab3:
# User selects the number of clusters
num_clusters = st.slider("Select Number of Clusters", min_value=2, max_value=10, value=3)
# Apply K-Means clustering
model = KMeans(n_clusters=num_clusters, random_state=42)
customer_data["Cluster"] = model.fit_predict(customer_scaled)
# Visualize the clusters
st.write("### Clusters Visualization")
fig, ax = plt.subplots()
scatter = ax.scatter(customer_data["TotalSpent"], customer_data["NumTransactions"], c=customer_data["Cluster"], cmap='viridis')
ax.set_xlabel("Total Spent")
ax.set_ylabel("Number of Transactions")
ax.set_title("Customer Segments")
plt.colorbar(scatter, label="Cluster")
st.pyplot(fig)
# Show the segmented customer data
st.write("### Customer Segments Data")
st.write(customer_data.groupby("Cluster").agg({"TotalSpent": "mean", "NumTransactions": "mean", "AvgUnitPrice": "mean"}))
|