Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.cluster import KMeans | |
| from sklearn.preprocessing import StandardScaler | |
| # Load dataset | |
| def load_data(): | |
| file_path = "Mall_Customers.csv" | |
| df = pd.read_csv(file_path) | |
| return df | |
| df = load_data() | |
| # Sidebar for navigation | |
| st.sidebar.title("K-Means Clustering App") | |
| section = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "K-Means Model", "Test Model"]) | |
| if section == "Introduction": | |
| st.title("Introduction to K-Means Clustering") | |
| st.write("### About the Model") | |
| st.write("K-Means Clustering is an unsupervised machine learning algorithm used for customer segmentation. It helps identify different groups of customers based on their spending behavior and income.") | |
| st.write("### About the Dataset") | |
| st.write("The dataset consists of customer information, including:") | |
| st.markdown("- **CustomerID**: Unique identifier for each customer.") | |
| st.markdown("- **Gender**: Male or Female.") | |
| st.markdown("- **Age**: Age of the customer.") | |
| st.markdown("- **Annual Income (k$)**: Customer's yearly income.") | |
| st.markdown("- **Spending Score (1-100)**: A score assigned based on spending behavior.") | |
| st.write("### How to Use the App") | |
| st.markdown("1. **Go to 'Data Exploration'**: Understand the dataset using statistics and visualizations.") | |
| st.markdown("2. **Go to 'K-Means Model'**: Train the model and visualize clusters.") | |
| st.markdown("3. **Go to 'Test Model'**: Input values to predict customer cluster.") | |
| st.write("### Insights") | |
| st.markdown("- Customers can be grouped into different segments based on their income and spending habits.") | |
| st.markdown("- The Elbow Method helps determine the optimal number of clusters.") | |
| st.markdown("- Businesses can use these insights to tailor marketing strategies and improve customer engagement.") | |
| elif section == "Data Exploration": | |
| st.title("Data Exploration") | |
| st.write("### First 5 rows of dataset") | |
| st.dataframe(df.head()) | |
| st.write("### Summary Statistics") | |
| st.write(df.describe()) | |
| st.write("### Pairplot") | |
| sns.pairplot(df.drop(columns=["CustomerID", "Gender"]), diag_kind="kde") | |
| st.pyplot() | |
| st.write("### Correlation Heatmap") | |
| plt.figure(figsize=(8, 6)) | |
| sns.heatmap(df.drop(columns=["CustomerID", "Gender"]).corr(), annot=True, cmap="coolwarm") | |
| st.pyplot() | |
| elif section == "K-Means Model": | |
| st.title("K-Means Clustering") | |
| # Selecting features for clustering | |
| features = df[["Annual Income (k$)", "Spending Score (1-100)"]] | |
| scaler = StandardScaler() | |
| scaled_features = scaler.fit_transform(features) | |
| # Finding the optimal number of clusters using Elbow Method | |
| st.write("### Elbow Method") | |
| inertia = [] | |
| for k in range(1, 11): | |
| kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) | |
| kmeans.fit(scaled_features) | |
| inertia.append(kmeans.inertia_) | |
| plt.figure(figsize=(8, 5)) | |
| plt.plot(range(1, 11), inertia, marker='o') | |
| plt.xlabel('Number of Clusters') | |
| plt.ylabel('Inertia') | |
| plt.title('Elbow Method for Optimal k') | |
| st.pyplot() | |
| # Train K-Means Model | |
| k = st.slider("Select Number of Clusters", 2, 10, 5) | |
| kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) | |
| df['Cluster'] = kmeans.fit_predict(scaled_features) | |
| st.write("### Clustered Data") | |
| st.dataframe(df) | |
| # Visualization of clusters | |
| plt.figure(figsize=(8, 6)) | |
| sns.scatterplot(x=df["Annual Income (k$)"], y=df["Spending Score (1-100)"], hue=df['Cluster'], palette='viridis') | |
| plt.xlabel("Annual Income (k$)") | |
| plt.ylabel("Spending Score (1-100)") | |
| plt.title("Customer Segmentation using K-Means") | |
| st.pyplot() | |
| # Store the model and scaler globally | |
| st.session_state['scaler'] = scaler | |
| st.session_state['kmeans'] = kmeans | |
| elif section == "Test Model": | |
| st.title("Test K-Means Model") | |
| income = st.number_input("Enter Annual Income (k$)", min_value=0, max_value=200, value=50) | |
| score = st.number_input("Enter Spending Score (1-100)", min_value=1, max_value=100, value=50) | |
| if 'scaler' in st.session_state and 'kmeans' in st.session_state: | |
| input_data = st.session_state['scaler'].transform([[income, score]]) | |
| prediction = st.session_state['kmeans'].predict(input_data) | |
| st.write(f"### Predicted Cluster: {prediction[0]}") | |
| else: | |
| st.write("### Please run the K-Means Model section first.") | |