{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.cluster import KMeans\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.model_selection import train_test_split\n", "import pickle" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# Load dataset\n", "def load_data():\n", " np.random.seed(42)\n", " customers = pd.DataFrame({\n", " 'CustomerID': range(1, 101),\n", " 'Spending_Score': np.random.randint(1, 101, 100),\n", " 'Annual_Income': np.random.randint(15000, 120000, 100),\n", " 'Purchases': np.random.randint(1, 50, 100)\n", " })\n", " return customers\n", "\n", "customers = load_data()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# Feature scaling\n", "scaler = StandardScaler()\n", "X = customers[['Spending_Score', 'Annual_Income', 'Purchases']]\n", "X_scaled = scaler.fit_transform(X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# KMeans clustering for customer segmentation\n", "kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)\n", "customers['Segment'] = kmeans.fit_predict(X_scaled)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# Train KNN classifier\n", "X_train, X_test, y_train, y_test = train_test_split(X_scaled, customers['Segment'], test_size=0.2, random_state=42)\n", "knn = KNeighborsClassifier(n_neighbors=5)\n", "knn.fit(X_train, y_train)\n", "accuracy = knn.score(X_test, y_test)\n", "print(f'KNN Model Accuracy: {accuracy:.2f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# Save models\n", "with open(\"scaler.pkl\", \"wb\") as f:\n", " pickle.dump(scaler, f)\n", "with open(\"knn_model.pkl\", \"wb\") as f:\n", " pickle.dump(knn, f)\n", "with open(\"kmeans_model.pkl\", \"wb\") as f:\n", " pickle.dump(kmeans, f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# Visualizing the clusters\n", "plt.figure(figsize=(8, 6))\n", "sns.scatterplot(x=customers['Annual_Income'], y=customers['Spending_Score'], hue=customers['Segment'], palette='viridis')\n", "plt.xlabel(\"Annual Income\")\n", "plt.ylabel(\"Spending Score\")\n", "plt.title(\"Customer Segmentation\")\n", "plt.show()" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }