{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"
"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Packages"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.metrics import silhouette_score\n",
"from joblib import dump\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
| \n", " | ID | \n", "Sex | \n", "Marital status | \n", "Age | \n", "Education | \n", "Income | \n", "Occupation | \n", "Settlement size | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "100000001 | \n", "0 | \n", "0 | \n", "67 | \n", "2 | \n", "124670 | \n", "1 | \n", "2 | \n", "
| 1 | \n", "100000002 | \n", "1 | \n", "1 | \n", "22 | \n", "1 | \n", "150773 | \n", "1 | \n", "2 | \n", "
| 2 | \n", "100000003 | \n", "0 | \n", "0 | \n", "49 | \n", "1 | \n", "89210 | \n", "0 | \n", "0 | \n", "
| 3 | \n", "100000004 | \n", "0 | \n", "0 | \n", "45 | \n", "1 | \n", "171565 | \n", "1 | \n", "1 | \n", "
| 4 | \n", "100000005 | \n", "0 | \n", "0 | \n", "53 | \n", "1 | \n", "149031 | \n", "1 | \n", "1 | \n", "
| \n", " | Sex | \n", "Marital status | \n", "Age | \n", "Education | \n", "Income | \n", "Occupation | \n", "Settlement size | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "0 | \n", "67 | \n", "2 | \n", "124670 | \n", "1 | \n", "2 | \n", "
| 1 | \n", "1 | \n", "1 | \n", "22 | \n", "1 | \n", "150773 | \n", "1 | \n", "2 | \n", "
| 2 | \n", "0 | \n", "0 | \n", "49 | \n", "1 | \n", "89210 | \n", "0 | \n", "0 | \n", "
| 3 | \n", "0 | \n", "0 | \n", "45 | \n", "1 | \n", "171565 | \n", "1 | \n", "1 | \n", "
| 4 | \n", "0 | \n", "0 | \n", "53 | \n", "1 | \n", "149031 | \n", "1 | \n", "1 | \n", "
| \n", " | Sex | \n", "Marital status | \n", "Age | \n", "Education | \n", "Income | \n", "Occupation | \n", "Settlement size | \n", "
|---|---|---|---|---|---|---|---|
| 1 | \n", "1 | \n", "1 | \n", "22 | \n", "1 | \n", "150773 | \n", "1 | \n", "2 | \n", "
| 2 | \n", "0 | \n", "0 | \n", "49 | \n", "1 | \n", "89210 | \n", "0 | \n", "0 | \n", "
| 3 | \n", "0 | \n", "0 | \n", "45 | \n", "1 | \n", "171565 | \n", "1 | \n", "1 | \n", "
| 4 | \n", "0 | \n", "0 | \n", "53 | \n", "1 | \n", "149031 | \n", "1 | \n", "1 | \n", "
| 5 | \n", "0 | \n", "0 | \n", "35 | \n", "1 | \n", "144848 | \n", "0 | \n", "0 | \n", "
KMeans(n_clusters=3, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=3, random_state=42)