{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Analysis and Preprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Necessary package imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix\n", "from sklearn.neighbors import KNeighborsClassifier\n", "import graphviz\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "from imblearn.over_sampling import SMOTE\n", "from collections import Counter" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | id | \n", "gender | \n", "age | \n", "hypertension | \n", "heart_disease | \n", "ever_married | \n", "work_type | \n", "Residence_type | \n", "avg_glucose_level | \n", "bmi | \n", "smoking_status | \n", "stroke | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "9046 | \n", "Male | \n", "67.0 | \n", "0 | \n", "1 | \n", "Yes | \n", "Private | \n", "Urban | \n", "228.69 | \n", "36.6 | \n", "formerly smoked | \n", "1 | \n", "
| 1 | \n", "51676 | \n", "Female | \n", "61.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Self-employed | \n", "Rural | \n", "202.21 | \n", "NaN | \n", "never smoked | \n", "1 | \n", "
| 2 | \n", "31112 | \n", "Male | \n", "80.0 | \n", "0 | \n", "1 | \n", "Yes | \n", "Private | \n", "Rural | \n", "105.92 | \n", "32.5 | \n", "never smoked | \n", "1 | \n", "
| 3 | \n", "60182 | \n", "Female | \n", "49.0 | \n", "0 | \n", "0 | \n", "Yes | \n", "Private | \n", "Urban | \n", "171.23 | \n", "34.4 | \n", "smokes | \n", "1 | \n", "
| 4 | \n", "1665 | \n", "Female | \n", "79.0 | \n", "1 | \n", "0 | \n", "Yes | \n", "Self-employed | \n", "Rural | \n", "174.12 | \n", "24.0 | \n", "never smoked | \n", "1 | \n", "