{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0be446e2",
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2023-07-05T15:46:39.066571Z",
"iopub.status.busy": "2023-07-05T15:46:39.066235Z",
"iopub.status.idle": "2023-07-05T15:46:39.078599Z",
"shell.execute_reply": "2023-07-05T15:46:39.077214Z"
},
"papermill": {
"duration": 0.025649,
"end_time": "2023-07-05T15:46:39.080496",
"exception": false,
"start_time": "2023-07-05T15:46:39.054847",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\n",
"/kaggle/input/icr-identify-age-related-conditions/greeks.csv\n",
"/kaggle/input/icr-identify-age-related-conditions/train.csv\n",
"/kaggle/input/icr-identify-age-related-conditions/test.csv\n"
]
}
],
"source": [
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
"# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
"# For example, here's several helpful packages to load\n",
"\n",
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the read-only \"../input/\" directory\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n",
"\n",
"# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
]
},
{
"cell_type": "markdown",
"id": "06ae7cdd",
"metadata": {
"papermill": {
"duration": 0.007921,
"end_time": "2023-07-05T15:46:39.096094",
"exception": false,
"start_time": "2023-07-05T15:46:39.088173",
"status": "completed"
},
"tags": []
},
"source": [
"# 1. Import the Libraries"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "71f53a82",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:39.114025Z",
"iopub.status.busy": "2023-07-05T15:46:39.112905Z",
"iopub.status.idle": "2023-07-05T15:46:41.354651Z",
"shell.execute_reply": "2023-07-05T15:46:41.353299Z"
},
"papermill": {
"duration": 2.253388,
"end_time": "2023-07-05T15:46:41.357197",
"exception": false,
"start_time": "2023-07-05T15:46:39.103809",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "bbae2d8d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.375772Z",
"iopub.status.busy": "2023-07-05T15:46:41.374537Z",
"iopub.status.idle": "2023-07-05T15:46:41.379755Z",
"shell.execute_reply": "2023-07-05T15:46:41.378776Z"
},
"papermill": {
"duration": 0.01666,
"end_time": "2023-07-05T15:46:41.381967",
"exception": false,
"start_time": "2023-07-05T15:46:41.365307",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "markdown",
"id": "d467f1d3",
"metadata": {
"papermill": {
"duration": 0.007505,
"end_time": "2023-07-05T15:46:41.397582",
"exception": false,
"start_time": "2023-07-05T15:46:41.390077",
"status": "completed"
},
"tags": []
},
"source": [
"# 2. Load the Datasets"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2906de3f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.416422Z",
"iopub.status.busy": "2023-07-05T15:46:41.415815Z",
"iopub.status.idle": "2023-07-05T15:46:41.459024Z",
"shell.execute_reply": "2023-07-05T15:46:41.457487Z"
},
"papermill": {
"duration": 0.055237,
"end_time": "2023-07-05T15:46:41.461494",
"exception": false,
"start_time": "2023-07-05T15:46:41.406257",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"train_df = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/train.csv')\n",
"test_df = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/test.csv')"
]
},
{
"cell_type": "markdown",
"id": "64f9eb89",
"metadata": {
"papermill": {
"duration": 0.007285,
"end_time": "2023-07-05T15:46:41.476703",
"exception": false,
"start_time": "2023-07-05T15:46:41.469418",
"status": "completed"
},
"tags": []
},
"source": [
"# 3. View the Datasets"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "05a12538",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.495027Z",
"iopub.status.busy": "2023-07-05T15:46:41.493738Z",
"iopub.status.idle": "2023-07-05T15:46:41.555274Z",
"shell.execute_reply": "2023-07-05T15:46:41.554055Z"
},
"papermill": {
"duration": 0.073121,
"end_time": "2023-07-05T15:46:41.557466",
"exception": false,
"start_time": "2023-07-05T15:46:41.484345",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Id \n",
" AB \n",
" AF \n",
" AH \n",
" AM \n",
" AR \n",
" AX \n",
" AY \n",
" AZ \n",
" BC \n",
" BD \n",
" BN \n",
" BP \n",
" BQ \n",
" BR \n",
" BZ \n",
" CB \n",
" CC \n",
" CD \n",
" CF \n",
" CH \n",
" CL \n",
" CR \n",
" CS \n",
" CU \n",
" CW \n",
" DA \n",
" DE \n",
" DF \n",
" DH \n",
" DI \n",
" DL \n",
" DN \n",
" DU \n",
" DV \n",
" DY \n",
" EB \n",
" EE \n",
" EG \n",
" EH \n",
" EJ \n",
" EL \n",
" EP \n",
" EU \n",
" FC \n",
" FD \n",
" FE \n",
" FI \n",
" FL \n",
" FR \n",
" FS \n",
" GB \n",
" GE \n",
" GF \n",
" GH \n",
" GI \n",
" GL \n",
" Class \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 000ff2bfdfe9 \n",
" 0.209377 \n",
" 3109.03329 \n",
" 85.200147 \n",
" 22.394407 \n",
" 8.138688 \n",
" 0.699861 \n",
" 0.025578 \n",
" 9.812214 \n",
" 5.555634 \n",
" 4126.58731 \n",
" 22.5984 \n",
" 175.638726 \n",
" 152.707705 \n",
" 823.928241 \n",
" 257.432377 \n",
" 47.223358 \n",
" 0.563481 \n",
" 23.387600 \n",
" 4.851915 \n",
" 0.023482 \n",
" 1.050225 \n",
" 0.069225 \n",
" 13.784111 \n",
" 1.302012 \n",
" 36.205956 \n",
" 69.08340 \n",
" 295.570575 \n",
" 0.23868 \n",
" 0.284232 \n",
" 89.245560 \n",
" 84.31664 \n",
" 29.657104 \n",
" 5.310690 \n",
" 1.74307 \n",
" 23.187704 \n",
" 7.294176 \n",
" 1.987283 \n",
" 1433.166750 \n",
" 0.949104 \n",
" B \n",
" 30.879420 \n",
" 78.526968 \n",
" 3.828384 \n",
" 13.394640 \n",
" 10.265073 \n",
" 9028.291921 \n",
" 3.583450 \n",
" 7.298162 \n",
" 1.73855 \n",
" 0.094822 \n",
" 11.339138 \n",
" 72.611063 \n",
" 2003.810319 \n",
" 22.136229 \n",
" 69.834944 \n",
" 0.120343 \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" 007255e47698 \n",
" 0.145282 \n",
" 978.76416 \n",
" 85.200147 \n",
" 36.968889 \n",
" 8.138688 \n",
" 3.632190 \n",
" 0.025578 \n",
" 13.517790 \n",
" 1.229900 \n",
" 5496.92824 \n",
" 19.4205 \n",
" 155.868030 \n",
" 14.754720 \n",
" 51.216883 \n",
" 257.432377 \n",
" 30.284345 \n",
" 0.484710 \n",
" 50.628208 \n",
" 6.085041 \n",
" 0.031442 \n",
" 1.113875 \n",
" 1.117800 \n",
" 28.310953 \n",
" 1.357182 \n",
" 37.476568 \n",
" 70.79836 \n",
" 178.553100 \n",
" 0.23868 \n",
" 0.363489 \n",
" 110.581815 \n",
" 75.74548 \n",
" 37.532000 \n",
" 0.005518 \n",
" 1.74307 \n",
" 17.222328 \n",
" 4.926396 \n",
" 0.858603 \n",
" 1111.287150 \n",
" 0.003042 \n",
" A \n",
" 109.125159 \n",
" 95.415086 \n",
" 52.260480 \n",
" 17.175984 \n",
" 0.296850 \n",
" 6785.003474 \n",
" 10.358927 \n",
" 0.173229 \n",
" 0.49706 \n",
" 0.568932 \n",
" 9.292698 \n",
" 72.611063 \n",
" 27981.562750 \n",
" 29.135430 \n",
" 32.131996 \n",
" 21.978000 \n",
" 0 \n",
" \n",
" \n",
" 2 \n",
" 013f2bd269f5 \n",
" 0.470030 \n",
" 2635.10654 \n",
" 85.200147 \n",
" 32.360553 \n",
" 8.138688 \n",
" 6.732840 \n",
" 0.025578 \n",
" 12.824570 \n",
" 1.229900 \n",
" 5135.78024 \n",
" 26.4825 \n",
" 128.988531 \n",
" 219.320160 \n",
" 482.141594 \n",
" 257.432377 \n",
" 32.563713 \n",
" 0.495852 \n",
" 85.955376 \n",
" 5.376488 \n",
" 0.036218 \n",
" 1.050225 \n",
" 0.700350 \n",
" 39.364743 \n",
" 1.009611 \n",
" 21.459644 \n",
" 70.81970 \n",
" 321.426625 \n",
" 0.23868 \n",
" 0.210441 \n",
" 120.056438 \n",
" 65.46984 \n",
" 28.053464 \n",
" 1.289739 \n",
" 1.74307 \n",
" 36.861352 \n",
" 7.813674 \n",
" 8.146651 \n",
" 1494.076488 \n",
" 0.377208 \n",
" B \n",
" 109.125159 \n",
" 78.526968 \n",
" 5.390628 \n",
" 224.207424 \n",
" 8.745201 \n",
" 8338.906181 \n",
" 11.626917 \n",
" 7.709560 \n",
" 0.97556 \n",
" 1.198821 \n",
" 37.077772 \n",
" 88.609437 \n",
" 13676.957810 \n",
" 28.022851 \n",
" 35.192676 \n",
" 0.196941 \n",
" 0 \n",
" \n",
" \n",
" 3 \n",
" 043ac50845d5 \n",
" 0.252107 \n",
" 3819.65177 \n",
" 120.201618 \n",
" 77.112203 \n",
" 8.138688 \n",
" 3.685344 \n",
" 0.025578 \n",
" 11.053708 \n",
" 1.229900 \n",
" 4169.67738 \n",
" 23.6577 \n",
" 237.282264 \n",
" 11.050410 \n",
" 661.518640 \n",
" 257.432377 \n",
" 15.201914 \n",
" 0.717882 \n",
" 88.159360 \n",
" 2.347652 \n",
" 0.029054 \n",
" 1.400300 \n",
" 0.636075 \n",
" 41.116960 \n",
" 0.722727 \n",
" 21.530392 \n",
" 47.27586 \n",
" 196.607985 \n",
" 0.23868 \n",
" 0.292431 \n",
" 139.824570 \n",
" 71.57120 \n",
" 24.354856 \n",
" 2.655345 \n",
" 1.74307 \n",
" 52.003884 \n",
" 7.386060 \n",
" 3.813326 \n",
" 15691.552180 \n",
" 0.614484 \n",
" B \n",
" 31.674357 \n",
" 78.526968 \n",
" 31.323372 \n",
" 59.301984 \n",
" 7.884336 \n",
" 10965.766040 \n",
" 14.852022 \n",
" 6.122162 \n",
" 0.49706 \n",
" 0.284466 \n",
" 18.529584 \n",
" 82.416803 \n",
" 2094.262452 \n",
" 39.948656 \n",
" 90.493248 \n",
" 0.155829 \n",
" 0 \n",
" \n",
" \n",
" 4 \n",
" 044fb8a146ec \n",
" 0.380297 \n",
" 3733.04844 \n",
" 85.200147 \n",
" 14.103738 \n",
" 8.138688 \n",
" 3.942255 \n",
" 0.054810 \n",
" 3.396778 \n",
" 102.151980 \n",
" 5728.73412 \n",
" 24.0108 \n",
" 324.546318 \n",
" 149.717165 \n",
" 6074.859475 \n",
" 257.432377 \n",
" 82.213495 \n",
" 0.536467 \n",
" 72.644264 \n",
" 30.537722 \n",
" 0.025472 \n",
" 1.050225 \n",
" 0.693150 \n",
" 31.724726 \n",
" 0.827550 \n",
" 34.415360 \n",
" 74.06532 \n",
" 200.178160 \n",
" 0.23868 \n",
" 0.207708 \n",
" 97.920120 \n",
" 52.83888 \n",
" 26.019912 \n",
" 1.144902 \n",
" 1.74307 \n",
" 9.064856 \n",
" 7.350720 \n",
" 3.490846 \n",
" 1403.656300 \n",
" 0.164268 \n",
" B \n",
" 109.125159 \n",
" 91.994825 \n",
" 51.141336 \n",
" 29.102640 \n",
" 4.274640 \n",
" 16198.049590 \n",
" 13.666727 \n",
" 8.153058 \n",
" 48.50134 \n",
" 0.121914 \n",
" 16.408728 \n",
" 146.109943 \n",
" 8524.370502 \n",
" 45.381316 \n",
" 36.262628 \n",
" 0.096614 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id AB AF AH AM AR \\\n",
"0 000ff2bfdfe9 0.209377 3109.03329 85.200147 22.394407 8.138688 \n",
"1 007255e47698 0.145282 978.76416 85.200147 36.968889 8.138688 \n",
"2 013f2bd269f5 0.470030 2635.10654 85.200147 32.360553 8.138688 \n",
"3 043ac50845d5 0.252107 3819.65177 120.201618 77.112203 8.138688 \n",
"4 044fb8a146ec 0.380297 3733.04844 85.200147 14.103738 8.138688 \n",
"\n",
" AX AY AZ BC BD BN BP \\\n",
"0 0.699861 0.025578 9.812214 5.555634 4126.58731 22.5984 175.638726 \n",
"1 3.632190 0.025578 13.517790 1.229900 5496.92824 19.4205 155.868030 \n",
"2 6.732840 0.025578 12.824570 1.229900 5135.78024 26.4825 128.988531 \n",
"3 3.685344 0.025578 11.053708 1.229900 4169.67738 23.6577 237.282264 \n",
"4 3.942255 0.054810 3.396778 102.151980 5728.73412 24.0108 324.546318 \n",
"\n",
" BQ BR BZ CB CC CD \\\n",
"0 152.707705 823.928241 257.432377 47.223358 0.563481 23.387600 \n",
"1 14.754720 51.216883 257.432377 30.284345 0.484710 50.628208 \n",
"2 219.320160 482.141594 257.432377 32.563713 0.495852 85.955376 \n",
"3 11.050410 661.518640 257.432377 15.201914 0.717882 88.159360 \n",
"4 149.717165 6074.859475 257.432377 82.213495 0.536467 72.644264 \n",
"\n",
" CF CH CL CR CS CU CW \\\n",
"0 4.851915 0.023482 1.050225 0.069225 13.784111 1.302012 36.205956 \n",
"1 6.085041 0.031442 1.113875 1.117800 28.310953 1.357182 37.476568 \n",
"2 5.376488 0.036218 1.050225 0.700350 39.364743 1.009611 21.459644 \n",
"3 2.347652 0.029054 1.400300 0.636075 41.116960 0.722727 21.530392 \n",
"4 30.537722 0.025472 1.050225 0.693150 31.724726 0.827550 34.415360 \n",
"\n",
" DA DE DF DH DI DL DN \\\n",
"0 69.08340 295.570575 0.23868 0.284232 89.245560 84.31664 29.657104 \n",
"1 70.79836 178.553100 0.23868 0.363489 110.581815 75.74548 37.532000 \n",
"2 70.81970 321.426625 0.23868 0.210441 120.056438 65.46984 28.053464 \n",
"3 47.27586 196.607985 0.23868 0.292431 139.824570 71.57120 24.354856 \n",
"4 74.06532 200.178160 0.23868 0.207708 97.920120 52.83888 26.019912 \n",
"\n",
" DU DV DY EB EE EG EH \\\n",
"0 5.310690 1.74307 23.187704 7.294176 1.987283 1433.166750 0.949104 \n",
"1 0.005518 1.74307 17.222328 4.926396 0.858603 1111.287150 0.003042 \n",
"2 1.289739 1.74307 36.861352 7.813674 8.146651 1494.076488 0.377208 \n",
"3 2.655345 1.74307 52.003884 7.386060 3.813326 15691.552180 0.614484 \n",
"4 1.144902 1.74307 9.064856 7.350720 3.490846 1403.656300 0.164268 \n",
"\n",
" EJ EL EP EU FC FD FE \\\n",
"0 B 30.879420 78.526968 3.828384 13.394640 10.265073 9028.291921 \n",
"1 A 109.125159 95.415086 52.260480 17.175984 0.296850 6785.003474 \n",
"2 B 109.125159 78.526968 5.390628 224.207424 8.745201 8338.906181 \n",
"3 B 31.674357 78.526968 31.323372 59.301984 7.884336 10965.766040 \n",
"4 B 109.125159 91.994825 51.141336 29.102640 4.274640 16198.049590 \n",
"\n",
" FI FL FR FS GB GE \\\n",
"0 3.583450 7.298162 1.73855 0.094822 11.339138 72.611063 \n",
"1 10.358927 0.173229 0.49706 0.568932 9.292698 72.611063 \n",
"2 11.626917 7.709560 0.97556 1.198821 37.077772 88.609437 \n",
"3 14.852022 6.122162 0.49706 0.284466 18.529584 82.416803 \n",
"4 13.666727 8.153058 48.50134 0.121914 16.408728 146.109943 \n",
"\n",
" GF GH GI GL Class \n",
"0 2003.810319 22.136229 69.834944 0.120343 1 \n",
"1 27981.562750 29.135430 32.131996 21.978000 0 \n",
"2 13676.957810 28.022851 35.192676 0.196941 0 \n",
"3 2094.262452 39.948656 90.493248 0.155829 0 \n",
"4 8524.370502 45.381316 36.262628 0.096614 1 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "fa624379",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.575511Z",
"iopub.status.busy": "2023-07-05T15:46:41.575168Z",
"iopub.status.idle": "2023-07-05T15:46:41.629223Z",
"shell.execute_reply": "2023-07-05T15:46:41.628091Z"
},
"papermill": {
"duration": 0.065299,
"end_time": "2023-07-05T15:46:41.631210",
"exception": false,
"start_time": "2023-07-05T15:46:41.565911",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Id \n",
" AB \n",
" AF \n",
" AH \n",
" AM \n",
" AR \n",
" AX \n",
" AY \n",
" AZ \n",
" BC \n",
" BD \n",
" BN \n",
" BP \n",
" BQ \n",
" BR \n",
" BZ \n",
" CB \n",
" CC \n",
" CD \n",
" CF \n",
" CH \n",
" CL \n",
" CR \n",
" CS \n",
" CU \n",
" CW \n",
" DA \n",
" DE \n",
" DF \n",
" DH \n",
" DI \n",
" DL \n",
" DN \n",
" DU \n",
" DV \n",
" DY \n",
" EB \n",
" EE \n",
" EG \n",
" EH \n",
" EJ \n",
" EL \n",
" EP \n",
" EU \n",
" FC \n",
" FD \n",
" FE \n",
" FI \n",
" FL \n",
" FR \n",
" FS \n",
" GB \n",
" GE \n",
" GF \n",
" GH \n",
" GI \n",
" GL \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 00eed32682bb \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" A \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" \n",
" \n",
" 1 \n",
" 010ebe33f668 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" A \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" \n",
" \n",
" 2 \n",
" 02fa521e1838 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" A \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" \n",
" \n",
" 3 \n",
" 040e15f562a2 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" A \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" \n",
" \n",
" 4 \n",
" 046e85c7cc7f \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" A \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" 0.0 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id AB AF AH AM AR AX AY AZ BC BD BN BP \\\n",
"0 00eed32682bb 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 010ebe33f668 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 02fa521e1838 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 040e15f562a2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 046e85c7cc7f 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" BQ BR BZ CB CC CD CF CH CL CR CS CU CW DA DE \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" DF DH DI DL DN DU DV DY EB EE EG EH EJ EL EP \\\n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 A 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 A 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 A 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 A 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 A 0.0 0.0 \n",
"\n",
" EU FC FD FE FI FL FR FS GB GE GF GH GI GL \n",
"0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "a5dd9265",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.650350Z",
"iopub.status.busy": "2023-07-05T15:46:41.649982Z",
"iopub.status.idle": "2023-07-05T15:46:41.680166Z",
"shell.execute_reply": "2023-07-05T15:46:41.678193Z"
},
"papermill": {
"duration": 0.042405,
"end_time": "2023-07-05T15:46:41.682423",
"exception": false,
"start_time": "2023-07-05T15:46:41.640018",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 617 entries, 0 to 616\n",
"Data columns (total 58 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Id 617 non-null object \n",
" 1 AB 617 non-null float64\n",
" 2 AF 617 non-null float64\n",
" 3 AH 617 non-null float64\n",
" 4 AM 617 non-null float64\n",
" 5 AR 617 non-null float64\n",
" 6 AX 617 non-null float64\n",
" 7 AY 617 non-null float64\n",
" 8 AZ 617 non-null float64\n",
" 9 BC 617 non-null float64\n",
" 10 BD 617 non-null float64\n",
" 11 BN 617 non-null float64\n",
" 12 BP 617 non-null float64\n",
" 13 BQ 557 non-null float64\n",
" 14 BR 617 non-null float64\n",
" 15 BZ 617 non-null float64\n",
" 16 CB 615 non-null float64\n",
" 17 CC 614 non-null float64\n",
" 18 CD 617 non-null float64\n",
" 19 CF 617 non-null float64\n",
" 20 CH 617 non-null float64\n",
" 21 CL 617 non-null float64\n",
" 22 CR 617 non-null float64\n",
" 23 CS 617 non-null float64\n",
" 24 CU 617 non-null float64\n",
" 25 CW 617 non-null float64\n",
" 26 DA 617 non-null float64\n",
" 27 DE 617 non-null float64\n",
" 28 DF 617 non-null float64\n",
" 29 DH 617 non-null float64\n",
" 30 DI 617 non-null float64\n",
" 31 DL 617 non-null float64\n",
" 32 DN 617 non-null float64\n",
" 33 DU 616 non-null float64\n",
" 34 DV 617 non-null float64\n",
" 35 DY 617 non-null float64\n",
" 36 EB 617 non-null float64\n",
" 37 EE 617 non-null float64\n",
" 38 EG 617 non-null float64\n",
" 39 EH 617 non-null float64\n",
" 40 EJ 617 non-null object \n",
" 41 EL 557 non-null float64\n",
" 42 EP 617 non-null float64\n",
" 43 EU 617 non-null float64\n",
" 44 FC 616 non-null float64\n",
" 45 FD 617 non-null float64\n",
" 46 FE 617 non-null float64\n",
" 47 FI 617 non-null float64\n",
" 48 FL 616 non-null float64\n",
" 49 FR 617 non-null float64\n",
" 50 FS 615 non-null float64\n",
" 51 GB 617 non-null float64\n",
" 52 GE 617 non-null float64\n",
" 53 GF 617 non-null float64\n",
" 54 GH 617 non-null float64\n",
" 55 GI 617 non-null float64\n",
" 56 GL 616 non-null float64\n",
" 57 Class 617 non-null int64 \n",
"dtypes: float64(55), int64(1), object(2)\n",
"memory usage: 279.7+ KB\n"
]
}
],
"source": [
"train_df.info()"
]
},
{
"cell_type": "markdown",
"id": "b936471f",
"metadata": {
"papermill": {
"duration": 0.008422,
"end_time": "2023-07-05T15:46:41.699691",
"exception": false,
"start_time": "2023-07-05T15:46:41.691269",
"status": "completed"
},
"tags": []
},
"source": [
"# 3. Check for Nan Values"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a501be50",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.718572Z",
"iopub.status.busy": "2023-07-05T15:46:41.718215Z",
"iopub.status.idle": "2023-07-05T15:46:41.728098Z",
"shell.execute_reply": "2023-07-05T15:46:41.727105Z"
},
"papermill": {
"duration": 0.021586,
"end_time": "2023-07-05T15:46:41.730068",
"exception": false,
"start_time": "2023-07-05T15:46:41.708482",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Id 0\n",
"AB 0\n",
"AF 0\n",
"AH 0\n",
"AM 0\n",
"AR 0\n",
"AX 0\n",
"AY 0\n",
"AZ 0\n",
"BC 0\n",
"BD 0\n",
"BN 0\n",
"BP 0\n",
"BQ 60\n",
"BR 0\n",
"BZ 0\n",
"CB 2\n",
"CC 3\n",
"CD 0\n",
"CF 0\n",
"CH 0\n",
"CL 0\n",
"CR 0\n",
"CS 0\n",
"CU 0\n",
"CW 0\n",
"DA 0\n",
"DE 0\n",
"DF 0\n",
"DH 0\n",
"DI 0\n",
"DL 0\n",
"DN 0\n",
"DU 1\n",
"DV 0\n",
"DY 0\n",
"EB 0\n",
"EE 0\n",
"EG 0\n",
"EH 0\n",
"EJ 0\n",
"EL 60\n",
"EP 0\n",
"EU 0\n",
"FC 1\n",
"FD 0\n",
"FE 0\n",
"FI 0\n",
"FL 1\n",
"FR 0\n",
"FS 2\n",
"GB 0\n",
"GE 0\n",
"GF 0\n",
"GH 0\n",
"GI 0\n",
"GL 1\n",
"Class 0\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1a8490c9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.749859Z",
"iopub.status.busy": "2023-07-05T15:46:41.749147Z",
"iopub.status.idle": "2023-07-05T15:46:41.761153Z",
"shell.execute_reply": "2023-07-05T15:46:41.760480Z"
},
"papermill": {
"duration": 0.024553,
"end_time": "2023-07-05T15:46:41.763599",
"exception": false,
"start_time": "2023-07-05T15:46:41.739046",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 5 entries, 0 to 4\n",
"Data columns (total 57 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Id 5 non-null object \n",
" 1 AB 5 non-null float64\n",
" 2 AF 5 non-null float64\n",
" 3 AH 5 non-null float64\n",
" 4 AM 5 non-null float64\n",
" 5 AR 5 non-null float64\n",
" 6 AX 5 non-null float64\n",
" 7 AY 5 non-null float64\n",
" 8 AZ 5 non-null float64\n",
" 9 BC 5 non-null float64\n",
" 10 BD 5 non-null float64\n",
" 11 BN 5 non-null float64\n",
" 12 BP 5 non-null float64\n",
" 13 BQ 5 non-null float64\n",
" 14 BR 5 non-null float64\n",
" 15 BZ 5 non-null float64\n",
" 16 CB 5 non-null float64\n",
" 17 CC 5 non-null float64\n",
" 18 CD 5 non-null float64\n",
" 19 CF 5 non-null float64\n",
" 20 CH 5 non-null float64\n",
" 21 CL 5 non-null float64\n",
" 22 CR 5 non-null float64\n",
" 23 CS 5 non-null float64\n",
" 24 CU 5 non-null float64\n",
" 25 CW 5 non-null float64\n",
" 26 DA 5 non-null float64\n",
" 27 DE 5 non-null float64\n",
" 28 DF 5 non-null float64\n",
" 29 DH 5 non-null float64\n",
" 30 DI 5 non-null float64\n",
" 31 DL 5 non-null float64\n",
" 32 DN 5 non-null float64\n",
" 33 DU 5 non-null float64\n",
" 34 DV 5 non-null float64\n",
" 35 DY 5 non-null float64\n",
" 36 EB 5 non-null float64\n",
" 37 EE 5 non-null float64\n",
" 38 EG 5 non-null float64\n",
" 39 EH 5 non-null float64\n",
" 40 EJ 5 non-null object \n",
" 41 EL 5 non-null float64\n",
" 42 EP 5 non-null float64\n",
" 43 EU 5 non-null float64\n",
" 44 FC 5 non-null float64\n",
" 45 FD 5 non-null float64\n",
" 46 FE 5 non-null float64\n",
" 47 FI 5 non-null float64\n",
" 48 FL 5 non-null float64\n",
" 49 FR 5 non-null float64\n",
" 50 FS 5 non-null float64\n",
" 51 GB 5 non-null float64\n",
" 52 GE 5 non-null float64\n",
" 53 GF 5 non-null float64\n",
" 54 GH 5 non-null float64\n",
" 55 GI 5 non-null float64\n",
" 56 GL 5 non-null float64\n",
"dtypes: float64(55), object(2)\n",
"memory usage: 2.4+ KB\n"
]
}
],
"source": [
"test_df.info()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d3a74c2b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.783906Z",
"iopub.status.busy": "2023-07-05T15:46:41.783138Z",
"iopub.status.idle": "2023-07-05T15:46:41.790255Z",
"shell.execute_reply": "2023-07-05T15:46:41.789668Z"
},
"papermill": {
"duration": 0.01986,
"end_time": "2023-07-05T15:46:41.792648",
"exception": false,
"start_time": "2023-07-05T15:46:41.772788",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Id 0\n",
"AB 0\n",
"AF 0\n",
"AH 0\n",
"AM 0\n",
"AR 0\n",
"AX 0\n",
"AY 0\n",
"AZ 0\n",
"BC 0\n",
"BD 0\n",
"BN 0\n",
"BP 0\n",
"BQ 0\n",
"BR 0\n",
"BZ 0\n",
"CB 0\n",
"CC 0\n",
"CD 0\n",
"CF 0\n",
"CH 0\n",
"CL 0\n",
"CR 0\n",
"CS 0\n",
"CU 0\n",
"CW 0\n",
"DA 0\n",
"DE 0\n",
"DF 0\n",
"DH 0\n",
"DI 0\n",
"DL 0\n",
"DN 0\n",
"DU 0\n",
"DV 0\n",
"DY 0\n",
"EB 0\n",
"EE 0\n",
"EG 0\n",
"EH 0\n",
"EJ 0\n",
"EL 0\n",
"EP 0\n",
"EU 0\n",
"FC 0\n",
"FD 0\n",
"FE 0\n",
"FI 0\n",
"FL 0\n",
"FR 0\n",
"FS 0\n",
"GB 0\n",
"GE 0\n",
"GF 0\n",
"GH 0\n",
"GI 0\n",
"GL 0\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df.isnull().sum()"
]
},
{
"cell_type": "markdown",
"id": "f33dae95",
"metadata": {
"papermill": {
"duration": 0.008786,
"end_time": "2023-07-05T15:46:41.810758",
"exception": false,
"start_time": "2023-07-05T15:46:41.801972",
"status": "completed"
},
"tags": []
},
"source": [
"# 4. Data Cleaning"
]
},
{
"cell_type": "markdown",
"id": "f3cf046a",
"metadata": {
"papermill": {
"duration": 0.008833,
"end_time": "2023-07-05T15:46:41.828833",
"exception": false,
"start_time": "2023-07-05T15:46:41.820000",
"status": "completed"
},
"tags": []
},
"source": [
"**Dropping The ID & EJ Column from the train dataset**"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "47b78dc7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.848731Z",
"iopub.status.busy": "2023-07-05T15:46:41.848407Z",
"iopub.status.idle": "2023-07-05T15:46:41.890814Z",
"shell.execute_reply": "2023-07-05T15:46:41.889544Z"
},
"papermill": {
"duration": 0.055327,
"end_time": "2023-07-05T15:46:41.893409",
"exception": false,
"start_time": "2023-07-05T15:46:41.838082",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" AB \n",
" AF \n",
" AH \n",
" AM \n",
" AR \n",
" AX \n",
" AY \n",
" AZ \n",
" BC \n",
" BD \n",
" BN \n",
" BP \n",
" BQ \n",
" BR \n",
" BZ \n",
" CB \n",
" CC \n",
" CD \n",
" CF \n",
" CH \n",
" CL \n",
" CR \n",
" CS \n",
" CU \n",
" CW \n",
" DA \n",
" DE \n",
" DF \n",
" DH \n",
" DI \n",
" DL \n",
" DN \n",
" DU \n",
" DV \n",
" DY \n",
" EB \n",
" EE \n",
" EG \n",
" EH \n",
" EL \n",
" EP \n",
" EU \n",
" FC \n",
" FD \n",
" FE \n",
" FI \n",
" FL \n",
" FR \n",
" FS \n",
" GB \n",
" GE \n",
" GF \n",
" GH \n",
" GI \n",
" GL \n",
" Class \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0.209377 \n",
" 3109.03329 \n",
" 85.200147 \n",
" 22.394407 \n",
" 8.138688 \n",
" 0.699861 \n",
" 0.025578 \n",
" 9.812214 \n",
" 5.555634 \n",
" 4126.58731 \n",
" 22.5984 \n",
" 175.638726 \n",
" 152.707705 \n",
" 823.928241 \n",
" 257.432377 \n",
" 47.223358 \n",
" 0.563481 \n",
" 23.387600 \n",
" 4.851915 \n",
" 0.023482 \n",
" 1.050225 \n",
" 0.069225 \n",
" 13.784111 \n",
" 1.302012 \n",
" 36.205956 \n",
" 69.08340 \n",
" 295.570575 \n",
" 0.23868 \n",
" 0.284232 \n",
" 89.245560 \n",
" 84.31664 \n",
" 29.657104 \n",
" 5.310690 \n",
" 1.74307 \n",
" 23.187704 \n",
" 7.294176 \n",
" 1.987283 \n",
" 1433.166750 \n",
" 0.949104 \n",
" 30.879420 \n",
" 78.526968 \n",
" 3.828384 \n",
" 13.394640 \n",
" 10.265073 \n",
" 9028.291921 \n",
" 3.583450 \n",
" 7.298162 \n",
" 1.73855 \n",
" 0.094822 \n",
" 11.339138 \n",
" 72.611063 \n",
" 2003.810319 \n",
" 22.136229 \n",
" 69.834944 \n",
" 0.120343 \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" 0.145282 \n",
" 978.76416 \n",
" 85.200147 \n",
" 36.968889 \n",
" 8.138688 \n",
" 3.632190 \n",
" 0.025578 \n",
" 13.517790 \n",
" 1.229900 \n",
" 5496.92824 \n",
" 19.4205 \n",
" 155.868030 \n",
" 14.754720 \n",
" 51.216883 \n",
" 257.432377 \n",
" 30.284345 \n",
" 0.484710 \n",
" 50.628208 \n",
" 6.085041 \n",
" 0.031442 \n",
" 1.113875 \n",
" 1.117800 \n",
" 28.310953 \n",
" 1.357182 \n",
" 37.476568 \n",
" 70.79836 \n",
" 178.553100 \n",
" 0.23868 \n",
" 0.363489 \n",
" 110.581815 \n",
" 75.74548 \n",
" 37.532000 \n",
" 0.005518 \n",
" 1.74307 \n",
" 17.222328 \n",
" 4.926396 \n",
" 0.858603 \n",
" 1111.287150 \n",
" 0.003042 \n",
" 109.125159 \n",
" 95.415086 \n",
" 52.260480 \n",
" 17.175984 \n",
" 0.296850 \n",
" 6785.003474 \n",
" 10.358927 \n",
" 0.173229 \n",
" 0.49706 \n",
" 0.568932 \n",
" 9.292698 \n",
" 72.611063 \n",
" 27981.562750 \n",
" 29.135430 \n",
" 32.131996 \n",
" 21.978000 \n",
" 0 \n",
" \n",
" \n",
" 2 \n",
" 0.470030 \n",
" 2635.10654 \n",
" 85.200147 \n",
" 32.360553 \n",
" 8.138688 \n",
" 6.732840 \n",
" 0.025578 \n",
" 12.824570 \n",
" 1.229900 \n",
" 5135.78024 \n",
" 26.4825 \n",
" 128.988531 \n",
" 219.320160 \n",
" 482.141594 \n",
" 257.432377 \n",
" 32.563713 \n",
" 0.495852 \n",
" 85.955376 \n",
" 5.376488 \n",
" 0.036218 \n",
" 1.050225 \n",
" 0.700350 \n",
" 39.364743 \n",
" 1.009611 \n",
" 21.459644 \n",
" 70.81970 \n",
" 321.426625 \n",
" 0.23868 \n",
" 0.210441 \n",
" 120.056438 \n",
" 65.46984 \n",
" 28.053464 \n",
" 1.289739 \n",
" 1.74307 \n",
" 36.861352 \n",
" 7.813674 \n",
" 8.146651 \n",
" 1494.076488 \n",
" 0.377208 \n",
" 109.125159 \n",
" 78.526968 \n",
" 5.390628 \n",
" 224.207424 \n",
" 8.745201 \n",
" 8338.906181 \n",
" 11.626917 \n",
" 7.709560 \n",
" 0.97556 \n",
" 1.198821 \n",
" 37.077772 \n",
" 88.609437 \n",
" 13676.957810 \n",
" 28.022851 \n",
" 35.192676 \n",
" 0.196941 \n",
" 0 \n",
" \n",
" \n",
" 3 \n",
" 0.252107 \n",
" 3819.65177 \n",
" 120.201618 \n",
" 77.112203 \n",
" 8.138688 \n",
" 3.685344 \n",
" 0.025578 \n",
" 11.053708 \n",
" 1.229900 \n",
" 4169.67738 \n",
" 23.6577 \n",
" 237.282264 \n",
" 11.050410 \n",
" 661.518640 \n",
" 257.432377 \n",
" 15.201914 \n",
" 0.717882 \n",
" 88.159360 \n",
" 2.347652 \n",
" 0.029054 \n",
" 1.400300 \n",
" 0.636075 \n",
" 41.116960 \n",
" 0.722727 \n",
" 21.530392 \n",
" 47.27586 \n",
" 196.607985 \n",
" 0.23868 \n",
" 0.292431 \n",
" 139.824570 \n",
" 71.57120 \n",
" 24.354856 \n",
" 2.655345 \n",
" 1.74307 \n",
" 52.003884 \n",
" 7.386060 \n",
" 3.813326 \n",
" 15691.552180 \n",
" 0.614484 \n",
" 31.674357 \n",
" 78.526968 \n",
" 31.323372 \n",
" 59.301984 \n",
" 7.884336 \n",
" 10965.766040 \n",
" 14.852022 \n",
" 6.122162 \n",
" 0.49706 \n",
" 0.284466 \n",
" 18.529584 \n",
" 82.416803 \n",
" 2094.262452 \n",
" 39.948656 \n",
" 90.493248 \n",
" 0.155829 \n",
" 0 \n",
" \n",
" \n",
" 4 \n",
" 0.380297 \n",
" 3733.04844 \n",
" 85.200147 \n",
" 14.103738 \n",
" 8.138688 \n",
" 3.942255 \n",
" 0.054810 \n",
" 3.396778 \n",
" 102.151980 \n",
" 5728.73412 \n",
" 24.0108 \n",
" 324.546318 \n",
" 149.717165 \n",
" 6074.859475 \n",
" 257.432377 \n",
" 82.213495 \n",
" 0.536467 \n",
" 72.644264 \n",
" 30.537722 \n",
" 0.025472 \n",
" 1.050225 \n",
" 0.693150 \n",
" 31.724726 \n",
" 0.827550 \n",
" 34.415360 \n",
" 74.06532 \n",
" 200.178160 \n",
" 0.23868 \n",
" 0.207708 \n",
" 97.920120 \n",
" 52.83888 \n",
" 26.019912 \n",
" 1.144902 \n",
" 1.74307 \n",
" 9.064856 \n",
" 7.350720 \n",
" 3.490846 \n",
" 1403.656300 \n",
" 0.164268 \n",
" 109.125159 \n",
" 91.994825 \n",
" 51.141336 \n",
" 29.102640 \n",
" 4.274640 \n",
" 16198.049590 \n",
" 13.666727 \n",
" 8.153058 \n",
" 48.50134 \n",
" 0.121914 \n",
" 16.408728 \n",
" 146.109943 \n",
" 8524.370502 \n",
" 45.381316 \n",
" 36.262628 \n",
" 0.096614 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" AB AF AH AM AR AX AY \\\n",
"0 0.209377 3109.03329 85.200147 22.394407 8.138688 0.699861 0.025578 \n",
"1 0.145282 978.76416 85.200147 36.968889 8.138688 3.632190 0.025578 \n",
"2 0.470030 2635.10654 85.200147 32.360553 8.138688 6.732840 0.025578 \n",
"3 0.252107 3819.65177 120.201618 77.112203 8.138688 3.685344 0.025578 \n",
"4 0.380297 3733.04844 85.200147 14.103738 8.138688 3.942255 0.054810 \n",
"\n",
" AZ BC BD BN BP BQ \\\n",
"0 9.812214 5.555634 4126.58731 22.5984 175.638726 152.707705 \n",
"1 13.517790 1.229900 5496.92824 19.4205 155.868030 14.754720 \n",
"2 12.824570 1.229900 5135.78024 26.4825 128.988531 219.320160 \n",
"3 11.053708 1.229900 4169.67738 23.6577 237.282264 11.050410 \n",
"4 3.396778 102.151980 5728.73412 24.0108 324.546318 149.717165 \n",
"\n",
" BR BZ CB CC CD CF \\\n",
"0 823.928241 257.432377 47.223358 0.563481 23.387600 4.851915 \n",
"1 51.216883 257.432377 30.284345 0.484710 50.628208 6.085041 \n",
"2 482.141594 257.432377 32.563713 0.495852 85.955376 5.376488 \n",
"3 661.518640 257.432377 15.201914 0.717882 88.159360 2.347652 \n",
"4 6074.859475 257.432377 82.213495 0.536467 72.644264 30.537722 \n",
"\n",
" CH CL CR CS CU CW DA \\\n",
"0 0.023482 1.050225 0.069225 13.784111 1.302012 36.205956 69.08340 \n",
"1 0.031442 1.113875 1.117800 28.310953 1.357182 37.476568 70.79836 \n",
"2 0.036218 1.050225 0.700350 39.364743 1.009611 21.459644 70.81970 \n",
"3 0.029054 1.400300 0.636075 41.116960 0.722727 21.530392 47.27586 \n",
"4 0.025472 1.050225 0.693150 31.724726 0.827550 34.415360 74.06532 \n",
"\n",
" DE DF DH DI DL DN DU \\\n",
"0 295.570575 0.23868 0.284232 89.245560 84.31664 29.657104 5.310690 \n",
"1 178.553100 0.23868 0.363489 110.581815 75.74548 37.532000 0.005518 \n",
"2 321.426625 0.23868 0.210441 120.056438 65.46984 28.053464 1.289739 \n",
"3 196.607985 0.23868 0.292431 139.824570 71.57120 24.354856 2.655345 \n",
"4 200.178160 0.23868 0.207708 97.920120 52.83888 26.019912 1.144902 \n",
"\n",
" DV DY EB EE EG EH EL \\\n",
"0 1.74307 23.187704 7.294176 1.987283 1433.166750 0.949104 30.879420 \n",
"1 1.74307 17.222328 4.926396 0.858603 1111.287150 0.003042 109.125159 \n",
"2 1.74307 36.861352 7.813674 8.146651 1494.076488 0.377208 109.125159 \n",
"3 1.74307 52.003884 7.386060 3.813326 15691.552180 0.614484 31.674357 \n",
"4 1.74307 9.064856 7.350720 3.490846 1403.656300 0.164268 109.125159 \n",
"\n",
" EP EU FC FD FE FI \\\n",
"0 78.526968 3.828384 13.394640 10.265073 9028.291921 3.583450 \n",
"1 95.415086 52.260480 17.175984 0.296850 6785.003474 10.358927 \n",
"2 78.526968 5.390628 224.207424 8.745201 8338.906181 11.626917 \n",
"3 78.526968 31.323372 59.301984 7.884336 10965.766040 14.852022 \n",
"4 91.994825 51.141336 29.102640 4.274640 16198.049590 13.666727 \n",
"\n",
" FL FR FS GB GE GF \\\n",
"0 7.298162 1.73855 0.094822 11.339138 72.611063 2003.810319 \n",
"1 0.173229 0.49706 0.568932 9.292698 72.611063 27981.562750 \n",
"2 7.709560 0.97556 1.198821 37.077772 88.609437 13676.957810 \n",
"3 6.122162 0.49706 0.284466 18.529584 82.416803 2094.262452 \n",
"4 8.153058 48.50134 0.121914 16.408728 146.109943 8524.370502 \n",
"\n",
" GH GI GL Class \n",
"0 22.136229 69.834944 0.120343 1 \n",
"1 29.135430 32.131996 21.978000 0 \n",
"2 28.022851 35.192676 0.196941 0 \n",
"3 39.948656 90.493248 0.155829 0 \n",
"4 45.381316 36.262628 0.096614 1 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df = train_df.drop([\"Id\",\"EJ\"],axis=1)\n",
"train_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "75202a79",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.915026Z",
"iopub.status.busy": "2023-07-05T15:46:41.913785Z",
"iopub.status.idle": "2023-07-05T15:46:41.954808Z",
"shell.execute_reply": "2023-07-05T15:46:41.953821Z"
},
"papermill": {
"duration": 0.053644,
"end_time": "2023-07-05T15:46:41.956887",
"exception": false,
"start_time": "2023-07-05T15:46:41.903243",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"N_train = train_df.select_dtypes(include=np.number)\n",
"N_test = test_df.select_dtypes(include=np.number)\n",
"train_df = N_train.fillna(N_train.median())\n",
"test_df = N_test.fillna(N_test.median())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "740c6cf7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:41.976788Z",
"iopub.status.busy": "2023-07-05T15:46:41.976475Z",
"iopub.status.idle": "2023-07-05T15:46:41.996910Z",
"shell.execute_reply": "2023-07-05T15:46:41.995979Z"
},
"papermill": {
"duration": 0.032929,
"end_time": "2023-07-05T15:46:41.999153",
"exception": false,
"start_time": "2023-07-05T15:46:41.966224",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 617 entries, 0 to 616\n",
"Data columns (total 56 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 AB 617 non-null float64\n",
" 1 AF 617 non-null float64\n",
" 2 AH 617 non-null float64\n",
" 3 AM 617 non-null float64\n",
" 4 AR 617 non-null float64\n",
" 5 AX 617 non-null float64\n",
" 6 AY 617 non-null float64\n",
" 7 AZ 617 non-null float64\n",
" 8 BC 617 non-null float64\n",
" 9 BD 617 non-null float64\n",
" 10 BN 617 non-null float64\n",
" 11 BP 617 non-null float64\n",
" 12 BQ 617 non-null float64\n",
" 13 BR 617 non-null float64\n",
" 14 BZ 617 non-null float64\n",
" 15 CB 617 non-null float64\n",
" 16 CC 617 non-null float64\n",
" 17 CD 617 non-null float64\n",
" 18 CF 617 non-null float64\n",
" 19 CH 617 non-null float64\n",
" 20 CL 617 non-null float64\n",
" 21 CR 617 non-null float64\n",
" 22 CS 617 non-null float64\n",
" 23 CU 617 non-null float64\n",
" 24 CW 617 non-null float64\n",
" 25 DA 617 non-null float64\n",
" 26 DE 617 non-null float64\n",
" 27 DF 617 non-null float64\n",
" 28 DH 617 non-null float64\n",
" 29 DI 617 non-null float64\n",
" 30 DL 617 non-null float64\n",
" 31 DN 617 non-null float64\n",
" 32 DU 617 non-null float64\n",
" 33 DV 617 non-null float64\n",
" 34 DY 617 non-null float64\n",
" 35 EB 617 non-null float64\n",
" 36 EE 617 non-null float64\n",
" 37 EG 617 non-null float64\n",
" 38 EH 617 non-null float64\n",
" 39 EL 617 non-null float64\n",
" 40 EP 617 non-null float64\n",
" 41 EU 617 non-null float64\n",
" 42 FC 617 non-null float64\n",
" 43 FD 617 non-null float64\n",
" 44 FE 617 non-null float64\n",
" 45 FI 617 non-null float64\n",
" 46 FL 617 non-null float64\n",
" 47 FR 617 non-null float64\n",
" 48 FS 617 non-null float64\n",
" 49 GB 617 non-null float64\n",
" 50 GE 617 non-null float64\n",
" 51 GF 617 non-null float64\n",
" 52 GH 617 non-null float64\n",
" 53 GI 617 non-null float64\n",
" 54 GL 617 non-null float64\n",
" 55 Class 617 non-null int64 \n",
"dtypes: float64(55), int64(1)\n",
"memory usage: 270.1 KB\n"
]
}
],
"source": [
"train_df.info()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "52261786",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.020415Z",
"iopub.status.busy": "2023-07-05T15:46:42.020019Z",
"iopub.status.idle": "2023-07-05T15:46:42.040268Z",
"shell.execute_reply": "2023-07-05T15:46:42.038368Z"
},
"papermill": {
"duration": 0.033429,
"end_time": "2023-07-05T15:46:42.042876",
"exception": false,
"start_time": "2023-07-05T15:46:42.009447",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 5 entries, 0 to 4\n",
"Data columns (total 55 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 AB 5 non-null float64\n",
" 1 AF 5 non-null float64\n",
" 2 AH 5 non-null float64\n",
" 3 AM 5 non-null float64\n",
" 4 AR 5 non-null float64\n",
" 5 AX 5 non-null float64\n",
" 6 AY 5 non-null float64\n",
" 7 AZ 5 non-null float64\n",
" 8 BC 5 non-null float64\n",
" 9 BD 5 non-null float64\n",
" 10 BN 5 non-null float64\n",
" 11 BP 5 non-null float64\n",
" 12 BQ 5 non-null float64\n",
" 13 BR 5 non-null float64\n",
" 14 BZ 5 non-null float64\n",
" 15 CB 5 non-null float64\n",
" 16 CC 5 non-null float64\n",
" 17 CD 5 non-null float64\n",
" 18 CF 5 non-null float64\n",
" 19 CH 5 non-null float64\n",
" 20 CL 5 non-null float64\n",
" 21 CR 5 non-null float64\n",
" 22 CS 5 non-null float64\n",
" 23 CU 5 non-null float64\n",
" 24 CW 5 non-null float64\n",
" 25 DA 5 non-null float64\n",
" 26 DE 5 non-null float64\n",
" 27 DF 5 non-null float64\n",
" 28 DH 5 non-null float64\n",
" 29 DI 5 non-null float64\n",
" 30 DL 5 non-null float64\n",
" 31 DN 5 non-null float64\n",
" 32 DU 5 non-null float64\n",
" 33 DV 5 non-null float64\n",
" 34 DY 5 non-null float64\n",
" 35 EB 5 non-null float64\n",
" 36 EE 5 non-null float64\n",
" 37 EG 5 non-null float64\n",
" 38 EH 5 non-null float64\n",
" 39 EL 5 non-null float64\n",
" 40 EP 5 non-null float64\n",
" 41 EU 5 non-null float64\n",
" 42 FC 5 non-null float64\n",
" 43 FD 5 non-null float64\n",
" 44 FE 5 non-null float64\n",
" 45 FI 5 non-null float64\n",
" 46 FL 5 non-null float64\n",
" 47 FR 5 non-null float64\n",
" 48 FS 5 non-null float64\n",
" 49 GB 5 non-null float64\n",
" 50 GE 5 non-null float64\n",
" 51 GF 5 non-null float64\n",
" 52 GH 5 non-null float64\n",
" 53 GI 5 non-null float64\n",
" 54 GL 5 non-null float64\n",
"dtypes: float64(55)\n",
"memory usage: 2.3 KB\n"
]
}
],
"source": [
"test_df.info()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "ce457e80",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.064416Z",
"iopub.status.busy": "2023-07-05T15:46:42.064040Z",
"iopub.status.idle": "2023-07-05T15:46:42.084853Z",
"shell.execute_reply": "2023-07-05T15:46:42.083499Z"
},
"papermill": {
"duration": 0.033768,
"end_time": "2023-07-05T15:46:42.086645",
"exception": false,
"start_time": "2023-07-05T15:46:42.052877",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 617 entries, 0 to 616\n",
"Data columns (total 56 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 AB 617 non-null float64\n",
" 1 AF 617 non-null float64\n",
" 2 AH 617 non-null float64\n",
" 3 AM 617 non-null float64\n",
" 4 AR 617 non-null float64\n",
" 5 AX 617 non-null float64\n",
" 6 AY 617 non-null float64\n",
" 7 AZ 617 non-null float64\n",
" 8 BC 617 non-null float64\n",
" 9 BD 617 non-null float64\n",
" 10 BN 617 non-null float64\n",
" 11 BP 617 non-null float64\n",
" 12 BQ 617 non-null float64\n",
" 13 BR 617 non-null float64\n",
" 14 BZ 617 non-null float64\n",
" 15 CB 617 non-null float64\n",
" 16 CC 617 non-null float64\n",
" 17 CD 617 non-null float64\n",
" 18 CF 617 non-null float64\n",
" 19 CH 617 non-null float64\n",
" 20 CL 617 non-null float64\n",
" 21 CR 617 non-null float64\n",
" 22 CS 617 non-null float64\n",
" 23 CU 617 non-null float64\n",
" 24 CW 617 non-null float64\n",
" 25 DA 617 non-null float64\n",
" 26 DE 617 non-null float64\n",
" 27 DF 617 non-null float64\n",
" 28 DH 617 non-null float64\n",
" 29 DI 617 non-null float64\n",
" 30 DL 617 non-null float64\n",
" 31 DN 617 non-null float64\n",
" 32 DU 617 non-null float64\n",
" 33 DV 617 non-null float64\n",
" 34 DY 617 non-null float64\n",
" 35 EB 617 non-null float64\n",
" 36 EE 617 non-null float64\n",
" 37 EG 617 non-null float64\n",
" 38 EH 617 non-null float64\n",
" 39 EL 617 non-null float64\n",
" 40 EP 617 non-null float64\n",
" 41 EU 617 non-null float64\n",
" 42 FC 617 non-null float64\n",
" 43 FD 617 non-null float64\n",
" 44 FE 617 non-null float64\n",
" 45 FI 617 non-null float64\n",
" 46 FL 617 non-null float64\n",
" 47 FR 617 non-null float64\n",
" 48 FS 617 non-null float64\n",
" 49 GB 617 non-null float64\n",
" 50 GE 617 non-null float64\n",
" 51 GF 617 non-null float64\n",
" 52 GH 617 non-null float64\n",
" 53 GI 617 non-null float64\n",
" 54 GL 617 non-null float64\n",
" 55 Class 617 non-null int64 \n",
"dtypes: float64(55), int64(1)\n",
"memory usage: 270.1 KB\n"
]
}
],
"source": [
"train_df.info()"
]
},
{
"cell_type": "markdown",
"id": "53af5048",
"metadata": {
"papermill": {
"duration": 0.009615,
"end_time": "2023-07-05T15:46:42.106307",
"exception": false,
"start_time": "2023-07-05T15:46:42.096692",
"status": "completed"
},
"tags": []
},
"source": [
"# 5. Train the Model"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "24ab9237",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.127548Z",
"iopub.status.busy": "2023-07-05T15:46:42.127168Z",
"iopub.status.idle": "2023-07-05T15:46:42.134126Z",
"shell.execute_reply": "2023-07-05T15:46:42.133001Z"
},
"papermill": {
"duration": 0.020429,
"end_time": "2023-07-05T15:46:42.136536",
"exception": false,
"start_time": "2023-07-05T15:46:42.116107",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"X = train_df.drop('Class',axis=1)\n",
"y = train_df['Class']"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "9f2f79d8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.158760Z",
"iopub.status.busy": "2023-07-05T15:46:42.158114Z",
"iopub.status.idle": "2023-07-05T15:46:42.164813Z",
"shell.execute_reply": "2023-07-05T15:46:42.163728Z"
},
"papermill": {
"duration": 0.0198,
"end_time": "2023-07-05T15:46:42.166661",
"exception": false,
"start_time": "2023-07-05T15:46:42.146861",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "f79078e6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.189052Z",
"iopub.status.busy": "2023-07-05T15:46:42.187817Z",
"iopub.status.idle": "2023-07-05T15:46:42.193132Z",
"shell.execute_reply": "2023-07-05T15:46:42.192471Z"
},
"papermill": {
"duration": 0.018245,
"end_time": "2023-07-05T15:46:42.195083",
"exception": false,
"start_time": "2023-07-05T15:46:42.176838",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"param_grid = {'n_estimators':[50,100,200],\n",
" 'learning_rate':[0.001,0.1,0.05,0.2],\n",
" 'max_depth':[3,4,5,6]}"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "ce412f32",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.216680Z",
"iopub.status.busy": "2023-07-05T15:46:42.215833Z",
"iopub.status.idle": "2023-07-05T15:46:42.221501Z",
"shell.execute_reply": "2023-07-05T15:46:42.220118Z"
},
"papermill": {
"duration": 0.018266,
"end_time": "2023-07-05T15:46:42.223345",
"exception": false,
"start_time": "2023-07-05T15:46:42.205079",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"model = GradientBoostingClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6467bd48",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.244612Z",
"iopub.status.busy": "2023-07-05T15:46:42.244236Z",
"iopub.status.idle": "2023-07-05T15:46:42.248925Z",
"shell.execute_reply": "2023-07-05T15:46:42.247627Z"
},
"papermill": {
"duration": 0.018113,
"end_time": "2023-07-05T15:46:42.251243",
"exception": false,
"start_time": "2023-07-05T15:46:42.233130",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"grid = GridSearchCV(model,param_grid,error_score=\"raise\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f75ed97a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:46:42.274148Z",
"iopub.status.busy": "2023-07-05T15:46:42.272930Z",
"iopub.status.idle": "2023-07-05T15:49:40.114804Z",
"shell.execute_reply": "2023-07-05T15:49:40.113601Z"
},
"papermill": {
"duration": 177.864663,
"end_time": "2023-07-05T15:49:40.126470",
"exception": false,
"start_time": "2023-07-05T15:46:42.261807",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"GridSearchCV(error_score='raise', estimator=GradientBoostingClassifier(),\n",
" param_grid={'learning_rate': [0.001, 0.1, 0.05, 0.2],\n",
" 'max_depth': [3, 4, 5, 6],\n",
" 'n_estimators': [50, 100, 200]}) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. GridSearchCV GridSearchCV(error_score='raise', estimator=GradientBoostingClassifier(),\n",
" param_grid={'learning_rate': [0.001, 0.1, 0.05, 0.2],\n",
" 'max_depth': [3, 4, 5, 6],\n",
" 'n_estimators': [50, 100, 200]}) "
],
"text/plain": [
"GridSearchCV(error_score='raise', estimator=GradientBoostingClassifier(),\n",
" param_grid={'learning_rate': [0.001, 0.1, 0.05, 0.2],\n",
" 'max_depth': [3, 4, 5, 6],\n",
" 'n_estimators': [50, 100, 200]})"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.fit(X_train,y_train)"
]
},
{
"cell_type": "markdown",
"id": "cf6ab2ba",
"metadata": {
"papermill": {
"duration": 0.010273,
"end_time": "2023-07-05T15:49:40.147305",
"exception": false,
"start_time": "2023-07-05T15:49:40.137032",
"status": "completed"
},
"tags": []
},
"source": [
"# 6. Best Fits"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "36ef6964",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:40.170170Z",
"iopub.status.busy": "2023-07-05T15:49:40.169814Z",
"iopub.status.idle": "2023-07-05T15:49:40.178428Z",
"shell.execute_reply": "2023-07-05T15:49:40.177118Z"
},
"papermill": {
"duration": 0.022693,
"end_time": "2023-07-05T15:49:40.180899",
"exception": false,
"start_time": "2023-07-05T15:49:40.158206",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 50}"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "717d1424",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:40.205736Z",
"iopub.status.busy": "2023-07-05T15:49:40.205389Z",
"iopub.status.idle": "2023-07-05T15:49:40.210800Z",
"shell.execute_reply": "2023-07-05T15:49:40.209689Z"
},
"papermill": {
"duration": 0.02199,
"end_time": "2023-07-05T15:49:40.213517",
"exception": false,
"start_time": "2023-07-05T15:49:40.191527",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"model = GradientBoostingClassifier(learning_rate = 0.1, max_depth = 4, n_estimators = 200)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "4ee96a7b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:40.236929Z",
"iopub.status.busy": "2023-07-05T15:49:40.236544Z",
"iopub.status.idle": "2023-07-05T15:49:41.747169Z",
"shell.execute_reply": "2023-07-05T15:49:41.746147Z"
},
"papermill": {
"duration": 1.524483,
"end_time": "2023-07-05T15:49:41.749089",
"exception": false,
"start_time": "2023-07-05T15:49:40.224606",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"GradientBoostingClassifier(max_depth=4, n_estimators=200) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
],
"text/plain": [
"GradientBoostingClassifier(max_depth=4, n_estimators=200)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train,y_train)"
]
},
{
"cell_type": "markdown",
"id": "6c18124b",
"metadata": {
"papermill": {
"duration": 0.009762,
"end_time": "2023-07-05T15:49:41.769234",
"exception": false,
"start_time": "2023-07-05T15:49:41.759472",
"status": "completed"
},
"tags": []
},
"source": [
"# 7. Model Prediction"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "721f03ed",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:41.791577Z",
"iopub.status.busy": "2023-07-05T15:49:41.790410Z",
"iopub.status.idle": "2023-07-05T15:49:41.800711Z",
"shell.execute_reply": "2023-07-05T15:49:41.799705Z"
},
"papermill": {
"duration": 0.023652,
"end_time": "2023-07-05T15:49:41.802938",
"exception": false,
"start_time": "2023-07-05T15:49:41.779286",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,\n",
" 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,\n",
" 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,\n",
" 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0,\n",
" 0])"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred = model.predict(X_test)\n",
"pred"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "5b2e84a8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:41.825168Z",
"iopub.status.busy": "2023-07-05T15:49:41.824549Z",
"iopub.status.idle": "2023-07-05T15:49:41.833740Z",
"shell.execute_reply": "2023-07-05T15:49:41.833019Z"
},
"papermill": {
"duration": 0.021906,
"end_time": "2023-07-05T15:49:41.835395",
"exception": false,
"start_time": "2023-07-05T15:49:41.813489",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array([[4.29119504e-04, 9.99570880e-01],\n",
" [4.29119504e-04, 9.99570880e-01],\n",
" [4.29119504e-04, 9.99570880e-01],\n",
" [4.29119504e-04, 9.99570880e-01],\n",
" [4.29119504e-04, 9.99570880e-01]])"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred = model.predict_proba(test_df)\n",
"pred"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "61e2f349",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:41.858384Z",
"iopub.status.busy": "2023-07-05T15:49:41.857801Z",
"iopub.status.idle": "2023-07-05T15:49:41.862687Z",
"shell.execute_reply": "2023-07-05T15:49:41.861652Z"
},
"papermill": {
"duration": 0.018109,
"end_time": "2023-07-05T15:49:41.864387",
"exception": false,
"start_time": "2023-07-05T15:49:41.846278",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"class_0 = pred[:, 0]\n",
"class_1 = pred[:, 1]"
]
},
{
"cell_type": "markdown",
"id": "f75a6510",
"metadata": {
"papermill": {
"duration": 0.009905,
"end_time": "2023-07-05T15:49:41.884950",
"exception": false,
"start_time": "2023-07-05T15:49:41.875045",
"status": "completed"
},
"tags": []
},
"source": [
"# 8. Sample_Submission File"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "6374cc63",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:41.907025Z",
"iopub.status.busy": "2023-07-05T15:49:41.906394Z",
"iopub.status.idle": "2023-07-05T15:49:41.923742Z",
"shell.execute_reply": "2023-07-05T15:49:41.922307Z"
},
"papermill": {
"duration": 0.03132,
"end_time": "2023-07-05T15:49:41.926489",
"exception": false,
"start_time": "2023-07-05T15:49:41.895169",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Id \n",
" class_0 \n",
" class_1 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 00eed32682bb \n",
" 0.5 \n",
" 0.5 \n",
" \n",
" \n",
" 1 \n",
" 010ebe33f668 \n",
" 0.5 \n",
" 0.5 \n",
" \n",
" \n",
" 2 \n",
" 02fa521e1838 \n",
" 0.5 \n",
" 0.5 \n",
" \n",
" \n",
" 3 \n",
" 040e15f562a2 \n",
" 0.5 \n",
" 0.5 \n",
" \n",
" \n",
" 4 \n",
" 046e85c7cc7f \n",
" 0.5 \n",
" 0.5 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id class_0 class_1\n",
"0 00eed32682bb 0.5 0.5\n",
"1 010ebe33f668 0.5 0.5\n",
"2 02fa521e1838 0.5 0.5\n",
"3 040e15f562a2 0.5 0.5\n",
"4 046e85c7cc7f 0.5 0.5"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv')\n",
"test.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "ab58623f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:41.950246Z",
"iopub.status.busy": "2023-07-05T15:49:41.949912Z",
"iopub.status.idle": "2023-07-05T15:49:41.956851Z",
"shell.execute_reply": "2023-07-05T15:49:41.955551Z"
},
"papermill": {
"duration": 0.021037,
"end_time": "2023-07-05T15:49:41.958753",
"exception": false,
"start_time": "2023-07-05T15:49:41.937716",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"test_id = test[\"Id\"]\n",
"submission = pd.DataFrame(pred, columns=[\"class_0\", \"class_1\"])\n",
"submission.insert(0, \"Id\", test_id)"
]
},
{
"cell_type": "markdown",
"id": "f540b1dd",
"metadata": {
"papermill": {
"duration": 0.010046,
"end_time": "2023-07-05T15:49:41.979728",
"exception": false,
"start_time": "2023-07-05T15:49:41.969682",
"status": "completed"
},
"tags": []
},
"source": [
"# 9. Final Submission"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "9bd265d9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:42.001818Z",
"iopub.status.busy": "2023-07-05T15:49:42.001479Z",
"iopub.status.idle": "2023-07-05T15:49:42.011925Z",
"shell.execute_reply": "2023-07-05T15:49:42.010760Z"
},
"papermill": {
"duration": 0.024132,
"end_time": "2023-07-05T15:49:42.014108",
"exception": false,
"start_time": "2023-07-05T15:49:41.989976",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"submission.to_csv('submission.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "8f6186f4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-05T15:49:42.037573Z",
"iopub.status.busy": "2023-07-05T15:49:42.036964Z",
"iopub.status.idle": "2023-07-05T15:49:42.047251Z",
"shell.execute_reply": "2023-07-05T15:49:42.046333Z"
},
"papermill": {
"duration": 0.024469,
"end_time": "2023-07-05T15:49:42.049648",
"exception": false,
"start_time": "2023-07-05T15:49:42.025179",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Id \n",
" class_0 \n",
" class_1 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 00eed32682bb \n",
" 0.000429 \n",
" 0.999571 \n",
" \n",
" \n",
" 1 \n",
" 010ebe33f668 \n",
" 0.000429 \n",
" 0.999571 \n",
" \n",
" \n",
" 2 \n",
" 02fa521e1838 \n",
" 0.000429 \n",
" 0.999571 \n",
" \n",
" \n",
" 3 \n",
" 040e15f562a2 \n",
" 0.000429 \n",
" 0.999571 \n",
" \n",
" \n",
" 4 \n",
" 046e85c7cc7f \n",
" 0.000429 \n",
" 0.999571 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id class_0 class_1\n",
"0 00eed32682bb 0.000429 0.999571\n",
"1 010ebe33f668 0.000429 0.999571\n",
"2 02fa521e1838 0.000429 0.999571\n",
"3 040e15f562a2 0.000429 0.999571\n",
"4 046e85c7cc7f 0.000429 0.999571"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"submission.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"papermill": {
"default_parameters": {},
"duration": 193.401066,
"end_time": "2023-07-05T15:49:42.982911",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2023-07-05T15:46:29.581845",
"version": "2.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}