{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "35b7f880",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Age | \n",
" Gender | \n",
" Region | \n",
" Marital_status | \n",
" Number Of Dependants | \n",
" BMI_Category | \n",
" Smoking_Status | \n",
" Employment_Status | \n",
" Income_Level | \n",
" Income_Lakhs | \n",
" Medical History | \n",
" Insurance_Plan | \n",
" Annual_Premium_Amount | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 26 | \n",
" Male | \n",
" Northwest | \n",
" Unmarried | \n",
" 0 | \n",
" Normal | \n",
" No Smoking | \n",
" Salaried | \n",
" <10L | \n",
" 6 | \n",
" Diabetes | \n",
" Bronze | \n",
" 9053 | \n",
"
\n",
" \n",
" | 1 | \n",
" 29 | \n",
" Female | \n",
" Southeast | \n",
" Married | \n",
" 2 | \n",
" Obesity | \n",
" Regular | \n",
" Salaried | \n",
" <10L | \n",
" 6 | \n",
" Diabetes | \n",
" Bronze | \n",
" 16339 | \n",
"
\n",
" \n",
" | 2 | \n",
" 49 | \n",
" Female | \n",
" Northeast | \n",
" Married | \n",
" 2 | \n",
" Normal | \n",
" No Smoking | \n",
" Self-Employed | \n",
" 10L - 25L | \n",
" 20 | \n",
" High blood pressure | \n",
" Silver | \n",
" 18164 | \n",
"
\n",
" \n",
" | 3 | \n",
" 30 | \n",
" Female | \n",
" Southeast | \n",
" Married | \n",
" 3 | \n",
" Normal | \n",
" No Smoking | \n",
" Salaried | \n",
" > 40L | \n",
" 77 | \n",
" No Disease | \n",
" Gold | \n",
" 20303 | \n",
"
\n",
" \n",
" | 4 | \n",
" 18 | \n",
" Male | \n",
" Northeast | \n",
" Unmarried | \n",
" 0 | \n",
" Overweight | \n",
" Regular | \n",
" Self-Employed | \n",
" > 40L | \n",
" 99 | \n",
" High blood pressure | \n",
" Silver | \n",
" 13365 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Age Gender Region Marital_status Number Of Dependants BMI_Category \\\n",
"0 26 Male Northwest Unmarried 0 Normal \n",
"1 29 Female Southeast Married 2 Obesity \n",
"2 49 Female Northeast Married 2 Normal \n",
"3 30 Female Southeast Married 3 Normal \n",
"4 18 Male Northeast Unmarried 0 Overweight \n",
"\n",
" Smoking_Status Employment_Status Income_Level Income_Lakhs \\\n",
"0 No Smoking Salaried <10L 6 \n",
"1 Regular Salaried <10L 6 \n",
"2 No Smoking Self-Employed 10L - 25L 20 \n",
"3 No Smoking Salaried > 40L 77 \n",
"4 Regular Self-Employed > 40L 99 \n",
"\n",
" Medical History Insurance_Plan Annual_Premium_Amount \n",
"0 Diabetes Bronze 9053 \n",
"1 Diabetes Bronze 16339 \n",
"2 High blood pressure Silver 18164 \n",
"3 No Disease Gold 20303 \n",
"4 High blood pressure Silver 13365 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_excel(\"premiums.xlsx\")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2b820232",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(50000, 13)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "206a3cdb",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 50000.000000\n",
"mean 34.593480\n",
"std 15.000437\n",
"min 18.000000\n",
"25% 22.000000\n",
"50% 31.000000\n",
"75% 45.000000\n",
"max 356.000000\n",
"Name: Age, dtype: float64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.Age.describe()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "8b905a77",
"metadata": {},
"outputs": [],
"source": [
"df_young = df[df.Age<=25]\n",
"df_rest = df[df.Age>25]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f1d671ec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((20096, 13), (29904, 13))"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_young.shape, df_rest.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f566ae1c",
"metadata": {},
"outputs": [],
"source": [
"df_young.to_excel(\"premiums_young.xlsx\", index=False)\n",
"df_rest.to_excel(\"premiums_rest.xlsx\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ebcc0f68",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"id": "469c45f4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Age | \n",
" Gender | \n",
" Region | \n",
" Marital_status | \n",
" Number Of Dependants | \n",
" BMI_Category | \n",
" Smoking_Status | \n",
" Employment_Status | \n",
" Income_Level | \n",
" Income_Lakhs | \n",
" Medical History | \n",
" Insurance_Plan | \n",
" Annual_Premium_Amount | \n",
" Genetical_Risk | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 26 | \n",
" Male | \n",
" Northwest | \n",
" Unmarried | \n",
" 0 | \n",
" Normal | \n",
" No Smoking | \n",
" Salaried | \n",
" <10L | \n",
" 6 | \n",
" Diabetes | \n",
" Bronze | \n",
" 9053 | \n",
" 5 | \n",
"
\n",
" \n",
" | 1 | \n",
" 29 | \n",
" Female | \n",
" Southeast | \n",
" Married | \n",
" 2 | \n",
" Obesity | \n",
" Regular | \n",
" Salaried | \n",
" <10L | \n",
" 6 | \n",
" Diabetes | \n",
" Bronze | \n",
" 16339 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Age Gender Region Marital_status Number Of Dependants BMI_Category \\\n",
"0 26 Male Northwest Unmarried 0 Normal \n",
"1 29 Female Southeast Married 2 Obesity \n",
"\n",
" Smoking_Status Employment_Status Income_Level Income_Lakhs Medical History \\\n",
"0 No Smoking Salaried <10L 6 Diabetes \n",
"1 Regular Salaried <10L 6 Diabetes \n",
"\n",
" Insurance_Plan Annual_Premium_Amount Genetical_Risk \n",
"0 Bronze 9053 5 \n",
"1 Bronze 16339 0 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_excel(\"premiums_with_gr.xlsx\")\n",
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "48104400",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((20096, 14), (29904, 14))"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_young = df[df.Age<=25]\n",
"df_rest = df[df.Age>25]\n",
"\n",
"df_young.shape, df_rest.shape"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "18a2d9ce",
"metadata": {},
"outputs": [],
"source": [
"df_young.to_excel(\"premiums_young_with_gr.xlsx\", index=False)\n",
"df_rest.to_excel(\"premiums_rest_with_gr.xlsx\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}