{
"metadata": {
"kernelspec": {
"language": "python",
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"version": "3.6.4",
"file_extension": ".py",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"name": "python",
"mimetype": "text/x-python"
},
"kaggle": {
"accelerator": "none",
"dataSources": [
{
"sourceId": 12374924,
"sourceType": "datasetVersion",
"datasetId": 7802809
}
],
"isInternetEnabled": true,
"language": "python",
"sourceType": "notebook",
"isGpuEnabled": false
},
"colab": {
"provenance": []
}
},
"nbformat_minor": 0,
"nbformat": 4,
"cells": [
{
"source": [
"# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,\n",
"# THEN FEEL FREE TO DELETE THIS CELL.\n",
"# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON\n",
"# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR\n",
"# NOTEBOOK.\n",
"import kagglehub\n",
"gmudit_employer_data_path = kagglehub.dataset_download('gmudit/employer-data')\n",
"\n",
"print('Data source import complete.')\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wlTJKTg-OKTm",
"outputId": "4cc02ed2-f1ec-40cf-a9fe-e53ed3b9fb1d"
},
"cell_type": "code",
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Downloading from https://www.kaggle.com/api/v1/datasets/download/gmudit/employer-data?dataset_version_number=1...\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"100%|██████████| 163k/163k [00:00<00:00, 496kB/s]"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Extracting files...\n",
"Data source import complete.\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"\n"
]
}
],
"execution_count": null
},
{
"cell_type": "code",
"source": [
"gmudit_employer_data_path"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "BoE83HlnOQW_",
"outputId": "ef513373-19a6-4ad4-d77a-380c9135b3f8"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'/root/.cache/kagglehub/datasets/gmudit/employer-data/versions/1'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"source": [
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
"# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
"# For example, here's several helpful packages to load\n",
"\n",
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# Input data files are available in the read-only \"../input/\" directory\n",
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"import os\n",
"for dirname, _, filenames in os.walk('/root/.cache/kagglehub/datasets/gmudit/employer-data/versions/1'):\n",
" for filename in filenames:\n",
" print(os.path.join(dirname, filename))\n",
"\n",
"# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\"\n",
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
],
"metadata": {
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"trusted": true,
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "X1ZiOTGIOKTo",
"outputId": "539ddc40-9e2e-4fb4-8a72-d1da9c0f4e65"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/root/.cache/kagglehub/datasets/gmudit/employer-data/versions/1/Employers_data.csv\n"
]
}
],
"execution_count": null
},
{
"cell_type": "code",
"source": [
"emp = pd.read_csv('/root/.cache/kagglehub/datasets/gmudit/employer-data/versions/1/Employers_data.csv')\n",
"emp.head(5)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "-0PQPjiLOTAk",
"outputId": "e637a311-45c8-4cc5-d62a-78091b5ef329"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Employee_ID Name Age Gender Department Job_Title \\\n",
"0 1 Merle Ingram 24 Female Engineering Engineer \n",
"1 2 John Mayes 56 Male Sales Executive \n",
"2 3 Carlos Wille 21 Male Engineering Intern \n",
"3 4 Michael Bryant 30 Male Finance Analyst \n",
"4 5 Paula Douglas 25 Female HR Analyst \n",
"\n",
" Experience_Years Education_Level Location Salary \n",
"0 1 Master Austin 90000 \n",
"1 33 Master Seattle 195000 \n",
"2 1 Bachelor New York 35000 \n",
"3 9 Bachelor New York 75000 \n",
"4 2 Master Seattle 70000 "
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Employee_ID \n",
" Name \n",
" Age \n",
" Gender \n",
" Department \n",
" Job_Title \n",
" Experience_Years \n",
" Education_Level \n",
" Location \n",
" Salary \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" Merle Ingram \n",
" 24 \n",
" Female \n",
" Engineering \n",
" Engineer \n",
" 1 \n",
" Master \n",
" Austin \n",
" 90000 \n",
" \n",
" \n",
" 1 \n",
" 2 \n",
" John Mayes \n",
" 56 \n",
" Male \n",
" Sales \n",
" Executive \n",
" 33 \n",
" Master \n",
" Seattle \n",
" 195000 \n",
" \n",
" \n",
" 2 \n",
" 3 \n",
" Carlos Wille \n",
" 21 \n",
" Male \n",
" Engineering \n",
" Intern \n",
" 1 \n",
" Bachelor \n",
" New York \n",
" 35000 \n",
" \n",
" \n",
" 3 \n",
" 4 \n",
" Michael Bryant \n",
" 30 \n",
" Male \n",
" Finance \n",
" Analyst \n",
" 9 \n",
" Bachelor \n",
" New York \n",
" 75000 \n",
" \n",
" \n",
" 4 \n",
" 5 \n",
" Paula Douglas \n",
" 25 \n",
" Female \n",
" HR \n",
" Analyst \n",
" 2 \n",
" Master \n",
" Seattle \n",
" 70000 \n",
" \n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "emp",
"summary": "{\n \"name\": \"emp\",\n \"rows\": 10000,\n \"fields\": [\n {\n \"column\": \"Employee_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2886,\n \"min\": 1,\n \"max\": 10000,\n \"num_unique_values\": 10000,\n \"samples\": [\n 6253,\n 4685,\n 1732\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9868,\n \"samples\": [\n \"Cristal Rodriguez\",\n \"Stephanie Ellis\",\n \"Sean Green\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10,\n \"min\": 21,\n \"max\": 60,\n \"num_unique_values\": 40,\n \"samples\": [\n 22,\n 28,\n 31\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Gender\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Male\",\n \"Female\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Department\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Engineering\",\n \"Sales\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Job_Title\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Executive\",\n \"Manager\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Experience_Years\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9,\n \"min\": 0,\n \"max\": 37,\n \"num_unique_values\": 38,\n \"samples\": [\n 27,\n 34\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Education_Level\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Master\",\n \"Bachelor\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Seattle\",\n \"Chicago\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Salary\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 46066,\n \"min\": 25000,\n \"max\": 215000,\n \"num_unique_values\": 39,\n \"samples\": [\n 100000,\n 50000\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"emp.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nlba6h9XOYqU",
"outputId": "9e811967-31ff-4440-f16d-d2a8b1db819d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 10000 entries, 0 to 9999\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Employee_ID 10000 non-null int64 \n",
" 1 Name 10000 non-null object\n",
" 2 Age 10000 non-null int64 \n",
" 3 Gender 10000 non-null object\n",
" 4 Department 10000 non-null object\n",
" 5 Job_Title 10000 non-null object\n",
" 6 Experience_Years 10000 non-null int64 \n",
" 7 Education_Level 10000 non-null object\n",
" 8 Location 10000 non-null object\n",
" 9 Salary 10000 non-null int64 \n",
"dtypes: int64(4), object(6)\n",
"memory usage: 781.4+ KB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"emp.drop(columns=['Employee_ID'], inplace=True)"
],
"metadata": {
"id": "pJHzWx3hOZzR"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"emp.duplicated().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8oLiKUWzOdB-",
"outputId": "bda192bd-e3c2-494f-aae3-eabd57d35c97"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"np.int64(0)"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"emp.isna().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 366
},
"id": "WuNxStUKOfNs",
"outputId": "e78a25f6-d38f-41b1-bbea-bf763084f4e7"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Name 0\n",
"Age 0\n",
"Gender 0\n",
"Department 0\n",
"Job_Title 0\n",
"Experience_Years 0\n",
"Education_Level 0\n",
"Location 0\n",
"Salary 0\n",
"dtype: int64"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 0 \n",
" \n",
" \n",
" \n",
" \n",
" Name \n",
" 0 \n",
" \n",
" \n",
" Age \n",
" 0 \n",
" \n",
" \n",
" Gender \n",
" 0 \n",
" \n",
" \n",
" Department \n",
" 0 \n",
" \n",
" \n",
" Job_Title \n",
" 0 \n",
" \n",
" \n",
" Experience_Years \n",
" 0 \n",
" \n",
" \n",
" Education_Level \n",
" 0 \n",
" \n",
" \n",
" Location \n",
" 0 \n",
" \n",
" \n",
" Salary \n",
" 0 \n",
" \n",
" \n",
"
\n",
"
dtype: int64 "
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"emp['Salary'].value_counts(normalize=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "dwH_xoFaOgc0",
"outputId": "30275cf0-700b-4c47-bb88-ae5b4926f927"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Salary\n",
"70000 0.0848\n",
"65000 0.0734\n",
"75000 0.0595\n",
"130000 0.0447\n",
"135000 0.0440\n",
"125000 0.0439\n",
"140000 0.0429\n",
"145000 0.0424\n",
"150000 0.0404\n",
"120000 0.0385\n",
"80000 0.0304\n",
"60000 0.0298\n",
"115000 0.0262\n",
"35000 0.0262\n",
"100000 0.0233\n",
"95000 0.0224\n",
"180000 0.0219\n",
"190000 0.0215\n",
"110000 0.0214\n",
"200000 0.0213\n",
"90000 0.0211\n",
"105000 0.0205\n",
"175000 0.0202\n",
"155000 0.0201\n",
"165000 0.0201\n",
"195000 0.0199\n",
"185000 0.0198\n",
"170000 0.0185\n",
"40000 0.0175\n",
"85000 0.0150\n",
"205000 0.0136\n",
"160000 0.0120\n",
"30000 0.0110\n",
"55000 0.0045\n",
"210000 0.0034\n",
"45000 0.0021\n",
"25000 0.0010\n",
"50000 0.0005\n",
"215000 0.0003\n",
"Name: proportion, dtype: float64"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" proportion \n",
" \n",
" \n",
" Salary \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 70000 \n",
" 0.0848 \n",
" \n",
" \n",
" 65000 \n",
" 0.0734 \n",
" \n",
" \n",
" 75000 \n",
" 0.0595 \n",
" \n",
" \n",
" 130000 \n",
" 0.0447 \n",
" \n",
" \n",
" 135000 \n",
" 0.0440 \n",
" \n",
" \n",
" 125000 \n",
" 0.0439 \n",
" \n",
" \n",
" 140000 \n",
" 0.0429 \n",
" \n",
" \n",
" 145000 \n",
" 0.0424 \n",
" \n",
" \n",
" 150000 \n",
" 0.0404 \n",
" \n",
" \n",
" 120000 \n",
" 0.0385 \n",
" \n",
" \n",
" 80000 \n",
" 0.0304 \n",
" \n",
" \n",
" 60000 \n",
" 0.0298 \n",
" \n",
" \n",
" 115000 \n",
" 0.0262 \n",
" \n",
" \n",
" 35000 \n",
" 0.0262 \n",
" \n",
" \n",
" 100000 \n",
" 0.0233 \n",
" \n",
" \n",
" 95000 \n",
" 0.0224 \n",
" \n",
" \n",
" 180000 \n",
" 0.0219 \n",
" \n",
" \n",
" 190000 \n",
" 0.0215 \n",
" \n",
" \n",
" 110000 \n",
" 0.0214 \n",
" \n",
" \n",
" 200000 \n",
" 0.0213 \n",
" \n",
" \n",
" 90000 \n",
" 0.0211 \n",
" \n",
" \n",
" 105000 \n",
" 0.0205 \n",
" \n",
" \n",
" 175000 \n",
" 0.0202 \n",
" \n",
" \n",
" 155000 \n",
" 0.0201 \n",
" \n",
" \n",
" 165000 \n",
" 0.0201 \n",
" \n",
" \n",
" 195000 \n",
" 0.0199 \n",
" \n",
" \n",
" 185000 \n",
" 0.0198 \n",
" \n",
" \n",
" 170000 \n",
" 0.0185 \n",
" \n",
" \n",
" 40000 \n",
" 0.0175 \n",
" \n",
" \n",
" 85000 \n",
" 0.0150 \n",
" \n",
" \n",
" 205000 \n",
" 0.0136 \n",
" \n",
" \n",
" 160000 \n",
" 0.0120 \n",
" \n",
" \n",
" 30000 \n",
" 0.0110 \n",
" \n",
" \n",
" 55000 \n",
" 0.0045 \n",
" \n",
" \n",
" 210000 \n",
" 0.0034 \n",
" \n",
" \n",
" 45000 \n",
" 0.0021 \n",
" \n",
" \n",
" 25000 \n",
" 0.0010 \n",
" \n",
" \n",
" 50000 \n",
" 0.0005 \n",
" \n",
" \n",
" 215000 \n",
" 0.0003 \n",
" \n",
" \n",
"
\n",
"
dtype: float64 "
]
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"source": [
"def remove_outliers(df, col):\n",
" q1 = df[col].quantile(0.25)\n",
" q3 = df[col].quantile(0.75)\n",
" iqr = q3 - q1\n",
" lower_bound = q1 - 1.5 * iqr\n",
" upper_bound = q3 + 1.5 * iqr\n",
" df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]\n",
" return df"
],
"metadata": {
"id": "WuEAGhA8Ok9M"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for col in emp.select_dtypes(include='number').columns:\n",
" emp = remove_outliers(emp, col)"
],
"metadata": {
"id": "0x13-JbHOuVt"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"emp.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "h5YMeQQHOx7c",
"outputId": "446e21d6-3c99-43bd-ed98-d441422c0bc8"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 10000 entries, 0 to 9999\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Name 10000 non-null object\n",
" 1 Age 10000 non-null int64 \n",
" 2 Gender 10000 non-null object\n",
" 3 Department 10000 non-null object\n",
" 4 Job_Title 10000 non-null object\n",
" 5 Experience_Years 10000 non-null int64 \n",
" 6 Education_Level 10000 non-null object\n",
" 7 Location 10000 non-null object\n",
" 8 Salary 10000 non-null int64 \n",
"dtypes: int64(3), object(6)\n",
"memory usage: 703.3+ KB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"x = emp.drop('Salary',axis=1)\n",
"y = emp['Salary']\n"
],
"metadata": {
"id": "xTcMhQ0POy6s"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn.preprocessing import OneHotEncoder , StandardScaler\n",
"from sklearn.impute import SimpleImputer , KNNImputer\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LinearRegression , LogisticRegression , Ridge , Lasso\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.svm import SVR\n",
"from sklearn.neighbors import KNeighborsRegressor\n",
"from sklearn.ensemble import GradientBoostingRegressor\n",
"from sklearn.ensemble import AdaBoostRegressor\n",
"from sklearn.ensemble import ExtraTreesRegressor\n",
"from sklearn.ensemble import BaggingRegressor\n",
"from sklearn.metrics import mean_absolute_error , mean_squared_error , r2_score"
],
"metadata": {
"id": "nQ3UAVv4O8JE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"num_cols = x.select_dtypes(include='number').columns\n",
"cat_cols = x.select_dtypes(exclude='number').columns"
],
"metadata": {
"id": "31fPLr4pO_sl"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"num_pipe = Pipeline([\n",
" ('imputer', SimpleImputer(strategy='mean')),\n",
" ('scaler', StandardScaler())\n",
"])\n",
"\n",
"cat_pipe = Pipeline([\n",
" ('imputer', SimpleImputer(strategy='most_frequent')),\n",
" ('encoder', OneHotEncoder(handle_unknown='ignore'))\n",
"])"
],
"metadata": {
"id": "r6LDyCuXPBue"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"preprocessor = ColumnTransformer([\n",
" ('num', num_pipe, num_cols),\n",
" ('cat', cat_pipe, cat_cols)\n",
"])"
],
"metadata": {
"id": "_JKOzdm5PD-F"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"len(emp['Salary'].unique())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WHlIlJ6iPFSN",
"outputId": "0332b149-4804-4501-c796-a97656a47630"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"39"
]
},
"metadata": {},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"source": [
"from imblearn.under_sampling import RandomUnderSampler"
],
"metadata": {
"id": "1NddaS9XPJH8"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"x_train , x_test , y_train , y_test = train_test_split(x, y, test_size=0.2, random_state=42)"
],
"metadata": {
"id": "caqkkkrHPKzo"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"rus = RandomUnderSampler(random_state=42)\n",
"x_res, y_res = rus.fit_resample(x_train, y_train)"
],
"metadata": {
"id": "pKbHzncNPL5k"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"x_tr , x_val , y_tr , y_val = train_test_split(x_res, y_res, test_size=0.2, random_state=42)"
],
"metadata": {
"id": "747i6RGtPM96"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"x_tr = preprocessor.fit_transform(x_tr)\n",
"x_val = preprocessor.transform(x_val)"
],
"metadata": {
"id": "2gAm84ZCPOLn"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"lin = LinearRegression()\n",
"lin.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "QeO54dsjPPe9",
"outputId": "c30165c2-fc63-4175-fd5e-a3c690192839"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LinearRegression()"
],
"text/html": [
"LinearRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"source": [
"lin.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hYx0xiQFPQsF",
"outputId": "37eef3d7-918b-45ee-eb1e-8cd58a6711da"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9687384646766936"
]
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, lin.predict(x_val)))\n",
"print(mean_squared_error(y_val, lin.predict(x_val)))\n",
"print(r2_score(y_val, lin.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ogy0XxjSPRtl",
"outputId": "0b069cb5-b17c-495d-ed89-8f45bffce32e"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"9526.033881598178\n",
"110511359.09847379\n",
"0.9687384646766936\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"log = LogisticRegression()\n",
"log.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "av-6WoiuPTE2",
"outputId": "0e866e16-ddf4-4d70-a221-3fa651fde57e"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LogisticRegression()"
],
"text/html": [
"LogisticRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"source": [
"log.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Xwz6iq0_PUbd",
"outputId": "17494660-c9c2-4d21-be02-d876df033555"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.0"
]
},
"metadata": {},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, log.predict(x_val)))\n",
"print(mean_squared_error(y_val, log.predict(x_val)))\n",
"print(r2_score(y_val, log.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HZyy59jFPVit",
"outputId": "879c37d5-aeb5-4b35-a51b-48d0b15bf767"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"10312.5\n",
"139062500.0\n",
"0.9606618967374789\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"rid = Ridge()\n",
"rid.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "DTYj5AkgPWsG",
"outputId": "0eaa31a9-be47-4672-c615-90c156df8447"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Ridge()"
],
"text/html": [
"Ridge() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 30
}
]
},
{
"cell_type": "code",
"source": [
"rid.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jlJKbJKFPYFd",
"outputId": "30c075e0-2758-4c64-95bf-65df4fd5fe45"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9630515659674406"
]
},
"metadata": {},
"execution_count": 31
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, rid.predict(x_val)))\n",
"print(mean_squared_error(y_val, rid.predict(x_val)))\n",
"print(r2_score(y_val, rid.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Zc68MC-qPY4k",
"outputId": "1ab021da-4d09-42ad-b8f7-b8573f4c24c9"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"10036.11337423515\n",
"130614879.25240436\n",
"0.9630515659674406\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"la = Lasso()\n",
"la.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 135
},
"id": "uWpUU8lgPZ1_",
"outputId": "8c7a05f9-cde1-4424-d0b3-c893ff7c2d91"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.12/dist-packages/sklearn/linear_model/_coordinate_descent.py:656: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Duality gap: 33822632.26302413, tolerance: 19010685.48387097\n",
" model = cd_fast.sparse_enet_coordinate_descent(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Lasso()"
],
"text/html": [
"Lasso() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 33
}
]
},
{
"cell_type": "code",
"source": [
"la.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2XAtByXwPbAU",
"outputId": "30672bdb-c6dc-4277-ee9a-0d535b269735"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9523458161782297"
]
},
"metadata": {},
"execution_count": 34
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, la.predict(x_val)))\n",
"print(mean_squared_error(y_val, la.predict(x_val)))\n",
"print(r2_score(y_val, la.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "P3fMltECPcmE",
"outputId": "b674d50d-a815-4c90-b658-9a08d25812d3"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"10259.659971565385\n",
"168460332.04729128\n",
"0.9523458161782297\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"kn = KNeighborsRegressor(n_neighbors=10)\n",
"kn.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "ln6Y5na7Pdit",
"outputId": "52f59070-ba21-4974-d434-b1623076db6d"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"KNeighborsRegressor(n_neighbors=10)"
],
"text/html": [
"KNeighborsRegressor(n_neighbors=10) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 36
}
]
},
{
"cell_type": "code",
"source": [
"kn.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-2f5lWAoPeqQ",
"outputId": "66287c65-a03a-4a4a-c071-142ab33cfb8b"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9354059504406199"
]
},
"metadata": {},
"execution_count": 37
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, kn.predict(x_val)))\n",
"print(mean_squared_error(y_val, kn.predict(x_val)))\n",
"print(r2_score(y_val, kn.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "41FIdGo7Pfpt",
"outputId": "256ad7e8-72bf-49f3-db7b-5b08cb889655"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"11437.5\n",
"228343750.0\n",
"0.9354059504406199\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"dtc = DecisionTreeRegressor()\n",
"dtc.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "EQiuZ2aKPguE",
"outputId": "24a10f63-2a0c-443b-cadc-d4a550f50ab0"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DecisionTreeRegressor()"
],
"text/html": [
"DecisionTreeRegressor() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 39
}
]
},
{
"cell_type": "code",
"source": [
"dtc.score(x_val,y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "qfxTPjXhPkz9",
"outputId": "454686a3-2b9f-431c-db91-1dcb428191bc"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.8850796983342082"
]
},
"metadata": {},
"execution_count": 41
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, dtc.predict(x_val)))\n",
"print(mean_squared_error(y_val, dtc.predict(x_val)))\n",
"print(r2_score(y_val, dtc.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sH2iqVH2PiFG",
"outputId": "62eadce3-a829-4582-c421-4daedf237d6f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"13125.0\n",
"406250000.0\n",
"0.8850796983342082\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"rfr = RandomForestRegressor()\n",
"rfr.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "kBmUO8kPPi_F",
"outputId": "608b0567-1c63-4487-a7f5-c3ecf8770901"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"RandomForestRegressor()"
],
"text/html": [
"RandomForestRegressor() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 42
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, rfr.predict(x_val)))\n",
"print(mean_squared_error(y_val, rfr.predict(x_val)))\n",
"print(r2_score(y_val, rfr.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5naRwgftPpkr",
"outputId": "b6c9ad56-605e-43be-9a36-413082747cc1"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"8003.125\n",
"143430781.25\n",
"0.9594261940937595\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"rfr.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uutqLRiKPqgl",
"outputId": "7946b631-32b5-4919-a2b7-665ccb83b758"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9594261940937595"
]
},
"metadata": {},
"execution_count": 44
}
]
},
{
"cell_type": "code",
"source": [
"lin_svr = SVR(kernel='linear')\n",
"lin_svr.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "TtO-S2rdX4p-",
"outputId": "a345e4a5-16c5-44d2-b80d-3eed39b9e411"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"SVR(kernel='linear')"
],
"text/html": [
"SVR(kernel='linear') In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 45
}
]
},
{
"cell_type": "code",
"source": [
"lin_svr.score(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JE1fCx0eYEkH",
"outputId": "d476e05d-f7fd-46b8-a93b-f74d80bfed50"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.0037633278401071335"
]
},
"metadata": {},
"execution_count": 46
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, lin_svr.predict(x_val)))\n",
"print(mean_squared_error(y_val, lin_svr.predict(x_val)))\n",
"print(r2_score(y_val, lin_svr.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PpWQPRz-YFZf",
"outputId": "92abfd6e-f1af-4e3b-9469-78868a57b7ec"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"52391.57187596244\n",
"3563485085.649429\n",
"-0.008041307136941622\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"poly_svr = SVR(kernel='poly')\n",
"poly_svr.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "X_wOzaj4YHIv",
"outputId": "1a33bf6f-f9d1-43e6-dfe8-186c5cfcfac1"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"SVR(kernel='poly')"
],
"text/html": [
"SVR(kernel='poly') In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 48
}
]
},
{
"cell_type": "code",
"source": [
"poly_svr.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SbyXKB9nYJKH",
"outputId": "ea779b23-ae5c-47f1-b0e5-10197ad6427b"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"-0.011895723165550098"
]
},
"metadata": {},
"execution_count": 49
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, poly_svr.predict(x_val)))\n",
"print(mean_squared_error(y_val, poly_svr.predict(x_val)))\n",
"print(r2_score(y_val, poly_svr.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "bwsWQqrtYLaf",
"outputId": "86f09146-e84f-42b0-ff1b-fb425cf18782"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"52493.49175641051\n",
"3577110672.155249\n",
"-0.011895723165550098\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"rbf_svr = SVR(kernel='rbf')\n",
"rbf_svr.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "lgPG5jXZYMrn",
"outputId": "6f0dc663-53fb-4cca-8f92-03af6aeb8ff5"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"SVR()"
],
"text/html": [
"SVR() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 51
}
]
},
{
"cell_type": "code",
"source": [
"rbf_svr.score(x_val, y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cbN8vROYYQtf",
"outputId": "a95b969f-2aba-4594-924c-9e5c1b268ad3"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"-0.01197346602359639"
]
},
"metadata": {},
"execution_count": 52
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, rbf_svr.predict(x_val)))\n",
"print(mean_squared_error(y_val, rbf_svr.predict(x_val)))\n",
"print(r2_score(y_val, rbf_svr.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3Qn7IRxqYSNv",
"outputId": "3e637cff-ed8a-40aa-8f8a-1e571ecee44c"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"52494.214614453245\n",
"3577385497.713688\n",
"-0.01197346602359639\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from xgboost import XGBRegressor"
],
"metadata": {
"id": "KXBAus0dYWBc"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"xgr = XGBRegressor()\n",
"xgr.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 254
},
"id": "3Dn0EAE-YYfv",
"outputId": "b6c1fef6-d369-48a2-e7e3-389d813ab956"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" feature_weights=None, gamma=None, grow_policy=None,\n",
" importance_type=None, interaction_constraints=None,\n",
" learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
" max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
" max_leaves=None, min_child_weight=None, missing=nan,\n",
" monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
" n_jobs=None, num_parallel_tree=None, ...)"
],
"text/html": [
"XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" feature_weights=None, gamma=None, grow_policy=None,\n",
" importance_type=None, interaction_constraints=None,\n",
" learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
" max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
" max_leaves=None, min_child_weight=None, missing=nan,\n",
" monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
" n_jobs=None, num_parallel_tree=None, ...) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
" colsample_bylevel=None, colsample_bynode=None,\n",
" colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
" feature_weights=None, gamma=None, grow_policy=None,\n",
" importance_type=None, interaction_constraints=None,\n",
" learning_rate=None, max_bin=None, max_cat_threshold=None,\n",
" max_cat_to_onehot=None, max_delta_step=None, max_depth=None,\n",
" max_leaves=None, min_child_weight=None, missing=nan,\n",
" monotone_constraints=None, multi_strategy=None, n_estimators=None,\n",
" n_jobs=None, num_parallel_tree=None, ...) "
]
},
"metadata": {},
"execution_count": 55
}
]
},
{
"cell_type": "code",
"source": [
"xgr.score(x_val , y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CoPg3XQgYZrv",
"outputId": "121ec9f9-9b01-47d3-83a9-ddddf13448cf"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9270912408828735"
]
},
"metadata": {},
"execution_count": 56
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import GridSearchCV"
],
"metadata": {
"id": "eKEHa0x0QLDI"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"rid_searchcv = GridSearchCV(rid, param_grid={'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}, cv=5)\n",
"rid_searchcv.fit(x_tr, y_tr)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 166
},
"id": "Yes0iROsP_c7",
"outputId": "bbe89cbc-c43c-40ed-f371-bd3e093369c3"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"GridSearchCV(cv=5, estimator=Ridge(),\n",
" param_grid={'alpha': [0.001, 0.01, 0.1, 1, 10, 100]})"
],
"text/html": [
"GridSearchCV(cv=5, estimator=Ridge(),\n",
" param_grid={'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 59
}
]
},
{
"cell_type": "code",
"source": [
"rid_searchcv.best_estimator_"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"id": "anUT_ZC2QKF9",
"outputId": "22ee339d-6452-43de-e30d-ac5a4c89e8d7"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Ridge(alpha=0.001)"
],
"text/html": [
"Ridge(alpha=0.001) In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
]
},
"metadata": {},
"execution_count": 60
}
]
},
{
"cell_type": "code",
"source": [
"rid_searchcv.best_params_"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "M9VEbs2IQPAE",
"outputId": "aa7ca11f-b728-4bd5-f8de-9d6cfa7597f4"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'alpha': 0.001}"
]
},
"metadata": {},
"execution_count": 61
}
]
},
{
"cell_type": "code",
"source": [
"rid_searchcv.score(x_val,y_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "i9sl7XNbQQ4M",
"outputId": "81978c1b-3d79-432f-9970-53888b05ff1e"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9687184268641414"
]
},
"metadata": {},
"execution_count": 62
}
]
},
{
"cell_type": "code",
"source": [
"print(mean_absolute_error(y_val, rid_searchcv.predict(x_val)))\n",
"print(mean_squared_error(y_val, rid_searchcv.predict(x_val)))\n",
"print(r2_score(y_val, rid_searchcv.predict(x_val)))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ptZztDxOQVk0",
"outputId": "ffb19e3e-5983-4590-81c7-22e3d483f8de"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"9528.283094826926\n",
"110582193.93993585\n",
"0.9687184268641414\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import pickle as pkl"
],
"metadata": {
"id": "OmkiiLe7ZZt3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"pkl.dump(rid_searchcv, open('rid.pkl', 'wb'))\n",
"pkl.dump(preprocessor, open('prep.pkl', 'wb'))"
],
"metadata": {
"id": "YT_t965gr5AM"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"emp.to_csv('emp.csv')"
],
"metadata": {
"id": "TUE56B_hQfAH"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "9g6TY7S-Q0Ll"
},
"execution_count": null,
"outputs": []
}
]
}