{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 157,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gMz11RCXQmcK",
"outputId": "e28e8f62-4518-46fc-ff82-065239ddba85"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: opendatasets in /usr/local/lib/python3.12/dist-packages (0.1.22)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from opendatasets) (4.67.1)\n",
"Requirement already satisfied: kaggle in /usr/local/lib/python3.12/dist-packages (from opendatasets) (1.7.4.5)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.12/dist-packages (from opendatasets) (8.2.1)\n",
"Requirement already satisfied: bleach in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (6.2.0)\n",
"Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2025.8.3)\n",
"Requirement already satisfied: charset-normalizer in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (3.4.3)\n",
"Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (3.10)\n",
"Requirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (5.29.5)\n",
"Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2.9.0.post0)\n",
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (8.0.4)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2.32.4)\n",
"Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (75.2.0)\n",
"Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (1.17.0)\n",
"Requirement already satisfied: text-unidecode in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (1.3)\n",
"Requirement already satisfied: urllib3>=1.15.1 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2.5.0)\n",
"Requirement already satisfied: webencodings in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (0.5.1)\n"
]
}
],
"source": [
"!pip install opendatasets"
]
},
{
"cell_type": "code",
"source": [
"#importing required libraries\n",
"import os\n",
"import opendatasets as od\n",
"import pandas as pd\n",
"import numpy as np"
],
"metadata": {
"id": "zl1WW0LDQ1H2"
},
"execution_count": 158,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#download dataset\n",
"od.download(\"https://www.kaggle.com/datasets/emrekaany/google-daily-stock-prices-2004-today\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jsB9_VmxQ3Ck",
"outputId": "7bd340ce-882b-46ee-b9ee-a4df32b40985"
},
"execution_count": 159,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Skipping, found downloaded files in \"./google-daily-stock-prices-2004-today\" (use force=True to force download)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"os.listdir(\"google-daily-stock-prices-2004-today\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SeT8ay40Q7s0",
"outputId": "afef4417-1c73-42f0-d15b-ad40fa1f2fc5"
},
"execution_count": 160,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['googl_daily_prices.csv']"
]
},
"metadata": {},
"execution_count": 160
}
]
},
{
"cell_type": "code",
"source": [
"raw_df = pd.read_csv(\"/content/google-daily-stock-prices-2004-today/googl_daily_prices.csv\")"
],
"metadata": {
"id": "4-Yq5PCVRIXI"
},
"execution_count": 161,
"outputs": []
},
{
"cell_type": "code",
"source": [
"raw_df.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "CRH6s-X0RPNO",
"outputId": "4896b3f7-af6a-4081-a6c9-ad6cd8f5b841"
},
"execution_count": 162,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" date 1. open 2. high 3. low 4. close 5. volume\n",
"0 2025-09-30 242.810 243.2900 239.245 243.10 34724346.0\n",
"1 2025-09-29 247.850 251.1486 242.770 244.05 32505777.0\n",
"2 2025-09-26 247.065 249.4200 245.970 246.54 18503194.0\n",
"3 2025-09-25 244.400 246.4900 240.740 245.79 31020383.0\n",
"4 2025-09-24 251.660 252.3501 246.440 247.14 28201003.0"
],
"text/html": [
"\n",
"
\n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" 1. open | \n",
" 2. high | \n",
" 3. low | \n",
" 4. close | \n",
" 5. volume | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-30 | \n",
" 242.810 | \n",
" 243.2900 | \n",
" 239.245 | \n",
" 243.10 | \n",
" 34724346.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-29 | \n",
" 247.850 | \n",
" 251.1486 | \n",
" 242.770 | \n",
" 244.05 | \n",
" 32505777.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-26 | \n",
" 247.065 | \n",
" 249.4200 | \n",
" 245.970 | \n",
" 246.54 | \n",
" 18503194.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-25 | \n",
" 244.400 | \n",
" 246.4900 | \n",
" 240.740 | \n",
" 245.79 | \n",
" 31020383.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-24 | \n",
" 251.660 | \n",
" 252.3501 | \n",
" 246.440 | \n",
" 247.14 | \n",
" 28201003.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "raw_df",
"summary": "{\n \"name\": \"raw_df\",\n \"rows\": 5313,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5313,\n \"samples\": [\n \"2021-08-30\",\n \"2010-05-10\",\n \"2015-06-08\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"1. open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 612.1734512006673,\n \"min\": 85.4,\n \"max\": 3025.0,\n \"num_unique_values\": 5098,\n \"samples\": [\n 2902.94,\n 1440.0,\n 2857.38\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"2. high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 618.2806692169981,\n \"min\": 86.52,\n \"max\": 3030.9315,\n \"num_unique_values\": 5098,\n \"samples\": [\n 2925.075,\n 1442.32,\n 2743.29\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"3. low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 605.6491240554433,\n \"min\": 83.34,\n \"max\": 2977.98,\n \"num_unique_values\": 5151,\n \"samples\": [\n 1105.15,\n 1347.32,\n 537.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"4. close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 612.0915219846729,\n \"min\": 83.43,\n \"max\": 2996.77,\n \"num_unique_values\": 5169,\n \"samples\": [\n 1106.5,\n 950.44,\n 1422.86\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"5. volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12616124.474814428,\n \"min\": 465638.0,\n \"max\": 127747554.0,\n \"num_unique_values\": 5285,\n \"samples\": [\n 2964489.0,\n 4101200.0,\n 29130102.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 162
}
]
},
{
"cell_type": "code",
"source": [
"raw_df.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hbTXK0NeRRFt",
"outputId": "dcff175a-074b-47f3-d63d-878ad94f88c2"
},
"execution_count": 163,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 5313 entries, 0 to 5312\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 date 5313 non-null object \n",
" 1 1. open 5313 non-null float64\n",
" 2 2. high 5313 non-null float64\n",
" 3 3. low 5313 non-null float64\n",
" 4 4. close 5313 non-null float64\n",
" 5 5. volume 5313 non-null float64\n",
"dtypes: float64(5), object(1)\n",
"memory usage: 249.2+ KB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#rename column names\n",
"raw_df.columns = raw_df.columns.str.replace(r'\\d+\\.\\s+', '', regex=True)\n",
"display(raw_df.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "3Nm9EbGORWs_",
"outputId": "cf0788d5-35ab-4d23-962e-727bea74b6ee"
},
"execution_count": 164,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" date open high low close volume\n",
"0 2025-09-30 242.810 243.2900 239.245 243.10 34724346.0\n",
"1 2025-09-29 247.850 251.1486 242.770 244.05 32505777.0\n",
"2 2025-09-26 247.065 249.4200 245.970 246.54 18503194.0\n",
"3 2025-09-25 244.400 246.4900 240.740 245.79 31020383.0\n",
"4 2025-09-24 251.660 252.3501 246.440 247.14 28201003.0"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" open | \n",
" high | \n",
" low | \n",
" close | \n",
" volume | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-30 | \n",
" 242.810 | \n",
" 243.2900 | \n",
" 239.245 | \n",
" 243.10 | \n",
" 34724346.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-29 | \n",
" 247.850 | \n",
" 251.1486 | \n",
" 242.770 | \n",
" 244.05 | \n",
" 32505777.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-26 | \n",
" 247.065 | \n",
" 249.4200 | \n",
" 245.970 | \n",
" 246.54 | \n",
" 18503194.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-25 | \n",
" 244.400 | \n",
" 246.4900 | \n",
" 240.740 | \n",
" 245.79 | \n",
" 31020383.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-24 | \n",
" 251.660 | \n",
" 252.3501 | \n",
" 246.440 | \n",
" 247.14 | \n",
" 28201003.0 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"display(raw_df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"2025-09-29\",\n \"2025-09-24\",\n \"2025-09-26\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.408195270227335,\n \"min\": 242.81,\n \"max\": 251.66,\n \"num_unique_values\": 5,\n \"samples\": [\n 247.85,\n 251.66,\n 247.065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.669504586180536,\n \"min\": 243.29,\n \"max\": 252.3501,\n \"num_unique_values\": 5,\n \"samples\": [\n 251.1486,\n 252.3501,\n 249.42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.15870147370719,\n \"min\": 239.245,\n \"max\": 246.44,\n \"num_unique_values\": 5,\n \"samples\": [\n 242.77,\n 246.44,\n 245.97\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.7002146923256427,\n \"min\": 243.1,\n \"max\": 247.14,\n \"num_unique_values\": 5,\n \"samples\": [\n 244.05,\n 247.14,\n 246.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6323793.936636005,\n \"min\": 18503194.0,\n \"max\": 34724346.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 32505777.0,\n 28201003.0,\n 18503194.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"raw_df.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eIQqWuNDRktE",
"outputId": "97c4c0a8-da84-4c0f-d829-2a91c9a5f20e"
},
"execution_count": 165,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"RangeIndex: 5313 entries, 0 to 5312\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 date 5313 non-null object \n",
" 1 open 5313 non-null float64\n",
" 2 high 5313 non-null float64\n",
" 3 low 5313 non-null float64\n",
" 4 close 5313 non-null float64\n",
" 5 volume 5313 non-null float64\n",
"dtypes: float64(5), object(1)\n",
"memory usage: 249.2+ KB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#using MinMaxSclaler to slace column\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"\n",
"scaler = MinMaxScaler()\n",
"\n",
"#columns to scale\n",
"scale_cols = ['open', 'high', 'low', 'volume']\n",
"\n",
"#apply the scaler to the selected columns\n",
"raw_df[scale_cols] = scaler.fit_transform(raw_df[scale_cols])\n",
"\n",
"display(raw_df.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "NlyQzKFsR7Qi",
"outputId": "fdfb1f2e-b523-426a-beab-eb30e6c06645"
},
"execution_count": 166,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
" date open high low close volume\n",
"0 2025-09-30 0.053548 0.053243 0.053860 243.10 0.269156\n",
"1 2025-09-29 0.055263 0.055912 0.055078 244.05 0.251726\n",
"2 2025-09-26 0.054996 0.055325 0.056183 246.54 0.141713\n",
"3 2025-09-25 0.054089 0.054330 0.054376 245.79 0.240056\n",
"4 2025-09-24 0.056559 0.056320 0.056346 247.14 0.217905"
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" open | \n",
" high | \n",
" low | \n",
" close | \n",
" volume | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2025-09-30 | \n",
" 0.053548 | \n",
" 0.053243 | \n",
" 0.053860 | \n",
" 243.10 | \n",
" 0.269156 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2025-09-29 | \n",
" 0.055263 | \n",
" 0.055912 | \n",
" 0.055078 | \n",
" 244.05 | \n",
" 0.251726 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2025-09-26 | \n",
" 0.054996 | \n",
" 0.055325 | \n",
" 0.056183 | \n",
" 246.54 | \n",
" 0.141713 | \n",
"
\n",
" \n",
" | 3 | \n",
" 2025-09-25 | \n",
" 0.054089 | \n",
" 0.054330 | \n",
" 0.054376 | \n",
" 245.79 | \n",
" 0.240056 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2025-09-24 | \n",
" 0.056559 | \n",
" 0.056320 | \n",
" 0.056346 | \n",
" 247.14 | \n",
" 0.217905 | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"display(raw_df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"2025-09-29\",\n \"2025-09-24\",\n \"2025-09-26\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.001159407834476571,\n \"min\": 0.05354810178255545,\n \"max\": 0.056558715471492715,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.05526262076472989,\n 0.056558715471492715,\n 0.054995577629609466\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0012462607846017927,\n \"min\": 0.05324323723093731,\n \"max\": 0.056320286753397064,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.05591222558395795,\n 0.056320286753397064,\n 0.055325147317214315\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0010912242882386725,\n \"min\": 0.05385989276732167,\n \"max\": 0.05634552137744245,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.05507766077992428,\n 0.05634552137744245,\n 0.05618315230909543\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.7002146923256427,\n \"min\": 243.1,\n \"max\": 247.14,\n \"num_unique_values\": 5,\n \"samples\": [\n 244.05,\n 247.14,\n 246.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0496833653622562,\n \"min\": 0.14171342298147055,\n \"max\": 0.2691561305535344,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.25172577540394664,\n 0.21790499288209964,\n 0.14171342298147055\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"#spliting train & test df\n",
"X_train, X_test = train_test_split(raw_df, test_size=0.2, random_state=42)\n"
],
"metadata": {
"id": "qE759hHiTbgY"
},
"execution_count": 167,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#initializing target column\n",
"train_target_col = X_train['close']\n",
"test_target_col = X_test['close']"
],
"metadata": {
"id": "kHIYVx0zRuqw"
},
"execution_count": 168,
"outputs": []
},
{
"cell_type": "code",
"source": [
"X_train = X_train.drop('date', axis=1)\n",
"X_train = X_train.drop('close', axis=1)\n",
"X_test = X_test.drop('date', axis=1)\n",
"X_test = X_test.drop('close', axis=1)"
],
"metadata": {
"id": "5QLRQu9zUkdE"
},
"execution_count": 169,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from xgboost import XGBRegressor"
],
"metadata": {
"id": "Xr4-_Ma_UmMl"
},
"execution_count": 170,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#creating a function that evaluate the model\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n",
"\n",
"def xgb_model_evaluation(df, target_col):\n",
"\n",
" # Make predictions on the test set\n",
" predictions = xgb_model.predict(df)\n",
"\n",
" # Calculate RMSE\n",
" rmse = np.sqrt(mean_squared_error(target_col, predictions))\n",
" print(f\"RMSE: {rmse}\")\n",
"\n",
" # Calculate MAPE\n",
" mape = mean_absolute_percentage_error(target_col, predictions)\n",
" print(f\"MAPE: {mape}\")"
],
"metadata": {
"id": "lQWEfUtfYu8V"
},
"execution_count": 171,
"outputs": []
},
{
"cell_type": "code",
"source": [
"%%time\n",
"xgb_model = XGBRegressor(n_estimators=1500, learning_rate=0.01, n_jobs=25, random_state=47, max_depth=16).fit(X_train, train_target_col)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7rj5tIMZVaUT",
"outputId": "144efc80-4833-4752-d9a5-13566a4f1d3f"
},
"execution_count": 172,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"CPU times: user 48.1 s, sys: 476 ms, total: 48.6 s\n",
"Wall time: 36.1 s\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"xgb_model_evaluation(X_train, train_target_col)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NlhOaQxSZVH0",
"outputId": "69cf251d-1dd3-44ce-e467-bec7a4378a83"
},
"execution_count": 173,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"RMSE: 0.8930120580153202\n",
"MAPE: 0.0009947761695732408\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"xgb_model_evaluation(X_test, test_target_col)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wnmCmaVpZXJ8",
"outputId": "8f89c10f-2789-41ff-94d8-5bb6ba27a666"
},
"execution_count": 174,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"RMSE: 11.429648677781845\n",
"MAPE: 0.007749489179507017\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import joblib\n",
"\n",
"# Define the filename for the model\n",
"model_filename = 'xgb_regressor_model.joblib'\n",
"\n",
"# Save the model to the file\n",
"joblib.dump(xgb_model, model_filename)\n",
"\n",
"print(f\"Model saved to {model_filename}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "heGYy9dbqrKV",
"outputId": "09c92760-379a-4b2c-ea6b-ff2fccf020c5"
},
"execution_count": 175,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Model saved to xgb_regressor_model.joblib\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "2cdNDOOxpx6j"
},
"execution_count": 175,
"outputs": []
}
]
}