Himantaaa commited on
Commit
1bc5de5
·
verified ·
1 Parent(s): 247ec24

Upload DataSynthis_ML_JobTask.ipynb

Browse files
Files changed (1) hide show
  1. DataSynthis_ML_JobTask.ipynb +1377 -0
DataSynthis_ML_JobTask.ipynb ADDED
@@ -0,0 +1,1377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 157,
20
+ "metadata": {
21
+ "colab": {
22
+ "base_uri": "https://localhost:8080/"
23
+ },
24
+ "id": "gMz11RCXQmcK",
25
+ "outputId": "e28e8f62-4518-46fc-ff82-065239ddba85"
26
+ },
27
+ "outputs": [
28
+ {
29
+ "output_type": "stream",
30
+ "name": "stdout",
31
+ "text": [
32
+ "Requirement already satisfied: opendatasets in /usr/local/lib/python3.12/dist-packages (0.1.22)\n",
33
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from opendatasets) (4.67.1)\n",
34
+ "Requirement already satisfied: kaggle in /usr/local/lib/python3.12/dist-packages (from opendatasets) (1.7.4.5)\n",
35
+ "Requirement already satisfied: click in /usr/local/lib/python3.12/dist-packages (from opendatasets) (8.2.1)\n",
36
+ "Requirement already satisfied: bleach in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (6.2.0)\n",
37
+ "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2025.8.3)\n",
38
+ "Requirement already satisfied: charset-normalizer in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (3.4.3)\n",
39
+ "Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (3.10)\n",
40
+ "Requirement already satisfied: protobuf in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (5.29.5)\n",
41
+ "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2.9.0.post0)\n",
42
+ "Requirement already satisfied: python-slugify in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (8.0.4)\n",
43
+ "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2.32.4)\n",
44
+ "Requirement already satisfied: setuptools>=21.0.0 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (75.2.0)\n",
45
+ "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (1.17.0)\n",
46
+ "Requirement already satisfied: text-unidecode in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (1.3)\n",
47
+ "Requirement already satisfied: urllib3>=1.15.1 in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (2.5.0)\n",
48
+ "Requirement already satisfied: webencodings in /usr/local/lib/python3.12/dist-packages (from kaggle->opendatasets) (0.5.1)\n"
49
+ ]
50
+ }
51
+ ],
52
+ "source": [
53
+ "!pip install opendatasets"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "source": [
59
+ "#importing required libraries\n",
60
+ "import os\n",
61
+ "import opendatasets as od\n",
62
+ "import pandas as pd\n",
63
+ "import numpy as np"
64
+ ],
65
+ "metadata": {
66
+ "id": "zl1WW0LDQ1H2"
67
+ },
68
+ "execution_count": 158,
69
+ "outputs": []
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "source": [
74
+ "#download dataset\n",
75
+ "od.download(\"https://www.kaggle.com/datasets/emrekaany/google-daily-stock-prices-2004-today\")"
76
+ ],
77
+ "metadata": {
78
+ "colab": {
79
+ "base_uri": "https://localhost:8080/"
80
+ },
81
+ "id": "jsB9_VmxQ3Ck",
82
+ "outputId": "7bd340ce-882b-46ee-b9ee-a4df32b40985"
83
+ },
84
+ "execution_count": 159,
85
+ "outputs": [
86
+ {
87
+ "output_type": "stream",
88
+ "name": "stdout",
89
+ "text": [
90
+ "Skipping, found downloaded files in \"./google-daily-stock-prices-2004-today\" (use force=True to force download)\n"
91
+ ]
92
+ }
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "source": [
98
+ "os.listdir(\"google-daily-stock-prices-2004-today\")"
99
+ ],
100
+ "metadata": {
101
+ "colab": {
102
+ "base_uri": "https://localhost:8080/"
103
+ },
104
+ "id": "SeT8ay40Q7s0",
105
+ "outputId": "afef4417-1c73-42f0-d15b-ad40fa1f2fc5"
106
+ },
107
+ "execution_count": 160,
108
+ "outputs": [
109
+ {
110
+ "output_type": "execute_result",
111
+ "data": {
112
+ "text/plain": [
113
+ "['googl_daily_prices.csv']"
114
+ ]
115
+ },
116
+ "metadata": {},
117
+ "execution_count": 160
118
+ }
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "source": [
124
+ "raw_df = pd.read_csv(\"/content/google-daily-stock-prices-2004-today/googl_daily_prices.csv\")"
125
+ ],
126
+ "metadata": {
127
+ "id": "4-Yq5PCVRIXI"
128
+ },
129
+ "execution_count": 161,
130
+ "outputs": []
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "source": [
135
+ "raw_df.head()"
136
+ ],
137
+ "metadata": {
138
+ "colab": {
139
+ "base_uri": "https://localhost:8080/",
140
+ "height": 206
141
+ },
142
+ "id": "CRH6s-X0RPNO",
143
+ "outputId": "4896b3f7-af6a-4081-a6c9-ad6cd8f5b841"
144
+ },
145
+ "execution_count": 162,
146
+ "outputs": [
147
+ {
148
+ "output_type": "execute_result",
149
+ "data": {
150
+ "text/plain": [
151
+ " date 1. open 2. high 3. low 4. close 5. volume\n",
152
+ "0 2025-09-30 242.810 243.2900 239.245 243.10 34724346.0\n",
153
+ "1 2025-09-29 247.850 251.1486 242.770 244.05 32505777.0\n",
154
+ "2 2025-09-26 247.065 249.4200 245.970 246.54 18503194.0\n",
155
+ "3 2025-09-25 244.400 246.4900 240.740 245.79 31020383.0\n",
156
+ "4 2025-09-24 251.660 252.3501 246.440 247.14 28201003.0"
157
+ ],
158
+ "text/html": [
159
+ "\n",
160
+ " <div id=\"df-c093cc38-ba3f-4482-9d4e-27782311c74a\" class=\"colab-df-container\">\n",
161
+ " <div>\n",
162
+ "<style scoped>\n",
163
+ " .dataframe tbody tr th:only-of-type {\n",
164
+ " vertical-align: middle;\n",
165
+ " }\n",
166
+ "\n",
167
+ " .dataframe tbody tr th {\n",
168
+ " vertical-align: top;\n",
169
+ " }\n",
170
+ "\n",
171
+ " .dataframe thead th {\n",
172
+ " text-align: right;\n",
173
+ " }\n",
174
+ "</style>\n",
175
+ "<table border=\"1\" class=\"dataframe\">\n",
176
+ " <thead>\n",
177
+ " <tr style=\"text-align: right;\">\n",
178
+ " <th></th>\n",
179
+ " <th>date</th>\n",
180
+ " <th>1. open</th>\n",
181
+ " <th>2. high</th>\n",
182
+ " <th>3. low</th>\n",
183
+ " <th>4. close</th>\n",
184
+ " <th>5. volume</th>\n",
185
+ " </tr>\n",
186
+ " </thead>\n",
187
+ " <tbody>\n",
188
+ " <tr>\n",
189
+ " <th>0</th>\n",
190
+ " <td>2025-09-30</td>\n",
191
+ " <td>242.810</td>\n",
192
+ " <td>243.2900</td>\n",
193
+ " <td>239.245</td>\n",
194
+ " <td>243.10</td>\n",
195
+ " <td>34724346.0</td>\n",
196
+ " </tr>\n",
197
+ " <tr>\n",
198
+ " <th>1</th>\n",
199
+ " <td>2025-09-29</td>\n",
200
+ " <td>247.850</td>\n",
201
+ " <td>251.1486</td>\n",
202
+ " <td>242.770</td>\n",
203
+ " <td>244.05</td>\n",
204
+ " <td>32505777.0</td>\n",
205
+ " </tr>\n",
206
+ " <tr>\n",
207
+ " <th>2</th>\n",
208
+ " <td>2025-09-26</td>\n",
209
+ " <td>247.065</td>\n",
210
+ " <td>249.4200</td>\n",
211
+ " <td>245.970</td>\n",
212
+ " <td>246.54</td>\n",
213
+ " <td>18503194.0</td>\n",
214
+ " </tr>\n",
215
+ " <tr>\n",
216
+ " <th>3</th>\n",
217
+ " <td>2025-09-25</td>\n",
218
+ " <td>244.400</td>\n",
219
+ " <td>246.4900</td>\n",
220
+ " <td>240.740</td>\n",
221
+ " <td>245.79</td>\n",
222
+ " <td>31020383.0</td>\n",
223
+ " </tr>\n",
224
+ " <tr>\n",
225
+ " <th>4</th>\n",
226
+ " <td>2025-09-24</td>\n",
227
+ " <td>251.660</td>\n",
228
+ " <td>252.3501</td>\n",
229
+ " <td>246.440</td>\n",
230
+ " <td>247.14</td>\n",
231
+ " <td>28201003.0</td>\n",
232
+ " </tr>\n",
233
+ " </tbody>\n",
234
+ "</table>\n",
235
+ "</div>\n",
236
+ " <div class=\"colab-df-buttons\">\n",
237
+ "\n",
238
+ " <div class=\"colab-df-container\">\n",
239
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c093cc38-ba3f-4482-9d4e-27782311c74a')\"\n",
240
+ " title=\"Convert this dataframe to an interactive table.\"\n",
241
+ " style=\"display:none;\">\n",
242
+ "\n",
243
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
244
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
245
+ " </svg>\n",
246
+ " </button>\n",
247
+ "\n",
248
+ " <style>\n",
249
+ " .colab-df-container {\n",
250
+ " display:flex;\n",
251
+ " gap: 12px;\n",
252
+ " }\n",
253
+ "\n",
254
+ " .colab-df-convert {\n",
255
+ " background-color: #E8F0FE;\n",
256
+ " border: none;\n",
257
+ " border-radius: 50%;\n",
258
+ " cursor: pointer;\n",
259
+ " display: none;\n",
260
+ " fill: #1967D2;\n",
261
+ " height: 32px;\n",
262
+ " padding: 0 0 0 0;\n",
263
+ " width: 32px;\n",
264
+ " }\n",
265
+ "\n",
266
+ " .colab-df-convert:hover {\n",
267
+ " background-color: #E2EBFA;\n",
268
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
269
+ " fill: #174EA6;\n",
270
+ " }\n",
271
+ "\n",
272
+ " .colab-df-buttons div {\n",
273
+ " margin-bottom: 4px;\n",
274
+ " }\n",
275
+ "\n",
276
+ " [theme=dark] .colab-df-convert {\n",
277
+ " background-color: #3B4455;\n",
278
+ " fill: #D2E3FC;\n",
279
+ " }\n",
280
+ "\n",
281
+ " [theme=dark] .colab-df-convert:hover {\n",
282
+ " background-color: #434B5C;\n",
283
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
284
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
285
+ " fill: #FFFFFF;\n",
286
+ " }\n",
287
+ " </style>\n",
288
+ "\n",
289
+ " <script>\n",
290
+ " const buttonEl =\n",
291
+ " document.querySelector('#df-c093cc38-ba3f-4482-9d4e-27782311c74a button.colab-df-convert');\n",
292
+ " buttonEl.style.display =\n",
293
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
294
+ "\n",
295
+ " async function convertToInteractive(key) {\n",
296
+ " const element = document.querySelector('#df-c093cc38-ba3f-4482-9d4e-27782311c74a');\n",
297
+ " const dataTable =\n",
298
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
299
+ " [key], {});\n",
300
+ " if (!dataTable) return;\n",
301
+ "\n",
302
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
303
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
304
+ " + ' to learn more about interactive tables.';\n",
305
+ " element.innerHTML = '';\n",
306
+ " dataTable['output_type'] = 'display_data';\n",
307
+ " await google.colab.output.renderOutput(dataTable, element);\n",
308
+ " const docLink = document.createElement('div');\n",
309
+ " docLink.innerHTML = docLinkHtml;\n",
310
+ " element.appendChild(docLink);\n",
311
+ " }\n",
312
+ " </script>\n",
313
+ " </div>\n",
314
+ "\n",
315
+ "\n",
316
+ " <div id=\"df-2582d95f-0015-4b3a-acf0-6b7d28ed75fd\">\n",
317
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2582d95f-0015-4b3a-acf0-6b7d28ed75fd')\"\n",
318
+ " title=\"Suggest charts\"\n",
319
+ " style=\"display:none;\">\n",
320
+ "\n",
321
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
322
+ " width=\"24px\">\n",
323
+ " <g>\n",
324
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
325
+ " </g>\n",
326
+ "</svg>\n",
327
+ " </button>\n",
328
+ "\n",
329
+ "<style>\n",
330
+ " .colab-df-quickchart {\n",
331
+ " --bg-color: #E8F0FE;\n",
332
+ " --fill-color: #1967D2;\n",
333
+ " --hover-bg-color: #E2EBFA;\n",
334
+ " --hover-fill-color: #174EA6;\n",
335
+ " --disabled-fill-color: #AAA;\n",
336
+ " --disabled-bg-color: #DDD;\n",
337
+ " }\n",
338
+ "\n",
339
+ " [theme=dark] .colab-df-quickchart {\n",
340
+ " --bg-color: #3B4455;\n",
341
+ " --fill-color: #D2E3FC;\n",
342
+ " --hover-bg-color: #434B5C;\n",
343
+ " --hover-fill-color: #FFFFFF;\n",
344
+ " --disabled-bg-color: #3B4455;\n",
345
+ " --disabled-fill-color: #666;\n",
346
+ " }\n",
347
+ "\n",
348
+ " .colab-df-quickchart {\n",
349
+ " background-color: var(--bg-color);\n",
350
+ " border: none;\n",
351
+ " border-radius: 50%;\n",
352
+ " cursor: pointer;\n",
353
+ " display: none;\n",
354
+ " fill: var(--fill-color);\n",
355
+ " height: 32px;\n",
356
+ " padding: 0;\n",
357
+ " width: 32px;\n",
358
+ " }\n",
359
+ "\n",
360
+ " .colab-df-quickchart:hover {\n",
361
+ " background-color: var(--hover-bg-color);\n",
362
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
363
+ " fill: var(--button-hover-fill-color);\n",
364
+ " }\n",
365
+ "\n",
366
+ " .colab-df-quickchart-complete:disabled,\n",
367
+ " .colab-df-quickchart-complete:disabled:hover {\n",
368
+ " background-color: var(--disabled-bg-color);\n",
369
+ " fill: var(--disabled-fill-color);\n",
370
+ " box-shadow: none;\n",
371
+ " }\n",
372
+ "\n",
373
+ " .colab-df-spinner {\n",
374
+ " border: 2px solid var(--fill-color);\n",
375
+ " border-color: transparent;\n",
376
+ " border-bottom-color: var(--fill-color);\n",
377
+ " animation:\n",
378
+ " spin 1s steps(1) infinite;\n",
379
+ " }\n",
380
+ "\n",
381
+ " @keyframes spin {\n",
382
+ " 0% {\n",
383
+ " border-color: transparent;\n",
384
+ " border-bottom-color: var(--fill-color);\n",
385
+ " border-left-color: var(--fill-color);\n",
386
+ " }\n",
387
+ " 20% {\n",
388
+ " border-color: transparent;\n",
389
+ " border-left-color: var(--fill-color);\n",
390
+ " border-top-color: var(--fill-color);\n",
391
+ " }\n",
392
+ " 30% {\n",
393
+ " border-color: transparent;\n",
394
+ " border-left-color: var(--fill-color);\n",
395
+ " border-top-color: var(--fill-color);\n",
396
+ " border-right-color: var(--fill-color);\n",
397
+ " }\n",
398
+ " 40% {\n",
399
+ " border-color: transparent;\n",
400
+ " border-right-color: var(--fill-color);\n",
401
+ " border-top-color: var(--fill-color);\n",
402
+ " }\n",
403
+ " 60% {\n",
404
+ " border-color: transparent;\n",
405
+ " border-right-color: var(--fill-color);\n",
406
+ " }\n",
407
+ " 80% {\n",
408
+ " border-color: transparent;\n",
409
+ " border-right-color: var(--fill-color);\n",
410
+ " border-bottom-color: var(--fill-color);\n",
411
+ " }\n",
412
+ " 90% {\n",
413
+ " border-color: transparent;\n",
414
+ " border-bottom-color: var(--fill-color);\n",
415
+ " }\n",
416
+ " }\n",
417
+ "</style>\n",
418
+ "\n",
419
+ " <script>\n",
420
+ " async function quickchart(key) {\n",
421
+ " const quickchartButtonEl =\n",
422
+ " document.querySelector('#' + key + ' button');\n",
423
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
424
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
425
+ " try {\n",
426
+ " const charts = await google.colab.kernel.invokeFunction(\n",
427
+ " 'suggestCharts', [key], {});\n",
428
+ " } catch (error) {\n",
429
+ " console.error('Error during call to suggestCharts:', error);\n",
430
+ " }\n",
431
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
432
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
433
+ " }\n",
434
+ " (() => {\n",
435
+ " let quickchartButtonEl =\n",
436
+ " document.querySelector('#df-2582d95f-0015-4b3a-acf0-6b7d28ed75fd button');\n",
437
+ " quickchartButtonEl.style.display =\n",
438
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
439
+ " })();\n",
440
+ " </script>\n",
441
+ " </div>\n",
442
+ "\n",
443
+ " </div>\n",
444
+ " </div>\n"
445
+ ],
446
+ "application/vnd.google.colaboratory.intrinsic+json": {
447
+ "type": "dataframe",
448
+ "variable_name": "raw_df",
449
+ "summary": "{\n \"name\": \"raw_df\",\n \"rows\": 5313,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5313,\n \"samples\": [\n \"2021-08-30\",\n \"2010-05-10\",\n \"2015-06-08\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"1. open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 612.1734512006673,\n \"min\": 85.4,\n \"max\": 3025.0,\n \"num_unique_values\": 5098,\n \"samples\": [\n 2902.94,\n 1440.0,\n 2857.38\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"2. high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 618.2806692169981,\n \"min\": 86.52,\n \"max\": 3030.9315,\n \"num_unique_values\": 5098,\n \"samples\": [\n 2925.075,\n 1442.32,\n 2743.29\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"3. low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 605.6491240554433,\n \"min\": 83.34,\n \"max\": 2977.98,\n \"num_unique_values\": 5151,\n \"samples\": [\n 1105.15,\n 1347.32,\n 537.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"4. close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 612.0915219846729,\n \"min\": 83.43,\n \"max\": 2996.77,\n \"num_unique_values\": 5169,\n \"samples\": [\n 1106.5,\n 950.44,\n 1422.86\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"5. volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12616124.474814428,\n \"min\": 465638.0,\n \"max\": 127747554.0,\n \"num_unique_values\": 5285,\n \"samples\": [\n 2964489.0,\n 4101200.0,\n 29130102.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
450
+ }
451
+ },
452
+ "metadata": {},
453
+ "execution_count": 162
454
+ }
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "source": [
460
+ "raw_df.info()"
461
+ ],
462
+ "metadata": {
463
+ "colab": {
464
+ "base_uri": "https://localhost:8080/"
465
+ },
466
+ "id": "hbTXK0NeRRFt",
467
+ "outputId": "dcff175a-074b-47f3-d63d-878ad94f88c2"
468
+ },
469
+ "execution_count": 163,
470
+ "outputs": [
471
+ {
472
+ "output_type": "stream",
473
+ "name": "stdout",
474
+ "text": [
475
+ "<class 'pandas.core.frame.DataFrame'>\n",
476
+ "RangeIndex: 5313 entries, 0 to 5312\n",
477
+ "Data columns (total 6 columns):\n",
478
+ " # Column Non-Null Count Dtype \n",
479
+ "--- ------ -------------- ----- \n",
480
+ " 0 date 5313 non-null object \n",
481
+ " 1 1. open 5313 non-null float64\n",
482
+ " 2 2. high 5313 non-null float64\n",
483
+ " 3 3. low 5313 non-null float64\n",
484
+ " 4 4. close 5313 non-null float64\n",
485
+ " 5 5. volume 5313 non-null float64\n",
486
+ "dtypes: float64(5), object(1)\n",
487
+ "memory usage: 249.2+ KB\n"
488
+ ]
489
+ }
490
+ ]
491
+ },
492
+ {
493
+ "cell_type": "code",
494
+ "source": [
495
+ "#rename column names\n",
496
+ "raw_df.columns = raw_df.columns.str.replace(r'\\d+\\.\\s+', '', regex=True)\n",
497
+ "display(raw_df.head())"
498
+ ],
499
+ "metadata": {
500
+ "colab": {
501
+ "base_uri": "https://localhost:8080/",
502
+ "height": 206
503
+ },
504
+ "id": "3Nm9EbGORWs_",
505
+ "outputId": "cf0788d5-35ab-4d23-962e-727bea74b6ee"
506
+ },
507
+ "execution_count": 164,
508
+ "outputs": [
509
+ {
510
+ "output_type": "display_data",
511
+ "data": {
512
+ "text/plain": [
513
+ " date open high low close volume\n",
514
+ "0 2025-09-30 242.810 243.2900 239.245 243.10 34724346.0\n",
515
+ "1 2025-09-29 247.850 251.1486 242.770 244.05 32505777.0\n",
516
+ "2 2025-09-26 247.065 249.4200 245.970 246.54 18503194.0\n",
517
+ "3 2025-09-25 244.400 246.4900 240.740 245.79 31020383.0\n",
518
+ "4 2025-09-24 251.660 252.3501 246.440 247.14 28201003.0"
519
+ ],
520
+ "text/html": [
521
+ "\n",
522
+ " <div id=\"df-90441c02-7742-4daa-86eb-4157e38af203\" class=\"colab-df-container\">\n",
523
+ " <div>\n",
524
+ "<style scoped>\n",
525
+ " .dataframe tbody tr th:only-of-type {\n",
526
+ " vertical-align: middle;\n",
527
+ " }\n",
528
+ "\n",
529
+ " .dataframe tbody tr th {\n",
530
+ " vertical-align: top;\n",
531
+ " }\n",
532
+ "\n",
533
+ " .dataframe thead th {\n",
534
+ " text-align: right;\n",
535
+ " }\n",
536
+ "</style>\n",
537
+ "<table border=\"1\" class=\"dataframe\">\n",
538
+ " <thead>\n",
539
+ " <tr style=\"text-align: right;\">\n",
540
+ " <th></th>\n",
541
+ " <th>date</th>\n",
542
+ " <th>open</th>\n",
543
+ " <th>high</th>\n",
544
+ " <th>low</th>\n",
545
+ " <th>close</th>\n",
546
+ " <th>volume</th>\n",
547
+ " </tr>\n",
548
+ " </thead>\n",
549
+ " <tbody>\n",
550
+ " <tr>\n",
551
+ " <th>0</th>\n",
552
+ " <td>2025-09-30</td>\n",
553
+ " <td>242.810</td>\n",
554
+ " <td>243.2900</td>\n",
555
+ " <td>239.245</td>\n",
556
+ " <td>243.10</td>\n",
557
+ " <td>34724346.0</td>\n",
558
+ " </tr>\n",
559
+ " <tr>\n",
560
+ " <th>1</th>\n",
561
+ " <td>2025-09-29</td>\n",
562
+ " <td>247.850</td>\n",
563
+ " <td>251.1486</td>\n",
564
+ " <td>242.770</td>\n",
565
+ " <td>244.05</td>\n",
566
+ " <td>32505777.0</td>\n",
567
+ " </tr>\n",
568
+ " <tr>\n",
569
+ " <th>2</th>\n",
570
+ " <td>2025-09-26</td>\n",
571
+ " <td>247.065</td>\n",
572
+ " <td>249.4200</td>\n",
573
+ " <td>245.970</td>\n",
574
+ " <td>246.54</td>\n",
575
+ " <td>18503194.0</td>\n",
576
+ " </tr>\n",
577
+ " <tr>\n",
578
+ " <th>3</th>\n",
579
+ " <td>2025-09-25</td>\n",
580
+ " <td>244.400</td>\n",
581
+ " <td>246.4900</td>\n",
582
+ " <td>240.740</td>\n",
583
+ " <td>245.79</td>\n",
584
+ " <td>31020383.0</td>\n",
585
+ " </tr>\n",
586
+ " <tr>\n",
587
+ " <th>4</th>\n",
588
+ " <td>2025-09-24</td>\n",
589
+ " <td>251.660</td>\n",
590
+ " <td>252.3501</td>\n",
591
+ " <td>246.440</td>\n",
592
+ " <td>247.14</td>\n",
593
+ " <td>28201003.0</td>\n",
594
+ " </tr>\n",
595
+ " </tbody>\n",
596
+ "</table>\n",
597
+ "</div>\n",
598
+ " <div class=\"colab-df-buttons\">\n",
599
+ "\n",
600
+ " <div class=\"colab-df-container\">\n",
601
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-90441c02-7742-4daa-86eb-4157e38af203')\"\n",
602
+ " title=\"Convert this dataframe to an interactive table.\"\n",
603
+ " style=\"display:none;\">\n",
604
+ "\n",
605
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
606
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
607
+ " </svg>\n",
608
+ " </button>\n",
609
+ "\n",
610
+ " <style>\n",
611
+ " .colab-df-container {\n",
612
+ " display:flex;\n",
613
+ " gap: 12px;\n",
614
+ " }\n",
615
+ "\n",
616
+ " .colab-df-convert {\n",
617
+ " background-color: #E8F0FE;\n",
618
+ " border: none;\n",
619
+ " border-radius: 50%;\n",
620
+ " cursor: pointer;\n",
621
+ " display: none;\n",
622
+ " fill: #1967D2;\n",
623
+ " height: 32px;\n",
624
+ " padding: 0 0 0 0;\n",
625
+ " width: 32px;\n",
626
+ " }\n",
627
+ "\n",
628
+ " .colab-df-convert:hover {\n",
629
+ " background-color: #E2EBFA;\n",
630
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
631
+ " fill: #174EA6;\n",
632
+ " }\n",
633
+ "\n",
634
+ " .colab-df-buttons div {\n",
635
+ " margin-bottom: 4px;\n",
636
+ " }\n",
637
+ "\n",
638
+ " [theme=dark] .colab-df-convert {\n",
639
+ " background-color: #3B4455;\n",
640
+ " fill: #D2E3FC;\n",
641
+ " }\n",
642
+ "\n",
643
+ " [theme=dark] .colab-df-convert:hover {\n",
644
+ " background-color: #434B5C;\n",
645
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
646
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
647
+ " fill: #FFFFFF;\n",
648
+ " }\n",
649
+ " </style>\n",
650
+ "\n",
651
+ " <script>\n",
652
+ " const buttonEl =\n",
653
+ " document.querySelector('#df-90441c02-7742-4daa-86eb-4157e38af203 button.colab-df-convert');\n",
654
+ " buttonEl.style.display =\n",
655
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
656
+ "\n",
657
+ " async function convertToInteractive(key) {\n",
658
+ " const element = document.querySelector('#df-90441c02-7742-4daa-86eb-4157e38af203');\n",
659
+ " const dataTable =\n",
660
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
661
+ " [key], {});\n",
662
+ " if (!dataTable) return;\n",
663
+ "\n",
664
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
665
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
666
+ " + ' to learn more about interactive tables.';\n",
667
+ " element.innerHTML = '';\n",
668
+ " dataTable['output_type'] = 'display_data';\n",
669
+ " await google.colab.output.renderOutput(dataTable, element);\n",
670
+ " const docLink = document.createElement('div');\n",
671
+ " docLink.innerHTML = docLinkHtml;\n",
672
+ " element.appendChild(docLink);\n",
673
+ " }\n",
674
+ " </script>\n",
675
+ " </div>\n",
676
+ "\n",
677
+ "\n",
678
+ " <div id=\"df-d42d1ad5-b465-4a31-b5a5-e8e81ed94a00\">\n",
679
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d42d1ad5-b465-4a31-b5a5-e8e81ed94a00')\"\n",
680
+ " title=\"Suggest charts\"\n",
681
+ " style=\"display:none;\">\n",
682
+ "\n",
683
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
684
+ " width=\"24px\">\n",
685
+ " <g>\n",
686
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
687
+ " </g>\n",
688
+ "</svg>\n",
689
+ " </button>\n",
690
+ "\n",
691
+ "<style>\n",
692
+ " .colab-df-quickchart {\n",
693
+ " --bg-color: #E8F0FE;\n",
694
+ " --fill-color: #1967D2;\n",
695
+ " --hover-bg-color: #E2EBFA;\n",
696
+ " --hover-fill-color: #174EA6;\n",
697
+ " --disabled-fill-color: #AAA;\n",
698
+ " --disabled-bg-color: #DDD;\n",
699
+ " }\n",
700
+ "\n",
701
+ " [theme=dark] .colab-df-quickchart {\n",
702
+ " --bg-color: #3B4455;\n",
703
+ " --fill-color: #D2E3FC;\n",
704
+ " --hover-bg-color: #434B5C;\n",
705
+ " --hover-fill-color: #FFFFFF;\n",
706
+ " --disabled-bg-color: #3B4455;\n",
707
+ " --disabled-fill-color: #666;\n",
708
+ " }\n",
709
+ "\n",
710
+ " .colab-df-quickchart {\n",
711
+ " background-color: var(--bg-color);\n",
712
+ " border: none;\n",
713
+ " border-radius: 50%;\n",
714
+ " cursor: pointer;\n",
715
+ " display: none;\n",
716
+ " fill: var(--fill-color);\n",
717
+ " height: 32px;\n",
718
+ " padding: 0;\n",
719
+ " width: 32px;\n",
720
+ " }\n",
721
+ "\n",
722
+ " .colab-df-quickchart:hover {\n",
723
+ " background-color: var(--hover-bg-color);\n",
724
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
725
+ " fill: var(--button-hover-fill-color);\n",
726
+ " }\n",
727
+ "\n",
728
+ " .colab-df-quickchart-complete:disabled,\n",
729
+ " .colab-df-quickchart-complete:disabled:hover {\n",
730
+ " background-color: var(--disabled-bg-color);\n",
731
+ " fill: var(--disabled-fill-color);\n",
732
+ " box-shadow: none;\n",
733
+ " }\n",
734
+ "\n",
735
+ " .colab-df-spinner {\n",
736
+ " border: 2px solid var(--fill-color);\n",
737
+ " border-color: transparent;\n",
738
+ " border-bottom-color: var(--fill-color);\n",
739
+ " animation:\n",
740
+ " spin 1s steps(1) infinite;\n",
741
+ " }\n",
742
+ "\n",
743
+ " @keyframes spin {\n",
744
+ " 0% {\n",
745
+ " border-color: transparent;\n",
746
+ " border-bottom-color: var(--fill-color);\n",
747
+ " border-left-color: var(--fill-color);\n",
748
+ " }\n",
749
+ " 20% {\n",
750
+ " border-color: transparent;\n",
751
+ " border-left-color: var(--fill-color);\n",
752
+ " border-top-color: var(--fill-color);\n",
753
+ " }\n",
754
+ " 30% {\n",
755
+ " border-color: transparent;\n",
756
+ " border-left-color: var(--fill-color);\n",
757
+ " border-top-color: var(--fill-color);\n",
758
+ " border-right-color: var(--fill-color);\n",
759
+ " }\n",
760
+ " 40% {\n",
761
+ " border-color: transparent;\n",
762
+ " border-right-color: var(--fill-color);\n",
763
+ " border-top-color: var(--fill-color);\n",
764
+ " }\n",
765
+ " 60% {\n",
766
+ " border-color: transparent;\n",
767
+ " border-right-color: var(--fill-color);\n",
768
+ " }\n",
769
+ " 80% {\n",
770
+ " border-color: transparent;\n",
771
+ " border-right-color: var(--fill-color);\n",
772
+ " border-bottom-color: var(--fill-color);\n",
773
+ " }\n",
774
+ " 90% {\n",
775
+ " border-color: transparent;\n",
776
+ " border-bottom-color: var(--fill-color);\n",
777
+ " }\n",
778
+ " }\n",
779
+ "</style>\n",
780
+ "\n",
781
+ " <script>\n",
782
+ " async function quickchart(key) {\n",
783
+ " const quickchartButtonEl =\n",
784
+ " document.querySelector('#' + key + ' button');\n",
785
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
786
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
787
+ " try {\n",
788
+ " const charts = await google.colab.kernel.invokeFunction(\n",
789
+ " 'suggestCharts', [key], {});\n",
790
+ " } catch (error) {\n",
791
+ " console.error('Error during call to suggestCharts:', error);\n",
792
+ " }\n",
793
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
794
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
795
+ " }\n",
796
+ " (() => {\n",
797
+ " let quickchartButtonEl =\n",
798
+ " document.querySelector('#df-d42d1ad5-b465-4a31-b5a5-e8e81ed94a00 button');\n",
799
+ " quickchartButtonEl.style.display =\n",
800
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
801
+ " })();\n",
802
+ " </script>\n",
803
+ " </div>\n",
804
+ "\n",
805
+ " </div>\n",
806
+ " </div>\n"
807
+ ],
808
+ "application/vnd.google.colaboratory.intrinsic+json": {
809
+ "type": "dataframe",
810
+ "summary": "{\n \"name\": \"display(raw_df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"2025-09-29\",\n \"2025-09-24\",\n \"2025-09-26\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.408195270227335,\n \"min\": 242.81,\n \"max\": 251.66,\n \"num_unique_values\": 5,\n \"samples\": [\n 247.85,\n 251.66,\n 247.065\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.669504586180536,\n \"min\": 243.29,\n \"max\": 252.3501,\n \"num_unique_values\": 5,\n \"samples\": [\n 251.1486,\n 252.3501,\n 249.42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.15870147370719,\n \"min\": 239.245,\n \"max\": 246.44,\n \"num_unique_values\": 5,\n \"samples\": [\n 242.77,\n 246.44,\n 245.97\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.7002146923256427,\n \"min\": 243.1,\n \"max\": 247.14,\n \"num_unique_values\": 5,\n \"samples\": [\n 244.05,\n 247.14,\n 246.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6323793.936636005,\n \"min\": 18503194.0,\n \"max\": 34724346.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 32505777.0,\n 28201003.0,\n 18503194.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
811
+ }
812
+ },
813
+ "metadata": {}
814
+ }
815
+ ]
816
+ },
817
+ {
818
+ "cell_type": "code",
819
+ "source": [
820
+ "raw_df.info()"
821
+ ],
822
+ "metadata": {
823
+ "colab": {
824
+ "base_uri": "https://localhost:8080/"
825
+ },
826
+ "id": "eIQqWuNDRktE",
827
+ "outputId": "97c4c0a8-da84-4c0f-d829-2a91c9a5f20e"
828
+ },
829
+ "execution_count": 165,
830
+ "outputs": [
831
+ {
832
+ "output_type": "stream",
833
+ "name": "stdout",
834
+ "text": [
835
+ "<class 'pandas.core.frame.DataFrame'>\n",
836
+ "RangeIndex: 5313 entries, 0 to 5312\n",
837
+ "Data columns (total 6 columns):\n",
838
+ " # Column Non-Null Count Dtype \n",
839
+ "--- ------ -------------- ----- \n",
840
+ " 0 date 5313 non-null object \n",
841
+ " 1 open 5313 non-null float64\n",
842
+ " 2 high 5313 non-null float64\n",
843
+ " 3 low 5313 non-null float64\n",
844
+ " 4 close 5313 non-null float64\n",
845
+ " 5 volume 5313 non-null float64\n",
846
+ "dtypes: float64(5), object(1)\n",
847
+ "memory usage: 249.2+ KB\n"
848
+ ]
849
+ }
850
+ ]
851
+ },
852
+ {
853
+ "cell_type": "code",
854
+ "source": [
855
+ "#using MinMaxSclaler to slace column\n",
856
+ "from sklearn.preprocessing import MinMaxScaler\n",
857
+ "\n",
858
+ "scaler = MinMaxScaler()\n",
859
+ "\n",
860
+ "#columns to scale\n",
861
+ "scale_cols = ['open', 'high', 'low', 'volume']\n",
862
+ "\n",
863
+ "#apply the scaler to the selected columns\n",
864
+ "raw_df[scale_cols] = scaler.fit_transform(raw_df[scale_cols])\n",
865
+ "\n",
866
+ "display(raw_df.head())"
867
+ ],
868
+ "metadata": {
869
+ "colab": {
870
+ "base_uri": "https://localhost:8080/",
871
+ "height": 206
872
+ },
873
+ "id": "NlyQzKFsR7Qi",
874
+ "outputId": "fdfb1f2e-b523-426a-beab-eb30e6c06645"
875
+ },
876
+ "execution_count": 166,
877
+ "outputs": [
878
+ {
879
+ "output_type": "display_data",
880
+ "data": {
881
+ "text/plain": [
882
+ " date open high low close volume\n",
883
+ "0 2025-09-30 0.053548 0.053243 0.053860 243.10 0.269156\n",
884
+ "1 2025-09-29 0.055263 0.055912 0.055078 244.05 0.251726\n",
885
+ "2 2025-09-26 0.054996 0.055325 0.056183 246.54 0.141713\n",
886
+ "3 2025-09-25 0.054089 0.054330 0.054376 245.79 0.240056\n",
887
+ "4 2025-09-24 0.056559 0.056320 0.056346 247.14 0.217905"
888
+ ],
889
+ "text/html": [
890
+ "\n",
891
+ " <div id=\"df-27513fe5-65bd-4d9a-b915-d09ab71583e5\" class=\"colab-df-container\">\n",
892
+ " <div>\n",
893
+ "<style scoped>\n",
894
+ " .dataframe tbody tr th:only-of-type {\n",
895
+ " vertical-align: middle;\n",
896
+ " }\n",
897
+ "\n",
898
+ " .dataframe tbody tr th {\n",
899
+ " vertical-align: top;\n",
900
+ " }\n",
901
+ "\n",
902
+ " .dataframe thead th {\n",
903
+ " text-align: right;\n",
904
+ " }\n",
905
+ "</style>\n",
906
+ "<table border=\"1\" class=\"dataframe\">\n",
907
+ " <thead>\n",
908
+ " <tr style=\"text-align: right;\">\n",
909
+ " <th></th>\n",
910
+ " <th>date</th>\n",
911
+ " <th>open</th>\n",
912
+ " <th>high</th>\n",
913
+ " <th>low</th>\n",
914
+ " <th>close</th>\n",
915
+ " <th>volume</th>\n",
916
+ " </tr>\n",
917
+ " </thead>\n",
918
+ " <tbody>\n",
919
+ " <tr>\n",
920
+ " <th>0</th>\n",
921
+ " <td>2025-09-30</td>\n",
922
+ " <td>0.053548</td>\n",
923
+ " <td>0.053243</td>\n",
924
+ " <td>0.053860</td>\n",
925
+ " <td>243.10</td>\n",
926
+ " <td>0.269156</td>\n",
927
+ " </tr>\n",
928
+ " <tr>\n",
929
+ " <th>1</th>\n",
930
+ " <td>2025-09-29</td>\n",
931
+ " <td>0.055263</td>\n",
932
+ " <td>0.055912</td>\n",
933
+ " <td>0.055078</td>\n",
934
+ " <td>244.05</td>\n",
935
+ " <td>0.251726</td>\n",
936
+ " </tr>\n",
937
+ " <tr>\n",
938
+ " <th>2</th>\n",
939
+ " <td>2025-09-26</td>\n",
940
+ " <td>0.054996</td>\n",
941
+ " <td>0.055325</td>\n",
942
+ " <td>0.056183</td>\n",
943
+ " <td>246.54</td>\n",
944
+ " <td>0.141713</td>\n",
945
+ " </tr>\n",
946
+ " <tr>\n",
947
+ " <th>3</th>\n",
948
+ " <td>2025-09-25</td>\n",
949
+ " <td>0.054089</td>\n",
950
+ " <td>0.054330</td>\n",
951
+ " <td>0.054376</td>\n",
952
+ " <td>245.79</td>\n",
953
+ " <td>0.240056</td>\n",
954
+ " </tr>\n",
955
+ " <tr>\n",
956
+ " <th>4</th>\n",
957
+ " <td>2025-09-24</td>\n",
958
+ " <td>0.056559</td>\n",
959
+ " <td>0.056320</td>\n",
960
+ " <td>0.056346</td>\n",
961
+ " <td>247.14</td>\n",
962
+ " <td>0.217905</td>\n",
963
+ " </tr>\n",
964
+ " </tbody>\n",
965
+ "</table>\n",
966
+ "</div>\n",
967
+ " <div class=\"colab-df-buttons\">\n",
968
+ "\n",
969
+ " <div class=\"colab-df-container\">\n",
970
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-27513fe5-65bd-4d9a-b915-d09ab71583e5')\"\n",
971
+ " title=\"Convert this dataframe to an interactive table.\"\n",
972
+ " style=\"display:none;\">\n",
973
+ "\n",
974
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
975
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
976
+ " </svg>\n",
977
+ " </button>\n",
978
+ "\n",
979
+ " <style>\n",
980
+ " .colab-df-container {\n",
981
+ " display:flex;\n",
982
+ " gap: 12px;\n",
983
+ " }\n",
984
+ "\n",
985
+ " .colab-df-convert {\n",
986
+ " background-color: #E8F0FE;\n",
987
+ " border: none;\n",
988
+ " border-radius: 50%;\n",
989
+ " cursor: pointer;\n",
990
+ " display: none;\n",
991
+ " fill: #1967D2;\n",
992
+ " height: 32px;\n",
993
+ " padding: 0 0 0 0;\n",
994
+ " width: 32px;\n",
995
+ " }\n",
996
+ "\n",
997
+ " .colab-df-convert:hover {\n",
998
+ " background-color: #E2EBFA;\n",
999
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1000
+ " fill: #174EA6;\n",
1001
+ " }\n",
1002
+ "\n",
1003
+ " .colab-df-buttons div {\n",
1004
+ " margin-bottom: 4px;\n",
1005
+ " }\n",
1006
+ "\n",
1007
+ " [theme=dark] .colab-df-convert {\n",
1008
+ " background-color: #3B4455;\n",
1009
+ " fill: #D2E3FC;\n",
1010
+ " }\n",
1011
+ "\n",
1012
+ " [theme=dark] .colab-df-convert:hover {\n",
1013
+ " background-color: #434B5C;\n",
1014
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1015
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1016
+ " fill: #FFFFFF;\n",
1017
+ " }\n",
1018
+ " </style>\n",
1019
+ "\n",
1020
+ " <script>\n",
1021
+ " const buttonEl =\n",
1022
+ " document.querySelector('#df-27513fe5-65bd-4d9a-b915-d09ab71583e5 button.colab-df-convert');\n",
1023
+ " buttonEl.style.display =\n",
1024
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1025
+ "\n",
1026
+ " async function convertToInteractive(key) {\n",
1027
+ " const element = document.querySelector('#df-27513fe5-65bd-4d9a-b915-d09ab71583e5');\n",
1028
+ " const dataTable =\n",
1029
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1030
+ " [key], {});\n",
1031
+ " if (!dataTable) return;\n",
1032
+ "\n",
1033
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1034
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1035
+ " + ' to learn more about interactive tables.';\n",
1036
+ " element.innerHTML = '';\n",
1037
+ " dataTable['output_type'] = 'display_data';\n",
1038
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1039
+ " const docLink = document.createElement('div');\n",
1040
+ " docLink.innerHTML = docLinkHtml;\n",
1041
+ " element.appendChild(docLink);\n",
1042
+ " }\n",
1043
+ " </script>\n",
1044
+ " </div>\n",
1045
+ "\n",
1046
+ "\n",
1047
+ " <div id=\"df-8c9e1918-5f7b-4310-a63f-40ab3ca3b792\">\n",
1048
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-8c9e1918-5f7b-4310-a63f-40ab3ca3b792')\"\n",
1049
+ " title=\"Suggest charts\"\n",
1050
+ " style=\"display:none;\">\n",
1051
+ "\n",
1052
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
1053
+ " width=\"24px\">\n",
1054
+ " <g>\n",
1055
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
1056
+ " </g>\n",
1057
+ "</svg>\n",
1058
+ " </button>\n",
1059
+ "\n",
1060
+ "<style>\n",
1061
+ " .colab-df-quickchart {\n",
1062
+ " --bg-color: #E8F0FE;\n",
1063
+ " --fill-color: #1967D2;\n",
1064
+ " --hover-bg-color: #E2EBFA;\n",
1065
+ " --hover-fill-color: #174EA6;\n",
1066
+ " --disabled-fill-color: #AAA;\n",
1067
+ " --disabled-bg-color: #DDD;\n",
1068
+ " }\n",
1069
+ "\n",
1070
+ " [theme=dark] .colab-df-quickchart {\n",
1071
+ " --bg-color: #3B4455;\n",
1072
+ " --fill-color: #D2E3FC;\n",
1073
+ " --hover-bg-color: #434B5C;\n",
1074
+ " --hover-fill-color: #FFFFFF;\n",
1075
+ " --disabled-bg-color: #3B4455;\n",
1076
+ " --disabled-fill-color: #666;\n",
1077
+ " }\n",
1078
+ "\n",
1079
+ " .colab-df-quickchart {\n",
1080
+ " background-color: var(--bg-color);\n",
1081
+ " border: none;\n",
1082
+ " border-radius: 50%;\n",
1083
+ " cursor: pointer;\n",
1084
+ " display: none;\n",
1085
+ " fill: var(--fill-color);\n",
1086
+ " height: 32px;\n",
1087
+ " padding: 0;\n",
1088
+ " width: 32px;\n",
1089
+ " }\n",
1090
+ "\n",
1091
+ " .colab-df-quickchart:hover {\n",
1092
+ " background-color: var(--hover-bg-color);\n",
1093
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1094
+ " fill: var(--button-hover-fill-color);\n",
1095
+ " }\n",
1096
+ "\n",
1097
+ " .colab-df-quickchart-complete:disabled,\n",
1098
+ " .colab-df-quickchart-complete:disabled:hover {\n",
1099
+ " background-color: var(--disabled-bg-color);\n",
1100
+ " fill: var(--disabled-fill-color);\n",
1101
+ " box-shadow: none;\n",
1102
+ " }\n",
1103
+ "\n",
1104
+ " .colab-df-spinner {\n",
1105
+ " border: 2px solid var(--fill-color);\n",
1106
+ " border-color: transparent;\n",
1107
+ " border-bottom-color: var(--fill-color);\n",
1108
+ " animation:\n",
1109
+ " spin 1s steps(1) infinite;\n",
1110
+ " }\n",
1111
+ "\n",
1112
+ " @keyframes spin {\n",
1113
+ " 0% {\n",
1114
+ " border-color: transparent;\n",
1115
+ " border-bottom-color: var(--fill-color);\n",
1116
+ " border-left-color: var(--fill-color);\n",
1117
+ " }\n",
1118
+ " 20% {\n",
1119
+ " border-color: transparent;\n",
1120
+ " border-left-color: var(--fill-color);\n",
1121
+ " border-top-color: var(--fill-color);\n",
1122
+ " }\n",
1123
+ " 30% {\n",
1124
+ " border-color: transparent;\n",
1125
+ " border-left-color: var(--fill-color);\n",
1126
+ " border-top-color: var(--fill-color);\n",
1127
+ " border-right-color: var(--fill-color);\n",
1128
+ " }\n",
1129
+ " 40% {\n",
1130
+ " border-color: transparent;\n",
1131
+ " border-right-color: var(--fill-color);\n",
1132
+ " border-top-color: var(--fill-color);\n",
1133
+ " }\n",
1134
+ " 60% {\n",
1135
+ " border-color: transparent;\n",
1136
+ " border-right-color: var(--fill-color);\n",
1137
+ " }\n",
1138
+ " 80% {\n",
1139
+ " border-color: transparent;\n",
1140
+ " border-right-color: var(--fill-color);\n",
1141
+ " border-bottom-color: var(--fill-color);\n",
1142
+ " }\n",
1143
+ " 90% {\n",
1144
+ " border-color: transparent;\n",
1145
+ " border-bottom-color: var(--fill-color);\n",
1146
+ " }\n",
1147
+ " }\n",
1148
+ "</style>\n",
1149
+ "\n",
1150
+ " <script>\n",
1151
+ " async function quickchart(key) {\n",
1152
+ " const quickchartButtonEl =\n",
1153
+ " document.querySelector('#' + key + ' button');\n",
1154
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
1155
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
1156
+ " try {\n",
1157
+ " const charts = await google.colab.kernel.invokeFunction(\n",
1158
+ " 'suggestCharts', [key], {});\n",
1159
+ " } catch (error) {\n",
1160
+ " console.error('Error during call to suggestCharts:', error);\n",
1161
+ " }\n",
1162
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
1163
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
1164
+ " }\n",
1165
+ " (() => {\n",
1166
+ " let quickchartButtonEl =\n",
1167
+ " document.querySelector('#df-8c9e1918-5f7b-4310-a63f-40ab3ca3b792 button');\n",
1168
+ " quickchartButtonEl.style.display =\n",
1169
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1170
+ " })();\n",
1171
+ " </script>\n",
1172
+ " </div>\n",
1173
+ "\n",
1174
+ " </div>\n",
1175
+ " </div>\n"
1176
+ ],
1177
+ "application/vnd.google.colaboratory.intrinsic+json": {
1178
+ "type": "dataframe",
1179
+ "summary": "{\n \"name\": \"display(raw_df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"date\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"2025-09-29\",\n \"2025-09-24\",\n \"2025-09-26\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.001159407834476571,\n \"min\": 0.05354810178255545,\n \"max\": 0.056558715471492715,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.05526262076472989,\n 0.056558715471492715,\n 0.054995577629609466\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0012462607846017927,\n \"min\": 0.05324323723093731,\n \"max\": 0.056320286753397064,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.05591222558395795,\n 0.056320286753397064,\n 0.055325147317214315\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0010912242882386725,\n \"min\": 0.05385989276732167,\n \"max\": 0.05634552137744245,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.05507766077992428,\n 0.05634552137744245,\n 0.05618315230909543\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.7002146923256427,\n \"min\": 243.1,\n \"max\": 247.14,\n \"num_unique_values\": 5,\n \"samples\": [\n 244.05,\n 247.14,\n 246.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0496833653622562,\n \"min\": 0.14171342298147055,\n \"max\": 0.2691561305535344,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.25172577540394664,\n 0.21790499288209964,\n 0.14171342298147055\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
1180
+ }
1181
+ },
1182
+ "metadata": {}
1183
+ }
1184
+ ]
1185
+ },
1186
+ {
1187
+ "cell_type": "code",
1188
+ "source": [
1189
+ "from sklearn.model_selection import train_test_split\n",
1190
+ "\n",
1191
+ "#spliting train & test df\n",
1192
+ "X_train, X_test = train_test_split(raw_df, test_size=0.2, random_state=42)\n"
1193
+ ],
1194
+ "metadata": {
1195
+ "id": "qE759hHiTbgY"
1196
+ },
1197
+ "execution_count": 167,
1198
+ "outputs": []
1199
+ },
1200
+ {
1201
+ "cell_type": "code",
1202
+ "source": [
1203
+ "#initializing target column\n",
1204
+ "train_target_col = X_train['close']\n",
1205
+ "test_target_col = X_test['close']"
1206
+ ],
1207
+ "metadata": {
1208
+ "id": "kHIYVx0zRuqw"
1209
+ },
1210
+ "execution_count": 168,
1211
+ "outputs": []
1212
+ },
1213
+ {
1214
+ "cell_type": "code",
1215
+ "source": [
1216
+ "X_train = X_train.drop('date', axis=1)\n",
1217
+ "X_train = X_train.drop('close', axis=1)\n",
1218
+ "X_test = X_test.drop('date', axis=1)\n",
1219
+ "X_test = X_test.drop('close', axis=1)"
1220
+ ],
1221
+ "metadata": {
1222
+ "id": "5QLRQu9zUkdE"
1223
+ },
1224
+ "execution_count": 169,
1225
+ "outputs": []
1226
+ },
1227
+ {
1228
+ "cell_type": "code",
1229
+ "source": [
1230
+ "from xgboost import XGBRegressor"
1231
+ ],
1232
+ "metadata": {
1233
+ "id": "Xr4-_Ma_UmMl"
1234
+ },
1235
+ "execution_count": 170,
1236
+ "outputs": []
1237
+ },
1238
+ {
1239
+ "cell_type": "code",
1240
+ "source": [
1241
+ "#creating a function that evaluate the model\n",
1242
+ "from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n",
1243
+ "\n",
1244
+ "def xgb_model_evaluation(df, target_col):\n",
1245
+ "\n",
1246
+ " # Make predictions on the test set\n",
1247
+ " predictions = xgb_model.predict(df)\n",
1248
+ "\n",
1249
+ " # Calculate RMSE\n",
1250
+ " rmse = np.sqrt(mean_squared_error(target_col, predictions))\n",
1251
+ " print(f\"RMSE: {rmse}\")\n",
1252
+ "\n",
1253
+ " # Calculate MAPE\n",
1254
+ " mape = mean_absolute_percentage_error(target_col, predictions)\n",
1255
+ " print(f\"MAPE: {mape}\")"
1256
+ ],
1257
+ "metadata": {
1258
+ "id": "lQWEfUtfYu8V"
1259
+ },
1260
+ "execution_count": 171,
1261
+ "outputs": []
1262
+ },
1263
+ {
1264
+ "cell_type": "code",
1265
+ "source": [
1266
+ "%%time\n",
1267
+ "xgb_model = XGBRegressor(n_estimators=1500, learning_rate=0.01, n_jobs=25, random_state=47, max_depth=16).fit(X_train, train_target_col)"
1268
+ ],
1269
+ "metadata": {
1270
+ "colab": {
1271
+ "base_uri": "https://localhost:8080/"
1272
+ },
1273
+ "id": "7rj5tIMZVaUT",
1274
+ "outputId": "144efc80-4833-4752-d9a5-13566a4f1d3f"
1275
+ },
1276
+ "execution_count": 172,
1277
+ "outputs": [
1278
+ {
1279
+ "output_type": "stream",
1280
+ "name": "stdout",
1281
+ "text": [
1282
+ "CPU times: user 48.1 s, sys: 476 ms, total: 48.6 s\n",
1283
+ "Wall time: 36.1 s\n"
1284
+ ]
1285
+ }
1286
+ ]
1287
+ },
1288
+ {
1289
+ "cell_type": "code",
1290
+ "source": [
1291
+ "xgb_model_evaluation(X_train, train_target_col)"
1292
+ ],
1293
+ "metadata": {
1294
+ "colab": {
1295
+ "base_uri": "https://localhost:8080/"
1296
+ },
1297
+ "id": "NlhOaQxSZVH0",
1298
+ "outputId": "69cf251d-1dd3-44ce-e467-bec7a4378a83"
1299
+ },
1300
+ "execution_count": 173,
1301
+ "outputs": [
1302
+ {
1303
+ "output_type": "stream",
1304
+ "name": "stdout",
1305
+ "text": [
1306
+ "RMSE: 0.8930120580153202\n",
1307
+ "MAPE: 0.0009947761695732408\n"
1308
+ ]
1309
+ }
1310
+ ]
1311
+ },
1312
+ {
1313
+ "cell_type": "code",
1314
+ "source": [
1315
+ "xgb_model_evaluation(X_test, test_target_col)"
1316
+ ],
1317
+ "metadata": {
1318
+ "colab": {
1319
+ "base_uri": "https://localhost:8080/"
1320
+ },
1321
+ "id": "wnmCmaVpZXJ8",
1322
+ "outputId": "8f89c10f-2789-41ff-94d8-5bb6ba27a666"
1323
+ },
1324
+ "execution_count": 174,
1325
+ "outputs": [
1326
+ {
1327
+ "output_type": "stream",
1328
+ "name": "stdout",
1329
+ "text": [
1330
+ "RMSE: 11.429648677781845\n",
1331
+ "MAPE: 0.007749489179507017\n"
1332
+ ]
1333
+ }
1334
+ ]
1335
+ },
1336
+ {
1337
+ "cell_type": "code",
1338
+ "source": [
1339
+ "import joblib\n",
1340
+ "\n",
1341
+ "# Define the filename for the model\n",
1342
+ "model_filename = 'xgb_regressor_model.joblib'\n",
1343
+ "\n",
1344
+ "# Save the model to the file\n",
1345
+ "joblib.dump(xgb_model, model_filename)\n",
1346
+ "\n",
1347
+ "print(f\"Model saved to {model_filename}\")"
1348
+ ],
1349
+ "metadata": {
1350
+ "colab": {
1351
+ "base_uri": "https://localhost:8080/"
1352
+ },
1353
+ "id": "heGYy9dbqrKV",
1354
+ "outputId": "09c92760-379a-4b2c-ea6b-ff2fccf020c5"
1355
+ },
1356
+ "execution_count": 175,
1357
+ "outputs": [
1358
+ {
1359
+ "output_type": "stream",
1360
+ "name": "stdout",
1361
+ "text": [
1362
+ "Model saved to xgb_regressor_model.joblib\n"
1363
+ ]
1364
+ }
1365
+ ]
1366
+ },
1367
+ {
1368
+ "cell_type": "code",
1369
+ "source": [],
1370
+ "metadata": {
1371
+ "id": "2cdNDOOxpx6j"
1372
+ },
1373
+ "execution_count": 175,
1374
+ "outputs": []
1375
+ }
1376
+ ]
1377
+ }