Parishri07 commited on
Commit
39692e8
·
verified ·
1 Parent(s): 6e6ed80

Upload 2 files

Browse files
src/notebooks/Creating_Dataset.ipynb ADDED
@@ -0,0 +1,492 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 1,
20
+ "metadata": {
21
+ "colab": {
22
+ "base_uri": "https://localhost:8080/",
23
+ "height": 244
24
+ },
25
+ "id": "5KvqZbgdpv6x",
26
+ "outputId": "6aab18aa-aa15-4adf-c147-c4c3b32e674f"
27
+ },
28
+ "outputs": [
29
+ {
30
+ "output_type": "stream",
31
+ "name": "stdout",
32
+ "text": [
33
+ "Dataset generated and saved as synthetic_carbon_footprint.csv\n"
34
+ ]
35
+ },
36
+ {
37
+ "output_type": "execute_result",
38
+ "data": {
39
+ "text/plain": [
40
+ " car_km_per_year public_transport_km_per_year flights_per_year \\\n",
41
+ "0 15795 9917 6 \n",
42
+ "1 860 7574 8 \n",
43
+ "2 5390 1689 5 \n",
44
+ "3 11964 3267 9 \n",
45
+ "4 11284 4406 0 \n",
46
+ "\n",
47
+ " electricity_kwh_per_year natural_gas_m3_per_year \\\n",
48
+ "0 1067 1526 \n",
49
+ "1 4836 1877 \n",
50
+ "2 4993 1699 \n",
51
+ "3 3506 1029 \n",
52
+ "4 2537 499 \n",
53
+ "\n",
54
+ " renewable_energy_percentage diet_type meat_kg_per_year \\\n",
55
+ "0 18 vegetarian 49 \n",
56
+ "1 76 non_vegetarian 39 \n",
57
+ "2 28 non_vegetarian 94 \n",
58
+ "3 60 non_vegetarian 2 \n",
59
+ "4 69 vegan 16 \n",
60
+ "\n",
61
+ " waste_kg_per_year recycling_rate house_size_m2 num_people_household \\\n",
62
+ "0 475 75 181 4 \n",
63
+ "1 154 46 162 2 \n",
64
+ "2 677 7 116 5 \n",
65
+ "3 838 53 72 3 \n",
66
+ "4 125 8 164 1 \n",
67
+ "\n",
68
+ " carbon_footprint_kgCO2_per_year \n",
69
+ "0 9519.570 \n",
70
+ "1 8087.708 \n",
71
+ "2 11279.228 \n",
72
+ "3 8328.298 \n",
73
+ "4 4161.735 "
74
+ ],
75
+ "text/html": [
76
+ "\n",
77
+ " <div id=\"df-edf8ae39-8746-4d33-bd1a-1a3df1ed68bd\" class=\"colab-df-container\">\n",
78
+ " <div>\n",
79
+ "<style scoped>\n",
80
+ " .dataframe tbody tr th:only-of-type {\n",
81
+ " vertical-align: middle;\n",
82
+ " }\n",
83
+ "\n",
84
+ " .dataframe tbody tr th {\n",
85
+ " vertical-align: top;\n",
86
+ " }\n",
87
+ "\n",
88
+ " .dataframe thead th {\n",
89
+ " text-align: right;\n",
90
+ " }\n",
91
+ "</style>\n",
92
+ "<table border=\"1\" class=\"dataframe\">\n",
93
+ " <thead>\n",
94
+ " <tr style=\"text-align: right;\">\n",
95
+ " <th></th>\n",
96
+ " <th>car_km_per_year</th>\n",
97
+ " <th>public_transport_km_per_year</th>\n",
98
+ " <th>flights_per_year</th>\n",
99
+ " <th>electricity_kwh_per_year</th>\n",
100
+ " <th>natural_gas_m3_per_year</th>\n",
101
+ " <th>renewable_energy_percentage</th>\n",
102
+ " <th>diet_type</th>\n",
103
+ " <th>meat_kg_per_year</th>\n",
104
+ " <th>waste_kg_per_year</th>\n",
105
+ " <th>recycling_rate</th>\n",
106
+ " <th>house_size_m2</th>\n",
107
+ " <th>num_people_household</th>\n",
108
+ " <th>carbon_footprint_kgCO2_per_year</th>\n",
109
+ " </tr>\n",
110
+ " </thead>\n",
111
+ " <tbody>\n",
112
+ " <tr>\n",
113
+ " <th>0</th>\n",
114
+ " <td>15795</td>\n",
115
+ " <td>9917</td>\n",
116
+ " <td>6</td>\n",
117
+ " <td>1067</td>\n",
118
+ " <td>1526</td>\n",
119
+ " <td>18</td>\n",
120
+ " <td>vegetarian</td>\n",
121
+ " <td>49</td>\n",
122
+ " <td>475</td>\n",
123
+ " <td>75</td>\n",
124
+ " <td>181</td>\n",
125
+ " <td>4</td>\n",
126
+ " <td>9519.570</td>\n",
127
+ " </tr>\n",
128
+ " <tr>\n",
129
+ " <th>1</th>\n",
130
+ " <td>860</td>\n",
131
+ " <td>7574</td>\n",
132
+ " <td>8</td>\n",
133
+ " <td>4836</td>\n",
134
+ " <td>1877</td>\n",
135
+ " <td>76</td>\n",
136
+ " <td>non_vegetarian</td>\n",
137
+ " <td>39</td>\n",
138
+ " <td>154</td>\n",
139
+ " <td>46</td>\n",
140
+ " <td>162</td>\n",
141
+ " <td>2</td>\n",
142
+ " <td>8087.708</td>\n",
143
+ " </tr>\n",
144
+ " <tr>\n",
145
+ " <th>2</th>\n",
146
+ " <td>5390</td>\n",
147
+ " <td>1689</td>\n",
148
+ " <td>5</td>\n",
149
+ " <td>4993</td>\n",
150
+ " <td>1699</td>\n",
151
+ " <td>28</td>\n",
152
+ " <td>non_vegetarian</td>\n",
153
+ " <td>94</td>\n",
154
+ " <td>677</td>\n",
155
+ " <td>7</td>\n",
156
+ " <td>116</td>\n",
157
+ " <td>5</td>\n",
158
+ " <td>11279.228</td>\n",
159
+ " </tr>\n",
160
+ " <tr>\n",
161
+ " <th>3</th>\n",
162
+ " <td>11964</td>\n",
163
+ " <td>3267</td>\n",
164
+ " <td>9</td>\n",
165
+ " <td>3506</td>\n",
166
+ " <td>1029</td>\n",
167
+ " <td>60</td>\n",
168
+ " <td>non_vegetarian</td>\n",
169
+ " <td>2</td>\n",
170
+ " <td>838</td>\n",
171
+ " <td>53</td>\n",
172
+ " <td>72</td>\n",
173
+ " <td>3</td>\n",
174
+ " <td>8328.298</td>\n",
175
+ " </tr>\n",
176
+ " <tr>\n",
177
+ " <th>4</th>\n",
178
+ " <td>11284</td>\n",
179
+ " <td>4406</td>\n",
180
+ " <td>0</td>\n",
181
+ " <td>2537</td>\n",
182
+ " <td>499</td>\n",
183
+ " <td>69</td>\n",
184
+ " <td>vegan</td>\n",
185
+ " <td>16</td>\n",
186
+ " <td>125</td>\n",
187
+ " <td>8</td>\n",
188
+ " <td>164</td>\n",
189
+ " <td>1</td>\n",
190
+ " <td>4161.735</td>\n",
191
+ " </tr>\n",
192
+ " </tbody>\n",
193
+ "</table>\n",
194
+ "</div>\n",
195
+ " <div class=\"colab-df-buttons\">\n",
196
+ "\n",
197
+ " <div class=\"colab-df-container\">\n",
198
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-edf8ae39-8746-4d33-bd1a-1a3df1ed68bd')\"\n",
199
+ " title=\"Convert this dataframe to an interactive table.\"\n",
200
+ " style=\"display:none;\">\n",
201
+ "\n",
202
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
203
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
204
+ " </svg>\n",
205
+ " </button>\n",
206
+ "\n",
207
+ " <style>\n",
208
+ " .colab-df-container {\n",
209
+ " display:flex;\n",
210
+ " gap: 12px;\n",
211
+ " }\n",
212
+ "\n",
213
+ " .colab-df-convert {\n",
214
+ " background-color: #E8F0FE;\n",
215
+ " border: none;\n",
216
+ " border-radius: 50%;\n",
217
+ " cursor: pointer;\n",
218
+ " display: none;\n",
219
+ " fill: #1967D2;\n",
220
+ " height: 32px;\n",
221
+ " padding: 0 0 0 0;\n",
222
+ " width: 32px;\n",
223
+ " }\n",
224
+ "\n",
225
+ " .colab-df-convert:hover {\n",
226
+ " background-color: #E2EBFA;\n",
227
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
228
+ " fill: #174EA6;\n",
229
+ " }\n",
230
+ "\n",
231
+ " .colab-df-buttons div {\n",
232
+ " margin-bottom: 4px;\n",
233
+ " }\n",
234
+ "\n",
235
+ " [theme=dark] .colab-df-convert {\n",
236
+ " background-color: #3B4455;\n",
237
+ " fill: #D2E3FC;\n",
238
+ " }\n",
239
+ "\n",
240
+ " [theme=dark] .colab-df-convert:hover {\n",
241
+ " background-color: #434B5C;\n",
242
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
243
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
244
+ " fill: #FFFFFF;\n",
245
+ " }\n",
246
+ " </style>\n",
247
+ "\n",
248
+ " <script>\n",
249
+ " const buttonEl =\n",
250
+ " document.querySelector('#df-edf8ae39-8746-4d33-bd1a-1a3df1ed68bd button.colab-df-convert');\n",
251
+ " buttonEl.style.display =\n",
252
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
253
+ "\n",
254
+ " async function convertToInteractive(key) {\n",
255
+ " const element = document.querySelector('#df-edf8ae39-8746-4d33-bd1a-1a3df1ed68bd');\n",
256
+ " const dataTable =\n",
257
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
258
+ " [key], {});\n",
259
+ " if (!dataTable) return;\n",
260
+ "\n",
261
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
262
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
263
+ " + ' to learn more about interactive tables.';\n",
264
+ " element.innerHTML = '';\n",
265
+ " dataTable['output_type'] = 'display_data';\n",
266
+ " await google.colab.output.renderOutput(dataTable, element);\n",
267
+ " const docLink = document.createElement('div');\n",
268
+ " docLink.innerHTML = docLinkHtml;\n",
269
+ " element.appendChild(docLink);\n",
270
+ " }\n",
271
+ " </script>\n",
272
+ " </div>\n",
273
+ "\n",
274
+ "\n",
275
+ " <div id=\"df-d96eea76-1da5-4c18-8114-eb8c9c0821be\">\n",
276
+ " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d96eea76-1da5-4c18-8114-eb8c9c0821be')\"\n",
277
+ " title=\"Suggest charts\"\n",
278
+ " style=\"display:none;\">\n",
279
+ "\n",
280
+ "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
281
+ " width=\"24px\">\n",
282
+ " <g>\n",
283
+ " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
284
+ " </g>\n",
285
+ "</svg>\n",
286
+ " </button>\n",
287
+ "\n",
288
+ "<style>\n",
289
+ " .colab-df-quickchart {\n",
290
+ " --bg-color: #E8F0FE;\n",
291
+ " --fill-color: #1967D2;\n",
292
+ " --hover-bg-color: #E2EBFA;\n",
293
+ " --hover-fill-color: #174EA6;\n",
294
+ " --disabled-fill-color: #AAA;\n",
295
+ " --disabled-bg-color: #DDD;\n",
296
+ " }\n",
297
+ "\n",
298
+ " [theme=dark] .colab-df-quickchart {\n",
299
+ " --bg-color: #3B4455;\n",
300
+ " --fill-color: #D2E3FC;\n",
301
+ " --hover-bg-color: #434B5C;\n",
302
+ " --hover-fill-color: #FFFFFF;\n",
303
+ " --disabled-bg-color: #3B4455;\n",
304
+ " --disabled-fill-color: #666;\n",
305
+ " }\n",
306
+ "\n",
307
+ " .colab-df-quickchart {\n",
308
+ " background-color: var(--bg-color);\n",
309
+ " border: none;\n",
310
+ " border-radius: 50%;\n",
311
+ " cursor: pointer;\n",
312
+ " display: none;\n",
313
+ " fill: var(--fill-color);\n",
314
+ " height: 32px;\n",
315
+ " padding: 0;\n",
316
+ " width: 32px;\n",
317
+ " }\n",
318
+ "\n",
319
+ " .colab-df-quickchart:hover {\n",
320
+ " background-color: var(--hover-bg-color);\n",
321
+ " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
322
+ " fill: var(--button-hover-fill-color);\n",
323
+ " }\n",
324
+ "\n",
325
+ " .colab-df-quickchart-complete:disabled,\n",
326
+ " .colab-df-quickchart-complete:disabled:hover {\n",
327
+ " background-color: var(--disabled-bg-color);\n",
328
+ " fill: var(--disabled-fill-color);\n",
329
+ " box-shadow: none;\n",
330
+ " }\n",
331
+ "\n",
332
+ " .colab-df-spinner {\n",
333
+ " border: 2px solid var(--fill-color);\n",
334
+ " border-color: transparent;\n",
335
+ " border-bottom-color: var(--fill-color);\n",
336
+ " animation:\n",
337
+ " spin 1s steps(1) infinite;\n",
338
+ " }\n",
339
+ "\n",
340
+ " @keyframes spin {\n",
341
+ " 0% {\n",
342
+ " border-color: transparent;\n",
343
+ " border-bottom-color: var(--fill-color);\n",
344
+ " border-left-color: var(--fill-color);\n",
345
+ " }\n",
346
+ " 20% {\n",
347
+ " border-color: transparent;\n",
348
+ " border-left-color: var(--fill-color);\n",
349
+ " border-top-color: var(--fill-color);\n",
350
+ " }\n",
351
+ " 30% {\n",
352
+ " border-color: transparent;\n",
353
+ " border-left-color: var(--fill-color);\n",
354
+ " border-top-color: var(--fill-color);\n",
355
+ " border-right-color: var(--fill-color);\n",
356
+ " }\n",
357
+ " 40% {\n",
358
+ " border-color: transparent;\n",
359
+ " border-right-color: var(--fill-color);\n",
360
+ " border-top-color: var(--fill-color);\n",
361
+ " }\n",
362
+ " 60% {\n",
363
+ " border-color: transparent;\n",
364
+ " border-right-color: var(--fill-color);\n",
365
+ " }\n",
366
+ " 80% {\n",
367
+ " border-color: transparent;\n",
368
+ " border-right-color: var(--fill-color);\n",
369
+ " border-bottom-color: var(--fill-color);\n",
370
+ " }\n",
371
+ " 90% {\n",
372
+ " border-color: transparent;\n",
373
+ " border-bottom-color: var(--fill-color);\n",
374
+ " }\n",
375
+ " }\n",
376
+ "</style>\n",
377
+ "\n",
378
+ " <script>\n",
379
+ " async function quickchart(key) {\n",
380
+ " const quickchartButtonEl =\n",
381
+ " document.querySelector('#' + key + ' button');\n",
382
+ " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
383
+ " quickchartButtonEl.classList.add('colab-df-spinner');\n",
384
+ " try {\n",
385
+ " const charts = await google.colab.kernel.invokeFunction(\n",
386
+ " 'suggestCharts', [key], {});\n",
387
+ " } catch (error) {\n",
388
+ " console.error('Error during call to suggestCharts:', error);\n",
389
+ " }\n",
390
+ " quickchartButtonEl.classList.remove('colab-df-spinner');\n",
391
+ " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
392
+ " }\n",
393
+ " (() => {\n",
394
+ " let quickchartButtonEl =\n",
395
+ " document.querySelector('#df-d96eea76-1da5-4c18-8114-eb8c9c0821be button');\n",
396
+ " quickchartButtonEl.style.display =\n",
397
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
398
+ " })();\n",
399
+ " </script>\n",
400
+ " </div>\n",
401
+ "\n",
402
+ " </div>\n",
403
+ " </div>\n"
404
+ ],
405
+ "application/vnd.google.colaboratory.intrinsic+json": {
406
+ "type": "dataframe",
407
+ "variable_name": "df",
408
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 2000,\n \"fields\": [\n {\n \"column\": \"car_km_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5778,\n \"min\": 9,\n \"max\": 19994,\n \"num_unique_values\": 1890,\n \"samples\": [\n 7832,\n 15149,\n 4431\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"public_transport_km_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2848,\n \"min\": 2,\n \"max\": 9984,\n \"num_unique_values\": 1807,\n \"samples\": [\n 5445,\n 6525,\n 9302\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flights_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 0,\n \"max\": 9,\n \"num_unique_values\": 10,\n \"samples\": [\n 7,\n 8,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"electricity_kwh_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1443,\n \"min\": 1004,\n \"max\": 5995,\n \"num_unique_values\": 1660,\n \"samples\": [\n 3916,\n 3084,\n 2103\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"natural_gas_m3_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 860,\n \"min\": 0,\n \"max\": 2997,\n \"num_unique_values\": 1448,\n \"samples\": [\n 1283,\n 2806,\n 206\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"renewable_energy_percentage\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 100,\n \"samples\": [\n 15,\n 85,\n 71\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diet_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"vegetarian\",\n \"non_vegetarian\",\n \"vegan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"meat_kg_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 100,\n \"samples\": [\n 83,\n 77,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_kg_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 265,\n \"min\": 101,\n \"max\": 998,\n \"num_unique_values\": 805,\n \"samples\": [\n 345,\n 221,\n 585\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"recycling_rate\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 100,\n \"samples\": [\n 32,\n 38,\n 63\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"house_size_m2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 20,\n \"max\": 199,\n \"num_unique_values\": 180,\n \"samples\": [\n 85,\n 153,\n 146\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_people_household\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 5,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 1,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"carbon_footprint_kgCO2_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2425.0349473460146,\n \"min\": 2267.62,\n \"max\": 17301.329999999998,\n \"num_unique_values\": 2000,\n \"samples\": [\n 10281.315,\n 13234.246000000001,\n 11225.571\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
409
+ }
410
+ },
411
+ "metadata": {},
412
+ "execution_count": 1
413
+ }
414
+ ],
415
+ "source": [
416
+ "import pandas as pd\n",
417
+ "import numpy as np\n",
418
+ "\n",
419
+ "# Number of samples\n",
420
+ "n_samples = 2000\n",
421
+ "np.random.seed(42)\n",
422
+ "\n",
423
+ "# Generate realistic lifestyle data\n",
424
+ "data = {\n",
425
+ " \"car_km_per_year\": np.random.randint(0, 20000, n_samples),\n",
426
+ " \"public_transport_km_per_year\": np.random.randint(0, 10000, n_samples),\n",
427
+ " \"flights_per_year\": np.random.randint(0, 10, n_samples),\n",
428
+ " \"electricity_kwh_per_year\": np.random.randint(1000, 6000, n_samples),\n",
429
+ " \"natural_gas_m3_per_year\": np.random.randint(0, 3000, n_samples),\n",
430
+ " \"renewable_energy_percentage\": np.random.randint(0, 100, n_samples),\n",
431
+ " \"diet_type\": np.random.choice([\"vegetarian\", \"vegan\", \"non_vegetarian\"], n_samples, p=[0.3, 0.2, 0.5]),\n",
432
+ " \"meat_kg_per_year\": np.random.randint(0, 100, n_samples),\n",
433
+ " \"waste_kg_per_year\": np.random.randint(100, 1000, n_samples),\n",
434
+ " \"recycling_rate\": np.random.randint(0, 100, n_samples),\n",
435
+ " \"house_size_m2\": np.random.randint(20, 200, n_samples),\n",
436
+ " \"num_people_household\": np.random.randint(1, 6, n_samples)\n",
437
+ "}\n",
438
+ "\n",
439
+ "df = pd.DataFrame(data)\n",
440
+ "\n",
441
+ "# Emission factors\n",
442
+ "EF_CAR = 0.2 # kg CO2 per km\n",
443
+ "EF_PUBLIC = 0.05 # kg CO2 per km\n",
444
+ "EF_FLIGHT = 250 # kg CO2 per flight\n",
445
+ "EF_ELECTRICITY = 0.5 # kg CO2 per kWh\n",
446
+ "EF_NATURAL_GAS = 2 # kg CO2 per m3\n",
447
+ "EF_MEAT = 27 # kg CO2 per kg\n",
448
+ "EF_WASTE = 1.8 # kg CO2 per kg\n",
449
+ "\n",
450
+ "# Calculate emissions\n",
451
+ "car_emission = df[\"car_km_per_year\"] * EF_CAR\n",
452
+ "public_emission = df[\"public_transport_km_per_year\"] * EF_PUBLIC\n",
453
+ "flight_emission = df[\"flights_per_year\"] * EF_FLIGHT\n",
454
+ "electricity_emission = df[\"electricity_kwh_per_year\"] * EF_ELECTRICITY * (1 - df[\"renewable_energy_percentage\"]/100)\n",
455
+ "gas_emission = df[\"natural_gas_m3_per_year\"] * EF_NATURAL_GAS\n",
456
+ "\n",
457
+ "# Food emission (vegetarian and vegan lower)\n",
458
+ "meat_factor = df[\"diet_type\"].map({\n",
459
+ " \"non_vegetarian\": 1.0,\n",
460
+ " \"vegetarian\": 0.5,\n",
461
+ " \"vegan\": 0.2\n",
462
+ "})\n",
463
+ "food_emission = df[\"meat_kg_per_year\"] * EF_MEAT * meat_factor\n",
464
+ "\n",
465
+ "# Waste emission (recycling reduces emissions)\n",
466
+ "waste_emission = df[\"waste_kg_per_year\"] * EF_WASTE * (1 - df[\"recycling_rate\"]/100)\n",
467
+ "\n",
468
+ "# Total carbon footprint\n",
469
+ "df[\"carbon_footprint_kgCO2_per_year\"] = (\n",
470
+ " car_emission + public_emission + flight_emission +\n",
471
+ " electricity_emission + gas_emission + food_emission +\n",
472
+ " waste_emission\n",
473
+ ")\n",
474
+ "\n",
475
+ "# Save dataset\n",
476
+ "df.to_csv(\"synthetic_carbon_footprint.csv\", index=False)\n",
477
+ "\n",
478
+ "print(\"Dataset generated and saved as synthetic_carbon_footprint.csv\")\n",
479
+ "df.head()\n"
480
+ ]
481
+ },
482
+ {
483
+ "cell_type": "code",
484
+ "source": [],
485
+ "metadata": {
486
+ "id": "ARRjWA_4p5VJ"
487
+ },
488
+ "execution_count": null,
489
+ "outputs": []
490
+ }
491
+ ]
492
+ }
src/notebooks/Running_Model.ipynb ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "source": [
20
+ "import pandas as pd\n",
21
+ "import numpy as np\n",
22
+ "from sklearn.model_selection import train_test_split\n",
23
+ "from sklearn.ensemble import RandomForestRegressor\n",
24
+ "from sklearn.metrics import mean_absolute_error, r2_score\n",
25
+ "import joblib"
26
+ ],
27
+ "metadata": {
28
+ "id": "XmGmiHQPr-WV"
29
+ },
30
+ "execution_count": 4,
31
+ "outputs": []
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "source": [
36
+ "# 1. Load synthetic dataset\n",
37
+ "df = pd.read_csv(\"synthetic_carbon_footprint.csv\")"
38
+ ],
39
+ "metadata": {
40
+ "id": "f1oCurY6sA9N"
41
+ },
42
+ "execution_count": 5,
43
+ "outputs": []
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "source": [
48
+ "# 2. Encode categorical column (diet_type)\n",
49
+ "df_encoded = pd.get_dummies(df, columns=['diet_type'], drop_first=True)"
50
+ ],
51
+ "metadata": {
52
+ "id": "e1AJOXchsjmN"
53
+ },
54
+ "execution_count": 6,
55
+ "outputs": []
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "source": [
60
+ "# 3. Separate features and target\n",
61
+ "X = df_encoded.drop(columns=['carbon_footprint_kgCO2_per_year'])\n",
62
+ "y = df_encoded['carbon_footprint_kgCO2_per_year']"
63
+ ],
64
+ "metadata": {
65
+ "id": "2Vhu1YrMsldt"
66
+ },
67
+ "execution_count": 7,
68
+ "outputs": []
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "source": [
73
+ "# 4. Train/test split\n",
74
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
75
+ ],
76
+ "metadata": {
77
+ "id": "jw3t3wTUspd7"
78
+ },
79
+ "execution_count": 8,
80
+ "outputs": []
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "source": [
85
+ "# 5. Train Random Forest model\n",
86
+ "model = RandomForestRegressor(n_estimators=200, random_state=42)\n",
87
+ "model.fit(X_train, y_train)"
88
+ ],
89
+ "metadata": {
90
+ "colab": {
91
+ "base_uri": "https://localhost:8080/",
92
+ "height": 80
93
+ },
94
+ "id": "B3Kh4z6osrNs",
95
+ "outputId": "10fad54e-7a5b-40ff-cd8f-3de1547b7034"
96
+ },
97
+ "execution_count": 9,
98
+ "outputs": [
99
+ {
100
+ "output_type": "execute_result",
101
+ "data": {
102
+ "text/plain": [
103
+ "RandomForestRegressor(n_estimators=200, random_state=42)"
104
+ ],
105
+ "text/html": [
106
+ "<style>#sk-container-id-1 {\n",
107
+ " /* Definition of color scheme common for light and dark mode */\n",
108
+ " --sklearn-color-text: #000;\n",
109
+ " --sklearn-color-text-muted: #666;\n",
110
+ " --sklearn-color-line: gray;\n",
111
+ " /* Definition of color scheme for unfitted estimators */\n",
112
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
113
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
114
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
115
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
116
+ " /* Definition of color scheme for fitted estimators */\n",
117
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
118
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
119
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
120
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
121
+ "\n",
122
+ " /* Specific color for light theme */\n",
123
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
124
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
125
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
126
+ " --sklearn-color-icon: #696969;\n",
127
+ "\n",
128
+ " @media (prefers-color-scheme: dark) {\n",
129
+ " /* Redefinition of color scheme for dark theme */\n",
130
+ " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
131
+ " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
132
+ " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
133
+ " --sklearn-color-icon: #878787;\n",
134
+ " }\n",
135
+ "}\n",
136
+ "\n",
137
+ "#sk-container-id-1 {\n",
138
+ " color: var(--sklearn-color-text);\n",
139
+ "}\n",
140
+ "\n",
141
+ "#sk-container-id-1 pre {\n",
142
+ " padding: 0;\n",
143
+ "}\n",
144
+ "\n",
145
+ "#sk-container-id-1 input.sk-hidden--visually {\n",
146
+ " border: 0;\n",
147
+ " clip: rect(1px 1px 1px 1px);\n",
148
+ " clip: rect(1px, 1px, 1px, 1px);\n",
149
+ " height: 1px;\n",
150
+ " margin: -1px;\n",
151
+ " overflow: hidden;\n",
152
+ " padding: 0;\n",
153
+ " position: absolute;\n",
154
+ " width: 1px;\n",
155
+ "}\n",
156
+ "\n",
157
+ "#sk-container-id-1 div.sk-dashed-wrapped {\n",
158
+ " border: 1px dashed var(--sklearn-color-line);\n",
159
+ " margin: 0 0.4em 0.5em 0.4em;\n",
160
+ " box-sizing: border-box;\n",
161
+ " padding-bottom: 0.4em;\n",
162
+ " background-color: var(--sklearn-color-background);\n",
163
+ "}\n",
164
+ "\n",
165
+ "#sk-container-id-1 div.sk-container {\n",
166
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
167
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
168
+ " so we also need the `!important` here to be able to override the\n",
169
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
170
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
171
+ " display: inline-block !important;\n",
172
+ " position: relative;\n",
173
+ "}\n",
174
+ "\n",
175
+ "#sk-container-id-1 div.sk-text-repr-fallback {\n",
176
+ " display: none;\n",
177
+ "}\n",
178
+ "\n",
179
+ "div.sk-parallel-item,\n",
180
+ "div.sk-serial,\n",
181
+ "div.sk-item {\n",
182
+ " /* draw centered vertical line to link estimators */\n",
183
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
184
+ " background-size: 2px 100%;\n",
185
+ " background-repeat: no-repeat;\n",
186
+ " background-position: center center;\n",
187
+ "}\n",
188
+ "\n",
189
+ "/* Parallel-specific style estimator block */\n",
190
+ "\n",
191
+ "#sk-container-id-1 div.sk-parallel-item::after {\n",
192
+ " content: \"\";\n",
193
+ " width: 100%;\n",
194
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
195
+ " flex-grow: 1;\n",
196
+ "}\n",
197
+ "\n",
198
+ "#sk-container-id-1 div.sk-parallel {\n",
199
+ " display: flex;\n",
200
+ " align-items: stretch;\n",
201
+ " justify-content: center;\n",
202
+ " background-color: var(--sklearn-color-background);\n",
203
+ " position: relative;\n",
204
+ "}\n",
205
+ "\n",
206
+ "#sk-container-id-1 div.sk-parallel-item {\n",
207
+ " display: flex;\n",
208
+ " flex-direction: column;\n",
209
+ "}\n",
210
+ "\n",
211
+ "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
212
+ " align-self: flex-end;\n",
213
+ " width: 50%;\n",
214
+ "}\n",
215
+ "\n",
216
+ "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
217
+ " align-self: flex-start;\n",
218
+ " width: 50%;\n",
219
+ "}\n",
220
+ "\n",
221
+ "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
222
+ " width: 0;\n",
223
+ "}\n",
224
+ "\n",
225
+ "/* Serial-specific style estimator block */\n",
226
+ "\n",
227
+ "#sk-container-id-1 div.sk-serial {\n",
228
+ " display: flex;\n",
229
+ " flex-direction: column;\n",
230
+ " align-items: center;\n",
231
+ " background-color: var(--sklearn-color-background);\n",
232
+ " padding-right: 1em;\n",
233
+ " padding-left: 1em;\n",
234
+ "}\n",
235
+ "\n",
236
+ "\n",
237
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
238
+ "clickable and can be expanded/collapsed.\n",
239
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
240
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
241
+ "*/\n",
242
+ "\n",
243
+ "/* Pipeline and ColumnTransformer style (default) */\n",
244
+ "\n",
245
+ "#sk-container-id-1 div.sk-toggleable {\n",
246
+ " /* Default theme specific background. It is overwritten whether we have a\n",
247
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
248
+ " background-color: var(--sklearn-color-background);\n",
249
+ "}\n",
250
+ "\n",
251
+ "/* Toggleable label */\n",
252
+ "#sk-container-id-1 label.sk-toggleable__label {\n",
253
+ " cursor: pointer;\n",
254
+ " display: flex;\n",
255
+ " width: 100%;\n",
256
+ " margin-bottom: 0;\n",
257
+ " padding: 0.5em;\n",
258
+ " box-sizing: border-box;\n",
259
+ " text-align: center;\n",
260
+ " align-items: start;\n",
261
+ " justify-content: space-between;\n",
262
+ " gap: 0.5em;\n",
263
+ "}\n",
264
+ "\n",
265
+ "#sk-container-id-1 label.sk-toggleable__label .caption {\n",
266
+ " font-size: 0.6rem;\n",
267
+ " font-weight: lighter;\n",
268
+ " color: var(--sklearn-color-text-muted);\n",
269
+ "}\n",
270
+ "\n",
271
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
272
+ " /* Arrow on the left of the label */\n",
273
+ " content: \"▸\";\n",
274
+ " float: left;\n",
275
+ " margin-right: 0.25em;\n",
276
+ " color: var(--sklearn-color-icon);\n",
277
+ "}\n",
278
+ "\n",
279
+ "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
280
+ " color: var(--sklearn-color-text);\n",
281
+ "}\n",
282
+ "\n",
283
+ "/* Toggleable content - dropdown */\n",
284
+ "\n",
285
+ "#sk-container-id-1 div.sk-toggleable__content {\n",
286
+ " max-height: 0;\n",
287
+ " max-width: 0;\n",
288
+ " overflow: hidden;\n",
289
+ " text-align: left;\n",
290
+ " /* unfitted */\n",
291
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
292
+ "}\n",
293
+ "\n",
294
+ "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
295
+ " /* fitted */\n",
296
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
297
+ "}\n",
298
+ "\n",
299
+ "#sk-container-id-1 div.sk-toggleable__content pre {\n",
300
+ " margin: 0.2em;\n",
301
+ " border-radius: 0.25em;\n",
302
+ " color: var(--sklearn-color-text);\n",
303
+ " /* unfitted */\n",
304
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
305
+ "}\n",
306
+ "\n",
307
+ "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
308
+ " /* unfitted */\n",
309
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
310
+ "}\n",
311
+ "\n",
312
+ "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
313
+ " /* Expand drop-down */\n",
314
+ " max-height: 200px;\n",
315
+ " max-width: 100%;\n",
316
+ " overflow: auto;\n",
317
+ "}\n",
318
+ "\n",
319
+ "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
320
+ " content: \"▾\";\n",
321
+ "}\n",
322
+ "\n",
323
+ "/* Pipeline/ColumnTransformer-specific style */\n",
324
+ "\n",
325
+ "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
326
+ " color: var(--sklearn-color-text);\n",
327
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
328
+ "}\n",
329
+ "\n",
330
+ "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
331
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
332
+ "}\n",
333
+ "\n",
334
+ "/* Estimator-specific style */\n",
335
+ "\n",
336
+ "/* Colorize estimator box */\n",
337
+ "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
338
+ " /* unfitted */\n",
339
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
340
+ "}\n",
341
+ "\n",
342
+ "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
343
+ " /* fitted */\n",
344
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
345
+ "}\n",
346
+ "\n",
347
+ "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
348
+ "#sk-container-id-1 div.sk-label label {\n",
349
+ " /* The background is the default theme color */\n",
350
+ " color: var(--sklearn-color-text-on-default-background);\n",
351
+ "}\n",
352
+ "\n",
353
+ "/* On hover, darken the color of the background */\n",
354
+ "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
355
+ " color: var(--sklearn-color-text);\n",
356
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
357
+ "}\n",
358
+ "\n",
359
+ "/* Label box, darken color on hover, fitted */\n",
360
+ "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
361
+ " color: var(--sklearn-color-text);\n",
362
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
363
+ "}\n",
364
+ "\n",
365
+ "/* Estimator label */\n",
366
+ "\n",
367
+ "#sk-container-id-1 div.sk-label label {\n",
368
+ " font-family: monospace;\n",
369
+ " font-weight: bold;\n",
370
+ " display: inline-block;\n",
371
+ " line-height: 1.2em;\n",
372
+ "}\n",
373
+ "\n",
374
+ "#sk-container-id-1 div.sk-label-container {\n",
375
+ " text-align: center;\n",
376
+ "}\n",
377
+ "\n",
378
+ "/* Estimator-specific */\n",
379
+ "#sk-container-id-1 div.sk-estimator {\n",
380
+ " font-family: monospace;\n",
381
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
382
+ " border-radius: 0.25em;\n",
383
+ " box-sizing: border-box;\n",
384
+ " margin-bottom: 0.5em;\n",
385
+ " /* unfitted */\n",
386
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
387
+ "}\n",
388
+ "\n",
389
+ "#sk-container-id-1 div.sk-estimator.fitted {\n",
390
+ " /* fitted */\n",
391
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
392
+ "}\n",
393
+ "\n",
394
+ "/* on hover */\n",
395
+ "#sk-container-id-1 div.sk-estimator:hover {\n",
396
+ " /* unfitted */\n",
397
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
398
+ "}\n",
399
+ "\n",
400
+ "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
401
+ " /* fitted */\n",
402
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
403
+ "}\n",
404
+ "\n",
405
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
406
+ "\n",
407
+ "/* Common style for \"i\" and \"?\" */\n",
408
+ "\n",
409
+ ".sk-estimator-doc-link,\n",
410
+ "a:link.sk-estimator-doc-link,\n",
411
+ "a:visited.sk-estimator-doc-link {\n",
412
+ " float: right;\n",
413
+ " font-size: smaller;\n",
414
+ " line-height: 1em;\n",
415
+ " font-family: monospace;\n",
416
+ " background-color: var(--sklearn-color-background);\n",
417
+ " border-radius: 1em;\n",
418
+ " height: 1em;\n",
419
+ " width: 1em;\n",
420
+ " text-decoration: none !important;\n",
421
+ " margin-left: 0.5em;\n",
422
+ " text-align: center;\n",
423
+ " /* unfitted */\n",
424
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
425
+ " color: var(--sklearn-color-unfitted-level-1);\n",
426
+ "}\n",
427
+ "\n",
428
+ ".sk-estimator-doc-link.fitted,\n",
429
+ "a:link.sk-estimator-doc-link.fitted,\n",
430
+ "a:visited.sk-estimator-doc-link.fitted {\n",
431
+ " /* fitted */\n",
432
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
433
+ " color: var(--sklearn-color-fitted-level-1);\n",
434
+ "}\n",
435
+ "\n",
436
+ "/* On hover */\n",
437
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
438
+ ".sk-estimator-doc-link:hover,\n",
439
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
440
+ ".sk-estimator-doc-link:hover {\n",
441
+ " /* unfitted */\n",
442
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
443
+ " color: var(--sklearn-color-background);\n",
444
+ " text-decoration: none;\n",
445
+ "}\n",
446
+ "\n",
447
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
448
+ ".sk-estimator-doc-link.fitted:hover,\n",
449
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
450
+ ".sk-estimator-doc-link.fitted:hover {\n",
451
+ " /* fitted */\n",
452
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
453
+ " color: var(--sklearn-color-background);\n",
454
+ " text-decoration: none;\n",
455
+ "}\n",
456
+ "\n",
457
+ "/* Span, style for the box shown on hovering the info icon */\n",
458
+ ".sk-estimator-doc-link span {\n",
459
+ " display: none;\n",
460
+ " z-index: 9999;\n",
461
+ " position: relative;\n",
462
+ " font-weight: normal;\n",
463
+ " right: .2ex;\n",
464
+ " padding: .5ex;\n",
465
+ " margin: .5ex;\n",
466
+ " width: min-content;\n",
467
+ " min-width: 20ex;\n",
468
+ " max-width: 50ex;\n",
469
+ " color: var(--sklearn-color-text);\n",
470
+ " box-shadow: 2pt 2pt 4pt #999;\n",
471
+ " /* unfitted */\n",
472
+ " background: var(--sklearn-color-unfitted-level-0);\n",
473
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
474
+ "}\n",
475
+ "\n",
476
+ ".sk-estimator-doc-link.fitted span {\n",
477
+ " /* fitted */\n",
478
+ " background: var(--sklearn-color-fitted-level-0);\n",
479
+ " border: var(--sklearn-color-fitted-level-3);\n",
480
+ "}\n",
481
+ "\n",
482
+ ".sk-estimator-doc-link:hover span {\n",
483
+ " display: block;\n",
484
+ "}\n",
485
+ "\n",
486
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
487
+ "\n",
488
+ "#sk-container-id-1 a.estimator_doc_link {\n",
489
+ " float: right;\n",
490
+ " font-size: 1rem;\n",
491
+ " line-height: 1em;\n",
492
+ " font-family: monospace;\n",
493
+ " background-color: var(--sklearn-color-background);\n",
494
+ " border-radius: 1rem;\n",
495
+ " height: 1rem;\n",
496
+ " width: 1rem;\n",
497
+ " text-decoration: none;\n",
498
+ " /* unfitted */\n",
499
+ " color: var(--sklearn-color-unfitted-level-1);\n",
500
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
501
+ "}\n",
502
+ "\n",
503
+ "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
504
+ " /* fitted */\n",
505
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
506
+ " color: var(--sklearn-color-fitted-level-1);\n",
507
+ "}\n",
508
+ "\n",
509
+ "/* On hover */\n",
510
+ "#sk-container-id-1 a.estimator_doc_link:hover {\n",
511
+ " /* unfitted */\n",
512
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
513
+ " color: var(--sklearn-color-background);\n",
514
+ " text-decoration: none;\n",
515
+ "}\n",
516
+ "\n",
517
+ "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
518
+ " /* fitted */\n",
519
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
520
+ "}\n",
521
+ "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestRegressor(n_estimators=200, random_state=42)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>RandomForestRegressor</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.6/modules/generated/sklearn.ensemble.RandomForestRegressor.html\">?<span>Documentation for RandomForestRegressor</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\"><pre>RandomForestRegressor(n_estimators=200, random_state=42)</pre></div> </div></div></div></div>"
522
+ ]
523
+ },
524
+ "metadata": {},
525
+ "execution_count": 9
526
+ }
527
+ ]
528
+ },
529
+ {
530
+ "cell_type": "code",
531
+ "source": [
532
+ "# 6. Evaluate\n",
533
+ "y_pred = model.predict(X_test)\n",
534
+ "print(\"MAE:\", mean_absolute_error(y_test, y_pred))\n",
535
+ "print(\"R2 Score:\", r2_score(y_test, y_pred))"
536
+ ],
537
+ "metadata": {
538
+ "colab": {
539
+ "base_uri": "https://localhost:8080/"
540
+ },
541
+ "id": "W4te3CYess68",
542
+ "outputId": "5f64c684-7c0e-4919-bdc6-1b2cb9449db1"
543
+ },
544
+ "execution_count": 10,
545
+ "outputs": [
546
+ {
547
+ "output_type": "stream",
548
+ "name": "stdout",
549
+ "text": [
550
+ "MAE: 649.0838915624994\n",
551
+ "R2 Score: 0.8898286296616447\n"
552
+ ]
553
+ }
554
+ ]
555
+ },
556
+ {
557
+ "cell_type": "code",
558
+ "source": [
559
+ "# 7. Save model to .pkl file\n",
560
+ "joblib.dump(model, \"carbon_model.pkl\")\n",
561
+ "print(\"Model saved as carbon_model.pkl\")"
562
+ ],
563
+ "metadata": {
564
+ "colab": {
565
+ "base_uri": "https://localhost:8080/"
566
+ },
567
+ "id": "wWhBeYS0sv7F",
568
+ "outputId": "ce8ee282-f8c7-4fb0-cc64-7345a4a142b1"
569
+ },
570
+ "execution_count": 11,
571
+ "outputs": [
572
+ {
573
+ "output_type": "stream",
574
+ "name": "stdout",
575
+ "text": [
576
+ "Model saved as carbon_model.pkl\n"
577
+ ]
578
+ }
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "source": [
584
+ "# 8. (Optional) Save column names for later use in Streamlit app\n",
585
+ "joblib.dump(X_train.columns.tolist(), \"model_columns.pkl\")\n"
586
+ ],
587
+ "metadata": {
588
+ "colab": {
589
+ "base_uri": "https://localhost:8080/"
590
+ },
591
+ "id": "fQDQLIeZqiFm",
592
+ "outputId": "a82094cf-1f09-4c04-b5df-c13a0644b5ac"
593
+ },
594
+ "execution_count": 12,
595
+ "outputs": [
596
+ {
597
+ "output_type": "execute_result",
598
+ "data": {
599
+ "text/plain": [
600
+ "['model_columns.pkl']"
601
+ ]
602
+ },
603
+ "metadata": {},
604
+ "execution_count": 12
605
+ }
606
+ ]
607
+ },
608
+ {
609
+ "cell_type": "code",
610
+ "source": [],
611
+ "metadata": {
612
+ "id": "Weh3-Ujbr9T2"
613
+ },
614
+ "execution_count": null,
615
+ "outputs": []
616
+ }
617
+ ]
618
+ }