{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "import joblib" ], "metadata": { "id": "XmGmiHQPr-WV" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "# 1. Load synthetic dataset\n", "df = pd.read_csv(\"synthetic_carbon_footprint.csv\")" ], "metadata": { "id": "f1oCurY6sA9N" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "# 2. Encode categorical column (diet_type)\n", "df_encoded = pd.get_dummies(df, columns=['diet_type'], drop_first=True)" ], "metadata": { "id": "e1AJOXchsjmN" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "# 3. Separate features and target\n", "X = df_encoded.drop(columns=['carbon_footprint_kgCO2_per_year'])\n", "y = df_encoded['carbon_footprint_kgCO2_per_year']" ], "metadata": { "id": "2Vhu1YrMsldt" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "# 4. Train/test split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ], "metadata": { "id": "jw3t3wTUspd7" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "# 5. Train Random Forest model\n", "model = RandomForestRegressor(n_estimators=200, random_state=42)\n", "model.fit(X_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 80 }, "id": "B3Kh4z6osrNs", "outputId": "10fad54e-7a5b-40ff-cd8f-3de1547b7034" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomForestRegressor(n_estimators=200, random_state=42)" ], "text/html": [ "
RandomForestRegressor(n_estimators=200, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor(n_estimators=200, random_state=42)