{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "import joblib" ], "metadata": { "id": "XmGmiHQPr-WV" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "# 1. Load synthetic dataset\n", "df = pd.read_csv(\"synthetic_carbon_footprint.csv\")" ], "metadata": { "id": "f1oCurY6sA9N" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "# 2. Encode categorical column (diet_type)\n", "df_encoded = pd.get_dummies(df, columns=['diet_type'], drop_first=True)" ], "metadata": { "id": "e1AJOXchsjmN" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "# 3. Separate features and target\n", "X = df_encoded.drop(columns=['carbon_footprint_kgCO2_per_year'])\n", "y = df_encoded['carbon_footprint_kgCO2_per_year']" ], "metadata": { "id": "2Vhu1YrMsldt" }, "execution_count": 7, "outputs": [] }, { "cell_type": "code", "source": [ "# 4. Train/test split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ], "metadata": { "id": "jw3t3wTUspd7" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "# 5. Train Random Forest model\n", "model = RandomForestRegressor(n_estimators=200, random_state=42)\n", "model.fit(X_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 80 }, "id": "B3Kh4z6osrNs", "outputId": "10fad54e-7a5b-40ff-cd8f-3de1547b7034" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomForestRegressor(n_estimators=200, random_state=42)" ], "text/html": [ "
RandomForestRegressor(n_estimators=200, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "# 6. Evaluate\n", "y_pred = model.predict(X_test)\n", "print(\"MAE:\", mean_absolute_error(y_test, y_pred))\n", "print(\"R2 Score:\", r2_score(y_test, y_pred))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "W4te3CYess68", "outputId": "5f64c684-7c0e-4919-bdc6-1b2cb9449db1" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "MAE: 649.0838915624994\n", "R2 Score: 0.8898286296616447\n" ] } ] }, { "cell_type": "code", "source": [ "# 7. Save model to .pkl file\n", "joblib.dump(model, \"carbon_model.pkl\")\n", "print(\"Model saved as carbon_model.pkl\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wWhBeYS0sv7F", "outputId": "ce8ee282-f8c7-4fb0-cc64-7345a4a142b1" }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Model saved as carbon_model.pkl\n" ] } ] }, { "cell_type": "code", "source": [ "# 8. (Optional) Save column names for later use in Streamlit app\n", "joblib.dump(X_train.columns.tolist(), \"model_columns.pkl\")\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fQDQLIeZqiFm", "outputId": "a82094cf-1f09-4c04-b5df-c13a0644b5ac" }, "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['model_columns.pkl']" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "Weh3-Ujbr9T2" }, "execution_count": null, "outputs": [] } ] }