{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 244 }, "id": "5KvqZbgdpv6x", "outputId": "6aab18aa-aa15-4adf-c147-c4c3b32e674f" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Dataset generated and saved as synthetic_carbon_footprint.csv\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " car_km_per_year public_transport_km_per_year flights_per_year \\\n", "0 15795 9917 6 \n", "1 860 7574 8 \n", "2 5390 1689 5 \n", "3 11964 3267 9 \n", "4 11284 4406 0 \n", "\n", " electricity_kwh_per_year natural_gas_m3_per_year \\\n", "0 1067 1526 \n", "1 4836 1877 \n", "2 4993 1699 \n", "3 3506 1029 \n", "4 2537 499 \n", "\n", " renewable_energy_percentage diet_type meat_kg_per_year \\\n", "0 18 vegetarian 49 \n", "1 76 non_vegetarian 39 \n", "2 28 non_vegetarian 94 \n", "3 60 non_vegetarian 2 \n", "4 69 vegan 16 \n", "\n", " waste_kg_per_year recycling_rate house_size_m2 num_people_household \\\n", "0 475 75 181 4 \n", "1 154 46 162 2 \n", "2 677 7 116 5 \n", "3 838 53 72 3 \n", "4 125 8 164 1 \n", "\n", " carbon_footprint_kgCO2_per_year \n", "0 9519.570 \n", "1 8087.708 \n", "2 11279.228 \n", "3 8328.298 \n", "4 4161.735 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_km_per_yearpublic_transport_km_per_yearflights_per_yearelectricity_kwh_per_yearnatural_gas_m3_per_yearrenewable_energy_percentagediet_typemeat_kg_per_yearwaste_kg_per_yearrecycling_ratehouse_size_m2num_people_householdcarbon_footprint_kgCO2_per_year
015795991761067152618vegetarian494757518149519.570
1860757484836187776non_vegetarian391544616228087.708
25390168954993169928non_vegetarian946777116511279.228
311964326793506102960non_vegetarian2838537238328.298
41128444060253749969vegan16125816414161.735
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "df", "summary": "{\n \"name\": \"df\",\n \"rows\": 2000,\n \"fields\": [\n {\n \"column\": \"car_km_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5778,\n \"min\": 9,\n \"max\": 19994,\n \"num_unique_values\": 1890,\n \"samples\": [\n 7832,\n 15149,\n 4431\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"public_transport_km_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2848,\n \"min\": 2,\n \"max\": 9984,\n \"num_unique_values\": 1807,\n \"samples\": [\n 5445,\n 6525,\n 9302\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flights_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 0,\n \"max\": 9,\n \"num_unique_values\": 10,\n \"samples\": [\n 7,\n 8,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"electricity_kwh_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1443,\n \"min\": 1004,\n \"max\": 5995,\n \"num_unique_values\": 1660,\n \"samples\": [\n 3916,\n 3084,\n 2103\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"natural_gas_m3_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 860,\n \"min\": 0,\n \"max\": 2997,\n \"num_unique_values\": 1448,\n \"samples\": [\n 1283,\n 2806,\n 206\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"renewable_energy_percentage\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 100,\n \"samples\": [\n 15,\n 85,\n 71\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"diet_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"vegetarian\",\n \"non_vegetarian\",\n \"vegan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"meat_kg_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 100,\n \"samples\": [\n 83,\n 77,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_kg_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 265,\n \"min\": 101,\n \"max\": 998,\n \"num_unique_values\": 805,\n \"samples\": [\n 345,\n 221,\n 585\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"recycling_rate\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28,\n \"min\": 0,\n \"max\": 99,\n \"num_unique_values\": 100,\n \"samples\": [\n 32,\n 38,\n 63\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"house_size_m2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 20,\n \"max\": 199,\n \"num_unique_values\": 180,\n \"samples\": [\n 85,\n 153,\n 146\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_people_household\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 5,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 1,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"carbon_footprint_kgCO2_per_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2425.0349473460146,\n \"min\": 2267.62,\n \"max\": 17301.329999999998,\n \"num_unique_values\": 2000,\n \"samples\": [\n 10281.315,\n 13234.246000000001,\n 11225.571\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 1 } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "# Number of samples\n", "n_samples = 2000\n", "np.random.seed(42)\n", "\n", "# Generate realistic lifestyle data\n", "data = {\n", " \"car_km_per_year\": np.random.randint(0, 20000, n_samples),\n", " \"public_transport_km_per_year\": np.random.randint(0, 10000, n_samples),\n", " \"flights_per_year\": np.random.randint(0, 10, n_samples),\n", " \"electricity_kwh_per_year\": np.random.randint(1000, 6000, n_samples),\n", " \"natural_gas_m3_per_year\": np.random.randint(0, 3000, n_samples),\n", " \"renewable_energy_percentage\": np.random.randint(0, 100, n_samples),\n", " \"diet_type\": np.random.choice([\"vegetarian\", \"vegan\", \"non_vegetarian\"], n_samples, p=[0.3, 0.2, 0.5]),\n", " \"meat_kg_per_year\": np.random.randint(0, 100, n_samples),\n", " \"waste_kg_per_year\": np.random.randint(100, 1000, n_samples),\n", " \"recycling_rate\": np.random.randint(0, 100, n_samples),\n", " \"house_size_m2\": np.random.randint(20, 200, n_samples),\n", " \"num_people_household\": np.random.randint(1, 6, n_samples)\n", "}\n", "\n", "df = pd.DataFrame(data)\n", "\n", "# Emission factors\n", "EF_CAR = 0.2 # kg CO2 per km\n", "EF_PUBLIC = 0.05 # kg CO2 per km\n", "EF_FLIGHT = 250 # kg CO2 per flight\n", "EF_ELECTRICITY = 0.5 # kg CO2 per kWh\n", "EF_NATURAL_GAS = 2 # kg CO2 per m3\n", "EF_MEAT = 27 # kg CO2 per kg\n", "EF_WASTE = 1.8 # kg CO2 per kg\n", "\n", "# Calculate emissions\n", "car_emission = df[\"car_km_per_year\"] * EF_CAR\n", "public_emission = df[\"public_transport_km_per_year\"] * EF_PUBLIC\n", "flight_emission = df[\"flights_per_year\"] * EF_FLIGHT\n", "electricity_emission = df[\"electricity_kwh_per_year\"] * EF_ELECTRICITY * (1 - df[\"renewable_energy_percentage\"]/100)\n", "gas_emission = df[\"natural_gas_m3_per_year\"] * EF_NATURAL_GAS\n", "\n", "# Food emission (vegetarian and vegan lower)\n", "meat_factor = df[\"diet_type\"].map({\n", " \"non_vegetarian\": 1.0,\n", " \"vegetarian\": 0.5,\n", " \"vegan\": 0.2\n", "})\n", "food_emission = df[\"meat_kg_per_year\"] * EF_MEAT * meat_factor\n", "\n", "# Waste emission (recycling reduces emissions)\n", "waste_emission = df[\"waste_kg_per_year\"] * EF_WASTE * (1 - df[\"recycling_rate\"]/100)\n", "\n", "# Total carbon footprint\n", "df[\"carbon_footprint_kgCO2_per_year\"] = (\n", " car_emission + public_emission + flight_emission +\n", " electricity_emission + gas_emission + food_emission +\n", " waste_emission\n", ")\n", "\n", "# Save dataset\n", "df.to_csv(\"synthetic_carbon_footprint.csv\", index=False)\n", "\n", "print(\"Dataset generated and saved as synthetic_carbon_footprint.csv\")\n", "df.head()\n" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "ARRjWA_4p5VJ" }, "execution_count": null, "outputs": [] } ] }