{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from __future__ import print_function, division # Ensures Python3 printing & division standard\n", "import pandas as pd \n", "from pandas import Series, DataFrame \n", "from matplotlib import pyplot as plt\n", "import numpy as np\n", "\n", "SavePlots = False" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Bedmap_train data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " LON LAT THICK \\\n", "0 76.889142 -69.876749 1046.6 \n", "1 76.893603 -69.876762 1058.5 \n", "2 76.899026 -69.876753 1061.1 \n", "3 76.904100 -69.876424 1063.7 \n", "4 76.909194 -69.876374 1069.8 \n", "\n", " geometry EAST \\\n", "0 b'\\x01\\x01\\x00\\x00\\x00\\xf4\\r\\xbfW\\xa2h@A\\xe45\\... 2.150725e+06 \n", "1 b'\\x01\\x01\\x00\\x00\\x00$\\xf8B\"\\xb5h@A\\x11-)\\xd5... 2.150762e+06 \n", "2 b'\\x01\\x01\\x00\\x00\\x00\\xb8\\xdb\\x03T\\xcdh@A\\xb2... 2.150811e+06 \n", "3 b'\\x01\\x01\\x00\\x00\\x00\\xe3\\xdcsj\\xf5h@A,8\\xae\\... 2.150891e+06 \n", "4 b'\\x01\\x01\\x00\\x00\\x00\\xd5<+a\\x0ei@A\\xf3\\xef\\x... 2.150941e+06 \n", "\n", " NORTH v ith_bm smb z s \n", "0 500918.979443 20.060943 1007.947592 211.410147 1155.742697 0.019393 \n", "1 500751.208165 19.999543 1006.453881 211.418109 1158.179379 0.018178 \n", "2 500547.878047 19.941658 1004.313773 211.428499 1161.195846 0.016543 \n", "3 500365.723321 19.862532 1008.705660 211.463026 1163.372114 0.015692 \n", "4 500175.761230 19.754973 1015.940170 211.472767 1165.676239 0.014873 \n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Læs en Parquet-fil ind i en DataFrame\n", "bedmap_train = pd.read_parquet(\"bedmap_train.parquet\")\n", "\n", "# Vis de første rækker\n", "print(bedmap_train.head())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.tree import DecisionTreeRegressor\n", "from sklearn.metrics import mean_squared_error, r2_score\n", "# Tag 10% af dataen tilfældigt\n", "sample_df = bedmap_train.sample(frac=0.1, random_state=42)\n", "\n", "# Fjern ikke-numeriske kolonner som 'geometry'\n", "numeric_df = sample_df.select_dtypes(include='number')\n", "\n", "# Definér target og features\n", "X = numeric_df.drop(columns=[\"THICK\"])\n", "y = numeric_df[\"THICK\"]\n", "\n", "# Split i train/test\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Træn beslutningstræ-model\n", "model = DecisionTreeRegressor(random_state=42)\n", "model.fit(X_train, y_train)\n", "\n", "# Forudsig på testdata\n", "y_pred = model.predict(X_test)\n", "\n", "# Evaluer\n", "mse = mean_squared_error(y_test, y_pred)\n", "r2 = r2_score(y_test, y_pred)\n", "\n", "print(f\"Mean Squared Error: {mse:.2f}\")\n", "print(f\"R^2 Score: {r2:.2f}\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "appml", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 4 }