{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division   # Ensures Python3 printing & division standard\n",
    "import pandas as pd \n",
    "from pandas import Series, DataFrame \n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "SavePlots = False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Bedmap_train data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         LON        LAT   THICK  \\\n",
      "0  76.889142 -69.876749  1046.6   \n",
      "1  76.893603 -69.876762  1058.5   \n",
      "2  76.899026 -69.876753  1061.1   \n",
      "3  76.904100 -69.876424  1063.7   \n",
      "4  76.909194 -69.876374  1069.8   \n",
      "\n",
      "                                            geometry          EAST  \\\n",
      "0  b'\\x01\\x01\\x00\\x00\\x00\\xf4\\r\\xbfW\\xa2h@A\\xe45\\...  2.150725e+06   \n",
      "1  b'\\x01\\x01\\x00\\x00\\x00$\\xf8B\"\\xb5h@A\\x11-)\\xd5...  2.150762e+06   \n",
      "2  b'\\x01\\x01\\x00\\x00\\x00\\xb8\\xdb\\x03T\\xcdh@A\\xb2...  2.150811e+06   \n",
      "3  b'\\x01\\x01\\x00\\x00\\x00\\xe3\\xdcsj\\xf5h@A,8\\xae\\...  2.150891e+06   \n",
      "4  b'\\x01\\x01\\x00\\x00\\x00\\xd5<+a\\x0ei@A\\xf3\\xef\\x...  2.150941e+06   \n",
      "\n",
      "           NORTH          v       ith_bm         smb            z         s  \n",
      "0  500918.979443  20.060943  1007.947592  211.410147  1155.742697  0.019393  \n",
      "1  500751.208165  19.999543  1006.453881  211.418109  1158.179379  0.018178  \n",
      "2  500547.878047  19.941658  1004.313773  211.428499  1161.195846  0.016543  \n",
      "3  500365.723321  19.862532  1008.705660  211.463026  1163.372114  0.015692  \n",
      "4  500175.761230  19.754973  1015.940170  211.472767  1165.676239  0.014873  \n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Læs en Parquet-fil ind i en DataFrame\n",
    "bedmap_train = pd.read_parquet(\"bedmap_train.parquet\")\n",
    "\n",
    "# Vis de første rækker\n",
    "print(bedmap_train.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "# Tag 10% af dataen tilfældigt\n",
    "sample_df = bedmap_train.sample(frac=0.1, random_state=42)\n",
    "\n",
    "# Fjern ikke-numeriske kolonner som 'geometry'\n",
    "numeric_df = sample_df.select_dtypes(include='number')\n",
    "\n",
    "# Definér target og features\n",
    "X = numeric_df.drop(columns=[\"THICK\"])\n",
    "y = numeric_df[\"THICK\"]\n",
    "\n",
    "# Split i train/test\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Træn beslutningstræ-model\n",
    "model = DecisionTreeRegressor(random_state=42)\n",
    "model.fit(X_train, y_train)\n",
    "\n",
    "# Forudsig på testdata\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "# Evaluer\n",
    "mse = mean_squared_error(y_test, y_pred)\n",
    "r2 = r2_score(y_test, y_pred)\n",
    "\n",
    "print(f\"Mean Squared Error: {mse:.2f}\")\n",
    "print(f\"R^2 Score: {r2:.2f}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "appml",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}