File size: 4,127 Bytes
b2a7669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division   # Ensures Python3 printing & division standard\n",
    "import pandas as pd \n",
    "from pandas import Series, DataFrame \n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "SavePlots = False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Bedmap_train data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         LON        LAT   THICK  \\\n",
      "0  76.889142 -69.876749  1046.6   \n",
      "1  76.893603 -69.876762  1058.5   \n",
      "2  76.899026 -69.876753  1061.1   \n",
      "3  76.904100 -69.876424  1063.7   \n",
      "4  76.909194 -69.876374  1069.8   \n",
      "\n",
      "                                            geometry          EAST  \\\n",
      "0  b'\\x01\\x01\\x00\\x00\\x00\\xf4\\r\\xbfW\\xa2h@A\\xe45\\...  2.150725e+06   \n",
      "1  b'\\x01\\x01\\x00\\x00\\x00$\\xf8B\"\\xb5h@A\\x11-)\\xd5...  2.150762e+06   \n",
      "2  b'\\x01\\x01\\x00\\x00\\x00\\xb8\\xdb\\x03T\\xcdh@A\\xb2...  2.150811e+06   \n",
      "3  b'\\x01\\x01\\x00\\x00\\x00\\xe3\\xdcsj\\xf5h@A,8\\xae\\...  2.150891e+06   \n",
      "4  b'\\x01\\x01\\x00\\x00\\x00\\xd5<+a\\x0ei@A\\xf3\\xef\\x...  2.150941e+06   \n",
      "\n",
      "           NORTH          v       ith_bm         smb            z         s  \n",
      "0  500918.979443  20.060943  1007.947592  211.410147  1155.742697  0.019393  \n",
      "1  500751.208165  19.999543  1006.453881  211.418109  1158.179379  0.018178  \n",
      "2  500547.878047  19.941658  1004.313773  211.428499  1161.195846  0.016543  \n",
      "3  500365.723321  19.862532  1008.705660  211.463026  1163.372114  0.015692  \n",
      "4  500175.761230  19.754973  1015.940170  211.472767  1165.676239  0.014873  \n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Læs en Parquet-fil ind i en DataFrame\n",
    "bedmap_train = pd.read_parquet(\"bedmap_train.parquet\")\n",
    "\n",
    "# Vis de første rækker\n",
    "print(bedmap_train.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "# Tag 10% af dataen tilfældigt\n",
    "sample_df = bedmap_train.sample(frac=0.1, random_state=42)\n",
    "\n",
    "# Fjern ikke-numeriske kolonner som 'geometry'\n",
    "numeric_df = sample_df.select_dtypes(include='number')\n",
    "\n",
    "# Definér target og features\n",
    "X = numeric_df.drop(columns=[\"THICK\"])\n",
    "y = numeric_df[\"THICK\"]\n",
    "\n",
    "# Split i train/test\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Træn beslutningstræ-model\n",
    "model = DecisionTreeRegressor(random_state=42)\n",
    "model.fit(X_train, y_train)\n",
    "\n",
    "# Forudsig på testdata\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "# Evaluer\n",
    "mse = mean_squared_error(y_test, y_pred)\n",
    "r2 = r2_score(y_test, y_pred)\n",
    "\n",
    "print(f\"Mean Squared Error: {mse:.2f}\")\n",
    "print(f\"R^2 Score: {r2:.2f}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "appml",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}