{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "191a5d22-bc45-4f45-a6cd-6d305e29c06e", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "#df = pd.read_parquet('C:/Users/Jens Gorm Rytter/Final project/filer/bedmap_train.parquet', columns=['ith_bm'])\n", "#print(df.head())\n", "\n", "df = pd.read_parquet(\n", " 'C:/Users/Jens Gorm Rytter/Final project/filer/grid_300km_30M.parquet',\n", " columns=['EAST','NORTH','THICK','v','ith_bm','smb','z','s','temp','gridCellId'])\n", "\n", "#print(df.head())\n", "#print(df.tail())" ] }, { "cell_type": "code", "execution_count": 2, "id": "7e5d8ea7-bbf6-41df-81c7-8f59d87539c6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " EAST NORTH THICK v ith_bm smb \\\n", "0 1.082323e+06 1.179464e+05 2193.7041 1.721265 2225.768378 28.600311 \n", "1 6.780533e+05 -1.755376e+06 2899.8000 0.870523 2487.617401 83.017294 \n", "2 5.012288e+05 -8.708590e+05 1819.1100 85.419531 1827.670487 21.117418 \n", "3 -8.666767e+05 3.561459e+05 1124.9400 3.918445 1130.303771 136.581627 \n", "4 -6.848977e+05 6.913987e+04 2122.4100 7.894785 2105.888247 108.577022 \n", "\n", " z s temp gridCellId \n", "0 3890.095924 0.004726 221.131235 137 \n", "1 2348.626277 0.000567 235.855494 28 \n", "2 1771.083276 0.004876 239.003074 82 \n", "3 216.134166 0.006954 246.742698 149 \n", "4 1400.136807 0.002945 243.399524 132 \n" ] } ], "source": [ "print(df.head())" ] }, { "cell_type": "code", "execution_count": 5, "id": "a8f0080c-9ac8-4a55-a2a7-711d91863091", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Antal rækker: 30000000\n", "Antal søjler: 13\n", "Kolonnenavne: ['THICK', 'geometry', 'EAST', 'NORTH', 'vx', 'vy', 'v', 'ith_bm', 'smb', 'z', 's', 'temp', 'gridCellId']\n" ] } ], "source": [ "import pyarrow.parquet as pq\n", "\n", "parquet_file = pq.ParquetFile(\"C:/Users/Jens Gorm Rytter/Final project/filer/grid_300km_30M.parquet\")\n", "print(f\"Antal rækker: {parquet_file.metadata.num_rows}\")\n", "print(f\"Antal søjler: {parquet_file.metadata.num_columns}\")\n", "schema = parquet_file.schema\n", "column_names = schema.names\n", "print(\"Kolonnenavne:\", column_names)" ] }, { "cell_type": "code", "execution_count": 4, "id": "c44f5b51-0b91-4cf5-aeb3-1ca14455e675", "metadata": {}, "outputs": [], "source": [ "#df2 = df.iloc[::100]\n", "df2 = df.sample(frac=0.1, random_state=42)" ] }, { "cell_type": "code", "execution_count": 14, "id": "8b330fe0-da84-4b30-aed0-22c1080e6f5d", "metadata": {}, "outputs": [], "source": [ "#df2 = df2.groupby(['EAST', 'NORTH'], as_index=False).mean()" ] }, { "cell_type": "code", "execution_count": 5, "id": "a9341f03-75b4-487e-8bf1-05f7d05f8d22", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | EAST | \n", "NORTH | \n", "THICK | \n", "v | \n", "ith_bm | \n", "smb | \n", "z | \n", "s | \n", "temp | \n", "gridCellId | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 7416703 | \n", "2.010190e+06 | \n", "4.234983e+05 | \n", "1912.400000 | \n", "8.522309 | \n", "1872.272055 | \n", "105.662956 | \n", "2190.711274 | \n", "0.003119 | \n", "244.664942 | \n", "159 | \n", "
| 23890874 | \n", "-1.416709e+06 | \n", "-4.656678e+05 | \n", "2335.975800 | \n", "416.462733 | \n", "2318.404431 | \n", "555.090772 | \n", "975.383088 | \n", "0.004745 | \n", "251.728175 | \n", "93 | \n", "
| 27490557 | \n", "2.677484e+05 | \n", "7.489870e+05 | \n", "3415.317057 | \n", "12.113687 | \n", "3398.085543 | \n", "35.786982 | \n", "2545.875074 | \n", "0.000519 | \n", "229.067995 | \n", "171 | \n", "
| 2612045 | \n", "2.563805e+05 | \n", "7.454121e+05 | \n", "3439.440151 | \n", "12.405320 | \n", "3414.118535 | \n", "34.281371 | \n", "2536.255275 | \n", "0.000779 | \n", "228.940039 | \n", "171 | \n", "
| 23317003 | \n", "-2.166897e+05 | \n", "1.619933e+06 | \n", "2876.000000 | \n", "1.269482 | \n", "2704.831963 | \n", "72.223145 | \n", "2588.549228 | \n", "0.001344 | \n", "238.712014 | \n", "223 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 19709337 | \n", "-9.330642e+05 | \n", "-1.090434e+06 | \n", "1011.600000 | \n", "4.732082 | \n", "751.811674 | \n", "165.418346 | \n", "1781.513145 | \n", "0.011932 | \n", "249.056547 | \n", "59 | \n", "
| 15490952 | \n", "-5.805525e+05 | \n", "1.041029e+06 | \n", "1628.155022 | \n", "169.035594 | \n", "1628.372881 | \n", "92.561372 | \n", "242.592427 | \n", "0.003159 | \n", "250.550865 | \n", "186 | \n", "
| 184493 | \n", "6.352326e+05 | \n", "-8.346829e+05 | \n", "2652.000000 | \n", "28.844480 | \n", "2633.841978 | \n", "28.581350 | \n", "2251.831105 | \n", "0.005529 | \n", "231.941480 | \n", "82 | \n", "
| 15674412 | \n", "1.289639e+06 | \n", "-1.662617e+06 | \n", "2462.700000 | \n", "10.403379 | \n", "2416.859428 | \n", "121.953121 | \n", "2191.457794 | \n", "0.003778 | \n", "235.054246 | \n", "30 | \n", "
| 10380913 | \n", "-1.563655e+06 | \n", "1.074168e+05 | \n", "1960.300000 | \n", "5.604786 | \n", "1980.466861 | \n", "482.535002 | \n", "1214.166058 | \n", "0.003893 | \n", "248.701443 | \n", "129 | \n", "
3000000 rows × 10 columns
\n", "| \n", " | EAST | \n", "NORTH | \n", "THICK | \n", "v | \n", "ith_bm | \n", "smb | \n", "z | \n", "s | \n", "temp | \n", "gridCellId | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 7416703 | \n", "2.010190e+06 | \n", "4.234983e+05 | \n", "1912.40000 | \n", "8.522309 | \n", "1872.272055 | \n", "105.662956 | \n", "2190.711274 | \n", "0.003119 | \n", "244.664942 | \n", "159 | \n", "
| 23890874 | \n", "-1.416709e+06 | \n", "-4.656678e+05 | \n", "2335.97580 | \n", "416.462733 | \n", "2318.404431 | \n", "555.090772 | \n", "975.383088 | \n", "0.004745 | \n", "251.728175 | \n", "93 | \n", "
| 23317003 | \n", "-2.166897e+05 | \n", "1.619933e+06 | \n", "2876.00000 | \n", "1.269482 | \n", "2704.831963 | \n", "72.223145 | \n", "2588.549228 | \n", "0.001344 | \n", "238.712014 | \n", "223 | \n", "
| 23197163 | \n", "-1.316697e+06 | \n", "-4.440985e+05 | \n", "2397.62550 | \n", "136.943610 | \n", "2359.059125 | \n", "515.937702 | \n", "1224.222085 | \n", "0.009070 | \n", "248.811480 | \n", "93 | \n", "
| 26562643 | \n", "-1.260567e+06 | \n", "3.463845e+05 | \n", "800.00000 | \n", "8.999879 | \n", "801.448965 | \n", "342.135255 | \n", "325.388920 | \n", "0.014823 | \n", "248.506820 | \n", "148 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 7440855 | \n", "-1.741590e+05 | \n", "1.632613e+06 | \n", "2205.30000 | \n", "3.927963 | \n", "1961.284527 | \n", "58.279049 | \n", "2661.827216 | \n", "0.003859 | \n", "237.875307 | \n", "223 | \n", "
| 22726437 | \n", "-2.534046e+05 | \n", "-1.040281e+06 | \n", "469.59474 | \n", "485.319648 | \n", "468.976002 | \n", "95.346090 | \n", "50.071174 | \n", "0.000195 | \n", "244.226429 | \n", "61 | \n", "
| 18818953 | \n", "-1.889686e+06 | \n", "-2.466929e+05 | \n", "208.97000 | \n", "43.362236 | \n", "158.828422 | \n", "561.629861 | \n", "17.046926 | \n", "0.000733 | \n", "261.030976 | \n", "110 | \n", "
| 184493 | \n", "6.352326e+05 | \n", "-8.346829e+05 | \n", "2652.00000 | \n", "28.844480 | \n", "2633.841978 | \n", "28.581350 | \n", "2251.831105 | \n", "0.005529 | \n", "231.941480 | \n", "82 | \n", "
| 10380913 | \n", "-1.563655e+06 | \n", "1.074168e+05 | \n", "1960.30000 | \n", "5.604786 | \n", "1980.466861 | \n", "482.535002 | \n", "1214.166058 | \n", "0.003893 | \n", "248.701443 | \n", "129 | \n", "
1541833 rows × 10 columns
\n", "| \n", " | EAST | \n", "NORTH | \n", "THICK | \n", "v | \n", "ith_bm | \n", "smb | \n", "z | \n", "s | \n", "temp | \n", "gridCellId | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 26562643 | \n", "-1.260567e+06 | \n", "3.463845e+05 | \n", "800.000000 | \n", "8.999879 | \n", "801.448965 | \n", "342.135255 | \n", "325.388920 | \n", "0.014823 | \n", "248.506820 | \n", "148 | \n", "
| 13260984 | \n", "-1.734314e+06 | \n", "4.071370e+05 | \n", "896.300000 | \n", "339.976923 | \n", "470.160135 | \n", "1052.603483 | \n", "50.597224 | \n", "0.002657 | \n", "258.562867 | \n", "146 | \n", "
| 14345082 | \n", "5.541113e+05 | \n", "-1.633705e+06 | \n", "954.600000 | \n", "4.207083 | \n", "1072.203454 | \n", "100.815306 | \n", "1677.870478 | \n", "0.006661 | \n", "237.044136 | \n", "28 | \n", "
| 2037232 | \n", "-1.191443e+06 | \n", "4.444783e+05 | \n", "764.410000 | \n", "421.903970 | \n", "714.258456 | \n", "200.968876 | \n", "76.678445 | \n", "0.000373 | \n", "246.908783 | \n", "148 | \n", "
| 7535672 | \n", "1.169423e+06 | \n", "-1.637842e+06 | \n", "2636.000000 | \n", "7.413730 | \n", "2794.300026 | \n", "89.607926 | \n", "2174.103477 | \n", "0.004095 | \n", "234.325237 | \n", "30 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 10766745 | \n", "2.543245e+06 | \n", "-1.627941e+05 | \n", "859.992000 | \n", "241.843534 | \n", "953.968505 | \n", "680.301080 | \n", "561.252752 | \n", "0.019034 | \n", "257.947905 | \n", "124 | \n", "
| 18818953 | \n", "-1.889686e+06 | \n", "-2.466929e+05 | \n", "208.970000 | \n", "43.362236 | \n", "158.828422 | \n", "561.629861 | \n", "17.046926 | \n", "0.000733 | \n", "261.030976 | \n", "110 | \n", "
| 15490952 | \n", "-5.805525e+05 | \n", "1.041029e+06 | \n", "1628.155022 | \n", "169.035594 | \n", "1628.372881 | \n", "92.561372 | \n", "242.592427 | \n", "0.003159 | \n", "250.550865 | \n", "186 | \n", "
| 184493 | \n", "6.352326e+05 | \n", "-8.346829e+05 | \n", "2652.000000 | \n", "28.844480 | \n", "2633.841978 | \n", "28.581350 | \n", "2251.831105 | \n", "0.005529 | \n", "231.941480 | \n", "82 | \n", "
| 15674412 | \n", "1.289639e+06 | \n", "-1.662617e+06 | \n", "2462.700000 | \n", "10.403379 | \n", "2416.859428 | \n", "121.953121 | \n", "2191.457794 | \n", "0.003778 | \n", "235.054246 | \n", "30 | \n", "
1421911 rows × 10 columns
\n", "| \n", " | THICK | \n", "
|---|---|
| 26562643 | \n", "800.000000 | \n", "
| 13260984 | \n", "896.300000 | \n", "
| 14345082 | \n", "954.600000 | \n", "
| 2037232 | \n", "764.410000 | \n", "
| 7535672 | \n", "2636.000000 | \n", "
| ... | \n", "... | \n", "
| 10766745 | \n", "859.992000 | \n", "
| 18818953 | \n", "208.970000 | \n", "
| 15490952 | \n", "1628.155022 | \n", "
| 184493 | \n", "2652.000000 | \n", "
| 15674412 | \n", "2462.700000 | \n", "
1421911 rows × 1 columns
\n", "| \n", " | ith_bm | \n", "
|---|---|
| 7416703 | \n", "1872.272055 | \n", "
| 23890874 | \n", "2318.404431 | \n", "
| 23317003 | \n", "2704.831963 | \n", "
| 23197163 | \n", "2359.059125 | \n", "
| 26562643 | \n", "801.448965 | \n", "
| ... | \n", "... | \n", "
| 7440855 | \n", "1961.284527 | \n", "
| 22726437 | \n", "468.976002 | \n", "
| 18818953 | \n", "158.828422 | \n", "
| 184493 | \n", "2633.841978 | \n", "
| 10380913 | \n", "1980.466861 | \n", "
1541833 rows × 1 columns
\n", "