{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "0dd2c5d4", "metadata": {}, "outputs": [], "source": [ "import xarray as xr\n", "import geopandas as gpd\n", "from shapely.geometry import box\n", "import rioxarray as rxr # Make sure you have rioxarray installed (pip install rioxarray)\n", "import numpy as np\n", "import ibis\n", "ibis.options.interactive = True" ] }, { "cell_type": "code", "execution_count": 2, "id": "d615f835", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "con = ibis.duckdb.connect()\n", "con.raw_sql('INSTALL spatial;')\n", "con.raw_sql('LOAD spatial;')" ] }, { "cell_type": "markdown", "id": "700cf1f9", "metadata": {}, "source": [ "- The .rio accessor: https://corteva.github.io/rioxarray/html/rioxarray.html#rioxarray-rio-accessors\n", "\n", "- Affine( pixel_width, 0, top_left_x_coord,\n", " 0, -pixel_height, top_left_y_coord)\n", "\n", "- Rasterio Affine Docs (https://affine.readthedocs.io/en/latest/)" ] }, { "cell_type": "code", "execution_count": 3, "id": "cf514138", "metadata": {}, "outputs": [], "source": [ "filename = 'BedMachineAntarctica-v3.nc'\n", "sat_im = rxr.open_rasterio(filename)\n", "transform = sat_im.rio.transform()" ] }, { "cell_type": "code", "execution_count": 4, "id": "7cc14869", "metadata": {}, "outputs": [], "source": [ "tab = con.read_parquet('bedmap_train2_30m.parquet')" ] }, { "cell_type": "code", "execution_count": 5, "id": "0fec2bb7", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0dae1027ada7475b8c194923c425073c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Let's create a dummy GeoPandas DataFrame for demonstration\n", "num_points = 100_000\n", "frac_points = num_points/30_000_000\n", "# Generate random points within a reasonable Antarctica extent (approx for EPSG:3031)\n", "# min_x, max_x = -2000000, 2000000\n", "# min_y, max_y = -2000000, 2000000\n", "# random_x = np.random.uniform(min_x, max_x, num_points)\n", "# random_y = np.random.uniform(min_y, max_y, num_points)\n", "# ice_thickness_data = np.random.uniform(100, 5000, num_points) # Example ice thickness\n", "# v_data = np.random.uniform(0, 1, num_points) # Example velocity\n", "# temp_data = np.random.uniform(0, 1000, num_points) # Example temperature\n", "\n", "# gdf = gpd.GeoDataFrame(\n", "# {'ice_thickness': ice_thickness_data,\n", "# 'v': v_data,\n", "# 'temp': temp_data\n", "# },\n", "# geometry=gpd.points_from_xy(random_x, random_y),\n", "# crs=\"EPSG:3031\"\n", "# )\n", "data = tab.drop(['LON','LAT'])\n", "random_data = data.sample(frac_points)\n", "\n", "# 3.1. Create a spatial index for your GeoDataFrame\n", "gdf = random_data.to_pandas()\n", "gdf.crs = \"EPSG:3031\"\n", "gdf_sindex = gdf.sindex" ] }, { "cell_type": "code", "execution_count": 6, "id": "ae2f315d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
THICKgeometryEASTNORTHvxvyvith_bmsmbzstemp
01064.24POINT (-413937.178 826713.054)-4.139372e+058.267131e+05-2.6192733.0842454.046376954.49087362.6446071162.2798390.007977244.021857
11193.70POINT (1671948.79 -1896344.349)1.671949e+06-1.896344e+0631.064121-41.05727351.4847481211.514173451.2637551203.7774960.015953252.699878
21552.96POINT (-1258929.617 -809646.847)-1.258930e+06-8.096468e+05-2.9243428.9538499.4192981663.638829490.4541612100.5457930.003695246.286919
31264.70POINT (-844998.401 333006.309)-8.449984e+053.330063e+05-2.674579-1.2639692.9582071259.694288145.535283345.6540520.004805246.782685
42000.78POINT (-735958.864 48154.371)-7.359589e+054.815437e+04-3.4509945.5021956.4948842022.433247132.7013611458.3152690.002991243.095592
.......................................
997612024.41POINT (-1070869.73 36219.135)-1.070870e+063.621913e+04-6.5109584.8010568.0896671676.777041202.6113961739.4567180.009819243.995007
997622787.40POINT (919130.388 -1668408.781)9.191304e+05-1.668409e+060.308528-1.3885301.4223942820.33116185.1789352181.5612620.002284234.099728
99763785.40POINT (2292737.808 -1064316.615)2.292738e+06-1.064317e+06-167.424091-27.102104169.603509796.5791131294.093330212.0719620.048592258.157311
997642452.89POINT (-1387664.803 -630194.489)-1.387665e+06-6.301945e+05-43.76326647.03347164.2446172506.279739738.1074991219.7768640.004550249.767054
997651680.48POINT (539623.208 -1053727.718)5.396232e+05-1.053728e+06-4.670764-1.0804574.7941031509.64915631.2536872153.8994260.008152234.451772
\n", "

99766 rows × 12 columns

\n", "
" ], "text/plain": [ " THICK geometry EAST NORTH \\\n", "0 1064.24 POINT (-413937.178 826713.054) -4.139372e+05 8.267131e+05 \n", "1 1193.70 POINT (1671948.79 -1896344.349) 1.671949e+06 -1.896344e+06 \n", "2 1552.96 POINT (-1258929.617 -809646.847) -1.258930e+06 -8.096468e+05 \n", "3 1264.70 POINT (-844998.401 333006.309) -8.449984e+05 3.330063e+05 \n", "4 2000.78 POINT (-735958.864 48154.371) -7.359589e+05 4.815437e+04 \n", "... ... ... ... ... \n", "99761 2024.41 POINT (-1070869.73 36219.135) -1.070870e+06 3.621913e+04 \n", "99762 2787.40 POINT (919130.388 -1668408.781) 9.191304e+05 -1.668409e+06 \n", "99763 785.40 POINT (2292737.808 -1064316.615) 2.292738e+06 -1.064317e+06 \n", "99764 2452.89 POINT (-1387664.803 -630194.489) -1.387665e+06 -6.301945e+05 \n", "99765 1680.48 POINT (539623.208 -1053727.718) 5.396232e+05 -1.053728e+06 \n", "\n", " vx vy v ith_bm smb \\\n", "0 -2.619273 3.084245 4.046376 954.490873 62.644607 \n", "1 31.064121 -41.057273 51.484748 1211.514173 451.263755 \n", "2 -2.924342 8.953849 9.419298 1663.638829 490.454161 \n", "3 -2.674579 -1.263969 2.958207 1259.694288 145.535283 \n", "4 -3.450994 5.502195 6.494884 2022.433247 132.701361 \n", "... ... ... ... ... ... \n", "99761 -6.510958 4.801056 8.089667 1676.777041 202.611396 \n", "99762 0.308528 -1.388530 1.422394 2820.331161 85.178935 \n", "99763 -167.424091 -27.102104 169.603509 796.579113 1294.093330 \n", "99764 -43.763266 47.033471 64.244617 2506.279739 738.107499 \n", "99765 -4.670764 -1.080457 4.794103 1509.649156 31.253687 \n", "\n", " z s temp \n", "0 1162.279839 0.007977 244.021857 \n", "1 1203.777496 0.015953 252.699878 \n", "2 2100.545793 0.003695 246.286919 \n", "3 345.654052 0.004805 246.782685 \n", "4 1458.315269 0.002991 243.095592 \n", "... ... ... ... \n", "99761 1739.456718 0.009819 243.995007 \n", "99762 2181.561262 0.002284 234.099728 \n", "99763 212.071962 0.048592 258.157311 \n", "99764 1219.776864 0.004550 249.767054 \n", "99765 2153.899426 0.008152 234.451772 \n", "\n", "[99766 rows x 12 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdf" ] }, { "cell_type": "code", "execution_count": 8, "id": "8cd3bb9e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "99766" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(gdf)" ] }, { "cell_type": "code", "execution_count": 9, "id": "9f8614bd", "metadata": {}, "outputs": [], "source": [ "size = 27 #pixels\n", "half = size // 2\n", "\n", "images = []\n", "im_data = {}\n", "\n", "scalar_feats = ['THICK', 'vx', 'vy', 'v', 'smb', 'z', 's', 'temp']\n", "\n", "for sclr in scalar_feats:\n", " im_data[sclr] = []\n", "\n", "\n", "for idx, data_row in gdf.iterrows():\n", " geom = data_row.geometry\n", "\n", " col, row = ~transform * (geom.x, geom.y) #Pixel coordinates\n", " col, row = int(np.floor(col)), int(np.floor(row))\n", "\n", " patch = sat_im.surface.isel(\n", " y=slice(row-half, row+half+1),\n", " x=slice(col-half, col+half+1)\n", " )\n", "\n", " image = xr.DataArray(\n", " patch[0].values,\n", " coords = {\n", " \"x\": np.arange(size),\n", " \"y\": np.arange(size)\n", " },\n", " dims = (\"x\", \"y\"),\n", " name = \"z_image\"\n", " )\n", "\n", " images.append(image)\n", " for sclr in scalar_feats:\n", " im_data[sclr].append(data_row[sclr])\n", "\n", "sc = np.arange(len(gdf))\n", "\n", "# Create a DataArray for all images\n", "all_images_da = xr.concat(images, dim = 'sample')\n", "all_images_da['sample'] = sc # Add sample coordinates\n", "\n", "scalars_ds = xr.Dataset(\n", " {\n", " 'vx': xr.DataArray(np.array(im_data['vx']), coords = {'sample': sc}, dims = ('sample',), name = 'vx'),\n", " 'vy': xr.DataArray(np.array(im_data['vy']), coords = {'sample': sc}, dims = ('sample',), name = 'vy'),\n", " 'v': xr.DataArray(np.array(im_data['v']), coords = {'sample': sc}, dims = ('sample',), name = 'v'),\n", " 'smb': xr.DataArray(np.array(im_data['smb']), coords = {'sample': sc}, dims = ('sample',), name = 'smb'),\n", " 'z': xr.DataArray(np.array(im_data['z']), coords = {'sample': sc}, dims = ('sample',), name = 'z'),\n", " 's': xr.DataArray(np.array(im_data['s']), coords = {'sample': sc}, dims = ('sample',), name = 's'),\n", " 'temp': xr.DataArray(np.array(im_data['temp']), coords = {'sample': sc}, dims = ('sample',), name = 'temp')\n", " },\n", " coords = {'sample': sc}\n", ")\n", "\n", "# Create DataArrays for labels and scalar features, aligning with the 'sample' dimension\n", "labels_da = xr.DataArray(\n", " np.array(im_data[\"THICK\"]),\n", " coords={\"sample\": sc},\n", " dims=(\"sample\",),\n", " name=\"label\"\n", ")\n", "\n", "# Combine everything into a single Dataset\n", "training_data_ds = xr.Dataset(\n", " {\n", " \"images\": all_images_da,\n", " \"labels\": labels_da\n", " },\n", " coords={\"sample\": sc}, # Add sample coordinates from scalar_features_ds\n", " # name = \"Elevation images, with labels and scarlars\",\n", " attrs={'description': 'CNN data with elevation images. Scalar features are everything from Niccolo 30M parquet.\\\n", " Images are 27x27 pixels.'}\n", ")\n", "\n", "training_data_ds = training_data_ds.merge(scalars_ds)" ] }, { "cell_type": "code", "execution_count": 16, "id": "6cc1c242", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset> Size: 298MB\n",
       "Dimensions:  (x: 27, y: 27, sample: 99766)\n",
       "Coordinates:\n",
       "  * x        (x) int64 216B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n",
       "  * y        (y) int64 216B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n",
       "  * sample   (sample) int64 798kB 0 1 2 3 4 5 ... 99761 99762 99763 99764 99765\n",
       "Data variables:\n",
       "    images   (sample, x, y) float32 291MB 1.093e+03 1.093e+03 ... 2.133e+03\n",
       "    labels   (sample) float64 798kB 1.064e+03 1.194e+03 ... 2.453e+03 1.68e+03\n",
       "    vx       (sample) float64 798kB -2.619 31.06 -2.924 ... -167.4 -43.76 -4.671\n",
       "    vy       (sample) float64 798kB 3.084 -41.06 8.954 ... -27.1 47.03 -1.08\n",
       "    v        (sample) float64 798kB 4.046 51.48 9.419 ... 169.6 64.24 4.794\n",
       "    smb      (sample) float64 798kB 62.64 451.3 490.5 ... 1.294e+03 738.1 31.25\n",
       "    z        (sample) float64 798kB 1.162e+03 1.204e+03 ... 1.22e+03 2.154e+03\n",
       "    s        (sample) float64 798kB 0.007977 0.01595 ... 0.00455 0.008152\n",
       "    temp     (sample) float64 798kB 244.0 252.7 246.3 ... 258.2 249.8 234.5\n",
       "Attributes:\n",
       "    description:  CNN data with elevation images. Scalar features are everyth...
" ], "text/plain": [ " Size: 298MB\n", "Dimensions: (x: 27, y: 27, sample: 99766)\n", "Coordinates:\n", " * x (x) int64 216B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n", " * y (y) int64 216B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n", " * sample (sample) int64 798kB 0 1 2 3 4 5 ... 99761 99762 99763 99764 99765\n", "Data variables:\n", " images (sample, x, y) float32 291MB 1.093e+03 1.093e+03 ... 2.133e+03\n", " labels (sample) float64 798kB 1.064e+03 1.194e+03 ... 2.453e+03 1.68e+03\n", " vx (sample) float64 798kB -2.619 31.06 -2.924 ... -167.4 -43.76 -4.671\n", " vy (sample) float64 798kB 3.084 -41.06 8.954 ... -27.1 47.03 -1.08\n", " v (sample) float64 798kB 4.046 51.48 9.419 ... 169.6 64.24 4.794\n", " smb (sample) float64 798kB 62.64 451.3 490.5 ... 1.294e+03 738.1 31.25\n", " z (sample) float64 798kB 1.162e+03 1.204e+03 ... 1.22e+03 2.154e+03\n", " s (sample) float64 798kB 0.007977 0.01595 ... 0.00455 0.008152\n", " temp (sample) float64 798kB 244.0 252.7 246.3 ... 258.2 249.8 234.5\n", "Attributes:\n", " description: CNN data with elevation images. Scalar features are everyth..." ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "training_data_ds" ] }, { "cell_type": "code", "execution_count": null, "id": "c0812faa", "metadata": {}, "outputs": [], "source": [ "# training_data_ds.to_netcdf('conv_train_1.nc')" ] }, { "cell_type": "code", "execution_count": 18, "id": "bdb4d03a", "metadata": {}, "outputs": [], "source": [ "test_import = xr.open_dataset('conv_train_1.nc') " ] }, { "cell_type": "code", "execution_count": 19, "id": "6bf789a6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset> Size: 298MB\n",
       "Dimensions:  (sample: 99766, x: 27, y: 27)\n",
       "Coordinates:\n",
       "  * sample   (sample) int32 399kB 0 1 2 3 4 5 ... 99761 99762 99763 99764 99765\n",
       "  * x        (x) int32 108B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n",
       "  * y        (y) int32 108B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n",
       "Data variables:\n",
       "    images   (sample, x, y) float32 291MB ...\n",
       "    labels   (sample) float64 798kB ...\n",
       "    vx       (sample) float64 798kB ...\n",
       "    vy       (sample) float64 798kB ...\n",
       "    v        (sample) float64 798kB ...\n",
       "    smb      (sample) float64 798kB ...\n",
       "    z        (sample) float64 798kB ...\n",
       "    s        (sample) float64 798kB ...\n",
       "    temp     (sample) float64 798kB ...\n",
       "Attributes:\n",
       "    description:  CNN data with elevation images. Scalar features are everyth...
" ], "text/plain": [ " Size: 298MB\n", "Dimensions: (sample: 99766, x: 27, y: 27)\n", "Coordinates:\n", " * sample (sample) int32 399kB 0 1 2 3 4 5 ... 99761 99762 99763 99764 99765\n", " * x (x) int32 108B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n", " * y (y) int32 108B 0 1 2 3 4 5 6 7 8 9 ... 18 19 20 21 22 23 24 25 26\n", "Data variables:\n", " images (sample, x, y) float32 291MB ...\n", " labels (sample) float64 798kB ...\n", " vx (sample) float64 798kB ...\n", " vy (sample) float64 798kB ...\n", " v (sample) float64 798kB ...\n", " smb (sample) float64 798kB ...\n", " z (sample) float64 798kB ...\n", " s (sample) float64 798kB ...\n", " temp (sample) float64 798kB ...\n", "Attributes:\n", " description: CNN data with elevation images. Scalar features are everyth..." ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_import" ] }, { "cell_type": "code", "execution_count": null, "id": "4a1e044e", "metadata": {}, "outputs": [], "source": [ "p = gdf.iloc[0].geometry\n", "\n", "transform = sat_im.rio.transform()\n", "\n", "col, row = ~transform * (p.x, p.y)\n", "\n", "col, row = int(np.floor(col)), int(np.floor(row))\n", "\n", "print(p.x, p.y)\n", "\n", "print(sat_im.surface[0][row][col].x, sat_im.surface[0][row][col].y)" ] }, { "cell_type": "code", "execution_count": 25, "id": "9ac2f290", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Source file path for Affine class: c:\\Users\\Cap\\Documents\\Python_Scripts\\AppML\\.geoMLvenv\\Lib\\site-packages\\affine\\__init__.py\n" ] } ], "source": [ "import inspect\n", "\n", "source_file_path = inspect.getsourcefile(type(sat_im.surface.rio.transform()))\n", "print(f\"\\nSource file path for Affine class: {source_file_path}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".geoMLvenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }