simen
commited on
Commit
·
b4c50fc
1
Parent(s):
fbbb329
add subsampling
Browse files- preprocess_forecast.py +32 -2
preprocess_forecast.py
CHANGED
|
@@ -3,6 +3,7 @@ from siphon.catalog import TDSCatalog
|
|
| 3 |
import numpy as np
|
| 4 |
import datetime
|
| 5 |
import re
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
# %%
|
|
@@ -162,12 +163,41 @@ def load_meps_for_location(file_path=None, altitude_min=0, altitude_max=3000):
|
|
| 162 |
return subset
|
| 163 |
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
if __name__ == "__main__":
|
| 166 |
dataset_file_path = find_latest_meps_file()
|
| 167 |
|
| 168 |
subset = load_meps_for_location(dataset_file_path)
|
| 169 |
|
|
|
|
|
|
|
| 170 |
os.makedirs("forecasts", exist_ok=True)
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import datetime
|
| 5 |
import re
|
| 6 |
+
import os
|
| 7 |
|
| 8 |
|
| 9 |
# %%
|
|
|
|
| 163 |
return subset
|
| 164 |
|
| 165 |
|
| 166 |
+
def subsample_lat_lon(dataset, lat_stride=2, lon_stride=2):
|
| 167 |
+
"""
|
| 168 |
+
Subsample the latitude and longitude points from the dataset.
|
| 169 |
+
|
| 170 |
+
Parameters:
|
| 171 |
+
- dataset: xarray.Dataset, the dataset to subsample.
|
| 172 |
+
- lat_stride: int, stride value for latitude subsampling.
|
| 173 |
+
- lon_stride: int, stride value for longitude subsampling.
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
- xarray.Dataset, the subsampled dataset.
|
| 177 |
+
"""
|
| 178 |
+
# Check if latitude and longitude dimensions are present
|
| 179 |
+
if "y" not in dataset.dims or "x" not in dataset.dims:
|
| 180 |
+
raise ValueError(
|
| 181 |
+
"Dataset does not contain 'y' and 'x' dimensions for latitude and longitude."
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
# Subsample latitude and longitude
|
| 185 |
+
subsampled_dataset = dataset.isel(
|
| 186 |
+
y=slice(None, None, lat_stride), x=slice(None, None, lon_stride)
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
return subsampled_dataset
|
| 190 |
+
|
| 191 |
+
|
| 192 |
if __name__ == "__main__":
|
| 193 |
dataset_file_path = find_latest_meps_file()
|
| 194 |
|
| 195 |
subset = load_meps_for_location(dataset_file_path)
|
| 196 |
|
| 197 |
+
subsampled_subset = subsample_lat_lon(subset, lat_stride=2, lon_stride=2)
|
| 198 |
+
|
| 199 |
os.makedirs("forecasts", exist_ok=True)
|
| 200 |
|
| 201 |
+
timestamp = extract_timestamp(dataset_file_path.split("/")[-1])
|
| 202 |
+
subsampled_subset.to_netcdf(f"forecasts/{timestamp}.nc")
|
| 203 |
+
print(f"Subsampled dataset saved to forecasts/{timestamp}.nc")
|