remisek commited on
Commit
4fcc331
·
1 Parent(s): 32c0e58
README.md CHANGED
@@ -12,5 +12,24 @@ short_description: Application for Automatic Crop Type Mapping
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
 
15
- # License
16
- This project is licensed under the terms of the MIT License. See the LICENSE file for details.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
 
15
+ ## Technologies Used
16
+ This project builds upon several open-source research initiatives:
17
+
18
+ * **[ESA WorldCereal](https://esa-worldcereal.org/):** Global crop mapping system funded by the European Space Agency.
19
+ * **[NASA Harvest Presto](https://github.com/nasaharvest/presto):** A lightweight, pre-trained transformer model for remote sensing timeseries.
20
+ * **[openEO GFMap](https://github.com/Open-EO/openeo-gfmap):** A generic framework for Earth Observation mapping applications.
21
+ * **Streamlit:** For the interactive web interface.
22
+
23
+ ## Citations & References
24
+ If you use this tool or the underlying models, please consider citing the original authors:
25
+
26
+ **Presto Model:**
27
+ > Tseng, G., Zvonkov, I., Purohit, M., Rolnick, D., & Kerner, H. (2023). Lightweight, Pre-trained Transformers for Remote Sensing Timeseries. *arXiv preprint arXiv:2304.14065*.
28
+
29
+ **WorldCereal System:**
30
+ > Van Tricht, K., Gobin, A., Gilliams, S., & Piccard, I. (2023). WorldCereal: a dynamic open-source system for global-scale crop mapping. *Earth System Science Data, 15*(12), 5491-5515.
31
+
32
+ ## License
33
+ This project is licensed under the **MIT License**.
34
+ * **WorldCereal** code components are Apache-2.0 / MIT compatible.
35
+ * **Presto** model weights and code are released under MIT License.
openeo_gfmap/fetching/__init__.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Extraction sub-module.
2
+
3
+ Logic behind the extraction of training or inference data. Different backends
4
+ are supported in order to obtain a very similar result at the end of this
5
+ component.
6
+ """
7
+
8
+ import logging
9
+
10
+ _log = logging.getLogger(__name__)
11
+ _log.setLevel(logging.INFO)
12
+
13
+ ch = logging.StreamHandler()
14
+ ch.setLevel(logging.INFO)
15
+
16
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
17
+ ch.setFormatter(formatter)
18
+
19
+ _log.addHandler(ch)
20
+
21
+ __all__ = [
22
+ "build_sentinel2_l2a_extractor",
23
+ "CollectionFetcher",
24
+ "FetchType",
25
+ "build_sentinel1_grd_extractor",
26
+ "build_generic_extractor",
27
+ "build_generic_extractor_stac",
28
+ ]
29
+
30
+ from .fetching import CollectionFetcher, FetchType # noqa: E402
31
+ from .generic import build_generic_extractor, build_generic_extractor_stac # noqa: E402
32
+ from .s1 import build_sentinel1_grd_extractor # noqa: E402
33
+ from .s2 import build_sentinel2_l2a_extractor # noqa: E402
openeo_gfmap/fetching/commons.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Common internal operations within collection extraction logic, such as reprojection.
3
+ """
4
+
5
+ from functools import partial
6
+ from typing import Dict, Optional, Sequence, Union
7
+
8
+ import openeo
9
+ from geojson import GeoJSON
10
+ from openeo.api.process import Parameter
11
+ from openeo.rest.connection import InputDate
12
+ from pyproj.crs import CRS
13
+ from pyproj.exceptions import CRSError
14
+
15
+ from openeo_gfmap.spatial import BoundingBoxExtent, SpatialContext
16
+ from openeo_gfmap.temporal import TemporalContext
17
+
18
+ from .fetching import FetchType
19
+
20
+
21
+ def convert_band_names(desired_bands: list, band_dict: dict) -> list:
22
+ """Renames the desired bands to the band names of the collection specified
23
+ in the backend.
24
+
25
+ Parameters
26
+ ----------
27
+ desired_bands: list
28
+ List of bands that are desired by the user, written in the OpenEO-GFMAP
29
+ harmonized names convention.
30
+ band_dict: dict
31
+ Dictionnary mapping for a backend the collection band names to the
32
+ OpenEO-GFMAP harmonized names. This dictionnary will be reversed to be
33
+ used within this function.
34
+ Returns
35
+ -------
36
+ backend_band_list: list
37
+ List of band names within the backend collection names.
38
+ """
39
+ # Reverse the dictionarry
40
+ band_dict = {v: k for k, v in band_dict.items()}
41
+ return [band_dict[band] for band in desired_bands]
42
+
43
+
44
+ def resample_reproject(
45
+ datacube: openeo.DataCube,
46
+ resolution: float,
47
+ epsg_code: Optional[Union[str, int]] = None,
48
+ method: str = "near",
49
+ ) -> openeo.DataCube:
50
+ """Reprojects the given datacube to the target epsg code, if the provided
51
+ epsg code is not None. Also performs checks on the give code to check
52
+ its validity.
53
+ """
54
+ if epsg_code is not None:
55
+ # Checks that the code is valid
56
+ try:
57
+ CRS.from_epsg(int(epsg_code))
58
+ except (CRSError, ValueError) as exc:
59
+ raise ValueError(
60
+ f"Specified target_crs: {epsg_code} is not a valid EPSG code."
61
+ ) from exc
62
+ return datacube.resample_spatial(
63
+ resolution=resolution, projection=epsg_code, method=method
64
+ )
65
+ return datacube.resample_spatial(resolution=resolution, method=method)
66
+
67
+
68
+ def rename_bands(datacube: openeo.DataCube, mapping: dict) -> openeo.DataCube:
69
+ """Rename the bands from the given mapping scheme"""
70
+
71
+ # Filter out bands that are not part of the datacube
72
+ def filter_condition(band_name, _):
73
+ return band_name in datacube.metadata.band_names
74
+
75
+ mapping = {k: v for k, v in mapping.items() if filter_condition(k, v)}
76
+
77
+ return datacube.rename_labels(
78
+ dimension="bands", target=list(mapping.values()), source=list(mapping.keys())
79
+ )
80
+
81
+
82
+ def _load_collection_hybrid(
83
+ connection: openeo.Connection,
84
+ is_stac: bool,
85
+ collection_id_or_url: str,
86
+ bands: list,
87
+ spatial_extent: Union[Dict[str, float], Parameter, None] = None,
88
+ temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
89
+ properties: Optional[dict] = None,
90
+ ):
91
+ """Wrapper around the load_collection, or load_stac method of the openeo connection."""
92
+ if not is_stac:
93
+ return connection.load_collection(
94
+ collection_id=collection_id_or_url,
95
+ spatial_extent=spatial_extent,
96
+ temporal_extent=temporal_extent,
97
+ bands=bands,
98
+ properties=properties,
99
+ )
100
+ cube = connection.load_stac(
101
+ url=collection_id_or_url,
102
+ spatial_extent=spatial_extent,
103
+ temporal_extent=temporal_extent,
104
+ bands=bands,
105
+ properties=properties,
106
+ )
107
+ cube = cube.rename_labels(dimension="bands", target=bands)
108
+ return cube
109
+
110
+
111
+ def _load_collection(
112
+ connection: openeo.Connection,
113
+ bands: list,
114
+ collection_name: str,
115
+ spatial_extent: SpatialContext,
116
+ temporal_extent: Optional[TemporalContext],
117
+ fetch_type: FetchType,
118
+ is_stac: bool = False,
119
+ **params,
120
+ ):
121
+ """Loads a collection from the openeo backend, acting differently depending
122
+ on the fetch type.
123
+ """
124
+ load_collection_parameters = params.get("load_collection", {})
125
+ load_collection_method = partial(
126
+ _load_collection_hybrid, is_stac=is_stac, collection_id_or_url=collection_name
127
+ )
128
+
129
+ if (
130
+ temporal_extent is not None
131
+ ): # Can be ignored for intemporal collections such as DEM
132
+ temporal_extent = [temporal_extent.start_date, temporal_extent.end_date]
133
+
134
+ if fetch_type == FetchType.TILE:
135
+ if isinstance(spatial_extent, BoundingBoxExtent):
136
+ spatial_extent = dict(spatial_extent)
137
+ elif spatial_extent is not None:
138
+ raise ValueError(
139
+ "`spatial_extent` should be either None or an instance of BoundingBoxExtent for tile-based fetching."
140
+ )
141
+ cube = load_collection_method(
142
+ connection=connection,
143
+ bands=bands,
144
+ spatial_extent=spatial_extent,
145
+ temporal_extent=temporal_extent,
146
+ properties=load_collection_parameters,
147
+ )
148
+ elif fetch_type == FetchType.POINT or fetch_type == FetchType.POLYGON:
149
+ cube = load_collection_method(
150
+ connection=connection,
151
+ bands=bands,
152
+ temporal_extent=temporal_extent,
153
+ properties=load_collection_parameters,
154
+ )
155
+
156
+ # Adding the process graph updates for experimental features
157
+ if params.get("update_arguments") is not None:
158
+ cube.result_node().update_arguments(**params["update_arguments"])
159
+
160
+ # Peforming pre-mask optimization
161
+ pre_mask = params.get("pre_mask", None)
162
+ if pre_mask is not None:
163
+ assert isinstance(pre_mask, openeo.DataCube), (
164
+ f"The 'pre_mask' parameter must be an openeo datacube, got {pre_mask}."
165
+ )
166
+ cube = cube.mask(pre_mask)
167
+
168
+ # Merges additional bands continuing the operations.
169
+ pre_merge_cube = params.get("pre_merge", None)
170
+ if pre_merge_cube is not None:
171
+ assert isinstance(pre_merge_cube, openeo.DataCube), (
172
+ f"The 'pre_merge' parameter value must be an openeo datacube, "
173
+ f"got {pre_merge_cube}."
174
+ )
175
+ if pre_mask is not None:
176
+ pre_merge_cube = pre_merge_cube.mask(pre_mask)
177
+ cube = cube.merge_cubes(pre_merge_cube)
178
+
179
+ if fetch_type == FetchType.POLYGON and spatial_extent is not None:
180
+ if isinstance(spatial_extent, str):
181
+ geometry = connection.load_url(
182
+ spatial_extent,
183
+ format="Parquet" if ".parquet" in spatial_extent else "GeoJSON",
184
+ )
185
+ cube = cube.filter_spatial(geometry)
186
+ else:
187
+ cube = cube.filter_spatial(spatial_extent)
188
+
189
+ return cube
openeo_gfmap/fetching/fetching.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Main file for extractions and pre-processing of data through OpenEO
2
+ """
3
+
4
+ from enum import Enum
5
+ from typing import Callable
6
+
7
+ import openeo
8
+
9
+ from openeo_gfmap import BackendContext
10
+ from openeo_gfmap.spatial import SpatialContext
11
+ from openeo_gfmap.temporal import TemporalContext
12
+
13
+
14
+ class FetchType(Enum):
15
+ """Enumerates the different types of extraction. There are three types of
16
+ enumerations.
17
+
18
+ * TILE: Tile based extractions, getting the data for a dense part. The
19
+ output of such fetching process in a dense DataCube.
20
+ * POINT: Point based extractions. From a datasets of polygons, gets sparse
21
+ extractions and performs spatial aggregation on the selected polygons. The
22
+ output of such fetching process is a VectorCube, that can be used to get
23
+ a pandas.DataFrame
24
+ * POLYGON: Patch based extractions, returning a VectorCube of sparsed
25
+ patches. This can be retrieved as multiple NetCDF files from one job.
26
+ """
27
+
28
+ TILE = "tile"
29
+ POINT = "point"
30
+ POLYGON = "polygon"
31
+
32
+
33
+ class CollectionFetcher:
34
+ """Base class to fetch a particular collection.
35
+
36
+ Parameters
37
+ ----------
38
+ backend_context: BackendContext
39
+ Information about the backend in use, useful in certain cases.
40
+ bands: list
41
+ List of band names to load from that collection.
42
+ collection_fetch: Callable
43
+ Function defining how to fetch a collection for a specific backend,
44
+ the function accepts the following parameters: connection,
45
+ spatial extent, temporal extent, bands and additional parameters.
46
+ collection_preprocessing: Callable
47
+ Function defining how to harmonize the data of a collection in a
48
+ backend. For example, this function could rename the bands as they
49
+ can be different for every backend/collection (SENTINEL2_L2A or
50
+ SENTINEL2_L2A_SENTINELHUB). Accepts the following parameters:
51
+ datacube (of pre-fetched collection) and additional parameters.
52
+ colection_params: dict
53
+ Additional parameters encoded within a dictionnary that will be
54
+ passed in the fetch and preprocessing function.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ backend_context: BackendContext,
60
+ bands: list,
61
+ collection_fetch: Callable,
62
+ collection_preprocessing: Callable,
63
+ **collection_params,
64
+ ):
65
+ self.backend_contect = backend_context
66
+ self.bands = bands
67
+ self.fetcher = collection_fetch
68
+ self.processing = collection_preprocessing
69
+ self.params = collection_params
70
+
71
+ def get_cube(
72
+ self,
73
+ connection: openeo.Connection,
74
+ spatial_context: SpatialContext,
75
+ temporal_context: TemporalContext,
76
+ ) -> openeo.DataCube:
77
+ """Retrieve a data cube from the given spatial and temporal context.
78
+
79
+ Parameters
80
+ ----------
81
+ connection: openeo.Connection
82
+ A connection to an OpenEO backend. The backend provided must be the
83
+ same as the one this extractor class is configured for.
84
+ spatial_extent: SpatialContext
85
+ Either a GeoJSON collection on which spatial filtering will be
86
+ applied or a bounding box with an EPSG code. If a bounding box is
87
+ provided, no filtering is applied and the entirety of the data is
88
+ fetched for that region.
89
+ temporal_extent: TemporalContext
90
+ The begin and end date of the extraction.
91
+ """
92
+ collection_data = self.fetcher(
93
+ connection, spatial_context, temporal_context, self.bands, **self.params
94
+ )
95
+
96
+ preprocessed_data = self.processing(collection_data, **self.params)
97
+
98
+ return preprocessed_data
openeo_gfmap/fetching/generic.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Generic extraction of features, supporting VITO backend.
2
+ """
3
+
4
+ from typing import Callable, Optional
5
+
6
+ import openeo
7
+ from openeo.rest import OpenEoApiError
8
+
9
+ from openeo_gfmap.backend import Backend, BackendContext
10
+ from openeo_gfmap.fetching import CollectionFetcher, FetchType, _log
11
+ from openeo_gfmap.fetching.commons import (
12
+ _load_collection,
13
+ convert_band_names,
14
+ rename_bands,
15
+ resample_reproject,
16
+ )
17
+ from openeo_gfmap.spatial import SpatialContext
18
+ from openeo_gfmap.temporal import TemporalContext
19
+
20
+ BASE_DEM_MAPPING = {"DEM": "COP-DEM"}
21
+ BASE_WEATHER_MAPPING = {
22
+ "dewpoint-temperature": "AGERA5-DEWTEMP",
23
+ "precipitation-flux": "AGERA5-PRECIP",
24
+ "solar-radiation-flux": "AGERA5-SOLRAD",
25
+ "temperature-max": "AGERA5-TMAX",
26
+ "temperature-mean": "AGERA5-TMEAN",
27
+ "temperature-min": "AGERA5-TMIN",
28
+ "vapour-pressure": "AGERA5-VAPOUR",
29
+ "wind-speed": "AGERA5-WIND",
30
+ }
31
+ AGERA5_STAC_MAPPING = {
32
+ "dewpoint_temperature_mean": "AGERA5-DEWTEMP",
33
+ "total_precipitation": "AGERA5-PRECIP",
34
+ "solar_radiation_flux": "AGERA5-SOLRAD",
35
+ "2m_temperature_max": "AGERA5-TMAX",
36
+ "2m_temperature_mean": "AGERA5-TMEAN",
37
+ "2m_temperature_min": "AGERA5-TMIN",
38
+ "vapour_pressure": "AGERA5-VAPOUR",
39
+ "wind_speed": "AGERA5-WIND",
40
+ }
41
+ KNOWN_UNTEMPORAL_COLLECTIONS = ["COPERNICUS_30"]
42
+
43
+ AGERA5_TERRASCOPE_STAC = "https://stac.openeo.vito.be/collections/agera5_daily"
44
+
45
+
46
+ def _get_generic_fetcher(
47
+ collection_name: str, fetch_type: FetchType, backend: Backend, is_stac: bool
48
+ ) -> Callable:
49
+ band_mapping: Optional[dict] = None
50
+
51
+ if collection_name == "COPERNICUS_30":
52
+ band_mapping = BASE_DEM_MAPPING
53
+ elif collection_name == "AGERA5":
54
+ band_mapping = BASE_WEATHER_MAPPING
55
+ elif is_stac and (AGERA5_TERRASCOPE_STAC in collection_name):
56
+ band_mapping = AGERA5_STAC_MAPPING
57
+
58
+ def generic_default_fetcher(
59
+ connection: openeo.Connection,
60
+ spatial_extent: SpatialContext,
61
+ temporal_extent: TemporalContext,
62
+ bands: list,
63
+ **params,
64
+ ) -> openeo.DataCube:
65
+ if band_mapping is not None:
66
+ bands = convert_band_names(bands, band_mapping)
67
+
68
+ if (collection_name in KNOWN_UNTEMPORAL_COLLECTIONS) and (
69
+ temporal_extent is not None
70
+ ):
71
+ _log.warning(
72
+ "Ignoring the temporal extent provided by the user as the collection %s is known to be untemporal.",
73
+ collection_name,
74
+ )
75
+ temporal_extent = None
76
+
77
+ try:
78
+ cube = _load_collection(
79
+ connection,
80
+ bands,
81
+ collection_name,
82
+ spatial_extent,
83
+ temporal_extent,
84
+ fetch_type,
85
+ is_stac=is_stac,
86
+ **params,
87
+ )
88
+ except OpenEoApiError as e:
89
+ if "CollectionNotFound" in str(e):
90
+ raise ValueError(
91
+ f"Collection {collection_name} not found in the selected backend {backend.value}."
92
+ ) from e
93
+ raise e
94
+
95
+ # # Apply if the collection is a GeoJSON Feature collection
96
+ # if isinstance(spatial_extent, GeoJSON):
97
+ # cube = cube.filter_spatial(spatial_extent)
98
+
99
+ return cube
100
+
101
+ return generic_default_fetcher
102
+
103
+
104
+ def _get_generic_processor(
105
+ collection_name: str, fetch_type: FetchType, is_stac: bool
106
+ ) -> Callable:
107
+ """Builds the preprocessing function from the collection name as it stored
108
+ in the target backend.
109
+ """
110
+ band_mapping: Optional[dict] = None
111
+ if collection_name == "COPERNICUS_30":
112
+ band_mapping = BASE_DEM_MAPPING
113
+ elif collection_name == "AGERA5":
114
+ band_mapping = BASE_WEATHER_MAPPING
115
+ elif is_stac and (AGERA5_TERRASCOPE_STAC in collection_name):
116
+ band_mapping = AGERA5_STAC_MAPPING
117
+
118
+ def generic_default_processor(cube: openeo.DataCube, **params):
119
+ """Default collection preprocessing method for generic datasets.
120
+ This method renames bands and removes the time dimension in case the
121
+ requested dataset is DEM
122
+ """
123
+ if params.get("target_resolution", None) is not None:
124
+ cube = resample_reproject(
125
+ cube,
126
+ params.get("target_resolution", 10.0),
127
+ params.get("target_crs", None),
128
+ method=params.get("resampling_method", "near"),
129
+ )
130
+
131
+ if collection_name == "COPERNICUS_30":
132
+ cube = cube.min_time()
133
+
134
+ if band_mapping is not None:
135
+ cube = rename_bands(cube, band_mapping)
136
+
137
+ return cube
138
+
139
+ return generic_default_processor
140
+
141
+
142
+ def build_generic_extractor(
143
+ backend_context: BackendContext,
144
+ bands: list,
145
+ fetch_type: FetchType,
146
+ collection_name: str,
147
+ **params,
148
+ ) -> CollectionFetcher:
149
+ """Creates a generic extractor adapted to the given backend. Provides band mappings for known
150
+ collections, such as AGERA5 available on Terrascope/FED and Copernicus 30m DEM in all backends.
151
+ """
152
+ fetcher = _get_generic_fetcher(
153
+ collection_name, fetch_type, backend_context.backend, False
154
+ )
155
+ preprocessor = _get_generic_processor(collection_name, fetch_type, False)
156
+
157
+ return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
158
+
159
+
160
+ def build_generic_extractor_stac(
161
+ backend_context: BackendContext,
162
+ bands: list,
163
+ fetch_type: FetchType,
164
+ collection_url: str,
165
+ **params,
166
+ ) -> CollectionFetcher:
167
+ """Creates a generic extractor adapted to the given backend. Currently only tested with VITO backend"""
168
+ fetcher = _get_generic_fetcher(
169
+ collection_url, fetch_type, backend_context.backend, True
170
+ )
171
+ preprocessor = _get_generic_processor(collection_url, fetch_type, True)
172
+
173
+ return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
openeo_gfmap/fetching/s1.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Collection fetching of S1 features, supporting different backends.
2
+ """
3
+
4
+ from functools import partial
5
+ from typing import Callable
6
+
7
+ import openeo
8
+ from geojson import GeoJSON
9
+
10
+ from openeo_gfmap.backend import Backend, BackendContext
11
+ from openeo_gfmap.spatial import SpatialContext
12
+ from openeo_gfmap.temporal import TemporalContext
13
+
14
+ from .commons import (
15
+ _load_collection,
16
+ convert_band_names,
17
+ rename_bands,
18
+ resample_reproject,
19
+ )
20
+ from .fetching import CollectionFetcher, FetchType
21
+
22
+ BASE_SENTINEL1_GRD_MAPPING = {
23
+ "VH": "S1-SIGMA0-VH",
24
+ "HH": "S1-SIGMA0-HH",
25
+ "HV": "S1-SIGMA0-HV",
26
+ "VV": "S1-SIGMA0-VV",
27
+ }
28
+
29
+
30
+ def _get_s1_grd_default_fetcher(
31
+ collection_name: str, fetch_type: FetchType
32
+ ) -> Callable:
33
+ """Return a default fetcher for Sentinel-1 GRD data.
34
+
35
+ Parameters
36
+ ----------
37
+ collection_name : str
38
+ The name of the sentinel1 collection to fetch as named in the backend.
39
+ fetch_type : FetchType
40
+ The type of fetching: TILE, POINT and POLYGON.
41
+ """
42
+
43
+ def s1_grd_fetch_default(
44
+ connection: openeo.Connection,
45
+ spatial_extent: SpatialContext,
46
+ temporal_extent: TemporalContext,
47
+ bands: list,
48
+ **params,
49
+ ) -> openeo.DataCube:
50
+ """Default collection fetcher for Sentinel-1 GRD collections. The
51
+ collection values are expected to be expressed in power values.
52
+ Parameters
53
+ ----------
54
+ connection: openeo.Connection
55
+ Connection to a general backend.
56
+ spatial_extent: SpatialContext
57
+ Either a GeoJSON collection or a bounding box of locations.
58
+ Performs spatial filtering if the spatial context is a GeoJSON
59
+ collection, as it implies sparse data.
60
+ temporal_extent: TemporalContexct
61
+ A time range, defined by a start and end date.
62
+ bands: list
63
+ The name of the bands to load from that collection
64
+ Returns
65
+ ------
66
+ openeo.DataCube: a datacube containing the collection raw products.
67
+ """
68
+ bands = convert_band_names(bands, BASE_SENTINEL1_GRD_MAPPING)
69
+
70
+ cube = _load_collection(
71
+ connection,
72
+ bands,
73
+ collection_name,
74
+ spatial_extent,
75
+ temporal_extent,
76
+ fetch_type,
77
+ **params,
78
+ )
79
+
80
+ if fetch_type is not FetchType.POINT and isinstance(spatial_extent, GeoJSON):
81
+ cube = cube.filter_spatial(spatial_extent)
82
+
83
+ return cube
84
+
85
+ return s1_grd_fetch_default
86
+
87
+
88
+ def _get_s1_grd_default_processor(
89
+ collection_name: str, fetch_type: FetchType, backend: Backend
90
+ ) -> Callable:
91
+ """Builds the preprocessing function from the collection name as it is stored
92
+ in the target backend.
93
+ """
94
+
95
+ def s1_grd_default_processor(cube: openeo.DataCube, **params):
96
+ """Default collection preprocessing method.
97
+ This method performs:
98
+
99
+ * Compute the backscatter of all the S1 products. By default, the
100
+ "sigma0-ellipsoid" method is used with "COPERNICUS_30" DEM, but those
101
+ can be changed by specifying "coefficient" and "elevation_model" in
102
+ params.
103
+ * Resampling to 10m resolution.
104
+ * Reprojection if a "target_crs" key is specified in `params`.
105
+ * Performs value rescaling to uint16.
106
+ """
107
+ elevation_model = params.get("elevation_model", "COPERNICUS_30")
108
+ coefficient = params.get("coefficient", "sigma0-ellipsoid")
109
+
110
+ cube = cube.sar_backscatter(
111
+ elevation_model=elevation_model,
112
+ coefficient=coefficient,
113
+ local_incidence_angle=False,
114
+ )
115
+
116
+ # Reproject collection data to target CRS and resolution, if specified so.
117
+ # Can be disabled by setting target_resolution=None in the parameters
118
+ if params.get("target_resolution", True) is not None:
119
+ cube = resample_reproject(
120
+ cube,
121
+ params.get("target_resolution", 10.0),
122
+ params.get("target_crs", None),
123
+ method=params.get("resampling_method", "near"),
124
+ )
125
+ elif params.get("target_crs") is not None:
126
+ raise ValueError(
127
+ "In fetching parameters: `target_crs` specified but not `target_resolution`, which is required to perform reprojection."
128
+ )
129
+
130
+ # Harmonizing the collection band names to the default GFMAP band names
131
+ cube = rename_bands(cube, BASE_SENTINEL1_GRD_MAPPING)
132
+
133
+ return cube
134
+
135
+ return s1_grd_default_processor
136
+
137
+
138
+ SENTINEL1_GRD_BACKEND_MAP = {
139
+ Backend.TERRASCOPE: {
140
+ "default": partial(
141
+ _get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
142
+ ),
143
+ "preprocessor": partial(
144
+ _get_s1_grd_default_processor,
145
+ collection_name="SENTINEL1_GRD",
146
+ backend=Backend.TERRASCOPE,
147
+ ),
148
+ },
149
+ Backend.CDSE: {
150
+ "default": partial(
151
+ _get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
152
+ ),
153
+ "preprocessor": partial(
154
+ _get_s1_grd_default_processor,
155
+ collection_name="SENTINEL1_GRD",
156
+ backend=Backend.CDSE,
157
+ ),
158
+ },
159
+ Backend.CDSE_STAGING: {
160
+ "default": partial(
161
+ _get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
162
+ ),
163
+ "preprocessor": partial(
164
+ _get_s1_grd_default_processor,
165
+ collection_name="SENTINEL1_GRD",
166
+ backend=Backend.CDSE_STAGING,
167
+ ),
168
+ },
169
+ Backend.FED: {
170
+ "default": partial(
171
+ _get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
172
+ ),
173
+ "preprocessor": partial(
174
+ _get_s1_grd_default_processor,
175
+ collection_name="SENTINEL1_GRD",
176
+ backend=Backend.FED,
177
+ ),
178
+ },
179
+ }
180
+
181
+
182
+ def build_sentinel1_grd_extractor(
183
+ backend_context: BackendContext, bands: list, fetch_type: FetchType, **params
184
+ ) -> CollectionFetcher:
185
+ """Creates a S1 GRD collection extractor for the given backend."""
186
+ backend_functions = SENTINEL1_GRD_BACKEND_MAP.get(backend_context.backend)
187
+
188
+ fetcher, preprocessor = (
189
+ backend_functions["default"](fetch_type=fetch_type),
190
+ backend_functions["preprocessor"](fetch_type=fetch_type),
191
+ )
192
+
193
+ return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
openeo_gfmap/fetching/s2.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Extraction of S2 features, supporting different backends.
2
+ """
3
+
4
+ from functools import partial
5
+ from typing import Callable
6
+
7
+ import openeo
8
+ from geojson import GeoJSON
9
+
10
+ from openeo_gfmap.backend import Backend, BackendContext
11
+ from openeo_gfmap.metadata import FakeMetadata
12
+ from openeo_gfmap.spatial import BoundingBoxExtent, SpatialContext
13
+ from openeo_gfmap.temporal import TemporalContext
14
+
15
+ from .commons import (
16
+ _load_collection,
17
+ convert_band_names,
18
+ rename_bands,
19
+ resample_reproject,
20
+ )
21
+ from .fetching import CollectionFetcher, FetchType
22
+
23
+ BASE_SENTINEL2_L2A_MAPPING = {
24
+ "B01": "S2-L2A-B01",
25
+ "B02": "S2-L2A-B02",
26
+ "B03": "S2-L2A-B03",
27
+ "B04": "S2-L2A-B04",
28
+ "B05": "S2-L2A-B05",
29
+ "B06": "S2-L2A-B06",
30
+ "B07": "S2-L2A-B07",
31
+ "B08": "S2-L2A-B08",
32
+ "B8A": "S2-L2A-B8A",
33
+ "B09": "S2-L2A-B09",
34
+ "B11": "S2-L2A-B11",
35
+ "B12": "S2-L2A-B12",
36
+ "AOT": "S2-L2A-AOT",
37
+ "SCL": "S2-L2A-SCL",
38
+ "SNW": "S2-L2A-SNW",
39
+ }
40
+
41
+ ELEMENT84_SENTINEL2_L2A_MAPPING = {
42
+ "coastal": "S2-L2A-B01",
43
+ "blue": "S2-L2A-B02",
44
+ "green": "S2-L2A-B03",
45
+ "red": "S2-L2A-B04",
46
+ "rededge1": "S2-L2A-B05",
47
+ "rededge2": "S2-L2A-B06",
48
+ "rededge3": "S2-L2A-B07",
49
+ "nir": "S2-L2A-B08",
50
+ "nir08": "S2-L2A-B8A",
51
+ "nir09": "S2-L2A-B09",
52
+ "cirrus": "S2-L2A-B10",
53
+ "swir16": "S2-L2A-B11",
54
+ "swir22": "S2-L2A-B12",
55
+ "scl": "S2-L2A-SCL",
56
+ "aot": "S2-L2A-AOT",
57
+ }
58
+
59
+
60
+ def _get_s2_l2a_default_fetcher(
61
+ collection_name: str, fetch_type: FetchType
62
+ ) -> Callable:
63
+ """Builds the fetch function from the collection name as it stored in the
64
+ target backend.
65
+
66
+ Parameters
67
+ ----------
68
+ collection_name: str
69
+ The name of the sentinel2 collection as named in the backend
70
+ point_based: bool
71
+ The type of fetching: TILE, POINT and POLYGON.
72
+ """
73
+
74
+ def s2_l2a_fetch_default(
75
+ connection: openeo.Connection,
76
+ spatial_extent: SpatialContext,
77
+ temporal_extent: TemporalContext,
78
+ bands: list,
79
+ **params,
80
+ ) -> openeo.DataCube:
81
+ """Default collection fetcher for Sentinel_L2A collections.
82
+ Parameters
83
+ ----------
84
+ connection: openeo.Connection
85
+ Connection to a general backend.
86
+ spatial_extent: SpatialContext
87
+ Either a GeoJSON collection or a bounding box of locations.
88
+ Performs spatial filtering if the spatial context is a GeoJSON
89
+ collection, as it implies sparse data.
90
+ temporal_extent: TemporalContexct
91
+ A time range, defined by a start and end date.
92
+ bands: list
93
+ The name of the bands to load from that collection
94
+ Returns
95
+ -------
96
+ openeo.DataCube: a datacube containing the collection raw products.
97
+ """
98
+ # Rename the bands to the backend collection names
99
+ bands = convert_band_names(bands, BASE_SENTINEL2_L2A_MAPPING)
100
+
101
+ cube = _load_collection(
102
+ connection,
103
+ bands,
104
+ collection_name,
105
+ spatial_extent,
106
+ temporal_extent,
107
+ fetch_type,
108
+ **params,
109
+ )
110
+
111
+ return cube
112
+
113
+ return s2_l2a_fetch_default
114
+
115
+
116
+ # TODO deprecated?
117
+ def _get_s2_l2a_element84_fetcher(
118
+ collection_name: str, fetch_type: FetchType
119
+ ) -> Callable:
120
+ """Fetches the collections from the Sentinel-2 Cloud-Optimized GeoTIFFs
121
+ bucket provided by Amazon and managed by Element84.
122
+ """
123
+
124
+ def s2_l2a_element84_fetcher(
125
+ connection: openeo.Connection,
126
+ spatial_extent: SpatialContext,
127
+ temporal_extent: TemporalContext,
128
+ bands: list,
129
+ **params,
130
+ ) -> openeo.DataCube:
131
+ """Collection fetcher on the element84 collection."""
132
+ bands = convert_band_names(bands, ELEMENT84_SENTINEL2_L2A_MAPPING)
133
+
134
+ if isinstance(spatial_extent, BoundingBoxExtent):
135
+ spatial_extent = dict(spatial_extent)
136
+ elif isinstance(spatial_extent, GeoJSON):
137
+ assert (
138
+ spatial_extent.get("crs", None) is not None
139
+ ), "CRS not defined within GeoJSON collection."
140
+ spatial_extent = dict(spatial_extent)
141
+
142
+ cube = connection.load_stac(
143
+ "https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a",
144
+ spatial_extent,
145
+ temporal_extent,
146
+ bands,
147
+ )
148
+
149
+ cube.metadata = FakeMetadata(band_names=bands)
150
+
151
+ # Apply if the collection is a GeoJSON Feature collection
152
+ if isinstance(spatial_extent, GeoJSON):
153
+ cube = cube.filter_spatial(spatial_extent)
154
+
155
+ return cube
156
+
157
+ return s2_l2a_element84_fetcher
158
+
159
+
160
+ def _get_s2_l2a_default_processor(
161
+ collection_name: str, fetch_type: FetchType
162
+ ) -> Callable:
163
+ """Builds the preprocessing function from the collection name as it stored
164
+ in the target backend.
165
+ """
166
+
167
+ def s2_l2a_default_processor(cube: openeo.DataCube, **params):
168
+ """Default collection preprocessing method.
169
+ This method performs reprojection if specified, upsampling of bands
170
+ at 10m resolution as well as band reprojection. Finally, it converts
171
+ the type of the cube values to uint16
172
+ """
173
+ # Reproject collection data to target CRS and resolution, if specified so.
174
+ # Can be disabled by setting target_resolution=None in the parameters
175
+ if params.get("target_resolution", True) is not None:
176
+ cube = resample_reproject(
177
+ cube,
178
+ params.get("target_resolution", 10.0),
179
+ params.get("target_crs", None),
180
+ method=params.get("resampling_method", "near"),
181
+ )
182
+ elif params.get("target_crs") is not None:
183
+ raise ValueError(
184
+ "In fetching parameters: `target_crs` specified but not `target_resolution`, which is required to perform reprojection."
185
+ )
186
+
187
+ # Harmonizing the collection band names to the default GFMAP band names
188
+ cube = rename_bands(cube, BASE_SENTINEL2_L2A_MAPPING)
189
+
190
+ # Change the data type to uint16 for optimization purposes
191
+ cube = cube.linear_scale_range(0, 65534, 0, 65534)
192
+
193
+ return cube
194
+
195
+ return s2_l2a_default_processor
196
+
197
+
198
+ SENTINEL2_L2A_BACKEND_MAP = {
199
+ Backend.TERRASCOPE: {
200
+ "fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
201
+ "preprocessor": partial(
202
+ _get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
203
+ ),
204
+ },
205
+ Backend.CDSE: {
206
+ "fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
207
+ "preprocessor": partial(
208
+ _get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
209
+ ),
210
+ },
211
+ Backend.CDSE_STAGING: {
212
+ "fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
213
+ "preprocessor": partial(
214
+ _get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
215
+ ),
216
+ },
217
+ Backend.FED: {
218
+ "fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
219
+ "preprocessor": partial(
220
+ _get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
221
+ ),
222
+ },
223
+ }
224
+
225
+
226
+ def build_sentinel2_l2a_extractor(
227
+ backend_context: BackendContext, bands: list, fetch_type: FetchType, **params
228
+ ) -> CollectionFetcher:
229
+ """Creates a S2 L2A extractor adapted to the given backend."""
230
+ backend_functions = SENTINEL2_L2A_BACKEND_MAP.get(backend_context.backend)
231
+
232
+ fetcher, preprocessor = (
233
+ backend_functions["fetch"](fetch_type=fetch_type),
234
+ backend_functions["preprocessor"](fetch_type=fetch_type),
235
+ )
236
+
237
+ return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
openeo_gfmap/preprocessing/compositing.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Temporal compositing, or temporal aggregation, is a method to increase the
2
+ quality of data within timesteps by reducing the temporal resolution of a time
3
+ series of satellite images.
4
+ """
5
+
6
+ from typing import Union
7
+
8
+ import openeo
9
+
10
+
11
+ def median_compositing(
12
+ cube: openeo.DataCube, period: Union[str, list]
13
+ ) -> openeo.DataCube:
14
+ """Perfrom median compositing on the given datacube."""
15
+ if isinstance(period, str):
16
+ return cube.aggregate_temporal_period(
17
+ period=period, reducer="median", dimension="t"
18
+ )
19
+ elif isinstance(period, list):
20
+ return cube.aggregate_temporal(
21
+ intervals=period, reducer="median", dimension="t"
22
+ )
23
+
24
+
25
+ def mean_compositing(
26
+ cube: openeo.DataCube, period: Union[str, list]
27
+ ) -> openeo.DataCube:
28
+ """Perfrom mean compositing on the given datacube."""
29
+ if isinstance(period, str):
30
+ return cube.aggregate_temporal_period(
31
+ period=period, reducer="mean", dimension="t"
32
+ )
33
+ elif isinstance(period, list):
34
+ return cube.aggregate_temporal(intervals=period, reducer="mean", dimension="t")
35
+
36
+
37
+ def sum_compositing(cube: openeo.DataCube, period: Union[str, list]) -> openeo.DataCube:
38
+ """Perform sum compositing on the given datacube."""
39
+ if isinstance(period, str):
40
+ return cube.aggregate_temporal_period(
41
+ period=period, reducer="sum", dimension="t"
42
+ )
43
+ elif isinstance(period, list):
44
+ return cube.aggregate_temporal(intervals=period, reducer="sum", dimension="t")
45
+
46
+
47
+ def max_ndvi_compositing(cube: openeo.DataCube, period: str) -> openeo.DataCube:
48
+ """Perform compositing by selecting the observation with the highest NDVI value over the
49
+ given compositing window."""
50
+
51
+ def max_ndvi_selection(ndvi: openeo.DataCube):
52
+ max_ndvi = ndvi.max()
53
+ return ndvi.array_apply(lambda x: x != max_ndvi)
54
+
55
+ if isinstance(period, str):
56
+ ndvi = cube.ndvi(nir="S2-L2A-B08", red="S2-L2A-B04")
57
+
58
+ rank_mask = ndvi.apply_neighborhood(
59
+ max_ndvi_selection,
60
+ size=[
61
+ {"dimension": "x", "unit": "px", "value": 1},
62
+ {"dimension": "y", "unit": "px", "value": 1},
63
+ {"dimension": "t", "value": period},
64
+ ],
65
+ overlap=[],
66
+ )
67
+
68
+ cube = cube.mask(mask=rank_mask).aggregate_temporal_period(period, "first")
69
+
70
+ else:
71
+ raise ValueError(
72
+ "Custom temporal intervals are not yet supported for max NDVI compositing."
73
+ )
74
+ return cube
openeo_gfmap/preprocessing/sar.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Routines to pre-process sar signals."""
2
+
3
+ import openeo
4
+ from openeo.processes import array_create, power
5
+
6
+ from openeo_gfmap import BackendContext
7
+
8
+
9
+ def compress_backscatter_uint16(
10
+ backend_context: BackendContext, cube: openeo.DataCube
11
+ ) -> openeo.DataCube:
12
+ """
13
+ Scaling the bands from float32 power values to uint16 for memory optimization. The scaling
14
+ casts the values from power to decibels and applies a linear scaling from 0 to 65534.
15
+
16
+ The resulting datacube has a uint16 memory representation which makes an optimization
17
+ before passing through any UDFs.
18
+
19
+ Parameters
20
+ ----------
21
+ backend_context : BackendContext
22
+ The backend context to fetch the backend name.
23
+ cube : openeo.DataCube
24
+ The datacube to compress the backscatter values.
25
+ Returns
26
+ -------
27
+ openeo.DataCube
28
+ The datacube with the backscatter values compressed to uint16.
29
+ """
30
+
31
+ # Apply rescaling of power values in a logarithmic way
32
+ cube = cube.apply_dimension(
33
+ dimension="bands",
34
+ process=lambda x: array_create(
35
+ [
36
+ power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
37
+ power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
38
+ ]
39
+ ),
40
+ )
41
+
42
+ # Change the data type to uint16 for optimization purposes
43
+ return cube.linear_scale_range(1, 65534, 1, 65534)
44
+
45
+
46
+ def decompress_backscatter_uint16(
47
+ backend_context: BackendContext, cube: openeo.DataCube
48
+ ) -> openeo.DataCube:
49
+ """
50
+ Decompresing the bands from uint16 to their original float32 values.
51
+
52
+ Parameters
53
+ ----------
54
+ backend_context : BackendContext
55
+ The backend context to fetch the backend name.
56
+ cube : openeo.DataCube
57
+ The datacube to decompress the backscatter values.
58
+ Returns
59
+ -------
60
+ openeo.DataCube
61
+ The datacube with the backscatter values in their original float32 values.
62
+ """
63
+
64
+ cube = cube.apply_dimension(
65
+ dimension="bands",
66
+ process=lambda x: array_create(
67
+ [
68
+ power(base=10, p=(20.0 * x[0].log(base=10) - 83.0) / 10.0),
69
+ power(base=10, p=(20.0 * x[1].log(base=10) - 83.0) / 10.0),
70
+ ]
71
+ ),
72
+ )
73
+
74
+ return cube
openeo_gfmap/preprocessing/scaling.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Scaling and compressing methods for datacubes."""
2
+
3
+ import openeo
4
+
5
+
6
+ def _compress(
7
+ cube: openeo.DataCube,
8
+ min_val: int,
9
+ max_val: int,
10
+ alpha: float,
11
+ beta: float,
12
+ ):
13
+ if (
14
+ alpha != 1.0 or beta != 0.0
15
+ ): # Avoid adding a node in the computing graph if scaling is not necessary
16
+ cube = (cube * alpha) + beta
17
+
18
+ return cube.linear_scale_range(min_val, max_val, min_val, max_val)
19
+
20
+
21
+ def compress_uint16(
22
+ cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
23
+ ) -> openeo.DataCube:
24
+ """Scales the data linearly using the formula `output = (input * a) + b` and compresses values
25
+ from float32 to uint16 for memory optimization.
26
+
27
+ Parameters
28
+ ----------
29
+ cube : openeo.DataCube
30
+ The input datacube to compress, only meteo data should be present.
31
+ alpha : float, optional (default=1.0)
32
+ The scaling factor. Values in the input datacube are multiplied by this coefficient.
33
+ beta : float, optional (default=0.0)
34
+ The offset. Values in the input datacube are added by this value.
35
+
36
+ Returns
37
+ -------
38
+ cube : openeo.DataCube
39
+ The datacube with the data linearly scaled and compressed to uint16 and rescaled frome.
40
+ """
41
+ return _compress(cube, 0, 65534, alpha, beta)
42
+
43
+
44
+ def compress_uint8(
45
+ cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
46
+ ) -> openeo.DataCube:
47
+ """
48
+ Scales the data linearly using the formula `output = (input * a) + b` and compresses values
49
+ from float32 to uint8 for memory optimization.
50
+
51
+ Parameters
52
+ ----------
53
+ cube : openeo.DataCube
54
+ The input datacube to compress, only meteo data should be present.
55
+ alpha : float, optional (default=1.0)
56
+ The scaling factor. Values in the input datacube are multiplied by this coefficient.
57
+ beta : float, optional (default=0.0)
58
+ The offset. Values in the input datacube are added by this value.
59
+
60
+ Returns
61
+ -------
62
+ cube : openeo.DataCube
63
+ The datacube with the data linearly scaled and compressed to uint8 and rescaled frome.
64
+ """
65
+ return _compress(cube, 0, 253, alpha, beta)
openeo_gfmap/utils/__init__.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This sub-module contains utilitary function and tools for OpenEO-GFMap"""
2
+
3
+ import logging
4
+
5
+ from openeo_gfmap.utils.build_df import load_json
6
+ from openeo_gfmap.utils.intervals import quintad_intervals
7
+ from openeo_gfmap.utils.netcdf import update_nc_attributes
8
+ from openeo_gfmap.utils.split_stac import split_collection_by_epsg
9
+ from openeo_gfmap.utils.tile_processing import (
10
+ array_bounds,
11
+ arrays_cosine_similarity,
12
+ normalize_array,
13
+ select_optical_bands,
14
+ select_sar_bands,
15
+ )
16
+
17
+ _log = logging.getLogger(__name__)
18
+ _log.setLevel(logging.INFO)
19
+
20
+ ch = logging.StreamHandler()
21
+ ch.setLevel(logging.INFO)
22
+
23
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
24
+ ch.setFormatter(formatter)
25
+
26
+ _log.addHandler(ch)
27
+
28
+
29
+ __all__ = [
30
+ "load_json",
31
+ "normalize_array",
32
+ "select_optical_bands",
33
+ "array_bounds",
34
+ "select_sar_bands",
35
+ "arrays_cosine_similarity",
36
+ "quintad_intervals",
37
+ "split_collection_by_epsg",
38
+ "update_nc_attributes",
39
+ ]
openeo_gfmap/utils/build_df.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utilities to build a `pandas.DataFrame` from the output of a VectorCube
2
+ based job. Usefull to collect the output of point based extraction.
3
+ """
4
+
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+
9
+ VECTORCUBE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S%z"
10
+ TIMESTAMP_FORMAT = "%Y-%m-%d"
11
+
12
+
13
+ def load_json(input_file: Path, bands: list) -> pd.DataFrame:
14
+ """Reads a json file and outputs it as a proper pandas dataframe.
15
+
16
+ Parameters
17
+ ----------
18
+ input_file: PathLike
19
+ The path of the JSON file to read.
20
+ bands: list
21
+ The name of the bands that will be used in the columns names. The band
22
+ names must be the same as the vector cube that resulted into the parsed
23
+ JSON file.
24
+ Returns
25
+ -------
26
+ df: pd.DataFrame
27
+ A `pandas.DataFrame` containing a combination of the band names and the
28
+ timestamps as column names.
29
+ For example, the Sentinel-2 green band on the 1st October 2020 is will
30
+ have the column name `S2-L2A-B02:2020-10-01`
31
+ """
32
+
33
+ df = pd.read_json(input_file)
34
+
35
+ target_timestamps = list(
36
+ map(lambda date: date.strftime(TIMESTAMP_FORMAT), df.columns.to_pydatetime())
37
+ )
38
+
39
+ df = df.rename(dict(zip(df.columns, target_timestamps)), axis=1)
40
+
41
+ expanded_df = pd.DataFrame()
42
+ for col in df.columns:
43
+ expanded_col = pd.DataFrame(
44
+ df[col].to_list(), columns=[f"{feature}:{col}" for feature in bands]
45
+ )
46
+ expanded_df = pd.concat([expanded_df, expanded_col], axis=1)
47
+
48
+ return expanded_df
openeo_gfmap/utils/catalogue.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Functionalities to interract with product catalogues."""
2
+
3
+ from typing import Optional
4
+
5
+ import geojson
6
+ import pandas as pd
7
+ import requests
8
+ from pyproj.crs import CRS
9
+ from rasterio.warp import transform_bounds
10
+ from requests import adapters
11
+ from shapely.geometry import Point, box, shape
12
+ from shapely.ops import unary_union
13
+
14
+ from openeo_gfmap import (
15
+ Backend,
16
+ BackendContext,
17
+ BoundingBoxExtent,
18
+ SpatialContext,
19
+ TemporalContext,
20
+ )
21
+ from openeo_gfmap.utils import _log
22
+
23
+ request_sessions: Optional[requests.Session] = None
24
+
25
+
26
+ def _request_session() -> requests.Session:
27
+ global request_sessions
28
+
29
+ if request_sessions is None:
30
+ request_sessions = requests.Session()
31
+ retries = adapters.Retry(
32
+ total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504]
33
+ )
34
+ request_sessions.mount("https://", adapters.HTTPAdapter(max_retries=retries))
35
+ return request_sessions
36
+
37
+
38
+ class UncoveredS1Exception(Exception):
39
+ """Exception raised when there is no product available to fully cover spatially a given
40
+ spatio-temporal context for the Sentinel-1 collection."""
41
+
42
+ pass
43
+
44
+
45
+ def _parse_cdse_products(response: dict):
46
+ """Parses the geometry and timestamps of products from the CDSE catalogue."""
47
+ geometries = []
48
+ timestamps = []
49
+ products = response["features"]
50
+
51
+ for product in products:
52
+ if "geometry" in product and "startDate" in product["properties"]:
53
+ geometries.append(shape(product["geometry"]))
54
+ timestamps.append(pd.to_datetime(product["properties"]["startDate"]))
55
+ else:
56
+ _log.warning(
57
+ "Cannot parse product %s does not have a geometry or timestamp.",
58
+ product["properties"]["id"],
59
+ )
60
+ return geometries, timestamps
61
+
62
+
63
+ def _query_cdse_catalogue(
64
+ collection: str,
65
+ bounds: list,
66
+ temporal_extent: TemporalContext,
67
+ **additional_parameters: dict,
68
+ ) -> dict:
69
+ """
70
+ Queries the CDSE catalogue for a given collection, spatio-temporal context and additional
71
+ parameters.
72
+
73
+ Params
74
+ ------
75
+
76
+ """
77
+ minx, miny, maxx, maxy = bounds
78
+
79
+ # The date format should be YYYY-MM-DD
80
+ start_date = f"{temporal_extent.start_date}T00:00:00Z"
81
+ end_date = f"{temporal_extent.end_date}T00:00:00Z"
82
+
83
+ url = (
84
+ f"https://catalogue.dataspace.copernicus.eu/resto/api/collections/"
85
+ f"{collection}/search.json?box={minx},{miny},{maxx},{maxy}"
86
+ f"&sortParam=startDate&maxRecords=1000&dataset=ESA-DATASET"
87
+ f"&startDate={start_date}&completionDate={end_date}"
88
+ )
89
+ for key, value in additional_parameters.items():
90
+ url += f"&{key}={value}"
91
+
92
+ session = _request_session()
93
+ response = session.get(url, timeout=60)
94
+
95
+ if response.status_code != 200:
96
+ raise Exception(
97
+ f"Cannot check S1 catalogue on CDSE: Request to {url} failed with "
98
+ f"status code {response.status_code}"
99
+ )
100
+
101
+ return response.json()
102
+
103
+
104
+ def _check_cdse_catalogue(
105
+ collection: str,
106
+ bounds: list,
107
+ temporal_extent: TemporalContext,
108
+ **additional_parameters: dict,
109
+ ) -> bool:
110
+ """Checks if there is at least one product available in the
111
+ given spatio-temporal context for a collection in the CDSE catalogue,
112
+ as there might be issues in the API that sometimes returns empty results
113
+ for a valid query.
114
+
115
+ Parameters
116
+ ----------
117
+ collection : str
118
+ The collection name to be checked. (For example: Sentinel1 or Sentinel2)
119
+ spatial_extent : SpatialContext
120
+ The spatial extent to be checked, it will check within its bounding box.
121
+ temporal_extent : TemporalContext
122
+ The temporal period to be checked.
123
+ additional_parameters : Optional[dict], optional
124
+ Additional parameters to be passed to the catalogue, by default empty.
125
+ Parameters (key, value) will be passed as "&key=value" in the query,
126
+ for example: {"sortOrder": "ascending"} will be passed as "&ascendingOrder=True"
127
+
128
+ Returns
129
+ -------
130
+ True if there is at least one product, False otherwise.
131
+ """
132
+ body = _query_cdse_catalogue(
133
+ collection, bounds, temporal_extent, **additional_parameters
134
+ )
135
+
136
+ grd_tiles = list(
137
+ filter(
138
+ lambda feature: feature["properties"]["productType"].contains("GRD"),
139
+ body["features"],
140
+ )
141
+ )
142
+
143
+ return len(grd_tiles) > 0
144
+
145
+
146
+ def _compute_max_gap_days(
147
+ temporal_extent: TemporalContext, timestamps: list[pd.DatetimeIndex]
148
+ ) -> int:
149
+ """Computes the maximum temporal gap in days from the timestamps parsed from the catalogue.
150
+ Requires the start and end date to be included in the timestamps to compute the gap before
151
+ and after the first and last observation.
152
+
153
+ Parameters
154
+ ----------
155
+ temporal_extent : TemporalContext
156
+ The temporal extent to be checked. Same as used to query the catalogue.
157
+ timestamps : list[pd.DatetimeIndex]
158
+ The list of timestamps parsed from the catalogue and to compute the gap from.
159
+
160
+ Returns
161
+ -------
162
+ days : int
163
+ The maximum temporal gap in days.
164
+ """
165
+ # Computes max temporal gap. Include requested start and end date so we dont miss
166
+ # any start or end gap before first/last observation
167
+ timestamps = pd.DatetimeIndex(
168
+ sorted(
169
+ [pd.to_datetime(temporal_extent.start_date, utc=True)]
170
+ + timestamps
171
+ + [pd.to_datetime(temporal_extent.end_date, utc=True)]
172
+ )
173
+ )
174
+ return timestamps.to_series().diff().max().days
175
+
176
+
177
+ def s1_area_per_orbitstate_vvvh(
178
+ backend: BackendContext,
179
+ spatial_extent: SpatialContext,
180
+ temporal_extent: TemporalContext,
181
+ ) -> dict:
182
+ """
183
+ Evaluates for both the ascending and descending state orbits the area of interesection and
184
+ maximum temporal gap for the available products with a VV&VH polarisation.
185
+
186
+ Parameters
187
+ ----------
188
+ backend : BackendContext
189
+ The backend to be within, as each backend might use different catalogues. Only the CDSE,
190
+ CDSE_STAGING and FED backends are supported.
191
+ spatial_extent : SpatialContext
192
+ The spatial extent to be checked, it will check within its bounding box.
193
+ temporal_extent : TemporalContext
194
+ The temporal period to be checked.
195
+
196
+ Returns
197
+ ------
198
+ dict
199
+ Keys containing the orbit state and values containing the total area of intersection and
200
+ in km^2 and maximum temporal gap in days.
201
+ """
202
+ if isinstance(spatial_extent, geojson.FeatureCollection):
203
+ # Transform geojson into shapely geometry and compute bounds
204
+ shapely_geometries = [
205
+ shape(feature["geometry"]) for feature in spatial_extent["features"]
206
+ ]
207
+ if len(shapely_geometries) == 1 and isinstance(shapely_geometries[0], Point):
208
+ point = shapely_geometries[0]
209
+ buffer_size = 0.0001
210
+ buffered_geometry = point.buffer(buffer_size)
211
+ bounds = buffered_geometry.bounds
212
+ else:
213
+ geometry = unary_union(shapely_geometries)
214
+ bounds = geometry.bounds
215
+ epsg = 4326
216
+ elif isinstance(spatial_extent, BoundingBoxExtent):
217
+ bounds = [
218
+ spatial_extent.west,
219
+ spatial_extent.south,
220
+ spatial_extent.east,
221
+ spatial_extent.north,
222
+ ]
223
+ epsg = spatial_extent.epsg
224
+ else:
225
+ raise ValueError(
226
+ "Provided spatial extent is not a valid GeoJSON or SpatialContext object."
227
+ )
228
+ # Warp the bounds if the epsg is different from 4326
229
+ if epsg != 4326:
230
+ bounds = transform_bounds(CRS.from_epsg(epsg), CRS.from_epsg(4326), *bounds)
231
+
232
+ # Queries the products in the catalogues
233
+ if backend.backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
234
+ ascending_products, ascending_timestamps = _parse_cdse_products(
235
+ _query_cdse_catalogue(
236
+ "Sentinel1",
237
+ bounds,
238
+ temporal_extent,
239
+ orbitDirection="ASCENDING",
240
+ polarisation="VV%26VH",
241
+ productType="IW_GRDH_1S-COG",
242
+ )
243
+ )
244
+ descending_products, descending_timestamps = _parse_cdse_products(
245
+ _query_cdse_catalogue(
246
+ "Sentinel1",
247
+ bounds,
248
+ temporal_extent,
249
+ orbitDirection="DESCENDING",
250
+ polarisation="VV%26VH",
251
+ productType="IW_GRDH_1S-COG",
252
+ )
253
+ )
254
+ else:
255
+ raise NotImplementedError(
256
+ f"This feature is not supported for backend: {backend.backend}."
257
+ )
258
+
259
+ # Builds the shape of the spatial extent and computes the area
260
+ spatial_extent = box(*bounds)
261
+
262
+ # Computes if there is the full overlap for each of those states
263
+ union_ascending = unary_union(ascending_products)
264
+ union_descending = unary_union(descending_products)
265
+
266
+ ascending_covers = union_ascending.contains(spatial_extent)
267
+ descending_covers = union_descending.contains(spatial_extent)
268
+
269
+ # Computes the area of intersection
270
+ return {
271
+ "ASCENDING": {
272
+ "full_overlap": ascending_covers,
273
+ "max_temporal_gap": _compute_max_gap_days(
274
+ temporal_extent, ascending_timestamps
275
+ ),
276
+ "area": sum(
277
+ product.intersection(spatial_extent).area
278
+ for product in ascending_products
279
+ ),
280
+ },
281
+ "DESCENDING": {
282
+ "full_overlap": descending_covers,
283
+ "max_temporal_gap": _compute_max_gap_days(
284
+ temporal_extent, descending_timestamps
285
+ ),
286
+ "area": sum(
287
+ product.intersection(spatial_extent).area
288
+ for product in descending_products
289
+ ),
290
+ },
291
+ }
292
+
293
+
294
+ def select_s1_orbitstate_vvvh(
295
+ backend: BackendContext,
296
+ spatial_extent: SpatialContext,
297
+ temporal_extent: TemporalContext,
298
+ max_temporal_gap: int = 60,
299
+ ) -> str:
300
+ """Selects the orbit state based on some predefined rules that
301
+ are checked in sequential order:
302
+ 1. prefer an orbit with full coverage over the requested bounds
303
+ 2. prefer an orbit with a maximum temporal gap under a
304
+ predefined threshold
305
+ 3. prefer the orbit that covers the most area of intersection
306
+ for the available products
307
+
308
+ Parameters
309
+ ----------
310
+ backend : BackendContext
311
+ The backend to be within, as each backend might use different catalogues. Only the CDSE,
312
+ CDSE_STAGING and FED backends are supported.
313
+ spatial_extent : SpatialContext
314
+ The spatial extent to be checked, it will check within its bounding box.
315
+ temporal_extent : TemporalContext
316
+ The temporal period to be checked.
317
+ max_temporal_gap: int, optional, default: 30
318
+ The maximum temporal gap in days to be considered for the orbit state.
319
+
320
+ Returns
321
+ ------
322
+ str
323
+ The orbit state that covers the most area of the given spatio-temporal context
324
+ """
325
+
326
+ # Queries the products in the catalogues
327
+ areas = s1_area_per_orbitstate_vvvh(backend, spatial_extent, temporal_extent)
328
+
329
+ ascending_overlap = areas["ASCENDING"]["full_overlap"]
330
+ descending_overlap = areas["DESCENDING"]["full_overlap"]
331
+ ascending_gap_too_large = areas["ASCENDING"]["max_temporal_gap"] > max_temporal_gap
332
+ descending_gap_too_large = (
333
+ areas["DESCENDING"]["max_temporal_gap"] > max_temporal_gap
334
+ )
335
+
336
+ orbit_choice = None
337
+
338
+ if not ascending_overlap and not descending_overlap:
339
+ raise UncoveredS1Exception(
340
+ "No product available to fully cover the requested area in both orbit states."
341
+ )
342
+
343
+ # Rule 1: Prefer an orbit with full coverage over the requested bounds
344
+ if ascending_overlap and not descending_overlap:
345
+ orbit_choice = "ASCENDING"
346
+ reason = "Only orbit fully covering the requested area."
347
+ elif descending_overlap and not ascending_overlap:
348
+ orbit_choice = "DESCENDING"
349
+ reason = "Only orbit fully covering the requested area."
350
+
351
+ # Rule 2: Prefer an orbit with a maximum temporal gap under a predefined threshold
352
+ elif ascending_gap_too_large and not descending_gap_too_large:
353
+ orbit_choice = "DESCENDING"
354
+ reason = (
355
+ "Only orbit with temporal gap under the threshold. "
356
+ f"{areas['DESCENDING']['max_temporal_gap']} days < {max_temporal_gap} days"
357
+ )
358
+ elif descending_gap_too_large and not ascending_gap_too_large:
359
+ orbit_choice = "ASCENDING"
360
+ reason = (
361
+ "Only orbit with temporal gap under the threshold. "
362
+ f"{areas['ASCENDING']['max_temporal_gap']} days < {max_temporal_gap} days"
363
+ )
364
+ # Rule 3: Prefer the orbit that covers the most area of intersection
365
+ # for the available products
366
+ elif ascending_overlap and descending_overlap:
367
+ ascending_cover_area = areas["ASCENDING"]["area"]
368
+ descending_cover_area = areas["DESCENDING"]["area"]
369
+
370
+ # Selects the orbit state that covers the most area
371
+ if ascending_cover_area > descending_cover_area:
372
+ orbit_choice = "ASCENDING"
373
+ reason = (
374
+ "Orbit has more cumulative intersected area. "
375
+ f"{ascending_cover_area} > {descending_cover_area}"
376
+ )
377
+ else:
378
+ reason = (
379
+ "Orbit has more cumulative intersected area. "
380
+ f"{descending_cover_area} > {ascending_cover_area}"
381
+ )
382
+ orbit_choice = "DESCENDING"
383
+
384
+ if orbit_choice is not None:
385
+ _log.info(f"Selected orbit state: {orbit_choice}. Reason: {reason}")
386
+ return orbit_choice
387
+ raise UncoveredS1Exception("Failed to select suitable Sentinel-1 orbit.")
openeo_gfmap/utils/intervals.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utilitary function for intervals, useful for temporal aggregation
2
+ methods.
3
+ """
4
+
5
+ from datetime import timedelta
6
+
7
+ from openeo_gfmap import TemporalContext
8
+
9
+
10
+ def quintad_intervals(temporal_extent: TemporalContext) -> list:
11
+ """Returns a list of tuples (start_date, end_date) of quintad intervals
12
+ from the input temporal extent. Quintad intervals are intervals of
13
+ generally 5 days, that never overlap two months.
14
+
15
+ All months are divided in 6 quintads, where the 6th quintad might
16
+ contain 6 days for months of 31 days.
17
+ For the month of February, the 6th quintad is only of three days, or
18
+ four days for the leap year.
19
+ """
20
+ start_date, end_date = temporal_extent.to_datetime()
21
+ quintads = []
22
+
23
+ current_date = start_date
24
+
25
+ # Compute the offset of the first day on the start of the last quintad
26
+ if start_date.day != 1:
27
+ offset = (start_date - timedelta(days=1)).day % 5
28
+ current_date = current_date - timedelta(days=offset)
29
+ else:
30
+ offset = 0
31
+
32
+ while current_date <= end_date:
33
+ # Get the last day of the current month
34
+ last_day = current_date.replace(day=28) + timedelta(days=4)
35
+ last_day = last_day - timedelta(days=last_day.day)
36
+
37
+ # Get the last day of the current quintad
38
+ last_quintad = current_date + timedelta(days=4)
39
+
40
+ # Add a day if the day is the 30th and there is the 31th in the current month
41
+ if last_quintad.day == 30 and last_day.day == 31:
42
+ last_quintad = last_quintad + timedelta(days=1)
43
+
44
+ # If the last quintad is after the last day of the month, then
45
+ # set it to the last day of the month
46
+ if last_quintad > last_day:
47
+ last_quintad = last_day
48
+ # In the case the last quintad is after the end date, then set it to the end date
49
+ elif last_quintad > end_date:
50
+ last_quintad = end_date
51
+
52
+ quintads.append((current_date, last_quintad))
53
+
54
+ # Set the current date to the next quintad
55
+ current_date = last_quintad + timedelta(days=1)
56
+
57
+ # Fixing the offset issue for intervals starting in the middle of a quintad
58
+ quintads[0] = (quintads[0][0] + timedelta(days=offset), quintads[0][1])
59
+
60
+ # Returns to string with the YYYY-mm-dd format
61
+ return [
62
+ (start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
63
+ for start_date, end_date in quintads
64
+ ]
openeo_gfmap/utils/split_stac.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility function to split a STAC collection into multiple STAC collections based on CRS.
2
+ Requires the "proj:epsg" property to be present in all the STAC items.
3
+ """
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Iterator, Union
8
+
9
+ import pystac
10
+
11
+
12
+ def _extract_epsg_from_stac_item(stac_item: pystac.Item) -> int:
13
+ """
14
+ Extract the EPSG code from a STAC item.
15
+
16
+ Parameters:
17
+ stac_item (pystac.Item): The STAC item.
18
+
19
+ Returns:
20
+ int: The EPSG code.
21
+
22
+ Raises:
23
+ KeyError: If the "proj:epsg" property is missing from the STAC item.
24
+ """
25
+
26
+ try:
27
+ epsg_code = stac_item.properties["proj:epsg"]
28
+ return epsg_code
29
+ except KeyError:
30
+ raise KeyError("The 'proj:epsg' property is missing from the STAC item.")
31
+
32
+
33
+ def _get_items_by_epsg(
34
+ collection: pystac.Collection,
35
+ ) -> Iterator[tuple[int, pystac.Item]]:
36
+ """
37
+ Generator function that yields items grouped by their EPSG code.
38
+
39
+ Parameters:
40
+ collection (pystac.Collection): The STAC collection.
41
+
42
+ Yields:
43
+ tuple[int, pystac.Item]: EPSG code and corresponding STAC item.
44
+ """
45
+ for item in collection.get_all_items():
46
+ epsg = _extract_epsg_from_stac_item(item)
47
+ yield epsg, item
48
+
49
+
50
+ def _create_collection_skeleton(
51
+ collection: pystac.Collection, epsg: int
52
+ ) -> pystac.Collection:
53
+ """
54
+ Create a skeleton for a new STAC collection with a given EPSG code.
55
+
56
+ Parameters:
57
+ collection (pystac.Collection): The original STAC collection.
58
+ epsg (int): The EPSG code.
59
+
60
+ Returns:
61
+ pystac.Collection: The skeleton of the new STAC collection.
62
+ """
63
+ new_collection = pystac.Collection(
64
+ id=f"{collection.id}_{epsg}",
65
+ description=f"{collection.description} Containing only items with EPSG code {epsg}",
66
+ extent=collection.extent.clone(),
67
+ summaries=collection.summaries,
68
+ license=collection.license,
69
+ stac_extensions=collection.stac_extensions,
70
+ )
71
+ if "item_assets" in collection.extra_fields:
72
+ item_assets_extension = pystac.extensions.item_assets.ItemAssetsExtension.ext(
73
+ collection
74
+ )
75
+
76
+ new_item_assets_extension = (
77
+ pystac.extensions.item_assets.ItemAssetsExtension.ext(
78
+ new_collection, add_if_missing=True
79
+ )
80
+ )
81
+
82
+ new_item_assets_extension.item_assets = item_assets_extension.item_assets
83
+ return new_collection
84
+
85
+
86
+ def split_collection_by_epsg(
87
+ collection: Union[str, Path, pystac.Collection], output_dir: Union[str, Path]
88
+ ):
89
+ """
90
+ Split a STAC collection into multiple STAC collections based on EPSG code.
91
+
92
+ Parameters
93
+ ----------
94
+ collection: Union[str, Path, pystac.Collection]
95
+ A collection of STAC items or a path to a STAC collection.
96
+ output_dir: Union[str, Path]
97
+ The directory where the split STAC collections will be saved.
98
+ """
99
+
100
+ if not isinstance(collection, pystac.Collection):
101
+ collection = Path(collection)
102
+ output_dir = Path(output_dir)
103
+ os.makedirs(output_dir, exist_ok=True)
104
+
105
+ try:
106
+ collection = pystac.read_file(collection)
107
+ except pystac.STACError:
108
+ print("Please provide a path to a valid STAC collection.")
109
+ return
110
+
111
+ collections_by_epsg = {}
112
+
113
+ for epsg, item in _get_items_by_epsg(collection):
114
+ if epsg not in collections_by_epsg:
115
+ collections_by_epsg[epsg] = _create_collection_skeleton(collection, epsg)
116
+
117
+ # Add item to the corresponding collection
118
+ collections_by_epsg[epsg].add_item(item)
119
+
120
+ # Write each collection to disk
121
+ for epsg, new_collection in collections_by_epsg.items():
122
+ new_collection.update_extent_from_items() # Update extent based on added items
123
+ collection_path = output_dir / f"collection-{epsg}"
124
+ new_collection.normalize_hrefs(str(collection_path))
125
+ new_collection.save()
openeo_gfmap/utils/tile_processing.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utilitaries to process data tiles."""
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+
6
+
7
+ def normalize_array(inarr: xr.DataArray, percentile: float = 0.99) -> xr.DataArray:
8
+ """Performs normalization between 0.0 and 1.0 using the given
9
+ percentile.
10
+ """
11
+ quantile_value = inarr.quantile(percentile, dim=["x", "y", "t"])
12
+ minimum = inarr.min(dim=["x", "y", "t"])
13
+
14
+ inarr = (inarr - minimum) / (quantile_value - minimum)
15
+
16
+ # Perform clipping on values that are higher than the computed quantile
17
+ return inarr.where(inarr < 1.0, 1.0)
18
+
19
+
20
+ def select_optical_bands(inarr: xr.DataArray) -> xr.DataArray:
21
+ """Filters and keep only the optical bands for a given array."""
22
+ return inarr.sel(
23
+ bands=[
24
+ band
25
+ for band in inarr.coords["bands"].to_numpy()
26
+ if band.startswith("S2-L2A-B")
27
+ ]
28
+ )
29
+
30
+
31
+ def select_sar_bands(inarr: xr.DataArray) -> xr.DataArray:
32
+ """Filters and keep only the SAR bands for a given array."""
33
+ return inarr.sel(
34
+ bands=[
35
+ band
36
+ for band in inarr.coords["bands"].to_numpy()
37
+ if band in ["S1-SIGMA0-VV", "S1-SIGMA0-VH", "S1-SIGMA0-HH", "S1-SIGMA0-HV"]
38
+ ]
39
+ )
40
+
41
+
42
+ def array_bounds(inarr: xr.DataArray) -> tuple:
43
+ """Returns the 4 bounds values for the x and y coordinates of the tile"""
44
+ return (
45
+ inarr.coords["x"].min().item(),
46
+ inarr.coords["y"].min().item(),
47
+ inarr.coords["x"].max().item(),
48
+ inarr.coords["y"].max().item(),
49
+ )
50
+
51
+
52
+ def arrays_cosine_similarity(
53
+ first_array: xr.DataArray, second_array: xr.DataArray
54
+ ) -> float:
55
+ """Returns a similarity score based on normalized cosine distance. The
56
+ input arrays must have similar ranges to obtain a valid score.
57
+ 1.0 represents the best score (same tiles), while 0.0 is the worst score.
58
+ """
59
+ dot_product = np.sum(first_array * second_array)
60
+ first_norm = np.linalg.norm(first_array)
61
+ second_norm = np.linalg.norm(second_array)
62
+ similarity = (dot_product / (first_norm * second_norm)).item()
63
+
64
+ return similarity
pyproject.toml CHANGED
@@ -8,9 +8,17 @@ dependencies = [
8
  "geojson>=3.2.0",
9
  "geopandas>=1.1.2",
10
  "joblib>=1.5.3",
11
- "matplotlib>=3.10.8",
 
12
  "openeo>=0.47.0",
13
- "rasterio>=1.5.0",
 
 
 
14
  "streamlit>=1.53.1",
15
  "streamlit-folium>=0.26.1",
 
16
  ]
 
 
 
 
8
  "geojson>=3.2.0",
9
  "geopandas>=1.1.2",
10
  "joblib>=1.5.3",
11
+ "matplotlib==3.7.5",
12
+ "numpy<2.0.0",
13
  "openeo>=0.47.0",
14
+ "prometheo",
15
+ "pydantic>=2.12.5",
16
+ "rasterio<1.4.0",
17
+ "scipy>=1.17.0",
18
  "streamlit>=1.53.1",
19
  "streamlit-folium>=0.26.1",
20
+ "torch==2.3.1",
21
  ]
22
+
23
+ [tool.uv.sources]
24
+ prometheo = { git = "https://github.com/WorldCereal/prometheo.git" }
uv.lock CHANGED
The diff for this file is too large to render. See raw diff