Fix
Browse files- README.md +21 -2
- openeo_gfmap/fetching/__init__.py +33 -0
- openeo_gfmap/fetching/commons.py +189 -0
- openeo_gfmap/fetching/fetching.py +98 -0
- openeo_gfmap/fetching/generic.py +173 -0
- openeo_gfmap/fetching/s1.py +193 -0
- openeo_gfmap/fetching/s2.py +237 -0
- openeo_gfmap/preprocessing/compositing.py +74 -0
- openeo_gfmap/preprocessing/sar.py +74 -0
- openeo_gfmap/preprocessing/scaling.py +65 -0
- openeo_gfmap/utils/__init__.py +39 -0
- openeo_gfmap/utils/build_df.py +48 -0
- openeo_gfmap/utils/catalogue.py +387 -0
- openeo_gfmap/utils/intervals.py +64 -0
- openeo_gfmap/utils/split_stac.py +125 -0
- openeo_gfmap/utils/tile_processing.py +64 -0
- pyproject.toml +10 -2
- uv.lock +0 -0
README.md
CHANGED
|
@@ -12,5 +12,24 @@ short_description: Application for Automatic Crop Type Mapping
|
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
|
| 15 |
-
|
| 16 |
-
This project
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
|
| 15 |
+
## Technologies Used
|
| 16 |
+
This project builds upon several open-source research initiatives:
|
| 17 |
+
|
| 18 |
+
* **[ESA WorldCereal](https://esa-worldcereal.org/):** Global crop mapping system funded by the European Space Agency.
|
| 19 |
+
* **[NASA Harvest Presto](https://github.com/nasaharvest/presto):** A lightweight, pre-trained transformer model for remote sensing timeseries.
|
| 20 |
+
* **[openEO GFMap](https://github.com/Open-EO/openeo-gfmap):** A generic framework for Earth Observation mapping applications.
|
| 21 |
+
* **Streamlit:** For the interactive web interface.
|
| 22 |
+
|
| 23 |
+
## Citations & References
|
| 24 |
+
If you use this tool or the underlying models, please consider citing the original authors:
|
| 25 |
+
|
| 26 |
+
**Presto Model:**
|
| 27 |
+
> Tseng, G., Zvonkov, I., Purohit, M., Rolnick, D., & Kerner, H. (2023). Lightweight, Pre-trained Transformers for Remote Sensing Timeseries. *arXiv preprint arXiv:2304.14065*.
|
| 28 |
+
|
| 29 |
+
**WorldCereal System:**
|
| 30 |
+
> Van Tricht, K., Gobin, A., Gilliams, S., & Piccard, I. (2023). WorldCereal: a dynamic open-source system for global-scale crop mapping. *Earth System Science Data, 15*(12), 5491-5515.
|
| 31 |
+
|
| 32 |
+
## License
|
| 33 |
+
This project is licensed under the **MIT License**.
|
| 34 |
+
* **WorldCereal** code components are Apache-2.0 / MIT compatible.
|
| 35 |
+
* **Presto** model weights and code are released under MIT License.
|
openeo_gfmap/fetching/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Extraction sub-module.
|
| 2 |
+
|
| 3 |
+
Logic behind the extraction of training or inference data. Different backends
|
| 4 |
+
are supported in order to obtain a very similar result at the end of this
|
| 5 |
+
component.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import logging
|
| 9 |
+
|
| 10 |
+
_log = logging.getLogger(__name__)
|
| 11 |
+
_log.setLevel(logging.INFO)
|
| 12 |
+
|
| 13 |
+
ch = logging.StreamHandler()
|
| 14 |
+
ch.setLevel(logging.INFO)
|
| 15 |
+
|
| 16 |
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 17 |
+
ch.setFormatter(formatter)
|
| 18 |
+
|
| 19 |
+
_log.addHandler(ch)
|
| 20 |
+
|
| 21 |
+
__all__ = [
|
| 22 |
+
"build_sentinel2_l2a_extractor",
|
| 23 |
+
"CollectionFetcher",
|
| 24 |
+
"FetchType",
|
| 25 |
+
"build_sentinel1_grd_extractor",
|
| 26 |
+
"build_generic_extractor",
|
| 27 |
+
"build_generic_extractor_stac",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
from .fetching import CollectionFetcher, FetchType # noqa: E402
|
| 31 |
+
from .generic import build_generic_extractor, build_generic_extractor_stac # noqa: E402
|
| 32 |
+
from .s1 import build_sentinel1_grd_extractor # noqa: E402
|
| 33 |
+
from .s2 import build_sentinel2_l2a_extractor # noqa: E402
|
openeo_gfmap/fetching/commons.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Common internal operations within collection extraction logic, such as reprojection.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from functools import partial
|
| 6 |
+
from typing import Dict, Optional, Sequence, Union
|
| 7 |
+
|
| 8 |
+
import openeo
|
| 9 |
+
from geojson import GeoJSON
|
| 10 |
+
from openeo.api.process import Parameter
|
| 11 |
+
from openeo.rest.connection import InputDate
|
| 12 |
+
from pyproj.crs import CRS
|
| 13 |
+
from pyproj.exceptions import CRSError
|
| 14 |
+
|
| 15 |
+
from openeo_gfmap.spatial import BoundingBoxExtent, SpatialContext
|
| 16 |
+
from openeo_gfmap.temporal import TemporalContext
|
| 17 |
+
|
| 18 |
+
from .fetching import FetchType
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def convert_band_names(desired_bands: list, band_dict: dict) -> list:
|
| 22 |
+
"""Renames the desired bands to the band names of the collection specified
|
| 23 |
+
in the backend.
|
| 24 |
+
|
| 25 |
+
Parameters
|
| 26 |
+
----------
|
| 27 |
+
desired_bands: list
|
| 28 |
+
List of bands that are desired by the user, written in the OpenEO-GFMAP
|
| 29 |
+
harmonized names convention.
|
| 30 |
+
band_dict: dict
|
| 31 |
+
Dictionnary mapping for a backend the collection band names to the
|
| 32 |
+
OpenEO-GFMAP harmonized names. This dictionnary will be reversed to be
|
| 33 |
+
used within this function.
|
| 34 |
+
Returns
|
| 35 |
+
-------
|
| 36 |
+
backend_band_list: list
|
| 37 |
+
List of band names within the backend collection names.
|
| 38 |
+
"""
|
| 39 |
+
# Reverse the dictionarry
|
| 40 |
+
band_dict = {v: k for k, v in band_dict.items()}
|
| 41 |
+
return [band_dict[band] for band in desired_bands]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def resample_reproject(
|
| 45 |
+
datacube: openeo.DataCube,
|
| 46 |
+
resolution: float,
|
| 47 |
+
epsg_code: Optional[Union[str, int]] = None,
|
| 48 |
+
method: str = "near",
|
| 49 |
+
) -> openeo.DataCube:
|
| 50 |
+
"""Reprojects the given datacube to the target epsg code, if the provided
|
| 51 |
+
epsg code is not None. Also performs checks on the give code to check
|
| 52 |
+
its validity.
|
| 53 |
+
"""
|
| 54 |
+
if epsg_code is not None:
|
| 55 |
+
# Checks that the code is valid
|
| 56 |
+
try:
|
| 57 |
+
CRS.from_epsg(int(epsg_code))
|
| 58 |
+
except (CRSError, ValueError) as exc:
|
| 59 |
+
raise ValueError(
|
| 60 |
+
f"Specified target_crs: {epsg_code} is not a valid EPSG code."
|
| 61 |
+
) from exc
|
| 62 |
+
return datacube.resample_spatial(
|
| 63 |
+
resolution=resolution, projection=epsg_code, method=method
|
| 64 |
+
)
|
| 65 |
+
return datacube.resample_spatial(resolution=resolution, method=method)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def rename_bands(datacube: openeo.DataCube, mapping: dict) -> openeo.DataCube:
|
| 69 |
+
"""Rename the bands from the given mapping scheme"""
|
| 70 |
+
|
| 71 |
+
# Filter out bands that are not part of the datacube
|
| 72 |
+
def filter_condition(band_name, _):
|
| 73 |
+
return band_name in datacube.metadata.band_names
|
| 74 |
+
|
| 75 |
+
mapping = {k: v for k, v in mapping.items() if filter_condition(k, v)}
|
| 76 |
+
|
| 77 |
+
return datacube.rename_labels(
|
| 78 |
+
dimension="bands", target=list(mapping.values()), source=list(mapping.keys())
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _load_collection_hybrid(
|
| 83 |
+
connection: openeo.Connection,
|
| 84 |
+
is_stac: bool,
|
| 85 |
+
collection_id_or_url: str,
|
| 86 |
+
bands: list,
|
| 87 |
+
spatial_extent: Union[Dict[str, float], Parameter, None] = None,
|
| 88 |
+
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
|
| 89 |
+
properties: Optional[dict] = None,
|
| 90 |
+
):
|
| 91 |
+
"""Wrapper around the load_collection, or load_stac method of the openeo connection."""
|
| 92 |
+
if not is_stac:
|
| 93 |
+
return connection.load_collection(
|
| 94 |
+
collection_id=collection_id_or_url,
|
| 95 |
+
spatial_extent=spatial_extent,
|
| 96 |
+
temporal_extent=temporal_extent,
|
| 97 |
+
bands=bands,
|
| 98 |
+
properties=properties,
|
| 99 |
+
)
|
| 100 |
+
cube = connection.load_stac(
|
| 101 |
+
url=collection_id_or_url,
|
| 102 |
+
spatial_extent=spatial_extent,
|
| 103 |
+
temporal_extent=temporal_extent,
|
| 104 |
+
bands=bands,
|
| 105 |
+
properties=properties,
|
| 106 |
+
)
|
| 107 |
+
cube = cube.rename_labels(dimension="bands", target=bands)
|
| 108 |
+
return cube
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _load_collection(
|
| 112 |
+
connection: openeo.Connection,
|
| 113 |
+
bands: list,
|
| 114 |
+
collection_name: str,
|
| 115 |
+
spatial_extent: SpatialContext,
|
| 116 |
+
temporal_extent: Optional[TemporalContext],
|
| 117 |
+
fetch_type: FetchType,
|
| 118 |
+
is_stac: bool = False,
|
| 119 |
+
**params,
|
| 120 |
+
):
|
| 121 |
+
"""Loads a collection from the openeo backend, acting differently depending
|
| 122 |
+
on the fetch type.
|
| 123 |
+
"""
|
| 124 |
+
load_collection_parameters = params.get("load_collection", {})
|
| 125 |
+
load_collection_method = partial(
|
| 126 |
+
_load_collection_hybrid, is_stac=is_stac, collection_id_or_url=collection_name
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
if (
|
| 130 |
+
temporal_extent is not None
|
| 131 |
+
): # Can be ignored for intemporal collections such as DEM
|
| 132 |
+
temporal_extent = [temporal_extent.start_date, temporal_extent.end_date]
|
| 133 |
+
|
| 134 |
+
if fetch_type == FetchType.TILE:
|
| 135 |
+
if isinstance(spatial_extent, BoundingBoxExtent):
|
| 136 |
+
spatial_extent = dict(spatial_extent)
|
| 137 |
+
elif spatial_extent is not None:
|
| 138 |
+
raise ValueError(
|
| 139 |
+
"`spatial_extent` should be either None or an instance of BoundingBoxExtent for tile-based fetching."
|
| 140 |
+
)
|
| 141 |
+
cube = load_collection_method(
|
| 142 |
+
connection=connection,
|
| 143 |
+
bands=bands,
|
| 144 |
+
spatial_extent=spatial_extent,
|
| 145 |
+
temporal_extent=temporal_extent,
|
| 146 |
+
properties=load_collection_parameters,
|
| 147 |
+
)
|
| 148 |
+
elif fetch_type == FetchType.POINT or fetch_type == FetchType.POLYGON:
|
| 149 |
+
cube = load_collection_method(
|
| 150 |
+
connection=connection,
|
| 151 |
+
bands=bands,
|
| 152 |
+
temporal_extent=temporal_extent,
|
| 153 |
+
properties=load_collection_parameters,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
# Adding the process graph updates for experimental features
|
| 157 |
+
if params.get("update_arguments") is not None:
|
| 158 |
+
cube.result_node().update_arguments(**params["update_arguments"])
|
| 159 |
+
|
| 160 |
+
# Peforming pre-mask optimization
|
| 161 |
+
pre_mask = params.get("pre_mask", None)
|
| 162 |
+
if pre_mask is not None:
|
| 163 |
+
assert isinstance(pre_mask, openeo.DataCube), (
|
| 164 |
+
f"The 'pre_mask' parameter must be an openeo datacube, got {pre_mask}."
|
| 165 |
+
)
|
| 166 |
+
cube = cube.mask(pre_mask)
|
| 167 |
+
|
| 168 |
+
# Merges additional bands continuing the operations.
|
| 169 |
+
pre_merge_cube = params.get("pre_merge", None)
|
| 170 |
+
if pre_merge_cube is not None:
|
| 171 |
+
assert isinstance(pre_merge_cube, openeo.DataCube), (
|
| 172 |
+
f"The 'pre_merge' parameter value must be an openeo datacube, "
|
| 173 |
+
f"got {pre_merge_cube}."
|
| 174 |
+
)
|
| 175 |
+
if pre_mask is not None:
|
| 176 |
+
pre_merge_cube = pre_merge_cube.mask(pre_mask)
|
| 177 |
+
cube = cube.merge_cubes(pre_merge_cube)
|
| 178 |
+
|
| 179 |
+
if fetch_type == FetchType.POLYGON and spatial_extent is not None:
|
| 180 |
+
if isinstance(spatial_extent, str):
|
| 181 |
+
geometry = connection.load_url(
|
| 182 |
+
spatial_extent,
|
| 183 |
+
format="Parquet" if ".parquet" in spatial_extent else "GeoJSON",
|
| 184 |
+
)
|
| 185 |
+
cube = cube.filter_spatial(geometry)
|
| 186 |
+
else:
|
| 187 |
+
cube = cube.filter_spatial(spatial_extent)
|
| 188 |
+
|
| 189 |
+
return cube
|
openeo_gfmap/fetching/fetching.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Main file for extractions and pre-processing of data through OpenEO
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
from enum import Enum
|
| 5 |
+
from typing import Callable
|
| 6 |
+
|
| 7 |
+
import openeo
|
| 8 |
+
|
| 9 |
+
from openeo_gfmap import BackendContext
|
| 10 |
+
from openeo_gfmap.spatial import SpatialContext
|
| 11 |
+
from openeo_gfmap.temporal import TemporalContext
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class FetchType(Enum):
|
| 15 |
+
"""Enumerates the different types of extraction. There are three types of
|
| 16 |
+
enumerations.
|
| 17 |
+
|
| 18 |
+
* TILE: Tile based extractions, getting the data for a dense part. The
|
| 19 |
+
output of such fetching process in a dense DataCube.
|
| 20 |
+
* POINT: Point based extractions. From a datasets of polygons, gets sparse
|
| 21 |
+
extractions and performs spatial aggregation on the selected polygons. The
|
| 22 |
+
output of such fetching process is a VectorCube, that can be used to get
|
| 23 |
+
a pandas.DataFrame
|
| 24 |
+
* POLYGON: Patch based extractions, returning a VectorCube of sparsed
|
| 25 |
+
patches. This can be retrieved as multiple NetCDF files from one job.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
TILE = "tile"
|
| 29 |
+
POINT = "point"
|
| 30 |
+
POLYGON = "polygon"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class CollectionFetcher:
|
| 34 |
+
"""Base class to fetch a particular collection.
|
| 35 |
+
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
backend_context: BackendContext
|
| 39 |
+
Information about the backend in use, useful in certain cases.
|
| 40 |
+
bands: list
|
| 41 |
+
List of band names to load from that collection.
|
| 42 |
+
collection_fetch: Callable
|
| 43 |
+
Function defining how to fetch a collection for a specific backend,
|
| 44 |
+
the function accepts the following parameters: connection,
|
| 45 |
+
spatial extent, temporal extent, bands and additional parameters.
|
| 46 |
+
collection_preprocessing: Callable
|
| 47 |
+
Function defining how to harmonize the data of a collection in a
|
| 48 |
+
backend. For example, this function could rename the bands as they
|
| 49 |
+
can be different for every backend/collection (SENTINEL2_L2A or
|
| 50 |
+
SENTINEL2_L2A_SENTINELHUB). Accepts the following parameters:
|
| 51 |
+
datacube (of pre-fetched collection) and additional parameters.
|
| 52 |
+
colection_params: dict
|
| 53 |
+
Additional parameters encoded within a dictionnary that will be
|
| 54 |
+
passed in the fetch and preprocessing function.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(
|
| 58 |
+
self,
|
| 59 |
+
backend_context: BackendContext,
|
| 60 |
+
bands: list,
|
| 61 |
+
collection_fetch: Callable,
|
| 62 |
+
collection_preprocessing: Callable,
|
| 63 |
+
**collection_params,
|
| 64 |
+
):
|
| 65 |
+
self.backend_contect = backend_context
|
| 66 |
+
self.bands = bands
|
| 67 |
+
self.fetcher = collection_fetch
|
| 68 |
+
self.processing = collection_preprocessing
|
| 69 |
+
self.params = collection_params
|
| 70 |
+
|
| 71 |
+
def get_cube(
|
| 72 |
+
self,
|
| 73 |
+
connection: openeo.Connection,
|
| 74 |
+
spatial_context: SpatialContext,
|
| 75 |
+
temporal_context: TemporalContext,
|
| 76 |
+
) -> openeo.DataCube:
|
| 77 |
+
"""Retrieve a data cube from the given spatial and temporal context.
|
| 78 |
+
|
| 79 |
+
Parameters
|
| 80 |
+
----------
|
| 81 |
+
connection: openeo.Connection
|
| 82 |
+
A connection to an OpenEO backend. The backend provided must be the
|
| 83 |
+
same as the one this extractor class is configured for.
|
| 84 |
+
spatial_extent: SpatialContext
|
| 85 |
+
Either a GeoJSON collection on which spatial filtering will be
|
| 86 |
+
applied or a bounding box with an EPSG code. If a bounding box is
|
| 87 |
+
provided, no filtering is applied and the entirety of the data is
|
| 88 |
+
fetched for that region.
|
| 89 |
+
temporal_extent: TemporalContext
|
| 90 |
+
The begin and end date of the extraction.
|
| 91 |
+
"""
|
| 92 |
+
collection_data = self.fetcher(
|
| 93 |
+
connection, spatial_context, temporal_context, self.bands, **self.params
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
preprocessed_data = self.processing(collection_data, **self.params)
|
| 97 |
+
|
| 98 |
+
return preprocessed_data
|
openeo_gfmap/fetching/generic.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Generic extraction of features, supporting VITO backend.
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
from typing import Callable, Optional
|
| 5 |
+
|
| 6 |
+
import openeo
|
| 7 |
+
from openeo.rest import OpenEoApiError
|
| 8 |
+
|
| 9 |
+
from openeo_gfmap.backend import Backend, BackendContext
|
| 10 |
+
from openeo_gfmap.fetching import CollectionFetcher, FetchType, _log
|
| 11 |
+
from openeo_gfmap.fetching.commons import (
|
| 12 |
+
_load_collection,
|
| 13 |
+
convert_band_names,
|
| 14 |
+
rename_bands,
|
| 15 |
+
resample_reproject,
|
| 16 |
+
)
|
| 17 |
+
from openeo_gfmap.spatial import SpatialContext
|
| 18 |
+
from openeo_gfmap.temporal import TemporalContext
|
| 19 |
+
|
| 20 |
+
BASE_DEM_MAPPING = {"DEM": "COP-DEM"}
|
| 21 |
+
BASE_WEATHER_MAPPING = {
|
| 22 |
+
"dewpoint-temperature": "AGERA5-DEWTEMP",
|
| 23 |
+
"precipitation-flux": "AGERA5-PRECIP",
|
| 24 |
+
"solar-radiation-flux": "AGERA5-SOLRAD",
|
| 25 |
+
"temperature-max": "AGERA5-TMAX",
|
| 26 |
+
"temperature-mean": "AGERA5-TMEAN",
|
| 27 |
+
"temperature-min": "AGERA5-TMIN",
|
| 28 |
+
"vapour-pressure": "AGERA5-VAPOUR",
|
| 29 |
+
"wind-speed": "AGERA5-WIND",
|
| 30 |
+
}
|
| 31 |
+
AGERA5_STAC_MAPPING = {
|
| 32 |
+
"dewpoint_temperature_mean": "AGERA5-DEWTEMP",
|
| 33 |
+
"total_precipitation": "AGERA5-PRECIP",
|
| 34 |
+
"solar_radiation_flux": "AGERA5-SOLRAD",
|
| 35 |
+
"2m_temperature_max": "AGERA5-TMAX",
|
| 36 |
+
"2m_temperature_mean": "AGERA5-TMEAN",
|
| 37 |
+
"2m_temperature_min": "AGERA5-TMIN",
|
| 38 |
+
"vapour_pressure": "AGERA5-VAPOUR",
|
| 39 |
+
"wind_speed": "AGERA5-WIND",
|
| 40 |
+
}
|
| 41 |
+
KNOWN_UNTEMPORAL_COLLECTIONS = ["COPERNICUS_30"]
|
| 42 |
+
|
| 43 |
+
AGERA5_TERRASCOPE_STAC = "https://stac.openeo.vito.be/collections/agera5_daily"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _get_generic_fetcher(
|
| 47 |
+
collection_name: str, fetch_type: FetchType, backend: Backend, is_stac: bool
|
| 48 |
+
) -> Callable:
|
| 49 |
+
band_mapping: Optional[dict] = None
|
| 50 |
+
|
| 51 |
+
if collection_name == "COPERNICUS_30":
|
| 52 |
+
band_mapping = BASE_DEM_MAPPING
|
| 53 |
+
elif collection_name == "AGERA5":
|
| 54 |
+
band_mapping = BASE_WEATHER_MAPPING
|
| 55 |
+
elif is_stac and (AGERA5_TERRASCOPE_STAC in collection_name):
|
| 56 |
+
band_mapping = AGERA5_STAC_MAPPING
|
| 57 |
+
|
| 58 |
+
def generic_default_fetcher(
|
| 59 |
+
connection: openeo.Connection,
|
| 60 |
+
spatial_extent: SpatialContext,
|
| 61 |
+
temporal_extent: TemporalContext,
|
| 62 |
+
bands: list,
|
| 63 |
+
**params,
|
| 64 |
+
) -> openeo.DataCube:
|
| 65 |
+
if band_mapping is not None:
|
| 66 |
+
bands = convert_band_names(bands, band_mapping)
|
| 67 |
+
|
| 68 |
+
if (collection_name in KNOWN_UNTEMPORAL_COLLECTIONS) and (
|
| 69 |
+
temporal_extent is not None
|
| 70 |
+
):
|
| 71 |
+
_log.warning(
|
| 72 |
+
"Ignoring the temporal extent provided by the user as the collection %s is known to be untemporal.",
|
| 73 |
+
collection_name,
|
| 74 |
+
)
|
| 75 |
+
temporal_extent = None
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
cube = _load_collection(
|
| 79 |
+
connection,
|
| 80 |
+
bands,
|
| 81 |
+
collection_name,
|
| 82 |
+
spatial_extent,
|
| 83 |
+
temporal_extent,
|
| 84 |
+
fetch_type,
|
| 85 |
+
is_stac=is_stac,
|
| 86 |
+
**params,
|
| 87 |
+
)
|
| 88 |
+
except OpenEoApiError as e:
|
| 89 |
+
if "CollectionNotFound" in str(e):
|
| 90 |
+
raise ValueError(
|
| 91 |
+
f"Collection {collection_name} not found in the selected backend {backend.value}."
|
| 92 |
+
) from e
|
| 93 |
+
raise e
|
| 94 |
+
|
| 95 |
+
# # Apply if the collection is a GeoJSON Feature collection
|
| 96 |
+
# if isinstance(spatial_extent, GeoJSON):
|
| 97 |
+
# cube = cube.filter_spatial(spatial_extent)
|
| 98 |
+
|
| 99 |
+
return cube
|
| 100 |
+
|
| 101 |
+
return generic_default_fetcher
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _get_generic_processor(
|
| 105 |
+
collection_name: str, fetch_type: FetchType, is_stac: bool
|
| 106 |
+
) -> Callable:
|
| 107 |
+
"""Builds the preprocessing function from the collection name as it stored
|
| 108 |
+
in the target backend.
|
| 109 |
+
"""
|
| 110 |
+
band_mapping: Optional[dict] = None
|
| 111 |
+
if collection_name == "COPERNICUS_30":
|
| 112 |
+
band_mapping = BASE_DEM_MAPPING
|
| 113 |
+
elif collection_name == "AGERA5":
|
| 114 |
+
band_mapping = BASE_WEATHER_MAPPING
|
| 115 |
+
elif is_stac and (AGERA5_TERRASCOPE_STAC in collection_name):
|
| 116 |
+
band_mapping = AGERA5_STAC_MAPPING
|
| 117 |
+
|
| 118 |
+
def generic_default_processor(cube: openeo.DataCube, **params):
|
| 119 |
+
"""Default collection preprocessing method for generic datasets.
|
| 120 |
+
This method renames bands and removes the time dimension in case the
|
| 121 |
+
requested dataset is DEM
|
| 122 |
+
"""
|
| 123 |
+
if params.get("target_resolution", None) is not None:
|
| 124 |
+
cube = resample_reproject(
|
| 125 |
+
cube,
|
| 126 |
+
params.get("target_resolution", 10.0),
|
| 127 |
+
params.get("target_crs", None),
|
| 128 |
+
method=params.get("resampling_method", "near"),
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
if collection_name == "COPERNICUS_30":
|
| 132 |
+
cube = cube.min_time()
|
| 133 |
+
|
| 134 |
+
if band_mapping is not None:
|
| 135 |
+
cube = rename_bands(cube, band_mapping)
|
| 136 |
+
|
| 137 |
+
return cube
|
| 138 |
+
|
| 139 |
+
return generic_default_processor
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def build_generic_extractor(
|
| 143 |
+
backend_context: BackendContext,
|
| 144 |
+
bands: list,
|
| 145 |
+
fetch_type: FetchType,
|
| 146 |
+
collection_name: str,
|
| 147 |
+
**params,
|
| 148 |
+
) -> CollectionFetcher:
|
| 149 |
+
"""Creates a generic extractor adapted to the given backend. Provides band mappings for known
|
| 150 |
+
collections, such as AGERA5 available on Terrascope/FED and Copernicus 30m DEM in all backends.
|
| 151 |
+
"""
|
| 152 |
+
fetcher = _get_generic_fetcher(
|
| 153 |
+
collection_name, fetch_type, backend_context.backend, False
|
| 154 |
+
)
|
| 155 |
+
preprocessor = _get_generic_processor(collection_name, fetch_type, False)
|
| 156 |
+
|
| 157 |
+
return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def build_generic_extractor_stac(
|
| 161 |
+
backend_context: BackendContext,
|
| 162 |
+
bands: list,
|
| 163 |
+
fetch_type: FetchType,
|
| 164 |
+
collection_url: str,
|
| 165 |
+
**params,
|
| 166 |
+
) -> CollectionFetcher:
|
| 167 |
+
"""Creates a generic extractor adapted to the given backend. Currently only tested with VITO backend"""
|
| 168 |
+
fetcher = _get_generic_fetcher(
|
| 169 |
+
collection_url, fetch_type, backend_context.backend, True
|
| 170 |
+
)
|
| 171 |
+
preprocessor = _get_generic_processor(collection_url, fetch_type, True)
|
| 172 |
+
|
| 173 |
+
return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
|
openeo_gfmap/fetching/s1.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Collection fetching of S1 features, supporting different backends.
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
from functools import partial
|
| 5 |
+
from typing import Callable
|
| 6 |
+
|
| 7 |
+
import openeo
|
| 8 |
+
from geojson import GeoJSON
|
| 9 |
+
|
| 10 |
+
from openeo_gfmap.backend import Backend, BackendContext
|
| 11 |
+
from openeo_gfmap.spatial import SpatialContext
|
| 12 |
+
from openeo_gfmap.temporal import TemporalContext
|
| 13 |
+
|
| 14 |
+
from .commons import (
|
| 15 |
+
_load_collection,
|
| 16 |
+
convert_band_names,
|
| 17 |
+
rename_bands,
|
| 18 |
+
resample_reproject,
|
| 19 |
+
)
|
| 20 |
+
from .fetching import CollectionFetcher, FetchType
|
| 21 |
+
|
| 22 |
+
BASE_SENTINEL1_GRD_MAPPING = {
|
| 23 |
+
"VH": "S1-SIGMA0-VH",
|
| 24 |
+
"HH": "S1-SIGMA0-HH",
|
| 25 |
+
"HV": "S1-SIGMA0-HV",
|
| 26 |
+
"VV": "S1-SIGMA0-VV",
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _get_s1_grd_default_fetcher(
|
| 31 |
+
collection_name: str, fetch_type: FetchType
|
| 32 |
+
) -> Callable:
|
| 33 |
+
"""Return a default fetcher for Sentinel-1 GRD data.
|
| 34 |
+
|
| 35 |
+
Parameters
|
| 36 |
+
----------
|
| 37 |
+
collection_name : str
|
| 38 |
+
The name of the sentinel1 collection to fetch as named in the backend.
|
| 39 |
+
fetch_type : FetchType
|
| 40 |
+
The type of fetching: TILE, POINT and POLYGON.
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
def s1_grd_fetch_default(
|
| 44 |
+
connection: openeo.Connection,
|
| 45 |
+
spatial_extent: SpatialContext,
|
| 46 |
+
temporal_extent: TemporalContext,
|
| 47 |
+
bands: list,
|
| 48 |
+
**params,
|
| 49 |
+
) -> openeo.DataCube:
|
| 50 |
+
"""Default collection fetcher for Sentinel-1 GRD collections. The
|
| 51 |
+
collection values are expected to be expressed in power values.
|
| 52 |
+
Parameters
|
| 53 |
+
----------
|
| 54 |
+
connection: openeo.Connection
|
| 55 |
+
Connection to a general backend.
|
| 56 |
+
spatial_extent: SpatialContext
|
| 57 |
+
Either a GeoJSON collection or a bounding box of locations.
|
| 58 |
+
Performs spatial filtering if the spatial context is a GeoJSON
|
| 59 |
+
collection, as it implies sparse data.
|
| 60 |
+
temporal_extent: TemporalContexct
|
| 61 |
+
A time range, defined by a start and end date.
|
| 62 |
+
bands: list
|
| 63 |
+
The name of the bands to load from that collection
|
| 64 |
+
Returns
|
| 65 |
+
------
|
| 66 |
+
openeo.DataCube: a datacube containing the collection raw products.
|
| 67 |
+
"""
|
| 68 |
+
bands = convert_band_names(bands, BASE_SENTINEL1_GRD_MAPPING)
|
| 69 |
+
|
| 70 |
+
cube = _load_collection(
|
| 71 |
+
connection,
|
| 72 |
+
bands,
|
| 73 |
+
collection_name,
|
| 74 |
+
spatial_extent,
|
| 75 |
+
temporal_extent,
|
| 76 |
+
fetch_type,
|
| 77 |
+
**params,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
if fetch_type is not FetchType.POINT and isinstance(spatial_extent, GeoJSON):
|
| 81 |
+
cube = cube.filter_spatial(spatial_extent)
|
| 82 |
+
|
| 83 |
+
return cube
|
| 84 |
+
|
| 85 |
+
return s1_grd_fetch_default
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _get_s1_grd_default_processor(
|
| 89 |
+
collection_name: str, fetch_type: FetchType, backend: Backend
|
| 90 |
+
) -> Callable:
|
| 91 |
+
"""Builds the preprocessing function from the collection name as it is stored
|
| 92 |
+
in the target backend.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
def s1_grd_default_processor(cube: openeo.DataCube, **params):
|
| 96 |
+
"""Default collection preprocessing method.
|
| 97 |
+
This method performs:
|
| 98 |
+
|
| 99 |
+
* Compute the backscatter of all the S1 products. By default, the
|
| 100 |
+
"sigma0-ellipsoid" method is used with "COPERNICUS_30" DEM, but those
|
| 101 |
+
can be changed by specifying "coefficient" and "elevation_model" in
|
| 102 |
+
params.
|
| 103 |
+
* Resampling to 10m resolution.
|
| 104 |
+
* Reprojection if a "target_crs" key is specified in `params`.
|
| 105 |
+
* Performs value rescaling to uint16.
|
| 106 |
+
"""
|
| 107 |
+
elevation_model = params.get("elevation_model", "COPERNICUS_30")
|
| 108 |
+
coefficient = params.get("coefficient", "sigma0-ellipsoid")
|
| 109 |
+
|
| 110 |
+
cube = cube.sar_backscatter(
|
| 111 |
+
elevation_model=elevation_model,
|
| 112 |
+
coefficient=coefficient,
|
| 113 |
+
local_incidence_angle=False,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
# Reproject collection data to target CRS and resolution, if specified so.
|
| 117 |
+
# Can be disabled by setting target_resolution=None in the parameters
|
| 118 |
+
if params.get("target_resolution", True) is not None:
|
| 119 |
+
cube = resample_reproject(
|
| 120 |
+
cube,
|
| 121 |
+
params.get("target_resolution", 10.0),
|
| 122 |
+
params.get("target_crs", None),
|
| 123 |
+
method=params.get("resampling_method", "near"),
|
| 124 |
+
)
|
| 125 |
+
elif params.get("target_crs") is not None:
|
| 126 |
+
raise ValueError(
|
| 127 |
+
"In fetching parameters: `target_crs` specified but not `target_resolution`, which is required to perform reprojection."
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# Harmonizing the collection band names to the default GFMAP band names
|
| 131 |
+
cube = rename_bands(cube, BASE_SENTINEL1_GRD_MAPPING)
|
| 132 |
+
|
| 133 |
+
return cube
|
| 134 |
+
|
| 135 |
+
return s1_grd_default_processor
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
SENTINEL1_GRD_BACKEND_MAP = {
|
| 139 |
+
Backend.TERRASCOPE: {
|
| 140 |
+
"default": partial(
|
| 141 |
+
_get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
|
| 142 |
+
),
|
| 143 |
+
"preprocessor": partial(
|
| 144 |
+
_get_s1_grd_default_processor,
|
| 145 |
+
collection_name="SENTINEL1_GRD",
|
| 146 |
+
backend=Backend.TERRASCOPE,
|
| 147 |
+
),
|
| 148 |
+
},
|
| 149 |
+
Backend.CDSE: {
|
| 150 |
+
"default": partial(
|
| 151 |
+
_get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
|
| 152 |
+
),
|
| 153 |
+
"preprocessor": partial(
|
| 154 |
+
_get_s1_grd_default_processor,
|
| 155 |
+
collection_name="SENTINEL1_GRD",
|
| 156 |
+
backend=Backend.CDSE,
|
| 157 |
+
),
|
| 158 |
+
},
|
| 159 |
+
Backend.CDSE_STAGING: {
|
| 160 |
+
"default": partial(
|
| 161 |
+
_get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
|
| 162 |
+
),
|
| 163 |
+
"preprocessor": partial(
|
| 164 |
+
_get_s1_grd_default_processor,
|
| 165 |
+
collection_name="SENTINEL1_GRD",
|
| 166 |
+
backend=Backend.CDSE_STAGING,
|
| 167 |
+
),
|
| 168 |
+
},
|
| 169 |
+
Backend.FED: {
|
| 170 |
+
"default": partial(
|
| 171 |
+
_get_s1_grd_default_fetcher, collection_name="SENTINEL1_GRD"
|
| 172 |
+
),
|
| 173 |
+
"preprocessor": partial(
|
| 174 |
+
_get_s1_grd_default_processor,
|
| 175 |
+
collection_name="SENTINEL1_GRD",
|
| 176 |
+
backend=Backend.FED,
|
| 177 |
+
),
|
| 178 |
+
},
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def build_sentinel1_grd_extractor(
|
| 183 |
+
backend_context: BackendContext, bands: list, fetch_type: FetchType, **params
|
| 184 |
+
) -> CollectionFetcher:
|
| 185 |
+
"""Creates a S1 GRD collection extractor for the given backend."""
|
| 186 |
+
backend_functions = SENTINEL1_GRD_BACKEND_MAP.get(backend_context.backend)
|
| 187 |
+
|
| 188 |
+
fetcher, preprocessor = (
|
| 189 |
+
backend_functions["default"](fetch_type=fetch_type),
|
| 190 |
+
backend_functions["preprocessor"](fetch_type=fetch_type),
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
|
openeo_gfmap/fetching/s2.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" Extraction of S2 features, supporting different backends.
|
| 2 |
+
"""
|
| 3 |
+
|
| 4 |
+
from functools import partial
|
| 5 |
+
from typing import Callable
|
| 6 |
+
|
| 7 |
+
import openeo
|
| 8 |
+
from geojson import GeoJSON
|
| 9 |
+
|
| 10 |
+
from openeo_gfmap.backend import Backend, BackendContext
|
| 11 |
+
from openeo_gfmap.metadata import FakeMetadata
|
| 12 |
+
from openeo_gfmap.spatial import BoundingBoxExtent, SpatialContext
|
| 13 |
+
from openeo_gfmap.temporal import TemporalContext
|
| 14 |
+
|
| 15 |
+
from .commons import (
|
| 16 |
+
_load_collection,
|
| 17 |
+
convert_band_names,
|
| 18 |
+
rename_bands,
|
| 19 |
+
resample_reproject,
|
| 20 |
+
)
|
| 21 |
+
from .fetching import CollectionFetcher, FetchType
|
| 22 |
+
|
| 23 |
+
BASE_SENTINEL2_L2A_MAPPING = {
|
| 24 |
+
"B01": "S2-L2A-B01",
|
| 25 |
+
"B02": "S2-L2A-B02",
|
| 26 |
+
"B03": "S2-L2A-B03",
|
| 27 |
+
"B04": "S2-L2A-B04",
|
| 28 |
+
"B05": "S2-L2A-B05",
|
| 29 |
+
"B06": "S2-L2A-B06",
|
| 30 |
+
"B07": "S2-L2A-B07",
|
| 31 |
+
"B08": "S2-L2A-B08",
|
| 32 |
+
"B8A": "S2-L2A-B8A",
|
| 33 |
+
"B09": "S2-L2A-B09",
|
| 34 |
+
"B11": "S2-L2A-B11",
|
| 35 |
+
"B12": "S2-L2A-B12",
|
| 36 |
+
"AOT": "S2-L2A-AOT",
|
| 37 |
+
"SCL": "S2-L2A-SCL",
|
| 38 |
+
"SNW": "S2-L2A-SNW",
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
ELEMENT84_SENTINEL2_L2A_MAPPING = {
|
| 42 |
+
"coastal": "S2-L2A-B01",
|
| 43 |
+
"blue": "S2-L2A-B02",
|
| 44 |
+
"green": "S2-L2A-B03",
|
| 45 |
+
"red": "S2-L2A-B04",
|
| 46 |
+
"rededge1": "S2-L2A-B05",
|
| 47 |
+
"rededge2": "S2-L2A-B06",
|
| 48 |
+
"rededge3": "S2-L2A-B07",
|
| 49 |
+
"nir": "S2-L2A-B08",
|
| 50 |
+
"nir08": "S2-L2A-B8A",
|
| 51 |
+
"nir09": "S2-L2A-B09",
|
| 52 |
+
"cirrus": "S2-L2A-B10",
|
| 53 |
+
"swir16": "S2-L2A-B11",
|
| 54 |
+
"swir22": "S2-L2A-B12",
|
| 55 |
+
"scl": "S2-L2A-SCL",
|
| 56 |
+
"aot": "S2-L2A-AOT",
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _get_s2_l2a_default_fetcher(
|
| 61 |
+
collection_name: str, fetch_type: FetchType
|
| 62 |
+
) -> Callable:
|
| 63 |
+
"""Builds the fetch function from the collection name as it stored in the
|
| 64 |
+
target backend.
|
| 65 |
+
|
| 66 |
+
Parameters
|
| 67 |
+
----------
|
| 68 |
+
collection_name: str
|
| 69 |
+
The name of the sentinel2 collection as named in the backend
|
| 70 |
+
point_based: bool
|
| 71 |
+
The type of fetching: TILE, POINT and POLYGON.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
def s2_l2a_fetch_default(
|
| 75 |
+
connection: openeo.Connection,
|
| 76 |
+
spatial_extent: SpatialContext,
|
| 77 |
+
temporal_extent: TemporalContext,
|
| 78 |
+
bands: list,
|
| 79 |
+
**params,
|
| 80 |
+
) -> openeo.DataCube:
|
| 81 |
+
"""Default collection fetcher for Sentinel_L2A collections.
|
| 82 |
+
Parameters
|
| 83 |
+
----------
|
| 84 |
+
connection: openeo.Connection
|
| 85 |
+
Connection to a general backend.
|
| 86 |
+
spatial_extent: SpatialContext
|
| 87 |
+
Either a GeoJSON collection or a bounding box of locations.
|
| 88 |
+
Performs spatial filtering if the spatial context is a GeoJSON
|
| 89 |
+
collection, as it implies sparse data.
|
| 90 |
+
temporal_extent: TemporalContexct
|
| 91 |
+
A time range, defined by a start and end date.
|
| 92 |
+
bands: list
|
| 93 |
+
The name of the bands to load from that collection
|
| 94 |
+
Returns
|
| 95 |
+
-------
|
| 96 |
+
openeo.DataCube: a datacube containing the collection raw products.
|
| 97 |
+
"""
|
| 98 |
+
# Rename the bands to the backend collection names
|
| 99 |
+
bands = convert_band_names(bands, BASE_SENTINEL2_L2A_MAPPING)
|
| 100 |
+
|
| 101 |
+
cube = _load_collection(
|
| 102 |
+
connection,
|
| 103 |
+
bands,
|
| 104 |
+
collection_name,
|
| 105 |
+
spatial_extent,
|
| 106 |
+
temporal_extent,
|
| 107 |
+
fetch_type,
|
| 108 |
+
**params,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
return cube
|
| 112 |
+
|
| 113 |
+
return s2_l2a_fetch_default
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# TODO deprecated?
|
| 117 |
+
def _get_s2_l2a_element84_fetcher(
|
| 118 |
+
collection_name: str, fetch_type: FetchType
|
| 119 |
+
) -> Callable:
|
| 120 |
+
"""Fetches the collections from the Sentinel-2 Cloud-Optimized GeoTIFFs
|
| 121 |
+
bucket provided by Amazon and managed by Element84.
|
| 122 |
+
"""
|
| 123 |
+
|
| 124 |
+
def s2_l2a_element84_fetcher(
|
| 125 |
+
connection: openeo.Connection,
|
| 126 |
+
spatial_extent: SpatialContext,
|
| 127 |
+
temporal_extent: TemporalContext,
|
| 128 |
+
bands: list,
|
| 129 |
+
**params,
|
| 130 |
+
) -> openeo.DataCube:
|
| 131 |
+
"""Collection fetcher on the element84 collection."""
|
| 132 |
+
bands = convert_band_names(bands, ELEMENT84_SENTINEL2_L2A_MAPPING)
|
| 133 |
+
|
| 134 |
+
if isinstance(spatial_extent, BoundingBoxExtent):
|
| 135 |
+
spatial_extent = dict(spatial_extent)
|
| 136 |
+
elif isinstance(spatial_extent, GeoJSON):
|
| 137 |
+
assert (
|
| 138 |
+
spatial_extent.get("crs", None) is not None
|
| 139 |
+
), "CRS not defined within GeoJSON collection."
|
| 140 |
+
spatial_extent = dict(spatial_extent)
|
| 141 |
+
|
| 142 |
+
cube = connection.load_stac(
|
| 143 |
+
"https://earth-search.aws.element84.com/v1/collections/sentinel-2-l2a",
|
| 144 |
+
spatial_extent,
|
| 145 |
+
temporal_extent,
|
| 146 |
+
bands,
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
cube.metadata = FakeMetadata(band_names=bands)
|
| 150 |
+
|
| 151 |
+
# Apply if the collection is a GeoJSON Feature collection
|
| 152 |
+
if isinstance(spatial_extent, GeoJSON):
|
| 153 |
+
cube = cube.filter_spatial(spatial_extent)
|
| 154 |
+
|
| 155 |
+
return cube
|
| 156 |
+
|
| 157 |
+
return s2_l2a_element84_fetcher
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _get_s2_l2a_default_processor(
|
| 161 |
+
collection_name: str, fetch_type: FetchType
|
| 162 |
+
) -> Callable:
|
| 163 |
+
"""Builds the preprocessing function from the collection name as it stored
|
| 164 |
+
in the target backend.
|
| 165 |
+
"""
|
| 166 |
+
|
| 167 |
+
def s2_l2a_default_processor(cube: openeo.DataCube, **params):
|
| 168 |
+
"""Default collection preprocessing method.
|
| 169 |
+
This method performs reprojection if specified, upsampling of bands
|
| 170 |
+
at 10m resolution as well as band reprojection. Finally, it converts
|
| 171 |
+
the type of the cube values to uint16
|
| 172 |
+
"""
|
| 173 |
+
# Reproject collection data to target CRS and resolution, if specified so.
|
| 174 |
+
# Can be disabled by setting target_resolution=None in the parameters
|
| 175 |
+
if params.get("target_resolution", True) is not None:
|
| 176 |
+
cube = resample_reproject(
|
| 177 |
+
cube,
|
| 178 |
+
params.get("target_resolution", 10.0),
|
| 179 |
+
params.get("target_crs", None),
|
| 180 |
+
method=params.get("resampling_method", "near"),
|
| 181 |
+
)
|
| 182 |
+
elif params.get("target_crs") is not None:
|
| 183 |
+
raise ValueError(
|
| 184 |
+
"In fetching parameters: `target_crs` specified but not `target_resolution`, which is required to perform reprojection."
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Harmonizing the collection band names to the default GFMAP band names
|
| 188 |
+
cube = rename_bands(cube, BASE_SENTINEL2_L2A_MAPPING)
|
| 189 |
+
|
| 190 |
+
# Change the data type to uint16 for optimization purposes
|
| 191 |
+
cube = cube.linear_scale_range(0, 65534, 0, 65534)
|
| 192 |
+
|
| 193 |
+
return cube
|
| 194 |
+
|
| 195 |
+
return s2_l2a_default_processor
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
SENTINEL2_L2A_BACKEND_MAP = {
|
| 199 |
+
Backend.TERRASCOPE: {
|
| 200 |
+
"fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
|
| 201 |
+
"preprocessor": partial(
|
| 202 |
+
_get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
|
| 203 |
+
),
|
| 204 |
+
},
|
| 205 |
+
Backend.CDSE: {
|
| 206 |
+
"fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
|
| 207 |
+
"preprocessor": partial(
|
| 208 |
+
_get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
|
| 209 |
+
),
|
| 210 |
+
},
|
| 211 |
+
Backend.CDSE_STAGING: {
|
| 212 |
+
"fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
|
| 213 |
+
"preprocessor": partial(
|
| 214 |
+
_get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
|
| 215 |
+
),
|
| 216 |
+
},
|
| 217 |
+
Backend.FED: {
|
| 218 |
+
"fetch": partial(_get_s2_l2a_default_fetcher, collection_name="SENTINEL2_L2A"),
|
| 219 |
+
"preprocessor": partial(
|
| 220 |
+
_get_s2_l2a_default_processor, collection_name="SENTINEL2_L2A"
|
| 221 |
+
),
|
| 222 |
+
},
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def build_sentinel2_l2a_extractor(
|
| 227 |
+
backend_context: BackendContext, bands: list, fetch_type: FetchType, **params
|
| 228 |
+
) -> CollectionFetcher:
|
| 229 |
+
"""Creates a S2 L2A extractor adapted to the given backend."""
|
| 230 |
+
backend_functions = SENTINEL2_L2A_BACKEND_MAP.get(backend_context.backend)
|
| 231 |
+
|
| 232 |
+
fetcher, preprocessor = (
|
| 233 |
+
backend_functions["fetch"](fetch_type=fetch_type),
|
| 234 |
+
backend_functions["preprocessor"](fetch_type=fetch_type),
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
return CollectionFetcher(backend_context, bands, fetcher, preprocessor, **params)
|
openeo_gfmap/preprocessing/compositing.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Temporal compositing, or temporal aggregation, is a method to increase the
|
| 2 |
+
quality of data within timesteps by reducing the temporal resolution of a time
|
| 3 |
+
series of satellite images.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Union
|
| 7 |
+
|
| 8 |
+
import openeo
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def median_compositing(
|
| 12 |
+
cube: openeo.DataCube, period: Union[str, list]
|
| 13 |
+
) -> openeo.DataCube:
|
| 14 |
+
"""Perfrom median compositing on the given datacube."""
|
| 15 |
+
if isinstance(period, str):
|
| 16 |
+
return cube.aggregate_temporal_period(
|
| 17 |
+
period=period, reducer="median", dimension="t"
|
| 18 |
+
)
|
| 19 |
+
elif isinstance(period, list):
|
| 20 |
+
return cube.aggregate_temporal(
|
| 21 |
+
intervals=period, reducer="median", dimension="t"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def mean_compositing(
|
| 26 |
+
cube: openeo.DataCube, period: Union[str, list]
|
| 27 |
+
) -> openeo.DataCube:
|
| 28 |
+
"""Perfrom mean compositing on the given datacube."""
|
| 29 |
+
if isinstance(period, str):
|
| 30 |
+
return cube.aggregate_temporal_period(
|
| 31 |
+
period=period, reducer="mean", dimension="t"
|
| 32 |
+
)
|
| 33 |
+
elif isinstance(period, list):
|
| 34 |
+
return cube.aggregate_temporal(intervals=period, reducer="mean", dimension="t")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def sum_compositing(cube: openeo.DataCube, period: Union[str, list]) -> openeo.DataCube:
|
| 38 |
+
"""Perform sum compositing on the given datacube."""
|
| 39 |
+
if isinstance(period, str):
|
| 40 |
+
return cube.aggregate_temporal_period(
|
| 41 |
+
period=period, reducer="sum", dimension="t"
|
| 42 |
+
)
|
| 43 |
+
elif isinstance(period, list):
|
| 44 |
+
return cube.aggregate_temporal(intervals=period, reducer="sum", dimension="t")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def max_ndvi_compositing(cube: openeo.DataCube, period: str) -> openeo.DataCube:
|
| 48 |
+
"""Perform compositing by selecting the observation with the highest NDVI value over the
|
| 49 |
+
given compositing window."""
|
| 50 |
+
|
| 51 |
+
def max_ndvi_selection(ndvi: openeo.DataCube):
|
| 52 |
+
max_ndvi = ndvi.max()
|
| 53 |
+
return ndvi.array_apply(lambda x: x != max_ndvi)
|
| 54 |
+
|
| 55 |
+
if isinstance(period, str):
|
| 56 |
+
ndvi = cube.ndvi(nir="S2-L2A-B08", red="S2-L2A-B04")
|
| 57 |
+
|
| 58 |
+
rank_mask = ndvi.apply_neighborhood(
|
| 59 |
+
max_ndvi_selection,
|
| 60 |
+
size=[
|
| 61 |
+
{"dimension": "x", "unit": "px", "value": 1},
|
| 62 |
+
{"dimension": "y", "unit": "px", "value": 1},
|
| 63 |
+
{"dimension": "t", "value": period},
|
| 64 |
+
],
|
| 65 |
+
overlap=[],
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
cube = cube.mask(mask=rank_mask).aggregate_temporal_period(period, "first")
|
| 69 |
+
|
| 70 |
+
else:
|
| 71 |
+
raise ValueError(
|
| 72 |
+
"Custom temporal intervals are not yet supported for max NDVI compositing."
|
| 73 |
+
)
|
| 74 |
+
return cube
|
openeo_gfmap/preprocessing/sar.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Routines to pre-process sar signals."""
|
| 2 |
+
|
| 3 |
+
import openeo
|
| 4 |
+
from openeo.processes import array_create, power
|
| 5 |
+
|
| 6 |
+
from openeo_gfmap import BackendContext
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def compress_backscatter_uint16(
|
| 10 |
+
backend_context: BackendContext, cube: openeo.DataCube
|
| 11 |
+
) -> openeo.DataCube:
|
| 12 |
+
"""
|
| 13 |
+
Scaling the bands from float32 power values to uint16 for memory optimization. The scaling
|
| 14 |
+
casts the values from power to decibels and applies a linear scaling from 0 to 65534.
|
| 15 |
+
|
| 16 |
+
The resulting datacube has a uint16 memory representation which makes an optimization
|
| 17 |
+
before passing through any UDFs.
|
| 18 |
+
|
| 19 |
+
Parameters
|
| 20 |
+
----------
|
| 21 |
+
backend_context : BackendContext
|
| 22 |
+
The backend context to fetch the backend name.
|
| 23 |
+
cube : openeo.DataCube
|
| 24 |
+
The datacube to compress the backscatter values.
|
| 25 |
+
Returns
|
| 26 |
+
-------
|
| 27 |
+
openeo.DataCube
|
| 28 |
+
The datacube with the backscatter values compressed to uint16.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
# Apply rescaling of power values in a logarithmic way
|
| 32 |
+
cube = cube.apply_dimension(
|
| 33 |
+
dimension="bands",
|
| 34 |
+
process=lambda x: array_create(
|
| 35 |
+
[
|
| 36 |
+
power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
|
| 37 |
+
power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
|
| 38 |
+
]
|
| 39 |
+
),
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Change the data type to uint16 for optimization purposes
|
| 43 |
+
return cube.linear_scale_range(1, 65534, 1, 65534)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def decompress_backscatter_uint16(
|
| 47 |
+
backend_context: BackendContext, cube: openeo.DataCube
|
| 48 |
+
) -> openeo.DataCube:
|
| 49 |
+
"""
|
| 50 |
+
Decompresing the bands from uint16 to their original float32 values.
|
| 51 |
+
|
| 52 |
+
Parameters
|
| 53 |
+
----------
|
| 54 |
+
backend_context : BackendContext
|
| 55 |
+
The backend context to fetch the backend name.
|
| 56 |
+
cube : openeo.DataCube
|
| 57 |
+
The datacube to decompress the backscatter values.
|
| 58 |
+
Returns
|
| 59 |
+
-------
|
| 60 |
+
openeo.DataCube
|
| 61 |
+
The datacube with the backscatter values in their original float32 values.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
cube = cube.apply_dimension(
|
| 65 |
+
dimension="bands",
|
| 66 |
+
process=lambda x: array_create(
|
| 67 |
+
[
|
| 68 |
+
power(base=10, p=(20.0 * x[0].log(base=10) - 83.0) / 10.0),
|
| 69 |
+
power(base=10, p=(20.0 * x[1].log(base=10) - 83.0) / 10.0),
|
| 70 |
+
]
|
| 71 |
+
),
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
return cube
|
openeo_gfmap/preprocessing/scaling.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Scaling and compressing methods for datacubes."""
|
| 2 |
+
|
| 3 |
+
import openeo
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _compress(
|
| 7 |
+
cube: openeo.DataCube,
|
| 8 |
+
min_val: int,
|
| 9 |
+
max_val: int,
|
| 10 |
+
alpha: float,
|
| 11 |
+
beta: float,
|
| 12 |
+
):
|
| 13 |
+
if (
|
| 14 |
+
alpha != 1.0 or beta != 0.0
|
| 15 |
+
): # Avoid adding a node in the computing graph if scaling is not necessary
|
| 16 |
+
cube = (cube * alpha) + beta
|
| 17 |
+
|
| 18 |
+
return cube.linear_scale_range(min_val, max_val, min_val, max_val)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def compress_uint16(
|
| 22 |
+
cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
|
| 23 |
+
) -> openeo.DataCube:
|
| 24 |
+
"""Scales the data linearly using the formula `output = (input * a) + b` and compresses values
|
| 25 |
+
from float32 to uint16 for memory optimization.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
cube : openeo.DataCube
|
| 30 |
+
The input datacube to compress, only meteo data should be present.
|
| 31 |
+
alpha : float, optional (default=1.0)
|
| 32 |
+
The scaling factor. Values in the input datacube are multiplied by this coefficient.
|
| 33 |
+
beta : float, optional (default=0.0)
|
| 34 |
+
The offset. Values in the input datacube are added by this value.
|
| 35 |
+
|
| 36 |
+
Returns
|
| 37 |
+
-------
|
| 38 |
+
cube : openeo.DataCube
|
| 39 |
+
The datacube with the data linearly scaled and compressed to uint16 and rescaled frome.
|
| 40 |
+
"""
|
| 41 |
+
return _compress(cube, 0, 65534, alpha, beta)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def compress_uint8(
|
| 45 |
+
cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
|
| 46 |
+
) -> openeo.DataCube:
|
| 47 |
+
"""
|
| 48 |
+
Scales the data linearly using the formula `output = (input * a) + b` and compresses values
|
| 49 |
+
from float32 to uint8 for memory optimization.
|
| 50 |
+
|
| 51 |
+
Parameters
|
| 52 |
+
----------
|
| 53 |
+
cube : openeo.DataCube
|
| 54 |
+
The input datacube to compress, only meteo data should be present.
|
| 55 |
+
alpha : float, optional (default=1.0)
|
| 56 |
+
The scaling factor. Values in the input datacube are multiplied by this coefficient.
|
| 57 |
+
beta : float, optional (default=0.0)
|
| 58 |
+
The offset. Values in the input datacube are added by this value.
|
| 59 |
+
|
| 60 |
+
Returns
|
| 61 |
+
-------
|
| 62 |
+
cube : openeo.DataCube
|
| 63 |
+
The datacube with the data linearly scaled and compressed to uint8 and rescaled frome.
|
| 64 |
+
"""
|
| 65 |
+
return _compress(cube, 0, 253, alpha, beta)
|
openeo_gfmap/utils/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This sub-module contains utilitary function and tools for OpenEO-GFMap"""
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
from openeo_gfmap.utils.build_df import load_json
|
| 6 |
+
from openeo_gfmap.utils.intervals import quintad_intervals
|
| 7 |
+
from openeo_gfmap.utils.netcdf import update_nc_attributes
|
| 8 |
+
from openeo_gfmap.utils.split_stac import split_collection_by_epsg
|
| 9 |
+
from openeo_gfmap.utils.tile_processing import (
|
| 10 |
+
array_bounds,
|
| 11 |
+
arrays_cosine_similarity,
|
| 12 |
+
normalize_array,
|
| 13 |
+
select_optical_bands,
|
| 14 |
+
select_sar_bands,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
_log = logging.getLogger(__name__)
|
| 18 |
+
_log.setLevel(logging.INFO)
|
| 19 |
+
|
| 20 |
+
ch = logging.StreamHandler()
|
| 21 |
+
ch.setLevel(logging.INFO)
|
| 22 |
+
|
| 23 |
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 24 |
+
ch.setFormatter(formatter)
|
| 25 |
+
|
| 26 |
+
_log.addHandler(ch)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
__all__ = [
|
| 30 |
+
"load_json",
|
| 31 |
+
"normalize_array",
|
| 32 |
+
"select_optical_bands",
|
| 33 |
+
"array_bounds",
|
| 34 |
+
"select_sar_bands",
|
| 35 |
+
"arrays_cosine_similarity",
|
| 36 |
+
"quintad_intervals",
|
| 37 |
+
"split_collection_by_epsg",
|
| 38 |
+
"update_nc_attributes",
|
| 39 |
+
]
|
openeo_gfmap/utils/build_df.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utilities to build a `pandas.DataFrame` from the output of a VectorCube
|
| 2 |
+
based job. Usefull to collect the output of point based extraction.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
VECTORCUBE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S%z"
|
| 10 |
+
TIMESTAMP_FORMAT = "%Y-%m-%d"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def load_json(input_file: Path, bands: list) -> pd.DataFrame:
|
| 14 |
+
"""Reads a json file and outputs it as a proper pandas dataframe.
|
| 15 |
+
|
| 16 |
+
Parameters
|
| 17 |
+
----------
|
| 18 |
+
input_file: PathLike
|
| 19 |
+
The path of the JSON file to read.
|
| 20 |
+
bands: list
|
| 21 |
+
The name of the bands that will be used in the columns names. The band
|
| 22 |
+
names must be the same as the vector cube that resulted into the parsed
|
| 23 |
+
JSON file.
|
| 24 |
+
Returns
|
| 25 |
+
-------
|
| 26 |
+
df: pd.DataFrame
|
| 27 |
+
A `pandas.DataFrame` containing a combination of the band names and the
|
| 28 |
+
timestamps as column names.
|
| 29 |
+
For example, the Sentinel-2 green band on the 1st October 2020 is will
|
| 30 |
+
have the column name `S2-L2A-B02:2020-10-01`
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
df = pd.read_json(input_file)
|
| 34 |
+
|
| 35 |
+
target_timestamps = list(
|
| 36 |
+
map(lambda date: date.strftime(TIMESTAMP_FORMAT), df.columns.to_pydatetime())
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
df = df.rename(dict(zip(df.columns, target_timestamps)), axis=1)
|
| 40 |
+
|
| 41 |
+
expanded_df = pd.DataFrame()
|
| 42 |
+
for col in df.columns:
|
| 43 |
+
expanded_col = pd.DataFrame(
|
| 44 |
+
df[col].to_list(), columns=[f"{feature}:{col}" for feature in bands]
|
| 45 |
+
)
|
| 46 |
+
expanded_df = pd.concat([expanded_df, expanded_col], axis=1)
|
| 47 |
+
|
| 48 |
+
return expanded_df
|
openeo_gfmap/utils/catalogue.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Functionalities to interract with product catalogues."""
|
| 2 |
+
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
import geojson
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import requests
|
| 8 |
+
from pyproj.crs import CRS
|
| 9 |
+
from rasterio.warp import transform_bounds
|
| 10 |
+
from requests import adapters
|
| 11 |
+
from shapely.geometry import Point, box, shape
|
| 12 |
+
from shapely.ops import unary_union
|
| 13 |
+
|
| 14 |
+
from openeo_gfmap import (
|
| 15 |
+
Backend,
|
| 16 |
+
BackendContext,
|
| 17 |
+
BoundingBoxExtent,
|
| 18 |
+
SpatialContext,
|
| 19 |
+
TemporalContext,
|
| 20 |
+
)
|
| 21 |
+
from openeo_gfmap.utils import _log
|
| 22 |
+
|
| 23 |
+
request_sessions: Optional[requests.Session] = None
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _request_session() -> requests.Session:
|
| 27 |
+
global request_sessions
|
| 28 |
+
|
| 29 |
+
if request_sessions is None:
|
| 30 |
+
request_sessions = requests.Session()
|
| 31 |
+
retries = adapters.Retry(
|
| 32 |
+
total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504]
|
| 33 |
+
)
|
| 34 |
+
request_sessions.mount("https://", adapters.HTTPAdapter(max_retries=retries))
|
| 35 |
+
return request_sessions
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class UncoveredS1Exception(Exception):
|
| 39 |
+
"""Exception raised when there is no product available to fully cover spatially a given
|
| 40 |
+
spatio-temporal context for the Sentinel-1 collection."""
|
| 41 |
+
|
| 42 |
+
pass
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _parse_cdse_products(response: dict):
|
| 46 |
+
"""Parses the geometry and timestamps of products from the CDSE catalogue."""
|
| 47 |
+
geometries = []
|
| 48 |
+
timestamps = []
|
| 49 |
+
products = response["features"]
|
| 50 |
+
|
| 51 |
+
for product in products:
|
| 52 |
+
if "geometry" in product and "startDate" in product["properties"]:
|
| 53 |
+
geometries.append(shape(product["geometry"]))
|
| 54 |
+
timestamps.append(pd.to_datetime(product["properties"]["startDate"]))
|
| 55 |
+
else:
|
| 56 |
+
_log.warning(
|
| 57 |
+
"Cannot parse product %s does not have a geometry or timestamp.",
|
| 58 |
+
product["properties"]["id"],
|
| 59 |
+
)
|
| 60 |
+
return geometries, timestamps
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _query_cdse_catalogue(
|
| 64 |
+
collection: str,
|
| 65 |
+
bounds: list,
|
| 66 |
+
temporal_extent: TemporalContext,
|
| 67 |
+
**additional_parameters: dict,
|
| 68 |
+
) -> dict:
|
| 69 |
+
"""
|
| 70 |
+
Queries the CDSE catalogue for a given collection, spatio-temporal context and additional
|
| 71 |
+
parameters.
|
| 72 |
+
|
| 73 |
+
Params
|
| 74 |
+
------
|
| 75 |
+
|
| 76 |
+
"""
|
| 77 |
+
minx, miny, maxx, maxy = bounds
|
| 78 |
+
|
| 79 |
+
# The date format should be YYYY-MM-DD
|
| 80 |
+
start_date = f"{temporal_extent.start_date}T00:00:00Z"
|
| 81 |
+
end_date = f"{temporal_extent.end_date}T00:00:00Z"
|
| 82 |
+
|
| 83 |
+
url = (
|
| 84 |
+
f"https://catalogue.dataspace.copernicus.eu/resto/api/collections/"
|
| 85 |
+
f"{collection}/search.json?box={minx},{miny},{maxx},{maxy}"
|
| 86 |
+
f"&sortParam=startDate&maxRecords=1000&dataset=ESA-DATASET"
|
| 87 |
+
f"&startDate={start_date}&completionDate={end_date}"
|
| 88 |
+
)
|
| 89 |
+
for key, value in additional_parameters.items():
|
| 90 |
+
url += f"&{key}={value}"
|
| 91 |
+
|
| 92 |
+
session = _request_session()
|
| 93 |
+
response = session.get(url, timeout=60)
|
| 94 |
+
|
| 95 |
+
if response.status_code != 200:
|
| 96 |
+
raise Exception(
|
| 97 |
+
f"Cannot check S1 catalogue on CDSE: Request to {url} failed with "
|
| 98 |
+
f"status code {response.status_code}"
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
return response.json()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def _check_cdse_catalogue(
|
| 105 |
+
collection: str,
|
| 106 |
+
bounds: list,
|
| 107 |
+
temporal_extent: TemporalContext,
|
| 108 |
+
**additional_parameters: dict,
|
| 109 |
+
) -> bool:
|
| 110 |
+
"""Checks if there is at least one product available in the
|
| 111 |
+
given spatio-temporal context for a collection in the CDSE catalogue,
|
| 112 |
+
as there might be issues in the API that sometimes returns empty results
|
| 113 |
+
for a valid query.
|
| 114 |
+
|
| 115 |
+
Parameters
|
| 116 |
+
----------
|
| 117 |
+
collection : str
|
| 118 |
+
The collection name to be checked. (For example: Sentinel1 or Sentinel2)
|
| 119 |
+
spatial_extent : SpatialContext
|
| 120 |
+
The spatial extent to be checked, it will check within its bounding box.
|
| 121 |
+
temporal_extent : TemporalContext
|
| 122 |
+
The temporal period to be checked.
|
| 123 |
+
additional_parameters : Optional[dict], optional
|
| 124 |
+
Additional parameters to be passed to the catalogue, by default empty.
|
| 125 |
+
Parameters (key, value) will be passed as "&key=value" in the query,
|
| 126 |
+
for example: {"sortOrder": "ascending"} will be passed as "&ascendingOrder=True"
|
| 127 |
+
|
| 128 |
+
Returns
|
| 129 |
+
-------
|
| 130 |
+
True if there is at least one product, False otherwise.
|
| 131 |
+
"""
|
| 132 |
+
body = _query_cdse_catalogue(
|
| 133 |
+
collection, bounds, temporal_extent, **additional_parameters
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
grd_tiles = list(
|
| 137 |
+
filter(
|
| 138 |
+
lambda feature: feature["properties"]["productType"].contains("GRD"),
|
| 139 |
+
body["features"],
|
| 140 |
+
)
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
return len(grd_tiles) > 0
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _compute_max_gap_days(
|
| 147 |
+
temporal_extent: TemporalContext, timestamps: list[pd.DatetimeIndex]
|
| 148 |
+
) -> int:
|
| 149 |
+
"""Computes the maximum temporal gap in days from the timestamps parsed from the catalogue.
|
| 150 |
+
Requires the start and end date to be included in the timestamps to compute the gap before
|
| 151 |
+
and after the first and last observation.
|
| 152 |
+
|
| 153 |
+
Parameters
|
| 154 |
+
----------
|
| 155 |
+
temporal_extent : TemporalContext
|
| 156 |
+
The temporal extent to be checked. Same as used to query the catalogue.
|
| 157 |
+
timestamps : list[pd.DatetimeIndex]
|
| 158 |
+
The list of timestamps parsed from the catalogue and to compute the gap from.
|
| 159 |
+
|
| 160 |
+
Returns
|
| 161 |
+
-------
|
| 162 |
+
days : int
|
| 163 |
+
The maximum temporal gap in days.
|
| 164 |
+
"""
|
| 165 |
+
# Computes max temporal gap. Include requested start and end date so we dont miss
|
| 166 |
+
# any start or end gap before first/last observation
|
| 167 |
+
timestamps = pd.DatetimeIndex(
|
| 168 |
+
sorted(
|
| 169 |
+
[pd.to_datetime(temporal_extent.start_date, utc=True)]
|
| 170 |
+
+ timestamps
|
| 171 |
+
+ [pd.to_datetime(temporal_extent.end_date, utc=True)]
|
| 172 |
+
)
|
| 173 |
+
)
|
| 174 |
+
return timestamps.to_series().diff().max().days
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def s1_area_per_orbitstate_vvvh(
|
| 178 |
+
backend: BackendContext,
|
| 179 |
+
spatial_extent: SpatialContext,
|
| 180 |
+
temporal_extent: TemporalContext,
|
| 181 |
+
) -> dict:
|
| 182 |
+
"""
|
| 183 |
+
Evaluates for both the ascending and descending state orbits the area of interesection and
|
| 184 |
+
maximum temporal gap for the available products with a VV&VH polarisation.
|
| 185 |
+
|
| 186 |
+
Parameters
|
| 187 |
+
----------
|
| 188 |
+
backend : BackendContext
|
| 189 |
+
The backend to be within, as each backend might use different catalogues. Only the CDSE,
|
| 190 |
+
CDSE_STAGING and FED backends are supported.
|
| 191 |
+
spatial_extent : SpatialContext
|
| 192 |
+
The spatial extent to be checked, it will check within its bounding box.
|
| 193 |
+
temporal_extent : TemporalContext
|
| 194 |
+
The temporal period to be checked.
|
| 195 |
+
|
| 196 |
+
Returns
|
| 197 |
+
------
|
| 198 |
+
dict
|
| 199 |
+
Keys containing the orbit state and values containing the total area of intersection and
|
| 200 |
+
in km^2 and maximum temporal gap in days.
|
| 201 |
+
"""
|
| 202 |
+
if isinstance(spatial_extent, geojson.FeatureCollection):
|
| 203 |
+
# Transform geojson into shapely geometry and compute bounds
|
| 204 |
+
shapely_geometries = [
|
| 205 |
+
shape(feature["geometry"]) for feature in spatial_extent["features"]
|
| 206 |
+
]
|
| 207 |
+
if len(shapely_geometries) == 1 and isinstance(shapely_geometries[0], Point):
|
| 208 |
+
point = shapely_geometries[0]
|
| 209 |
+
buffer_size = 0.0001
|
| 210 |
+
buffered_geometry = point.buffer(buffer_size)
|
| 211 |
+
bounds = buffered_geometry.bounds
|
| 212 |
+
else:
|
| 213 |
+
geometry = unary_union(shapely_geometries)
|
| 214 |
+
bounds = geometry.bounds
|
| 215 |
+
epsg = 4326
|
| 216 |
+
elif isinstance(spatial_extent, BoundingBoxExtent):
|
| 217 |
+
bounds = [
|
| 218 |
+
spatial_extent.west,
|
| 219 |
+
spatial_extent.south,
|
| 220 |
+
spatial_extent.east,
|
| 221 |
+
spatial_extent.north,
|
| 222 |
+
]
|
| 223 |
+
epsg = spatial_extent.epsg
|
| 224 |
+
else:
|
| 225 |
+
raise ValueError(
|
| 226 |
+
"Provided spatial extent is not a valid GeoJSON or SpatialContext object."
|
| 227 |
+
)
|
| 228 |
+
# Warp the bounds if the epsg is different from 4326
|
| 229 |
+
if epsg != 4326:
|
| 230 |
+
bounds = transform_bounds(CRS.from_epsg(epsg), CRS.from_epsg(4326), *bounds)
|
| 231 |
+
|
| 232 |
+
# Queries the products in the catalogues
|
| 233 |
+
if backend.backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
|
| 234 |
+
ascending_products, ascending_timestamps = _parse_cdse_products(
|
| 235 |
+
_query_cdse_catalogue(
|
| 236 |
+
"Sentinel1",
|
| 237 |
+
bounds,
|
| 238 |
+
temporal_extent,
|
| 239 |
+
orbitDirection="ASCENDING",
|
| 240 |
+
polarisation="VV%26VH",
|
| 241 |
+
productType="IW_GRDH_1S-COG",
|
| 242 |
+
)
|
| 243 |
+
)
|
| 244 |
+
descending_products, descending_timestamps = _parse_cdse_products(
|
| 245 |
+
_query_cdse_catalogue(
|
| 246 |
+
"Sentinel1",
|
| 247 |
+
bounds,
|
| 248 |
+
temporal_extent,
|
| 249 |
+
orbitDirection="DESCENDING",
|
| 250 |
+
polarisation="VV%26VH",
|
| 251 |
+
productType="IW_GRDH_1S-COG",
|
| 252 |
+
)
|
| 253 |
+
)
|
| 254 |
+
else:
|
| 255 |
+
raise NotImplementedError(
|
| 256 |
+
f"This feature is not supported for backend: {backend.backend}."
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
# Builds the shape of the spatial extent and computes the area
|
| 260 |
+
spatial_extent = box(*bounds)
|
| 261 |
+
|
| 262 |
+
# Computes if there is the full overlap for each of those states
|
| 263 |
+
union_ascending = unary_union(ascending_products)
|
| 264 |
+
union_descending = unary_union(descending_products)
|
| 265 |
+
|
| 266 |
+
ascending_covers = union_ascending.contains(spatial_extent)
|
| 267 |
+
descending_covers = union_descending.contains(spatial_extent)
|
| 268 |
+
|
| 269 |
+
# Computes the area of intersection
|
| 270 |
+
return {
|
| 271 |
+
"ASCENDING": {
|
| 272 |
+
"full_overlap": ascending_covers,
|
| 273 |
+
"max_temporal_gap": _compute_max_gap_days(
|
| 274 |
+
temporal_extent, ascending_timestamps
|
| 275 |
+
),
|
| 276 |
+
"area": sum(
|
| 277 |
+
product.intersection(spatial_extent).area
|
| 278 |
+
for product in ascending_products
|
| 279 |
+
),
|
| 280 |
+
},
|
| 281 |
+
"DESCENDING": {
|
| 282 |
+
"full_overlap": descending_covers,
|
| 283 |
+
"max_temporal_gap": _compute_max_gap_days(
|
| 284 |
+
temporal_extent, descending_timestamps
|
| 285 |
+
),
|
| 286 |
+
"area": sum(
|
| 287 |
+
product.intersection(spatial_extent).area
|
| 288 |
+
for product in descending_products
|
| 289 |
+
),
|
| 290 |
+
},
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def select_s1_orbitstate_vvvh(
|
| 295 |
+
backend: BackendContext,
|
| 296 |
+
spatial_extent: SpatialContext,
|
| 297 |
+
temporal_extent: TemporalContext,
|
| 298 |
+
max_temporal_gap: int = 60,
|
| 299 |
+
) -> str:
|
| 300 |
+
"""Selects the orbit state based on some predefined rules that
|
| 301 |
+
are checked in sequential order:
|
| 302 |
+
1. prefer an orbit with full coverage over the requested bounds
|
| 303 |
+
2. prefer an orbit with a maximum temporal gap under a
|
| 304 |
+
predefined threshold
|
| 305 |
+
3. prefer the orbit that covers the most area of intersection
|
| 306 |
+
for the available products
|
| 307 |
+
|
| 308 |
+
Parameters
|
| 309 |
+
----------
|
| 310 |
+
backend : BackendContext
|
| 311 |
+
The backend to be within, as each backend might use different catalogues. Only the CDSE,
|
| 312 |
+
CDSE_STAGING and FED backends are supported.
|
| 313 |
+
spatial_extent : SpatialContext
|
| 314 |
+
The spatial extent to be checked, it will check within its bounding box.
|
| 315 |
+
temporal_extent : TemporalContext
|
| 316 |
+
The temporal period to be checked.
|
| 317 |
+
max_temporal_gap: int, optional, default: 30
|
| 318 |
+
The maximum temporal gap in days to be considered for the orbit state.
|
| 319 |
+
|
| 320 |
+
Returns
|
| 321 |
+
------
|
| 322 |
+
str
|
| 323 |
+
The orbit state that covers the most area of the given spatio-temporal context
|
| 324 |
+
"""
|
| 325 |
+
|
| 326 |
+
# Queries the products in the catalogues
|
| 327 |
+
areas = s1_area_per_orbitstate_vvvh(backend, spatial_extent, temporal_extent)
|
| 328 |
+
|
| 329 |
+
ascending_overlap = areas["ASCENDING"]["full_overlap"]
|
| 330 |
+
descending_overlap = areas["DESCENDING"]["full_overlap"]
|
| 331 |
+
ascending_gap_too_large = areas["ASCENDING"]["max_temporal_gap"] > max_temporal_gap
|
| 332 |
+
descending_gap_too_large = (
|
| 333 |
+
areas["DESCENDING"]["max_temporal_gap"] > max_temporal_gap
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
orbit_choice = None
|
| 337 |
+
|
| 338 |
+
if not ascending_overlap and not descending_overlap:
|
| 339 |
+
raise UncoveredS1Exception(
|
| 340 |
+
"No product available to fully cover the requested area in both orbit states."
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
# Rule 1: Prefer an orbit with full coverage over the requested bounds
|
| 344 |
+
if ascending_overlap and not descending_overlap:
|
| 345 |
+
orbit_choice = "ASCENDING"
|
| 346 |
+
reason = "Only orbit fully covering the requested area."
|
| 347 |
+
elif descending_overlap and not ascending_overlap:
|
| 348 |
+
orbit_choice = "DESCENDING"
|
| 349 |
+
reason = "Only orbit fully covering the requested area."
|
| 350 |
+
|
| 351 |
+
# Rule 2: Prefer an orbit with a maximum temporal gap under a predefined threshold
|
| 352 |
+
elif ascending_gap_too_large and not descending_gap_too_large:
|
| 353 |
+
orbit_choice = "DESCENDING"
|
| 354 |
+
reason = (
|
| 355 |
+
"Only orbit with temporal gap under the threshold. "
|
| 356 |
+
f"{areas['DESCENDING']['max_temporal_gap']} days < {max_temporal_gap} days"
|
| 357 |
+
)
|
| 358 |
+
elif descending_gap_too_large and not ascending_gap_too_large:
|
| 359 |
+
orbit_choice = "ASCENDING"
|
| 360 |
+
reason = (
|
| 361 |
+
"Only orbit with temporal gap under the threshold. "
|
| 362 |
+
f"{areas['ASCENDING']['max_temporal_gap']} days < {max_temporal_gap} days"
|
| 363 |
+
)
|
| 364 |
+
# Rule 3: Prefer the orbit that covers the most area of intersection
|
| 365 |
+
# for the available products
|
| 366 |
+
elif ascending_overlap and descending_overlap:
|
| 367 |
+
ascending_cover_area = areas["ASCENDING"]["area"]
|
| 368 |
+
descending_cover_area = areas["DESCENDING"]["area"]
|
| 369 |
+
|
| 370 |
+
# Selects the orbit state that covers the most area
|
| 371 |
+
if ascending_cover_area > descending_cover_area:
|
| 372 |
+
orbit_choice = "ASCENDING"
|
| 373 |
+
reason = (
|
| 374 |
+
"Orbit has more cumulative intersected area. "
|
| 375 |
+
f"{ascending_cover_area} > {descending_cover_area}"
|
| 376 |
+
)
|
| 377 |
+
else:
|
| 378 |
+
reason = (
|
| 379 |
+
"Orbit has more cumulative intersected area. "
|
| 380 |
+
f"{descending_cover_area} > {ascending_cover_area}"
|
| 381 |
+
)
|
| 382 |
+
orbit_choice = "DESCENDING"
|
| 383 |
+
|
| 384 |
+
if orbit_choice is not None:
|
| 385 |
+
_log.info(f"Selected orbit state: {orbit_choice}. Reason: {reason}")
|
| 386 |
+
return orbit_choice
|
| 387 |
+
raise UncoveredS1Exception("Failed to select suitable Sentinel-1 orbit.")
|
openeo_gfmap/utils/intervals.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utilitary function for intervals, useful for temporal aggregation
|
| 2 |
+
methods.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from datetime import timedelta
|
| 6 |
+
|
| 7 |
+
from openeo_gfmap import TemporalContext
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def quintad_intervals(temporal_extent: TemporalContext) -> list:
|
| 11 |
+
"""Returns a list of tuples (start_date, end_date) of quintad intervals
|
| 12 |
+
from the input temporal extent. Quintad intervals are intervals of
|
| 13 |
+
generally 5 days, that never overlap two months.
|
| 14 |
+
|
| 15 |
+
All months are divided in 6 quintads, where the 6th quintad might
|
| 16 |
+
contain 6 days for months of 31 days.
|
| 17 |
+
For the month of February, the 6th quintad is only of three days, or
|
| 18 |
+
four days for the leap year.
|
| 19 |
+
"""
|
| 20 |
+
start_date, end_date = temporal_extent.to_datetime()
|
| 21 |
+
quintads = []
|
| 22 |
+
|
| 23 |
+
current_date = start_date
|
| 24 |
+
|
| 25 |
+
# Compute the offset of the first day on the start of the last quintad
|
| 26 |
+
if start_date.day != 1:
|
| 27 |
+
offset = (start_date - timedelta(days=1)).day % 5
|
| 28 |
+
current_date = current_date - timedelta(days=offset)
|
| 29 |
+
else:
|
| 30 |
+
offset = 0
|
| 31 |
+
|
| 32 |
+
while current_date <= end_date:
|
| 33 |
+
# Get the last day of the current month
|
| 34 |
+
last_day = current_date.replace(day=28) + timedelta(days=4)
|
| 35 |
+
last_day = last_day - timedelta(days=last_day.day)
|
| 36 |
+
|
| 37 |
+
# Get the last day of the current quintad
|
| 38 |
+
last_quintad = current_date + timedelta(days=4)
|
| 39 |
+
|
| 40 |
+
# Add a day if the day is the 30th and there is the 31th in the current month
|
| 41 |
+
if last_quintad.day == 30 and last_day.day == 31:
|
| 42 |
+
last_quintad = last_quintad + timedelta(days=1)
|
| 43 |
+
|
| 44 |
+
# If the last quintad is after the last day of the month, then
|
| 45 |
+
# set it to the last day of the month
|
| 46 |
+
if last_quintad > last_day:
|
| 47 |
+
last_quintad = last_day
|
| 48 |
+
# In the case the last quintad is after the end date, then set it to the end date
|
| 49 |
+
elif last_quintad > end_date:
|
| 50 |
+
last_quintad = end_date
|
| 51 |
+
|
| 52 |
+
quintads.append((current_date, last_quintad))
|
| 53 |
+
|
| 54 |
+
# Set the current date to the next quintad
|
| 55 |
+
current_date = last_quintad + timedelta(days=1)
|
| 56 |
+
|
| 57 |
+
# Fixing the offset issue for intervals starting in the middle of a quintad
|
| 58 |
+
quintads[0] = (quintads[0][0] + timedelta(days=offset), quintads[0][1])
|
| 59 |
+
|
| 60 |
+
# Returns to string with the YYYY-mm-dd format
|
| 61 |
+
return [
|
| 62 |
+
(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
|
| 63 |
+
for start_date, end_date in quintads
|
| 64 |
+
]
|
openeo_gfmap/utils/split_stac.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utility function to split a STAC collection into multiple STAC collections based on CRS.
|
| 2 |
+
Requires the "proj:epsg" property to be present in all the STAC items.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Iterator, Union
|
| 8 |
+
|
| 9 |
+
import pystac
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _extract_epsg_from_stac_item(stac_item: pystac.Item) -> int:
|
| 13 |
+
"""
|
| 14 |
+
Extract the EPSG code from a STAC item.
|
| 15 |
+
|
| 16 |
+
Parameters:
|
| 17 |
+
stac_item (pystac.Item): The STAC item.
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
int: The EPSG code.
|
| 21 |
+
|
| 22 |
+
Raises:
|
| 23 |
+
KeyError: If the "proj:epsg" property is missing from the STAC item.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
epsg_code = stac_item.properties["proj:epsg"]
|
| 28 |
+
return epsg_code
|
| 29 |
+
except KeyError:
|
| 30 |
+
raise KeyError("The 'proj:epsg' property is missing from the STAC item.")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _get_items_by_epsg(
|
| 34 |
+
collection: pystac.Collection,
|
| 35 |
+
) -> Iterator[tuple[int, pystac.Item]]:
|
| 36 |
+
"""
|
| 37 |
+
Generator function that yields items grouped by their EPSG code.
|
| 38 |
+
|
| 39 |
+
Parameters:
|
| 40 |
+
collection (pystac.Collection): The STAC collection.
|
| 41 |
+
|
| 42 |
+
Yields:
|
| 43 |
+
tuple[int, pystac.Item]: EPSG code and corresponding STAC item.
|
| 44 |
+
"""
|
| 45 |
+
for item in collection.get_all_items():
|
| 46 |
+
epsg = _extract_epsg_from_stac_item(item)
|
| 47 |
+
yield epsg, item
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _create_collection_skeleton(
|
| 51 |
+
collection: pystac.Collection, epsg: int
|
| 52 |
+
) -> pystac.Collection:
|
| 53 |
+
"""
|
| 54 |
+
Create a skeleton for a new STAC collection with a given EPSG code.
|
| 55 |
+
|
| 56 |
+
Parameters:
|
| 57 |
+
collection (pystac.Collection): The original STAC collection.
|
| 58 |
+
epsg (int): The EPSG code.
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
pystac.Collection: The skeleton of the new STAC collection.
|
| 62 |
+
"""
|
| 63 |
+
new_collection = pystac.Collection(
|
| 64 |
+
id=f"{collection.id}_{epsg}",
|
| 65 |
+
description=f"{collection.description} Containing only items with EPSG code {epsg}",
|
| 66 |
+
extent=collection.extent.clone(),
|
| 67 |
+
summaries=collection.summaries,
|
| 68 |
+
license=collection.license,
|
| 69 |
+
stac_extensions=collection.stac_extensions,
|
| 70 |
+
)
|
| 71 |
+
if "item_assets" in collection.extra_fields:
|
| 72 |
+
item_assets_extension = pystac.extensions.item_assets.ItemAssetsExtension.ext(
|
| 73 |
+
collection
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
new_item_assets_extension = (
|
| 77 |
+
pystac.extensions.item_assets.ItemAssetsExtension.ext(
|
| 78 |
+
new_collection, add_if_missing=True
|
| 79 |
+
)
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
new_item_assets_extension.item_assets = item_assets_extension.item_assets
|
| 83 |
+
return new_collection
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def split_collection_by_epsg(
|
| 87 |
+
collection: Union[str, Path, pystac.Collection], output_dir: Union[str, Path]
|
| 88 |
+
):
|
| 89 |
+
"""
|
| 90 |
+
Split a STAC collection into multiple STAC collections based on EPSG code.
|
| 91 |
+
|
| 92 |
+
Parameters
|
| 93 |
+
----------
|
| 94 |
+
collection: Union[str, Path, pystac.Collection]
|
| 95 |
+
A collection of STAC items or a path to a STAC collection.
|
| 96 |
+
output_dir: Union[str, Path]
|
| 97 |
+
The directory where the split STAC collections will be saved.
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
if not isinstance(collection, pystac.Collection):
|
| 101 |
+
collection = Path(collection)
|
| 102 |
+
output_dir = Path(output_dir)
|
| 103 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
collection = pystac.read_file(collection)
|
| 107 |
+
except pystac.STACError:
|
| 108 |
+
print("Please provide a path to a valid STAC collection.")
|
| 109 |
+
return
|
| 110 |
+
|
| 111 |
+
collections_by_epsg = {}
|
| 112 |
+
|
| 113 |
+
for epsg, item in _get_items_by_epsg(collection):
|
| 114 |
+
if epsg not in collections_by_epsg:
|
| 115 |
+
collections_by_epsg[epsg] = _create_collection_skeleton(collection, epsg)
|
| 116 |
+
|
| 117 |
+
# Add item to the corresponding collection
|
| 118 |
+
collections_by_epsg[epsg].add_item(item)
|
| 119 |
+
|
| 120 |
+
# Write each collection to disk
|
| 121 |
+
for epsg, new_collection in collections_by_epsg.items():
|
| 122 |
+
new_collection.update_extent_from_items() # Update extent based on added items
|
| 123 |
+
collection_path = output_dir / f"collection-{epsg}"
|
| 124 |
+
new_collection.normalize_hrefs(str(collection_path))
|
| 125 |
+
new_collection.save()
|
openeo_gfmap/utils/tile_processing.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utilitaries to process data tiles."""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import xarray as xr
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def normalize_array(inarr: xr.DataArray, percentile: float = 0.99) -> xr.DataArray:
|
| 8 |
+
"""Performs normalization between 0.0 and 1.0 using the given
|
| 9 |
+
percentile.
|
| 10 |
+
"""
|
| 11 |
+
quantile_value = inarr.quantile(percentile, dim=["x", "y", "t"])
|
| 12 |
+
minimum = inarr.min(dim=["x", "y", "t"])
|
| 13 |
+
|
| 14 |
+
inarr = (inarr - minimum) / (quantile_value - minimum)
|
| 15 |
+
|
| 16 |
+
# Perform clipping on values that are higher than the computed quantile
|
| 17 |
+
return inarr.where(inarr < 1.0, 1.0)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def select_optical_bands(inarr: xr.DataArray) -> xr.DataArray:
|
| 21 |
+
"""Filters and keep only the optical bands for a given array."""
|
| 22 |
+
return inarr.sel(
|
| 23 |
+
bands=[
|
| 24 |
+
band
|
| 25 |
+
for band in inarr.coords["bands"].to_numpy()
|
| 26 |
+
if band.startswith("S2-L2A-B")
|
| 27 |
+
]
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def select_sar_bands(inarr: xr.DataArray) -> xr.DataArray:
|
| 32 |
+
"""Filters and keep only the SAR bands for a given array."""
|
| 33 |
+
return inarr.sel(
|
| 34 |
+
bands=[
|
| 35 |
+
band
|
| 36 |
+
for band in inarr.coords["bands"].to_numpy()
|
| 37 |
+
if band in ["S1-SIGMA0-VV", "S1-SIGMA0-VH", "S1-SIGMA0-HH", "S1-SIGMA0-HV"]
|
| 38 |
+
]
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def array_bounds(inarr: xr.DataArray) -> tuple:
|
| 43 |
+
"""Returns the 4 bounds values for the x and y coordinates of the tile"""
|
| 44 |
+
return (
|
| 45 |
+
inarr.coords["x"].min().item(),
|
| 46 |
+
inarr.coords["y"].min().item(),
|
| 47 |
+
inarr.coords["x"].max().item(),
|
| 48 |
+
inarr.coords["y"].max().item(),
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def arrays_cosine_similarity(
|
| 53 |
+
first_array: xr.DataArray, second_array: xr.DataArray
|
| 54 |
+
) -> float:
|
| 55 |
+
"""Returns a similarity score based on normalized cosine distance. The
|
| 56 |
+
input arrays must have similar ranges to obtain a valid score.
|
| 57 |
+
1.0 represents the best score (same tiles), while 0.0 is the worst score.
|
| 58 |
+
"""
|
| 59 |
+
dot_product = np.sum(first_array * second_array)
|
| 60 |
+
first_norm = np.linalg.norm(first_array)
|
| 61 |
+
second_norm = np.linalg.norm(second_array)
|
| 62 |
+
similarity = (dot_product / (first_norm * second_norm)).item()
|
| 63 |
+
|
| 64 |
+
return similarity
|
pyproject.toml
CHANGED
|
@@ -8,9 +8,17 @@ dependencies = [
|
|
| 8 |
"geojson>=3.2.0",
|
| 9 |
"geopandas>=1.1.2",
|
| 10 |
"joblib>=1.5.3",
|
| 11 |
-
"matplotlib
|
|
|
|
| 12 |
"openeo>=0.47.0",
|
| 13 |
-
"
|
|
|
|
|
|
|
|
|
|
| 14 |
"streamlit>=1.53.1",
|
| 15 |
"streamlit-folium>=0.26.1",
|
|
|
|
| 16 |
]
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"geojson>=3.2.0",
|
| 9 |
"geopandas>=1.1.2",
|
| 10 |
"joblib>=1.5.3",
|
| 11 |
+
"matplotlib==3.7.5",
|
| 12 |
+
"numpy<2.0.0",
|
| 13 |
"openeo>=0.47.0",
|
| 14 |
+
"prometheo",
|
| 15 |
+
"pydantic>=2.12.5",
|
| 16 |
+
"rasterio<1.4.0",
|
| 17 |
+
"scipy>=1.17.0",
|
| 18 |
"streamlit>=1.53.1",
|
| 19 |
"streamlit-folium>=0.26.1",
|
| 20 |
+
"torch==2.3.1",
|
| 21 |
]
|
| 22 |
+
|
| 23 |
+
[tool.uv.sources]
|
| 24 |
+
prometheo = { git = "https://github.com/WorldCereal/prometheo.git" }
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|