crop-map / openeo_gfmap /utils /build_df.py
remisek's picture
Fix
4fcc331
"""Utilities to build a `pandas.DataFrame` from the output of a VectorCube
based job. Usefull to collect the output of point based extraction.
"""
from pathlib import Path
import pandas as pd
VECTORCUBE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S%z"
TIMESTAMP_FORMAT = "%Y-%m-%d"
def load_json(input_file: Path, bands: list) -> pd.DataFrame:
"""Reads a json file and outputs it as a proper pandas dataframe.
Parameters
----------
input_file: PathLike
The path of the JSON file to read.
bands: list
The name of the bands that will be used in the columns names. The band
names must be the same as the vector cube that resulted into the parsed
JSON file.
Returns
-------
df: pd.DataFrame
A `pandas.DataFrame` containing a combination of the band names and the
timestamps as column names.
For example, the Sentinel-2 green band on the 1st October 2020 is will
have the column name `S2-L2A-B02:2020-10-01`
"""
df = pd.read_json(input_file)
target_timestamps = list(
map(lambda date: date.strftime(TIMESTAMP_FORMAT), df.columns.to_pydatetime())
)
df = df.rename(dict(zip(df.columns, target_timestamps)), axis=1)
expanded_df = pd.DataFrame()
for col in df.columns:
expanded_col = pd.DataFrame(
df[col].to_list(), columns=[f"{feature}:{col}" for feature in bands]
)
expanded_df = pd.concat([expanded_df, expanded_col], axis=1)
return expanded_df