Spaces:

hackathon-ombrea
/

gaia

Runtime error

App Files Files Community

lucaordronneau commited on Feb 25, 2025

Commit

aa4cf91

verified ·

1 Parent(s): 38af3d3

Upload forecast.py

Browse files

Update with water deficit

Files changed (1) hide show

forecast.py +87 -28

forecast.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import os
 import xarray as xr
 import pandas as pd
 # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
@@ -18,6 +22,7 @@ VARIABLE_MAPPING = {
 }
 def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
     """
     Load data for a given variable from the dataset at the nearest latitude and longitude.
@@ -33,17 +38,16 @@ def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataA
     """
     try:
         data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
         # Convert temperature from Kelvin to Celsius for specific variables
         if variable in ["tas", "tasmin", "tasmax"]:
             data = data - 273.15
         return data
     except Exception as e:
         print(f"Error loading {variable}: {e}")
         return None
 def get_forecast_datasets(climate_sub_files: list) -> dict:
     """
     Get the forecast datasets by loading NetCDF files for each variable.
@@ -56,12 +60,10 @@ def get_forecast_datasets(climate_sub_files: list) -> dict:
     """
     datasets = {}
-    # Iterate over each file and check if the variable exists in the filename
     for file_path in climate_sub_files:
         filename = os.path.basename(file_path)
         for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
-            if var_key in filename:  # Check for presence of variable in filename
                 if var_key in ["tas", "tasmax", "tasmin"]:
                     if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
                         datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
@@ -71,6 +73,7 @@ def get_forecast_datasets(climate_sub_files: list) -> dict:
     return datasets
 def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
     """
     Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
@@ -85,41 +88,97 @@ def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
     """
     time_series_data = {'time': []}
-    # Iterate over the variable mapping to load and process data for each variable
     for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
         print(f"Processing {long_name} ({title}, {unit}, {variable})...")
-        # Load the data for the current variable
         data = load_data(variable, datasets[long_name], lat, lon)
-        if data is not None:
-            print(f"Time values: {data.time.values[:5]}")  # Preview first few time values
-            print(f"Data values: {data.values[:5]}")  # Preview first few data values
-            # Add the time values to the 'time' list
             time_series_data['time'] = data.time.values
-            # Format the column name with unit (e.g., "Precipitation (kg m-2 s-1)")
             column_name = f"{title} ({unit})"
             time_series_data[column_name] = data.values
-    # Convert the time series data into a pandas DataFrame
     return pd.DataFrame(time_series_data)
-# Define the directory to parse
-folder_to_parse = "climate_data_pessimist/"
-# Retrieve the subfolders and files to parse
-climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
-climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
-# Load the forecast datasets
-datasets = get_forecast_datasets(climate_sub_files)
-# Get the forecast data for a specific latitude and longitude
-lat, lon = 47.0, 5.0  # Example coordinates
-final_df = get_forecast_data(datasets, lat, lon)
-# Display the resulting DataFrame
-print(final_df.head())

 import os
 import xarray as xr
 import pandas as pd
+from matplotlib import pyplot as plt
+import docs.agro_indicators as agro_indicators
+import numpy as np
+from datetime import datetime
 # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
 }
+# Function to load data for a given variable from the dataset at the nearest latitude and longitude
 def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
     """
     Load data for a given variable from the dataset at the nearest latitude and longitude.
     """
     try:
         data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
         # Convert temperature from Kelvin to Celsius for specific variables
         if variable in ["tas", "tasmin", "tasmax"]:
             data = data - 273.15
         return data
     except Exception as e:
         print(f"Error loading {variable}: {e}")
         return None
+# Function to load forecast datasets from NetCDF files based on variable mapping
 def get_forecast_datasets(climate_sub_files: list) -> dict:
     """
     Get the forecast datasets by loading NetCDF files for each variable.
     """
     datasets = {}
     for file_path in climate_sub_files:
         filename = os.path.basename(file_path)
         for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
+            if var_key in filename:
                 if var_key in ["tas", "tasmax", "tasmin"]:
                     if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
                         datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
     return datasets
+# Function to extract climate data from forecast datasets and convert to a DataFrame
 def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
     """
     Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
     """
     time_series_data = {'time': []}
     for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
         print(f"Processing {long_name} ({title}, {unit}, {variable})...")
         data = load_data(variable, datasets[long_name], lat, lon)
+        if data is not None:
             time_series_data['time'] = data.time.values
             column_name = f"{title} ({unit})"
             time_series_data[column_name] = data.values
     return pd.DataFrame(time_series_data)
+# Function to compute reference evapotranspiration (ET0)
+def compute_et0(df: pd.DataFrame, latitude: float, longitude: float):
+    """
+    Compute reference evapotranspiration using the provided climate data.
+    Args:
+        df (pd.DataFrame): DataFrame containing climate data.
+        latitude (float): Latitude of the location.
+        longitude (float): Longitude of the location.
+    Returns:
+        arraylike: Daily reference evapotranspiration.
+    """
+    irradiance = df.irradiance
+    Tmin = df.air_temperature_min
+    Tmax = df.air_temperature_max
+    T = (Tmin + Tmax) / 2  # Average temperature
+    RHmin = df.relative_humidity_min
+    RHmax = df.relative_humidity_max
+    WS = df.wind_speed
+    JJulien = df.day_of_year
+    et0_values = agro_indicators.et0(irradiance, T, Tmax, Tmin, RHmin, RHmax, WS, JJulien, latitude, longitude)
+    return et0_values
+# Main processing workflow
+def main():
+    # Define the directory to parse
+    folder_to_parse = "../climate_data_pessimist/"
+    # Retrieve the subfolders and files to parse
+    climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
+    climate_sub_files  = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
+    # Load the forecast datasets
+    datasets = get_forecast_datasets(climate_sub_files)
+    # Get the forecast data for a specific latitude and longitude
+    lat, lon = 47.0, 5.0  # Example coordinates
+    final_df = get_forecast_data(datasets, lat, lon)
+    coef = 1
+    # Display the resulting DataFrame
+    print(final_df.head())
+    # Preprocess the data
+    data_test = final_df.copy()
+    data_test["irradiance"] = data_test['Surface Downwelling Shortwave Radiation (W/m²)'] * coef
+    data_test["air_temperature_min"] = data_test['Daily Minimum Near Surface Air Temperature (°C)']
+    data_test["air_temperature_max"] = data_test['Daily Maximum Near Surface Air Temperature (°C)']
+    data_test["relative_humidity_min"] = data_test['Relative Humidity (%)']
+    data_test["relative_humidity_max"] = data_test['Relative Humidity (%)']
+    data_test["wind_speed"] = data_test['Near Surface Wind Speed (m/s)']
+    # Convert 'time' to datetime and calculate Julian day
+    data_test['time'] = pd.to_datetime(data_test['time'], errors='coerce')
+    data_test['day_of_year'] = data_test['time'].dt.dayofyear
+    # Compute ET0
+    et0 = compute_et0(data_test, lat, lon)
+    data_test['Evaporation (mm/day)'] = et0
+    # Convert Precipitation from kg/m²/s to mm/day
+    data_test['Precipitation (mm/day)'] = 86400 * data_test['Precipitation (kg m-2 s-1)']
+    # Calculate Water Deficit: Water Deficit = ET0 - P + M
+    data_test['Water Deficit (mm/day)'] = (
+        (data_test['Evaporation (mm/day)'] - (data_test['Precipitation (mm/day)']) +
+        data_test['Moisture in Upper Portion of Soil Column (kg m-2)'])
+    )
+    # Display the resulting DataFrame with Water Deficit
+    print(data_test[['Water Deficit (mm/day)', 'Precipitation (mm/day)', 'Evaporation (mm/day)', 'Moisture in Upper Portion of Soil Column (kg m-2)']])
+    return data_test
+# Run the main function
+if __name__ == "__main__":
+    main()