paolog-fbk's picture
Upload folder using huggingface_hub
64ab846 verified
import numpy as np
import pandas as pd
import yaml
import pickle
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import torch
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from pipelines.xgcast_pipeline import normalise_df
with open('config/params.yml') as file:
config = yaml.safe_load(file)
with open('config/xgcast_params.yml') as file:
model_params = yaml.safe_load(file)
pd.options.plotting.backend = "plotly"
datetime_col = config['datetime_col']
value_col = config['value_col']
datastream_name_col = config['datastream_name_col']
resampling_window = config['resampling_window']
resampling = int(resampling_window.split('h')[0])
num_predictions = config['num_predictions']
### Functions
def get_sensor_data(consortium_name, sensor_forecasted):
sensor_data = pd.read_parquet(f'data//05_xgcast_input//tensiometers_{consortium_name}.parquet')
sensor_data = sensor_data[sensor_data[datastream_name_col] == sensor_forecasted].set_index(datetime_col).sort_index()
return sensor_data
def get_mean_and_std(consortium_name):
mean_std_df = pd.read_parquet(f'data//05_xgcast_input//mean_std_{consortium_name}.parquet')
return mean_std_df
def get_feature_cols(consortium_name):
with open(f'data//05_xgcast_input//feature_cols_{consortium_name}.pickle', 'rb') as handle:
feature_cols = pickle.load(handle)
return feature_cols
def get_model(consortium_name, model_name):
if model_name == 'xgboost':
xgb_params = model_params['xgb_params']
bst = XGBRegressor(objective='reg:absoluteerror', **xgb_params)
bst.load_model(f'data//06_xgcast_output//xgcast_{consortium_name}.json')
return bst
else:
print('Model not available.')
def get_predictions(consortium_name, model, sensor_data):
sns_data = sensor_data.copy()
mean_std_df = get_mean_and_std(consortium_name)
feature_cols = get_feature_cols(consortium_name)
sns_data = sns_data[feature_cols].sort_index()
current_data_point = normalise_df(sns_data, mean_std_df)
predictions = model.predict(current_data_point) * mean_std_df['std'][value_col] + mean_std_df['mean'][value_col]
predictions = pd.DataFrame(predictions, columns=[f'prediction_next_{i+1}periods' for i in range(num_predictions)], index=sns_data.index)
predictions = predictions.join(sns_data[[value_col, 'precipitation', 'irrigation']]).rename(columns={value_col: 'real_value'})
return predictions
def plot_sensor_forecasted(sensor_forecasted, predictions, target_col, thresholds, additional_cols_plot):
# BUILD PLOT
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(
x=predictions.index,
y=predictions[target_col],
mode="lines",
name="value",
),
secondary_y=False
)
for col in predictions.columns:
if 'prediction_next_' in col:
fig.add_trace(
go.Scatter(
x=predictions.index,
y=predictions[col],
mode="lines",
name=col,
),
secondary_y=False
)
for col in additional_cols_plot:
fig.add_trace(
go.Bar(
x=predictions.index,
y=predictions[col],
name=col,
),
secondary_y=True
)
fig.add_hline(
y=thresholds[0], line_width=3, line_dash="dash"
)
fig.add_hline(
y=thresholds[1], line_width=3, line_dash="dash"
)
fig.update_layout(
title=f"Tensiometer values for sensor {sensor_forecasted}",
font=dict(
family="Courier New, monospace",
size=12
)
)
# Set x-axis title
fig.update_xaxes(title_text="datetime")
# Set y-axes titles
fig.update_yaxes(title_text="Tension (mbar)", secondary_y=False)
if len(additional_cols_plot) > 0:
fig.update_yaxes(title_text='Water amount (mm)', secondary_y=True)
return fig