| import numpy as np
|
| import pandas as pd
|
| import yaml
|
| import pickle
|
| import plotly.graph_objects as go
|
| from plotly.subplots import make_subplots
|
| import torch
|
| from xgboost import XGBRegressor
|
| from sklearn.metrics import mean_absolute_error, mean_squared_error
|
| from pipelines.xgcast_pipeline import normalise_df
|
|
|
|
|
| with open('config/params.yml') as file:
|
| config = yaml.safe_load(file)
|
| with open('config/xgcast_params.yml') as file:
|
| model_params = yaml.safe_load(file)
|
|
|
| pd.options.plotting.backend = "plotly"
|
|
|
|
|
| datetime_col = config['datetime_col']
|
| value_col = config['value_col']
|
| datastream_name_col = config['datastream_name_col']
|
|
|
| resampling_window = config['resampling_window']
|
| resampling = int(resampling_window.split('h')[0])
|
|
|
| num_predictions = config['num_predictions']
|
|
|
|
|
|
|
|
|
| def get_sensor_data(consortium_name, sensor_forecasted):
|
| sensor_data = pd.read_parquet(f'data//05_xgcast_input//tensiometers_{consortium_name}.parquet')
|
| sensor_data = sensor_data[sensor_data[datastream_name_col] == sensor_forecasted].set_index(datetime_col).sort_index()
|
| return sensor_data
|
|
|
| def get_mean_and_std(consortium_name):
|
| mean_std_df = pd.read_parquet(f'data//05_xgcast_input//mean_std_{consortium_name}.parquet')
|
| return mean_std_df
|
|
|
| def get_feature_cols(consortium_name):
|
| with open(f'data//05_xgcast_input//feature_cols_{consortium_name}.pickle', 'rb') as handle:
|
| feature_cols = pickle.load(handle)
|
| return feature_cols
|
|
|
| def get_model(consortium_name, model_name):
|
| if model_name == 'xgboost':
|
| xgb_params = model_params['xgb_params']
|
| bst = XGBRegressor(objective='reg:absoluteerror', **xgb_params)
|
| bst.load_model(f'data//06_xgcast_output//xgcast_{consortium_name}.json')
|
| return bst
|
| else:
|
| print('Model not available.')
|
|
|
| def get_predictions(consortium_name, model, sensor_data):
|
| sns_data = sensor_data.copy()
|
|
|
| mean_std_df = get_mean_and_std(consortium_name)
|
| feature_cols = get_feature_cols(consortium_name)
|
|
|
| sns_data = sns_data[feature_cols].sort_index()
|
|
|
| current_data_point = normalise_df(sns_data, mean_std_df)
|
| predictions = model.predict(current_data_point) * mean_std_df['std'][value_col] + mean_std_df['mean'][value_col]
|
| predictions = pd.DataFrame(predictions, columns=[f'prediction_next_{i+1}periods' for i in range(num_predictions)], index=sns_data.index)
|
| predictions = predictions.join(sns_data[[value_col, 'precipitation', 'irrigation']]).rename(columns={value_col: 'real_value'})
|
| return predictions
|
|
|
| def plot_sensor_forecasted(sensor_forecasted, predictions, target_col, thresholds, additional_cols_plot):
|
|
|
| fig = make_subplots(specs=[[{"secondary_y": True}]])
|
| fig.add_trace(
|
| go.Scatter(
|
| x=predictions.index,
|
| y=predictions[target_col],
|
| mode="lines",
|
| name="value",
|
| ),
|
| secondary_y=False
|
| )
|
| for col in predictions.columns:
|
| if 'prediction_next_' in col:
|
| fig.add_trace(
|
| go.Scatter(
|
| x=predictions.index,
|
| y=predictions[col],
|
| mode="lines",
|
| name=col,
|
| ),
|
| secondary_y=False
|
| )
|
|
|
| for col in additional_cols_plot:
|
| fig.add_trace(
|
| go.Bar(
|
| x=predictions.index,
|
| y=predictions[col],
|
| name=col,
|
| ),
|
| secondary_y=True
|
| )
|
|
|
| fig.add_hline(
|
| y=thresholds[0], line_width=3, line_dash="dash"
|
| )
|
| fig.add_hline(
|
| y=thresholds[1], line_width=3, line_dash="dash"
|
| )
|
|
|
| fig.update_layout(
|
| title=f"Tensiometer values for sensor {sensor_forecasted}",
|
| font=dict(
|
| family="Courier New, monospace",
|
| size=12
|
| )
|
| )
|
|
|
| fig.update_xaxes(title_text="datetime")
|
|
|
| fig.update_yaxes(title_text="Tension (mbar)", secondary_y=False)
|
| if len(additional_cols_plot) > 0:
|
| fig.update_yaxes(title_text='Water amount (mm)', secondary_y=True)
|
|
|
| return fig
|
|
|