Upload 7 files
Browse files- LSTM_forecaster.joblib +3 -0
- app.py +19 -0
- filterdf.py +121 -0
- mergedf.py +152 -0
- pipeline.pkl +3 -0
- pipelineFinal.py +130 -0
- requirements.txt +11 -0
LSTM_forecaster.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c16873628011f5cc2e860eeaf65437171df8b5da6be69fc7dcd3a57b6e64f233
|
| 3 |
+
size 1893197
|
app.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from pipelineFinal import pipeline_final
|
| 3 |
+
|
| 4 |
+
demo = gr.Interface(
|
| 5 |
+
fn=pipeline_final,
|
| 6 |
+
inputs=[
|
| 7 |
+
gr.Dropdown(["Si", "No"], label="Deseas hacer una prediccion justo luego del tiempo del train"), # Uncomment this line to add audio input
|
| 8 |
+
gr.Slider(0, 24, label="Choose a number"),
|
| 9 |
+
gr.File(label="Sube el archivo de tain en csv. (Solo si elegiste NO)"),
|
| 10 |
+
gr.File(label="Sube el archivo de client en csv. (Solo si elegiste NO)"),
|
| 11 |
+
gr.File(label="Sube el archivo de histroical_weather en csv. (Solo si elegiste NO)"),
|
| 12 |
+
gr.File(label="Sube el archivo de electricity_prices en csv. (Solo si elegiste NO)"),
|
| 13 |
+
gr.File(label="Sube el archivo de gas_prices en csv. (Solo si elegiste NO)")
|
| 14 |
+
|
| 15 |
+
],
|
| 16 |
+
outputs=[gr.Plot(), gr.DataFrame()]
|
| 17 |
+
)
|
| 18 |
+
demo.launch()
|
| 19 |
+
|
filterdf.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from geopy.geocoders import Nominatim
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def initialize_geolocator(user_agent="county_locator"):
|
| 8 |
+
"""Initialize the geolocator object."""
|
| 9 |
+
return Nominatim(user_agent=user_agent)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_county_from_coordinates(latitude, longitude, geolocator):
|
| 13 |
+
"""Retrieve county name from coordinates using the geolocator."""
|
| 14 |
+
location = geolocator.reverse((latitude, longitude), language="en")
|
| 15 |
+
if location:
|
| 16 |
+
return location.raw.get("address", {}).get("county", "Unknown")
|
| 17 |
+
return "Unknown"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def add_county_column(df):
|
| 21 |
+
"""Add county column to DataFrame based on coordinates."""
|
| 22 |
+
geolocator = initialize_geolocator()
|
| 23 |
+
coordinates = df[['longitude', 'latitude']].drop_duplicates()
|
| 24 |
+
coordinates['county'] = coordinates.apply(
|
| 25 |
+
lambda row: get_county_from_coordinates(row['latitude'], row['longitude'], geolocator), axis=1
|
| 26 |
+
)
|
| 27 |
+
df = df.merge(coordinates[['latitude', 'longitude', 'county']], on=['latitude', 'longitude'])
|
| 28 |
+
df = df.drop(['longitude', 'latitude'], axis=1)
|
| 29 |
+
return df
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def filter_estonian_counties(df):
|
| 33 |
+
"""Filter rows by Estonian counties and map county names to integers."""
|
| 34 |
+
county_locations = [
|
| 35 |
+
'Saare County', 'Võru County', 'Pärnu County', 'Valga County', 'Viljandi County', 'Tartu County',
|
| 36 |
+
'Põlva County', 'Jõgeva County', 'Hiiu County', 'Lääne County', 'Rapla County', 'Järva County',
|
| 37 |
+
'Harju County', 'Lääne-Viru County', 'Ida-Viru County'
|
| 38 |
+
]
|
| 39 |
+
county_to_int = {
|
| 40 |
+
'Saare County': 10, 'Võru County': 15, 'Pärnu County': 7, 'Valga County': 13, 'Viljandi County': 14,
|
| 41 |
+
'Tartu County': 11, 'Põlva County': 8, 'Jõgeva County': 4, 'Hiiu County': 1, 'Lääne County': 6,
|
| 42 |
+
'Rapla County': 9, 'Järva County': 3, 'Harju County': 0, 'Lääne-Viru County': 5, 'Ida-Viru County': 2
|
| 43 |
+
}
|
| 44 |
+
df = df[df['county'].isin(county_locations)]
|
| 45 |
+
df.loc[:, 'county'] = df['county'].map(county_to_int)
|
| 46 |
+
return df
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def filter_data(train, client, weather, is_business, product_type, county_code):
|
| 50 |
+
"""Filter and split train data based on is_business, product_type, county_code and is_consumption."""
|
| 51 |
+
train = train[
|
| 52 |
+
(train['is_business'] == is_business) &
|
| 53 |
+
(train['product_type'] == product_type) &
|
| 54 |
+
(train['county'] == county_code)
|
| 55 |
+
]
|
| 56 |
+
train = train.drop(['is_business', 'product_type', 'county'], axis=1)
|
| 57 |
+
train = train[train['is_consumption'] == 0]; train = train.drop(['is_consumption'], axis=1)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
client = client[
|
| 61 |
+
(client['is_business'] == is_business) &
|
| 62 |
+
(client['product_type'] == product_type) &
|
| 63 |
+
(client['county'] == county_code)
|
| 64 |
+
]
|
| 65 |
+
client = client.drop(['is_business', 'product_type', 'county'], axis=1)
|
| 66 |
+
|
| 67 |
+
weather = weather[weather['county'] == county_code]
|
| 68 |
+
weather = weather.drop(['county'], axis=1)
|
| 69 |
+
|
| 70 |
+
return train, client, weather
|
| 71 |
+
|
| 72 |
+
def save_datasets_to_pickle(datasets, paths=None):
|
| 73 |
+
"""Save each dataset in datasets list to the corresponding path in paths list as a pickle file."""
|
| 74 |
+
# Obtén el directorio del archivo actual
|
| 75 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
| 76 |
+
|
| 77 |
+
if paths is None:
|
| 78 |
+
paths = [
|
| 79 |
+
os.path.join(base_dir, 'process_files', 'generation.pkl'),
|
| 80 |
+
os.path.join(base_dir, 'process_files', 'client.pkl'),
|
| 81 |
+
os.path.join(base_dir, 'process_files', 'historical_weather.pkl'),
|
| 82 |
+
os.path.join(base_dir, 'process_files', 'electricity_prices.pkl'),
|
| 83 |
+
os.path.join(base_dir, 'process_files', 'gas_prices.pkl'),
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
# Guardar cada dataset en su respectiva ruta
|
| 87 |
+
for dataset, path in zip(datasets, paths):
|
| 88 |
+
dataset.to_pickle(path)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def filter_datasets(train,client,historical_weather,electricity_prices,gas_prices):
|
| 93 |
+
# Filter parameters
|
| 94 |
+
is_business, product_type, county_code = 1, 3, 0
|
| 95 |
+
|
| 96 |
+
# Drop unnecessary columns and change date columns to datetime type
|
| 97 |
+
datasets_info = [
|
| 98 |
+
[train, ['data_block_id', 'row_id', 'prediction_unit_id'], ['datetime']],
|
| 99 |
+
[client, ['data_block_id'], ['date']],
|
| 100 |
+
[historical_weather, ['data_block_id'], ['datetime']],
|
| 101 |
+
[electricity_prices, ['data_block_id', 'origin_date'], ['forecast_date']],
|
| 102 |
+
[gas_prices, ['data_block_id', 'origin_date'], ['forecast_date']]
|
| 103 |
+
] # [df, [drop_cols], [date_cols]]
|
| 104 |
+
|
| 105 |
+
for df, drop_cols, date_cols in datasets_info:
|
| 106 |
+
df.drop(drop_cols, axis=1, inplace=True)
|
| 107 |
+
for col in date_cols:
|
| 108 |
+
df[col] = pd.to_datetime(df[col])
|
| 109 |
+
|
| 110 |
+
# Add county and filter weather data
|
| 111 |
+
historical_weather = add_county_column(historical_weather)
|
| 112 |
+
historical_weather = filter_estonian_counties(historical_weather)
|
| 113 |
+
# Group weather data by day
|
| 114 |
+
historical_weather = historical_weather.groupby(['county', 'datetime']).agg('mean').reset_index()
|
| 115 |
+
|
| 116 |
+
# Filter data by is_business, product_type, county_code
|
| 117 |
+
train, client, historical_weather = filter_data(train, client, historical_weather, is_business, product_type, county_code)
|
| 118 |
+
|
| 119 |
+
# Save datasets to pickle files
|
| 120 |
+
save_datasets_to_pickle([train, client, historical_weather, electricity_prices, gas_prices])
|
| 121 |
+
|
mergedf.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def load_datasets():
|
| 8 |
+
"""Load all datasets and return them as dataframes."""
|
| 9 |
+
# Obtén la ruta base del directorio actual
|
| 10 |
+
base_dir = os.getcwd() # Directorio actual
|
| 11 |
+
|
| 12 |
+
# Construye las rutas absolutas de cada archivo
|
| 13 |
+
train_path = os.path.join(base_dir, 'process_files', 'generation.pkl')
|
| 14 |
+
client_path = os.path.join(base_dir, 'process_files', 'client.pkl')
|
| 15 |
+
historical_weather_path = os.path.join(base_dir, 'process_files', 'historical_weather.pkl')
|
| 16 |
+
electricity_prices_path = os.path.join(base_dir, 'process_files', 'electricity_prices.pkl')
|
| 17 |
+
gas_prices_path = os.path.join(base_dir, 'process_files', 'gas_prices.pkl')
|
| 18 |
+
|
| 19 |
+
# Verifica que los archivos existan antes de intentar cargarlos
|
| 20 |
+
for path in [train_path, client_path, historical_weather_path, electricity_prices_path, gas_prices_path]:
|
| 21 |
+
if not os.path.exists(path):
|
| 22 |
+
raise FileNotFoundError(f"Archivo no encontrado: {path}")
|
| 23 |
+
|
| 24 |
+
# Carga los archivos
|
| 25 |
+
train = pd.read_pickle(train_path)
|
| 26 |
+
client = pd.read_pickle(client_path)
|
| 27 |
+
historical_weather = pd.read_pickle(historical_weather_path)
|
| 28 |
+
electricity_prices = pd.read_pickle(electricity_prices_path)
|
| 29 |
+
gas_prices = pd.read_pickle(gas_prices_path)
|
| 30 |
+
|
| 31 |
+
return train, client, historical_weather, electricity_prices, gas_prices
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def add_time_series_col(client, historical_weather, electricity_prices, gas_prices):
|
| 35 |
+
"""Add column with date where data is available."""
|
| 36 |
+
|
| 37 |
+
client['datetime'] = pd.to_datetime(client['date']) + pd.Timedelta(days=3)
|
| 38 |
+
historical_weather['datetime'] += pd.Timedelta(days=2)
|
| 39 |
+
electricity_prices['datetime'] = pd.to_datetime(electricity_prices['forecast_date']) + pd.Timedelta(days=1)
|
| 40 |
+
gas_prices['datetime'] = pd.to_datetime(gas_prices['forecast_date']) + pd.Timedelta(days=1)
|
| 41 |
+
|
| 42 |
+
# Drop unnecessary columns after date adjustments
|
| 43 |
+
client = client.drop(['date'], axis=1)
|
| 44 |
+
electricity_prices = electricity_prices.drop(['forecast_date'], axis=1)
|
| 45 |
+
gas_prices = gas_prices.drop(['forecast_date'], axis=1)
|
| 46 |
+
|
| 47 |
+
return client, historical_weather, electricity_prices, gas_prices
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def merge_datasets(train, client, historical_weather, electricity_prices, gas_prices):
|
| 51 |
+
"""Merge DataFrames train, client, historical weather, gas prices and electricity prices based on the datetime column."""
|
| 52 |
+
merged = train.merge(historical_weather, on='datetime', how='left') \
|
| 53 |
+
.merge(electricity_prices, on='datetime', how='left')
|
| 54 |
+
|
| 55 |
+
# Add dt.floor('D')
|
| 56 |
+
merged['date'] = merged['datetime'].dt.floor('D')
|
| 57 |
+
client['date'] = client['datetime'].dt.floor('D')
|
| 58 |
+
client = client.drop('datetime', axis=1)
|
| 59 |
+
gas_prices['date'] = gas_prices['datetime'].dt.floor('D')
|
| 60 |
+
gas_prices = gas_prices.drop('datetime', axis=1)
|
| 61 |
+
|
| 62 |
+
merged = merged.merge(client, on='date', how='outer') \
|
| 63 |
+
.merge(gas_prices, on='date', how='outer')
|
| 64 |
+
|
| 65 |
+
#dreop unnecessary columns
|
| 66 |
+
merged = merged.drop(['date'], axis=1)
|
| 67 |
+
|
| 68 |
+
return merged
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def reorder_columns(df, column_order=None):
|
| 72 |
+
"""Reorder columns of the DataFrame."""
|
| 73 |
+
if column_order == None:
|
| 74 |
+
column_order = [
|
| 75 |
+
'datetime', 'target', 'temperature', 'dewpoint', 'rain', 'snowfall',
|
| 76 |
+
'surface_pressure', 'cloudcover_total', 'cloudcover_low', 'cloudcover_mid',
|
| 77 |
+
'cloudcover_high', 'windspeed_10m', 'winddirection_10m',
|
| 78 |
+
'shortwave_radiation', 'direct_solar_radiation', 'diffuse_radiation',
|
| 79 |
+
'lowest_price_per_mwh', 'highest_price_per_mwh', 'euros_per_mwh','eic_count', 'installed_capacity'
|
| 80 |
+
]
|
| 81 |
+
return df[column_order]
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def save_datasets_to_pickle(datasets, paths=None):
|
| 85 |
+
"""Save each dataset in datasets list to the corresponding path in paths list as a pickle file."""
|
| 86 |
+
if paths == None:
|
| 87 |
+
import root
|
| 88 |
+
paths = [
|
| 89 |
+
root.DIR_DATA_STAGE + 'merged_df.pkl',
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
# Create folders if not exists
|
| 93 |
+
for path in paths:
|
| 94 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
| 95 |
+
|
| 96 |
+
# Save each dataset to its respective path
|
| 97 |
+
for dataset, path in zip(datasets, paths):
|
| 98 |
+
dataset.to_pickle(path)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def drop_first_3_days(df, column, threshold_column, threshold_nans=70):
|
| 102 |
+
"""Drop first 3 days of the dataset if the threshold is exceeded."""
|
| 103 |
+
# Count null values in the threshold column
|
| 104 |
+
nulos = df[threshold_column].isna().sum()
|
| 105 |
+
|
| 106 |
+
# If the threshold is exceeded drop the first 3 days
|
| 107 |
+
if nulos > threshold_nans:
|
| 108 |
+
# Initial date
|
| 109 |
+
fecha_minima = df[column].min()
|
| 110 |
+
# Limit day
|
| 111 |
+
limite = fecha_minima + pd.Timedelta(days=3)
|
| 112 |
+
# Filter df
|
| 113 |
+
df = df[df[column] >= limite]
|
| 114 |
+
|
| 115 |
+
return df
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def feature_selection(df):
|
| 119 |
+
cols_2_drop = [ 'dewpoint','cloudcover_low','cloudcover_mid',
|
| 120 |
+
'cloudcover_high','direct_solar_radiation',
|
| 121 |
+
'diffuse_radiation', 'lowest_price_per_mwh',
|
| 122 |
+
'highest_price_per_mwh','eic_count']
|
| 123 |
+
df.drop(columns = cols_2_drop, axis = 1, inplace = True)
|
| 124 |
+
return df
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def set_datetime_index(df):
|
| 128 |
+
df = df.set_index('datetime')
|
| 129 |
+
df = df.asfreq('h')
|
| 130 |
+
return df
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def merging_datasets():
|
| 134 |
+
# Read datasets
|
| 135 |
+
train, client, historical_weather, electricity_prices, gas_prices = load_datasets()
|
| 136 |
+
|
| 137 |
+
# Prepare date columns for merging
|
| 138 |
+
client, historical_weather, electricity_prices, gas_prices = add_time_series_col(client, historical_weather, electricity_prices, gas_prices)
|
| 139 |
+
|
| 140 |
+
# Merge datasets
|
| 141 |
+
merged = merge_datasets(train, client, historical_weather, electricity_prices, gas_prices)
|
| 142 |
+
|
| 143 |
+
# Reorder dataset columns
|
| 144 |
+
merged = reorder_columns(merged)
|
| 145 |
+
|
| 146 |
+
# Feature selection
|
| 147 |
+
merged = feature_selection(merged)
|
| 148 |
+
|
| 149 |
+
# Set datetime index
|
| 150 |
+
merged = set_datetime_index(merged)
|
| 151 |
+
|
| 152 |
+
return merged
|
pipeline.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6fa69d47a46f823b38783c73e2d36215e1884b6b30742c45c9912ed1542a4be
|
| 3 |
+
size 2283
|
pipelineFinal.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from calendar import c
|
| 2 |
+
from os import pipe
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import pickle
|
| 5 |
+
from skforecast.utils import load_forecaster
|
| 6 |
+
from filterdf import filter_datasets
|
| 7 |
+
from mergedf import merging_datasets
|
| 8 |
+
import numpy as np
|
| 9 |
+
import plotly.graph_objects as go
|
| 10 |
+
|
| 11 |
+
def load_csv(input_file):
|
| 12 |
+
try:
|
| 13 |
+
# Leer el archivo CSV
|
| 14 |
+
df = pd.read_csv(input_file)
|
| 15 |
+
|
| 16 |
+
# Verificar si el DataFrame está vacío
|
| 17 |
+
if df.empty:
|
| 18 |
+
raise ValueError("El archivo subido está vacío o no tiene datos válidos.")
|
| 19 |
+
|
| 20 |
+
# Retornar las primeras 5 filas como tabla HTML
|
| 21 |
+
# return df.head().to_html()
|
| 22 |
+
return df
|
| 23 |
+
except Exception as e:
|
| 24 |
+
raise f"Error al cargar el archivo CSV:{e}"
|
| 25 |
+
|
| 26 |
+
def load_model(name):
|
| 27 |
+
|
| 28 |
+
model = load_forecaster(name,verbose=True)
|
| 29 |
+
return model
|
| 30 |
+
|
| 31 |
+
def load_pipeline():
|
| 32 |
+
with open('pipeline.pkl', 'rb') as file:
|
| 33 |
+
pipeline = pickle.load(file)
|
| 34 |
+
return pipeline
|
| 35 |
+
|
| 36 |
+
def unscale_data(scaler, predictions):
|
| 37 |
+
placeholder = np.zeros((len(predictions), 11))
|
| 38 |
+
placeholder[:, 0] = predictions['target']
|
| 39 |
+
predictions_scaled = scaler.inverse_transform(placeholder)[:, 0]
|
| 40 |
+
predictions_scaled[predictions_scaled < 0] = 0
|
| 41 |
+
predictions = pd.DataFrame(predictions_scaled, columns=predictions.columns, index=predictions.index)
|
| 42 |
+
return predictions
|
| 43 |
+
|
| 44 |
+
def create_plots(predictions):
|
| 45 |
+
# Gráfico de las predicciones vs valores reales en el conjunto de test del modelo con mejores parametros
|
| 46 |
+
fig = go.Figure()
|
| 47 |
+
trace2 = go.Scatter(x=predictions.index, y=predictions['target'], name="Estimado", mode="lines", line_color="#4EA72E")
|
| 48 |
+
fig.add_trace(trace2)
|
| 49 |
+
fig.update_layout(
|
| 50 |
+
yaxis_title="Producción (kWh)",
|
| 51 |
+
width=750,
|
| 52 |
+
height=350,
|
| 53 |
+
margin=dict(l=20, r=0, t=35, b=20),
|
| 54 |
+
legend=dict(
|
| 55 |
+
orientation="v",
|
| 56 |
+
yanchor="top",
|
| 57 |
+
xanchor="right",
|
| 58 |
+
x=0.99,
|
| 59 |
+
y=0.99
|
| 60 |
+
)
|
| 61 |
+
)
|
| 62 |
+
return fig
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def pipeline_final(texto,steps,train=None,client=None,historical_weather=None,electricity_prices=None,gas_prices=None):
|
| 66 |
+
#prueba
|
| 67 |
+
#texto = 'No'
|
| 68 |
+
# #steps
|
| 69 |
+
# steps = 24
|
| 70 |
+
# #dfs
|
| 71 |
+
|
| 72 |
+
# train = 'files_prueba/train_filtered.csv'
|
| 73 |
+
# client = 'files_prueba/client_filtered.csv'
|
| 74 |
+
# historical_weather = 'files_prueba/historical_weather_filtered.csv'
|
| 75 |
+
# electricity_prices = 'files_prueba/electricity_prices_filtered.csv'
|
| 76 |
+
# gas_prices = 'files_prueba/gas_prices_filtered.csv'
|
| 77 |
+
pipeline = load_pipeline()
|
| 78 |
+
scaler = pipeline['scale']
|
| 79 |
+
|
| 80 |
+
#load model
|
| 81 |
+
model = load_model('LSTM_forecaster.joblib')
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
if texto == 'Si':
|
| 85 |
+
pred = model.predict(steps=steps)
|
| 86 |
+
|
| 87 |
+
pred = unscale_data(scaler, pred)
|
| 88 |
+
|
| 89 |
+
pred_reset = pred.reset_index(drop=False)
|
| 90 |
+
|
| 91 |
+
pred_reset = pred_reset.astype(str)
|
| 92 |
+
|
| 93 |
+
pred_reset = pred_reset.rename(columns={'index': 'fecha'})
|
| 94 |
+
|
| 95 |
+
fig = create_plots(pred)
|
| 96 |
+
|
| 97 |
+
return fig , pred_reset
|
| 98 |
+
|
| 99 |
+
else:
|
| 100 |
+
train = load_csv(train)
|
| 101 |
+
client = load_csv(client)
|
| 102 |
+
historical_weather = load_csv(historical_weather)
|
| 103 |
+
electricity_prices = load_csv(electricity_prices)
|
| 104 |
+
gas_prices = load_csv(gas_prices)
|
| 105 |
+
#filter data sets
|
| 106 |
+
filter_datasets(train,client,historical_weather,electricity_prices,gas_prices)
|
| 107 |
+
#merge data sets
|
| 108 |
+
df = merging_datasets()
|
| 109 |
+
#load pipeline
|
| 110 |
+
# sclaing the data
|
| 111 |
+
df_processed = pipeline.transform(df)
|
| 112 |
+
|
| 113 |
+
df_processed = pd.DataFrame(df_processed, columns=df.columns, index=df.index)
|
| 114 |
+
|
| 115 |
+
pred = model.predict(steps=steps, last_window=df_processed)
|
| 116 |
+
|
| 117 |
+
pred = unscale_data(scaler, pred)
|
| 118 |
+
|
| 119 |
+
pred_reset = pred.reset_index(drop=False)
|
| 120 |
+
|
| 121 |
+
pred_reset = pred_reset.astype(str)
|
| 122 |
+
|
| 123 |
+
pred_reset = pred_reset.rename(columns={'index': 'fecha'})
|
| 124 |
+
|
| 125 |
+
fig = create_plots(pred)
|
| 126 |
+
|
| 127 |
+
return fig , pred_reset
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
astral==3.2
|
| 2 |
+
feature_engine==1.8.2
|
| 3 |
+
geopy==2.4.1
|
| 4 |
+
gradio==5.7.1
|
| 5 |
+
joblib==1.4.2
|
| 6 |
+
numpy==2.1.3
|
| 7 |
+
pandas==2.2.3
|
| 8 |
+
plotly==5.24.1
|
| 9 |
+
root==0.0.1
|
| 10 |
+
scikit_learn==1.5.2
|
| 11 |
+
skforecast==0.14.0
|