Spaces:
Sleeping
Sleeping
Commit ·
e4ee23f
1
Parent(s): fbb4bb1
update pretrained model
Browse files- app.py +8 -5
- models/custom_models.py +29 -13
- models/darts_models.py +70 -20
- utils/preprocessing.py +1 -0
app.py
CHANGED
|
@@ -32,8 +32,9 @@ def plot_raw_data(df):
|
|
| 32 |
fig.tight_layout()
|
| 33 |
return fig
|
| 34 |
|
| 35 |
-
def forecast_interface(file, model_type, is_multivariate, horizon, neurons, epochs, batch_size, future_horizon):
|
| 36 |
df, _ = load_and_process_data(file.name, is_multivariate == "Multivariate")
|
|
|
|
| 37 |
raw_plot = plot_raw_data(df)
|
| 38 |
|
| 39 |
if model_type in ["LSTM", "BiLSTM", "GRU"]:
|
|
@@ -42,6 +43,7 @@ def forecast_interface(file, model_type, is_multivariate, horizon, neurons, epoc
|
|
| 42 |
model_type,
|
| 43 |
is_multivariate == "Multivariate",
|
| 44 |
horizon,
|
|
|
|
| 45 |
neurons,
|
| 46 |
epochs,
|
| 47 |
batch_size,
|
|
@@ -71,11 +73,12 @@ with gr.Blocks() as demo:
|
|
| 71 |
is_multivariate = gr.Radio(["Univariate", "Multivariate"], value="Univariate", label="📊 Data Type")
|
| 72 |
|
| 73 |
with gr.Row():
|
| 74 |
-
horizon = gr.Slider(10, 10000, value=300, label="📏 Forecast Horizon")
|
| 75 |
-
|
|
|
|
| 76 |
|
| 77 |
with gr.Row():
|
| 78 |
-
neurons = gr.Slider(10, 12400, value=64, label="🧠 Neurons (Only for LSTM)")
|
| 79 |
epochs = gr.Slider(10, 10000, value=100, label="🔁 Epochs")
|
| 80 |
batch_size = gr.Slider(8, 2048, value=32, step=8, label="📦 Batch Size")
|
| 81 |
|
|
@@ -88,7 +91,7 @@ with gr.Blocks() as demo:
|
|
| 88 |
|
| 89 |
run_btn.click(
|
| 90 |
forecast_interface,
|
| 91 |
-
inputs=[file, model_type, is_multivariate, horizon, neurons, epochs, batch_size, future_horizon],
|
| 92 |
outputs=[raw_plot, output_plot, metrics_out, export_csv]
|
| 93 |
)
|
| 94 |
|
|
|
|
| 32 |
fig.tight_layout()
|
| 33 |
return fig
|
| 34 |
|
| 35 |
+
def forecast_interface(file, model_type, is_multivariate, horizon, lag, neurons, epochs, batch_size, future_horizon):
|
| 36 |
df, _ = load_and_process_data(file.name, is_multivariate == "Multivariate")
|
| 37 |
+
print(df.head())
|
| 38 |
raw_plot = plot_raw_data(df)
|
| 39 |
|
| 40 |
if model_type in ["LSTM", "BiLSTM", "GRU"]:
|
|
|
|
| 43 |
model_type,
|
| 44 |
is_multivariate == "Multivariate",
|
| 45 |
horizon,
|
| 46 |
+
lag,
|
| 47 |
neurons,
|
| 48 |
epochs,
|
| 49 |
batch_size,
|
|
|
|
| 73 |
is_multivariate = gr.Radio(["Univariate", "Multivariate"], value="Univariate", label="📊 Data Type")
|
| 74 |
|
| 75 |
with gr.Row():
|
| 76 |
+
horizon = gr.Slider(10, 10000, value=300, step=100, label="📏 Forecast Horizon")
|
| 77 |
+
lag = gr.Slider(1, 1000, value=10, step=10, label="⏳ Lag (Input Length)")
|
| 78 |
+
future_horizon = gr.Slider(0, 10000, value=0, step=100, label="🔮 Future Horizon (Steps Ahead)")
|
| 79 |
|
| 80 |
with gr.Row():
|
| 81 |
+
neurons = gr.Slider(10, 12400, value=64, step=8, label="🧠 Neurons (Only for LSTM)")
|
| 82 |
epochs = gr.Slider(10, 10000, value=100, label="🔁 Epochs")
|
| 83 |
batch_size = gr.Slider(8, 2048, value=32, step=8, label="📦 Batch Size")
|
| 84 |
|
|
|
|
| 91 |
|
| 92 |
run_btn.click(
|
| 93 |
forecast_interface,
|
| 94 |
+
inputs=[file, model_type, is_multivariate, horizon, lag, neurons, epochs, batch_size, future_horizon],
|
| 95 |
outputs=[raw_plot, output_plot, metrics_out, export_csv]
|
| 96 |
)
|
| 97 |
|
models/custom_models.py
CHANGED
|
@@ -10,12 +10,12 @@ from tensorflow.keras.layers import LSTM, Bidirectional, GRU, Dense
|
|
| 10 |
from tensorflow.keras.callbacks import EarlyStopping
|
| 11 |
|
| 12 |
def prepare_data(df, lag=10):
|
| 13 |
-
values = df.values.astype(
|
| 14 |
X, y = [], []
|
| 15 |
|
| 16 |
for i in range(len(values) - lag):
|
| 17 |
-
X.append(values[i:i+lag])
|
| 18 |
-
y.append(values[i+lag, 0]) # always predict first column (target)
|
| 19 |
|
| 20 |
return np.array(X), np.array(y), values
|
| 21 |
|
|
@@ -30,9 +30,11 @@ def scale_data(X, y):
|
|
| 30 |
|
| 31 |
return X_scaled, y_scaled, scaler_X, scaler_y
|
| 32 |
|
|
|
|
| 33 |
def invert_prediction(scaler, pred):
|
| 34 |
return scaler.inverse_transform(pred.reshape(-1, 1)).flatten()
|
| 35 |
|
|
|
|
| 36 |
def create_model(input_shape, model_type, neurons):
|
| 37 |
model = Sequential()
|
| 38 |
if model_type == "LSTM":
|
|
@@ -43,13 +45,24 @@ def create_model(input_shape, model_type, neurons):
|
|
| 43 |
model.add(GRU(neurons, input_shape=input_shape))
|
| 44 |
else:
|
| 45 |
raise ValueError(f"Unsupported model type: {model_type}")
|
| 46 |
-
|
| 47 |
model.add(Dense(1))
|
| 48 |
model.compile(loss="mse", optimizer="adam")
|
| 49 |
return model
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
X, y, raw_values = prepare_data(df, lag=lag)
|
| 54 |
|
| 55 |
train_size = len(X) - horizon
|
|
@@ -63,12 +76,13 @@ def run_forecast(df, model_type, is_multivariate, horizon, neurons, epochs, batc
|
|
| 63 |
|
| 64 |
early_stop = EarlyStopping(monitor="loss", patience=10, restore_best_weights=True)
|
| 65 |
model.fit(
|
| 66 |
-
X_train,
|
|
|
|
| 67 |
epochs=epochs,
|
| 68 |
batch_size=batch_size,
|
| 69 |
verbose=1,
|
| 70 |
shuffle=False,
|
| 71 |
-
callbacks=[early_stop]
|
| 72 |
)
|
| 73 |
|
| 74 |
y_train_pred = model.predict(X_train)
|
|
@@ -99,10 +113,7 @@ def run_forecast(df, model_type, is_multivariate, horizon, neurons, epochs, batc
|
|
| 99 |
metrics = f"Test RMSE: {rmse:.3f}, Test R2: {r2:.3f}"
|
| 100 |
|
| 101 |
# CSV export
|
| 102 |
-
export_df = pd.DataFrame({
|
| 103 |
-
"Test_Actual": y_test,
|
| 104 |
-
"Test_Predicted": y_test_pred
|
| 105 |
-
})
|
| 106 |
export_path = os.path.join(tempfile.gettempdir(), "forecast_result.csv")
|
| 107 |
export_df.to_csv(export_path, index=False)
|
| 108 |
|
|
@@ -111,7 +122,12 @@ def run_forecast(df, model_type, is_multivariate, horizon, neurons, epochs, batc
|
|
| 111 |
plt.plot(y_test, label="Test Actual")
|
| 112 |
plt.plot(y_test_pred, label="Test Predicted", linestyle="--")
|
| 113 |
if future_pred:
|
| 114 |
-
plt.plot(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
plt.title("Forecast Result")
|
| 116 |
plt.xlabel("Time Step")
|
| 117 |
plt.ylabel("Value")
|
|
|
|
| 10 |
from tensorflow.keras.callbacks import EarlyStopping
|
| 11 |
|
| 12 |
def prepare_data(df, lag=10):
|
| 13 |
+
values = df.values.astype("float32")
|
| 14 |
X, y = [], []
|
| 15 |
|
| 16 |
for i in range(len(values) - lag):
|
| 17 |
+
X.append(values[i : i + lag])
|
| 18 |
+
y.append(values[i + lag, 0]) # always predict first column (target)
|
| 19 |
|
| 20 |
return np.array(X), np.array(y), values
|
| 21 |
|
|
|
|
| 30 |
|
| 31 |
return X_scaled, y_scaled, scaler_X, scaler_y
|
| 32 |
|
| 33 |
+
|
| 34 |
def invert_prediction(scaler, pred):
|
| 35 |
return scaler.inverse_transform(pred.reshape(-1, 1)).flatten()
|
| 36 |
|
| 37 |
+
|
| 38 |
def create_model(input_shape, model_type, neurons):
|
| 39 |
model = Sequential()
|
| 40 |
if model_type == "LSTM":
|
|
|
|
| 45 |
model.add(GRU(neurons, input_shape=input_shape))
|
| 46 |
else:
|
| 47 |
raise ValueError(f"Unsupported model type: {model_type}")
|
| 48 |
+
|
| 49 |
model.add(Dense(1))
|
| 50 |
model.compile(loss="mse", optimizer="adam")
|
| 51 |
return model
|
| 52 |
|
| 53 |
+
|
| 54 |
+
def run_forecast(
|
| 55 |
+
df,
|
| 56 |
+
model_type,
|
| 57 |
+
is_multivariate,
|
| 58 |
+
horizon,
|
| 59 |
+
lag,
|
| 60 |
+
neurons,
|
| 61 |
+
epochs,
|
| 62 |
+
batch_size,
|
| 63 |
+
future_horizon=0,
|
| 64 |
+
):
|
| 65 |
+
# lag = 10
|
| 66 |
X, y, raw_values = prepare_data(df, lag=lag)
|
| 67 |
|
| 68 |
train_size = len(X) - horizon
|
|
|
|
| 76 |
|
| 77 |
early_stop = EarlyStopping(monitor="loss", patience=10, restore_best_weights=True)
|
| 78 |
model.fit(
|
| 79 |
+
X_train,
|
| 80 |
+
y_train,
|
| 81 |
epochs=epochs,
|
| 82 |
batch_size=batch_size,
|
| 83 |
verbose=1,
|
| 84 |
shuffle=False,
|
| 85 |
+
callbacks=[early_stop],
|
| 86 |
)
|
| 87 |
|
| 88 |
y_train_pred = model.predict(X_train)
|
|
|
|
| 113 |
metrics = f"Test RMSE: {rmse:.3f}, Test R2: {r2:.3f}"
|
| 114 |
|
| 115 |
# CSV export
|
| 116 |
+
export_df = pd.DataFrame({"Test_Actual": y_test, "Test_Predicted": y_test_pred})
|
|
|
|
|
|
|
|
|
|
| 117 |
export_path = os.path.join(tempfile.gettempdir(), "forecast_result.csv")
|
| 118 |
export_df.to_csv(export_path, index=False)
|
| 119 |
|
|
|
|
| 122 |
plt.plot(y_test, label="Test Actual")
|
| 123 |
plt.plot(y_test_pred, label="Test Predicted", linestyle="--")
|
| 124 |
if future_pred:
|
| 125 |
+
plt.plot(
|
| 126 |
+
range(len(y_test), len(y_test) + future_horizon),
|
| 127 |
+
future_pred,
|
| 128 |
+
label="Future Forecast",
|
| 129 |
+
linestyle="-.",
|
| 130 |
+
)
|
| 131 |
plt.title("Forecast Result")
|
| 132 |
plt.xlabel("Time Step")
|
| 133 |
plt.ylabel("Value")
|
models/darts_models.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
|
|
@@ -8,10 +9,9 @@ from darts.models import (
|
|
| 8 |
NBEATSModel,
|
| 9 |
TFTModel,
|
| 10 |
TCNModel,
|
| 11 |
-
BlockRNNModel
|
| 12 |
)
|
| 13 |
from darts.metrics import rmse, r2_score
|
| 14 |
-
from darts.utils.timeseries_generation import datetime_attribute_timeseries
|
| 15 |
from darts.dataprocessing.transformers import Scaler
|
| 16 |
|
| 17 |
MODEL_MAP = {
|
|
@@ -22,21 +22,37 @@ MODEL_MAP = {
|
|
| 22 |
"BlockRNN": BlockRNNModel,
|
| 23 |
}
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
series = series.astype("float32")
|
| 28 |
|
|
|
|
| 29 |
if not is_multivariate and series.width > 1:
|
| 30 |
series = series[:, 0]
|
| 31 |
|
| 32 |
scaler = Scaler()
|
| 33 |
series_scaled = scaler.fit_transform(series)
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
|
| 37 |
model_cls = MODEL_MAP[model_name]
|
| 38 |
model = model_cls(
|
| 39 |
-
input_chunk_length=
|
| 40 |
output_chunk_length=horizon,
|
| 41 |
n_epochs=epochs,
|
| 42 |
batch_size=batch_size,
|
|
@@ -45,21 +61,55 @@ def run_darts_forecast(df, model_name, is_multivariate, horizon, epochs, batch_s
|
|
| 45 |
)
|
| 46 |
|
| 47 |
model.fit(train, verbose=False)
|
| 48 |
-
forecast = model.predict(horizon)
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# Plot
|
| 55 |
-
fig = plt.figure()
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
plt.legend()
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
import pandas as pd
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
|
|
|
|
| 9 |
NBEATSModel,
|
| 10 |
TFTModel,
|
| 11 |
TCNModel,
|
| 12 |
+
BlockRNNModel,
|
| 13 |
)
|
| 14 |
from darts.metrics import rmse, r2_score
|
|
|
|
| 15 |
from darts.dataprocessing.transformers import Scaler
|
| 16 |
|
| 17 |
MODEL_MAP = {
|
|
|
|
| 22 |
"BlockRNN": BlockRNNModel,
|
| 23 |
}
|
| 24 |
|
| 25 |
+
|
| 26 |
+
def run_darts_forecast(
|
| 27 |
+
df, model_name, is_multivariate, horizon, epochs, batch_size, future_horizon=0
|
| 28 |
+
):
|
| 29 |
+
df.columns = df.columns.str.strip() # strip spaces/BOMs
|
| 30 |
+
print(df.columns)
|
| 31 |
+
if "datetime" in df.columns:
|
| 32 |
+
df["datetime"] = pd.to_datetime(df["datetime"])
|
| 33 |
+
series = TimeSeries.from_dataframe(df, time_col="datetime")
|
| 34 |
+
elif isinstance(df.index, pd.DatetimeIndex):
|
| 35 |
+
series = TimeSeries.from_dataframe(df)
|
| 36 |
+
else:
|
| 37 |
+
raise ValueError(
|
| 38 |
+
f"DataFrame must have a 'datetime' column or a DatetimeIndex. Columns are: {df.columns}"
|
| 39 |
+
)
|
| 40 |
series = series.astype("float32")
|
| 41 |
|
| 42 |
+
# Use only the first column if univariate
|
| 43 |
if not is_multivariate and series.width > 1:
|
| 44 |
series = series[:, 0]
|
| 45 |
|
| 46 |
scaler = Scaler()
|
| 47 |
series_scaled = scaler.fit_transform(series)
|
| 48 |
|
| 49 |
+
lag = 30
|
| 50 |
+
train_size = len(series_scaled) - horizon
|
| 51 |
+
train, test = series_scaled[:train_size], series_scaled[train_size:]
|
| 52 |
|
| 53 |
model_cls = MODEL_MAP[model_name]
|
| 54 |
model = model_cls(
|
| 55 |
+
input_chunk_length=lag,
|
| 56 |
output_chunk_length=horizon,
|
| 57 |
n_epochs=epochs,
|
| 58 |
batch_size=batch_size,
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
model.fit(train, verbose=False)
|
|
|
|
| 64 |
|
| 65 |
+
# Predict on test horizon
|
| 66 |
+
test_pred = model.predict(n=horizon)
|
| 67 |
+
test_actual = test
|
| 68 |
+
|
| 69 |
+
# Forecast future values
|
| 70 |
+
future_pred = None
|
| 71 |
+
if future_horizon > 0:
|
| 72 |
+
last_series = series_scaled[-lag:]
|
| 73 |
+
future_pred = model.predict(n=future_horizon, series=last_series)
|
| 74 |
+
|
| 75 |
+
# Invert scaling
|
| 76 |
+
test_pred_inv = scaler.inverse_transform(test_pred)
|
| 77 |
+
test_actual_inv = scaler.inverse_transform(test_actual)
|
| 78 |
+
|
| 79 |
+
if future_pred:
|
| 80 |
+
future_pred_inv = scaler.inverse_transform(future_pred)
|
| 81 |
+
future_vals = future_pred_inv.values().flatten()
|
| 82 |
+
else:
|
| 83 |
+
future_vals = []
|
| 84 |
+
|
| 85 |
+
# Calculate metrics
|
| 86 |
+
rmse_val = rmse(test_actual_inv, test_pred_inv)
|
| 87 |
+
r2_val = r2_score(test_actual_inv, test_pred_inv)
|
| 88 |
+
metrics = f"Test RMSE: {rmse_val:.3f}, Test R2: {r2_val:.3f}"
|
| 89 |
+
|
| 90 |
+
# Export CSV
|
| 91 |
+
export_df = pd.DataFrame(
|
| 92 |
+
{
|
| 93 |
+
"Test_Actual": test_actual_inv.values().flatten(),
|
| 94 |
+
"Test_Predicted": test_pred_inv.values().flatten(),
|
| 95 |
+
}
|
| 96 |
+
)
|
| 97 |
+
export_path = os.path.join(tempfile.gettempdir(), "darts_forecast_result.csv")
|
| 98 |
+
export_df.to_csv(export_path, index=False)
|
| 99 |
|
| 100 |
# Plot
|
| 101 |
+
fig = plt.figure(figsize=(12, 6))
|
| 102 |
+
test_actual_inv.plot(label="Test Actual")
|
| 103 |
+
test_pred_inv.plot(label="Test Predicted", linestyle="--")
|
| 104 |
+
if future_pred:
|
| 105 |
+
future_pred_inv.plot(label="Future Forecast", linestyle="-.")
|
| 106 |
+
plt.title(f"{model_name} Forecast Result")
|
| 107 |
plt.legend()
|
| 108 |
|
| 109 |
+
return (
|
| 110 |
+
test_pred_inv.values().flatten(), # test predictions
|
| 111 |
+
future_vals, # future forecast
|
| 112 |
+
fig, # matplotlib figure
|
| 113 |
+
metrics, # metrics string
|
| 114 |
+
export_path, # CSV file path
|
| 115 |
+
)
|
utils/preprocessing.py
CHANGED
|
@@ -22,3 +22,4 @@ def load_and_process_data(file_path, is_multivariate):
|
|
| 22 |
df = df[[numeric_cols[0]]]
|
| 23 |
|
| 24 |
return df, df.shape[1]
|
|
|
|
|
|
| 22 |
df = df[[numeric_cols[0]]]
|
| 23 |
|
| 24 |
return df, df.shape[1]
|
| 25 |
+
|