portfolio-optimizer / src /forecast.py
abnsol's picture
refactor: remove ARIMA forcasting model
f32736b verified
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
def train_test_split_series(series, split_date):
"""Split a time series into train/test sets based on a date."""
print("Train shape:", series.loc[:split_date].shape)
print("Test shape:", series.loc[split_date:].shape)
print("First few test values:\n", series.loc[split_date:].head())
train = series.loc[:split_date].dropna()
test = series.loc[split_date:].dropna()
return train, test
# --- LSTM Functions ---
def create_lstm_sequences(data, sequence_length):
"""Create input sequences and corresponding labels for LSTM."""
X, y = [], []
for i in range(len(data) - sequence_length):
X.append(data[i:(i + sequence_length), 0])
y.append(data[i + sequence_length, 0])
return np.array(X), np.array(y)
def build_and_train_lstm(X_train, y_train, epochs=100, batch_size=32):
"""Builds, compiles, and trains an LSTM model."""
lstm_model = Sequential([
LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
Dropout(0.2),
LSTM(units=50, return_sequences=False),
Dropout(0.2),
Dense(units=1)
])
lstm_model.compile(optimizer='adam', loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = lstm_model.fit(
X_train, y_train,
epochs=epochs,
batch_size=batch_size,
validation_split=0.2,
callbacks=[early_stopping],
verbose=1
)
return lstm_model
def forecast_lstm(model, all_data, test_data, scaler, sequence_length):
"""Generate a forecast using a trained LSTM model."""
inputs = all_data[len(all_data) - len(test_data) - sequence_length:].values
inputs = inputs.reshape(-1, 1)
inputs_scaled = scaler.transform(inputs)
X_test = []
for i in range(sequence_length, len(inputs_scaled)):
X_test.append(inputs_scaled[i-sequence_length:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predictions_scaled = model.predict(X_test)
predictions = scaler.inverse_transform(predictions_scaled)
forecast_series = pd.Series(predictions.flatten(), index=test_data.index, name="LSTM_Forecast")
return forecast_series
def evaluate_forecast(actual, forecast, model_name):
"""Calculate and print evaluation metrics for a forecast."""
mae = mean_absolute_error(actual, forecast)
rmse = root_mean_squared_error(actual, forecast)
mape = np.mean(np.abs((actual - forecast) / actual)) * 100
print(f"\n--- {model_name} Model Evaluation ---")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
return {"MAE": mae, "RMSE": rmse, "MAPE": mape}