import pandas as pd import numpy as np from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense, Dropout from tensorflow.keras.callbacks import EarlyStopping def train_test_split_series(series, split_date): """Split a time series into train/test sets based on a date.""" print("Train shape:", series.loc[:split_date].shape) print("Test shape:", series.loc[split_date:].shape) print("First few test values:\n", series.loc[split_date:].head()) train = series.loc[:split_date].dropna() test = series.loc[split_date:].dropna() return train, test # --- LSTM Functions --- def create_lstm_sequences(data, sequence_length): """Create input sequences and corresponding labels for LSTM.""" X, y = [], [] for i in range(len(data) - sequence_length): X.append(data[i:(i + sequence_length), 0]) y.append(data[i + sequence_length, 0]) return np.array(X), np.array(y) def build_and_train_lstm(X_train, y_train, epochs=100, batch_size=32): """Builds, compiles, and trains an LSTM model.""" lstm_model = Sequential([ LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)), Dropout(0.2), LSTM(units=50, return_sequences=False), Dropout(0.2), Dense(units=1) ]) lstm_model.compile(optimizer='adam', loss='mean_squared_error') early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) history = lstm_model.fit( X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, callbacks=[early_stopping], verbose=1 ) return lstm_model def forecast_lstm(model, all_data, test_data, scaler, sequence_length): """Generate a forecast using a trained LSTM model.""" inputs = all_data[len(all_data) - len(test_data) - sequence_length:].values inputs = inputs.reshape(-1, 1) inputs_scaled = scaler.transform(inputs) X_test = [] for i in range(sequence_length, len(inputs_scaled)): X_test.append(inputs_scaled[i-sequence_length:i, 0]) X_test = np.array(X_test) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) predictions_scaled = model.predict(X_test) predictions = scaler.inverse_transform(predictions_scaled) forecast_series = pd.Series(predictions.flatten(), index=test_data.index, name="LSTM_Forecast") return forecast_series def evaluate_forecast(actual, forecast, model_name): """Calculate and print evaluation metrics for a forecast.""" mae = mean_absolute_error(actual, forecast) rmse = root_mean_squared_error(actual, forecast) mape = np.mean(np.abs((actual - forecast) / actual)) * 100 print(f"\n--- {model_name} Model Evaluation ---") print(f"Mean Absolute Error (MAE): {mae:.4f}") print(f"Root Mean Squared Error (RMSE): {rmse:.4f}") print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%") return {"MAE": mae, "RMSE": rmse, "MAPE": mape}