|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from pmdarima.arima import auto_arima |
|
|
from sklearn.metrics import mean_absolute_error, mean_squared_error |
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
stock_prices = pd.read_csv("/work/GOOGL.csv", parse_dates=["Date"], index_col="Date")["Close"] |
|
|
|
|
|
|
|
|
log_returns = np.log(stock_prices / stock_prices.shift(1)).dropna() |
|
|
|
|
|
|
|
|
def evaluate_window(log_returns, stock_prices, window_size, test_size=0.2): |
|
|
train_size = int(len(log_returns) * (1 - test_size)) |
|
|
train, test = log_returns[:train_size], log_returns[train_size:] |
|
|
|
|
|
predictions = [] |
|
|
price_predictions = [] |
|
|
last_train_price = stock_prices.iloc[train_size - 1] |
|
|
price_predictions.append(last_train_price) |
|
|
|
|
|
for t in range(len(test)): |
|
|
|
|
|
start_idx = train_size + t - window_size |
|
|
if start_idx < 0: |
|
|
window_data = log_returns[:train_size + t] |
|
|
else: |
|
|
window_data = log_returns[start_idx:train_size + t] |
|
|
|
|
|
|
|
|
model = auto_arima( |
|
|
window_data.values, |
|
|
seasonal=False, |
|
|
stepwise=True, |
|
|
suppress_warnings=True, |
|
|
error_action="ignore" |
|
|
) |
|
|
|
|
|
|
|
|
forecast = model.predict(n_periods=1)[0] |
|
|
predictions.append(forecast) |
|
|
|
|
|
price_predictions.append(price_predictions[-1] * np.exp(forecast)) |
|
|
|
|
|
|
|
|
price_predictions = price_predictions[1:] |
|
|
|
|
|
|
|
|
predictions = np.array(predictions) |
|
|
test = test[:len(predictions)] |
|
|
actual_prices = stock_prices.iloc[train_size:train_size + len(price_predictions)] |
|
|
|
|
|
|
|
|
mae_log = mean_absolute_error(test, predictions) |
|
|
rmse_log = np.sqrt(mean_squared_error(test, predictions)) |
|
|
|
|
|
|
|
|
mae_price = mean_absolute_error(actual_prices, price_predictions) |
|
|
rmse_price = np.sqrt(mean_squared_error(actual_prices, price_predictions)) |
|
|
|
|
|
|
|
|
direction_accuracy = np.mean( |
|
|
np.sign(np.diff(actual_prices.values)) == np.sign(np.diff(price_predictions)) |
|
|
) |
|
|
|
|
|
return { |
|
|
"MAE_Log": mae_log, |
|
|
"RMSE_Log": rmse_log, |
|
|
"MAE_Price": mae_price, |
|
|
"RMSE_Price": rmse_price, |
|
|
"Direction_Accuracy": direction_accuracy, |
|
|
"Price_Predictions": price_predictions, |
|
|
"Actual_Prices": actual_prices |
|
|
} |
|
|
|
|
|
|
|
|
window_sizes = [30, 60, 90, 120, 180, 200, 250] |
|
|
results = {} |
|
|
|
|
|
for w in window_sizes: |
|
|
print(f"Evaluating window size: {w}") |
|
|
metrics = evaluate_window(log_returns, stock_prices, w) |
|
|
results[w] = metrics |
|
|
|
|
|
results_df = pd.DataFrame({k: {kk: vv for kk, vv in v.items() if kk not in ['Price_Predictions', 'Actual_Prices']} for k, v in results.items()}).T.sort_values("RMSE_Price") |
|
|
print("\nSliding Window Evaluation Results:") |
|
|
print(results_df) |
|
|
|
|
|
best_rmse_window = results_df.index[0] |
|
|
print(f"\n✅ Best window for RMSE (Price): {best_rmse_window} days") |
|
|
|
|
|
|
|
|
best_metrics = results[best_rmse_window] |
|
|
actual_prices = best_metrics['Actual_Prices'] |
|
|
price_predictions = best_metrics['Price_Predictions'] |
|
|
|
|
|
|
|
|
plt.figure(figsize=(12, 6)) |
|
|
plt.plot(actual_prices.index, actual_prices, label='Actual Prices', color='blue') |
|
|
plt.plot(actual_prices.index[:len(price_predictions)], price_predictions, label='Predicted Prices', color='orange', linestyle='--') |
|
|
plt.title(f'ARIMA Forecast vs Actual Prices (Window Size: {best_rmse_window} days)') |
|
|
plt.xlabel('Date') |
|
|
plt.ylabel('Stock Price (USD)') |
|
|
plt.legend() |
|
|
plt.grid(True) |
|
|
plt.show() |