AnnNaserNabil commited on
Commit
e845ced
·
verified ·
1 Parent(s): 9c0e56a

Create ARIMA.py

Browse files
Files changed (1) hide show
  1. ARIMA.py +107 -0
ARIMA.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from pmdarima.arima import auto_arima
4
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
5
+ import matplotlib.pyplot as plt
6
+
7
+ # --- Step 1: Load stock data ---
8
+ stock_prices = pd.read_csv("/work/GOOGL.csv", parse_dates=["Date"], index_col="Date")["Close"]
9
+
10
+ # --- Step 2: Compute Log Returns ---
11
+ log_returns = np.log(stock_prices / stock_prices.shift(1)).dropna()
12
+
13
+ # --- Step 3: Sliding Window Evaluation ---
14
+ def evaluate_window(log_returns, stock_prices, window_size, test_size=0.2):
15
+ train_size = int(len(log_returns) * (1 - test_size))
16
+ train, test = log_returns[:train_size], log_returns[train_size:]
17
+
18
+ predictions = []
19
+ price_predictions = []
20
+ last_train_price = stock_prices.iloc[train_size - 1]
21
+ price_predictions.append(last_train_price)
22
+
23
+ for t in range(len(test)):
24
+ # Define rolling window
25
+ start_idx = train_size + t - window_size
26
+ if start_idx < 0:
27
+ window_data = log_returns[:train_size + t]
28
+ else:
29
+ window_data = log_returns[start_idx:train_size + t]
30
+
31
+ # Fit ARIMA
32
+ model = auto_arima(
33
+ window_data.values,
34
+ seasonal=False,
35
+ stepwise=True,
36
+ suppress_warnings=True,
37
+ error_action="ignore"
38
+ )
39
+
40
+ # Forecast 1-step log return
41
+ forecast = model.predict(n_periods=1)[0]
42
+ predictions.append(forecast)
43
+ # Convert to price
44
+ price_predictions.append(price_predictions[-1] * np.exp(forecast))
45
+
46
+ # Drop the initial seed (last_train_price)
47
+ price_predictions = price_predictions[1:]
48
+
49
+ # --- Ensure same length ---
50
+ predictions = np.array(predictions)
51
+ test = test[:len(predictions)]
52
+ actual_prices = stock_prices.iloc[train_size:train_size + len(price_predictions)]
53
+
54
+ # --- Metrics in log-return space ---
55
+ mae_log = mean_absolute_error(test, predictions)
56
+ rmse_log = np.sqrt(mean_squared_error(test, predictions))
57
+
58
+ # --- Metrics in price space ---
59
+ mae_price = mean_absolute_error(actual_prices, price_predictions)
60
+ rmse_price = np.sqrt(mean_squared_error(actual_prices, price_predictions))
61
+
62
+ # --- Direction Accuracy ---
63
+ direction_accuracy = np.mean(
64
+ np.sign(np.diff(actual_prices.values)) == np.sign(np.diff(price_predictions))
65
+ )
66
+
67
+ return {
68
+ "MAE_Log": mae_log,
69
+ "RMSE_Log": rmse_log,
70
+ "MAE_Price": mae_price,
71
+ "RMSE_Price": rmse_price,
72
+ "Direction_Accuracy": direction_accuracy,
73
+ "Price_Predictions": price_predictions, # Store predictions
74
+ "Actual_Prices": actual_prices # Store actual prices
75
+ }
76
+
77
+ # --- Step 4: Test multiple window sizes ---
78
+ window_sizes = [30, 60, 90, 120, 180, 200, 250]
79
+ results = {}
80
+
81
+ for w in window_sizes:
82
+ print(f"Evaluating window size: {w}")
83
+ metrics = evaluate_window(log_returns, stock_prices, w)
84
+ results[w] = metrics
85
+
86
+ results_df = pd.DataFrame({k: {kk: vv for kk, vv in v.items() if kk not in ['Price_Predictions', 'Actual_Prices']} for k, v in results.items()}).T.sort_values("RMSE_Price")
87
+ print("\nSliding Window Evaluation Results:")
88
+ print(results_df)
89
+
90
+ best_rmse_window = results_df.index[0]
91
+ print(f"\n✅ Best window for RMSE (Price): {best_rmse_window} days")
92
+
93
+ # --- Step 5: Plot the forecast for the best window ---
94
+ best_metrics = results[best_rmse_window]
95
+ actual_prices = best_metrics['Actual_Prices']
96
+ price_predictions = best_metrics['Price_Predictions']
97
+
98
+ # Create the plot
99
+ plt.figure(figsize=(12, 6))
100
+ plt.plot(actual_prices.index, actual_prices, label='Actual Prices', color='blue')
101
+ plt.plot(actual_prices.index[:len(price_predictions)], price_predictions, label='Predicted Prices', color='orange', linestyle='--')
102
+ plt.title(f'ARIMA Forecast vs Actual Prices (Window Size: {best_rmse_window} days)')
103
+ plt.xlabel('Date')
104
+ plt.ylabel('Stock Price (USD)')
105
+ plt.legend()
106
+ plt.grid(True)
107
+ plt.show()