mozzic commited on
Commit
8e3bb5d
·
verified ·
1 Parent(s): ed254b9

Upload demo_files\stock_forecasting.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. demo_files//stock_forecasting.ipynb +130 -0
demo_files//stock_forecasting.ipynb ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Stock Price Forecasting with ARIMA and LSTM\n",
8
+ "\n",
9
+ "## Objective\n",
10
+ "Build and compare time series forecasting models for stock price prediction.\n",
11
+ "\n",
12
+ "**Dataset**: Daily stock prices (5 years)\n",
13
+ "**Models**: ARIMA, SARIMA, LSTM\n",
14
+ "**Metrics**: RMSE, MAE, MAPE"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 1,
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "import pandas as pd\n",
24
+ "import numpy as np\n",
25
+ "import matplotlib.pyplot as plt\n",
26
+ "from statsmodels.tsa.arima.model import ARIMA\n",
27
+ "from statsmodels.tsa.stattools import adfuller, acf, pacf\n",
28
+ "from statsmodels.graphics.tsaplots import plot_acf, plot_pacf\n",
29
+ "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
30
+ "import warnings\n",
31
+ "warnings.filterwarnings('ignore')\n",
32
+ "\n",
33
+ "# Generate synthetic stock data\n",
34
+ "np.random.seed(42)\n",
35
+ "dates = pd.date_range('2019-01-01', '2024-01-01', freq='D')\n",
36
+ "n = len(dates)\n",
37
+ "\n",
38
+ "# Generate realistic stock price with trend, seasonality, and noise\n",
39
+ "trend = np.linspace(100, 200, n)\n",
40
+ "seasonal = 10 * np.sin(np.linspace(0, 10*np.pi, n))\n",
41
+ "noise = np.random.normal(0, 5, n)\n",
42
+ "prices = trend + seasonal + noise\n",
43
+ "prices = np.maximum(prices, 50) # Ensure positive prices\n",
44
+ "\n",
45
+ "df = pd.DataFrame({\n",
46
+ " 'Date': dates,\n",
47
+ " 'Close': prices,\n",
48
+ " 'Volume': np.random.randint(1000000, 10000000, n)\n",
49
+ "})\n",
50
+ "df.set_index('Date', inplace=True)\n",
51
+ "\n",
52
+ "print(f'Dataset shape: {df.shape}')\n",
53
+ "print(f'Date range: {df.index.min()} to {df.index.max()}')\n",
54
+ "print(f'Mean price: ${df.Close.mean():.2f}')\n",
55
+ "print(f'Price volatility (std): ${df.Close.std():.2f}')"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 2,
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "# Stationarity test\n",
65
+ "result = adfuller(df['Close'])\n",
66
+ "print('ADF Statistic:', result[0])\n",
67
+ "print('p-value:', result[1])\n",
68
+ "print('Critical Values:', result[4])\n",
69
+ "\n",
70
+ "if result[1] > 0.05:\n",
71
+ " print('\\nSeries is NON-STATIONARY. Differencing required.')\n",
72
+ " df['Close_diff'] = df['Close'].diff().dropna()\n",
73
+ "else:\n",
74
+ " print('\\nSeries is STATIONARY.')\n",
75
+ "\n",
76
+ "# Calculate returns\n",
77
+ "df['Returns'] = df['Close'].pct_change() * 100\n",
78
+ "df['MA_7'] = df['Close'].rolling(window=7).mean()\n",
79
+ "df['MA_30'] = df['Close'].rolling(window=30).mean()\n",
80
+ "\n",
81
+ "print(f'\\nAverage daily return: {df.Returns.mean():.3f}%')\n",
82
+ "print(f'Return volatility: {df.Returns.std():.3f}%')"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": 3,
88
+ "metadata": {},
89
+ "outputs": [],
90
+ "source": [
91
+ "# Train-test split (80-20)\n",
92
+ "train_size = int(len(df) * 0.8)\n",
93
+ "train, test = df[:train_size], df[train_size:]\n",
94
+ "\n",
95
+ "print(f'Training set: {len(train)} days')\n",
96
+ "print(f'Test set: {len(test)} days')\n",
97
+ "\n",
98
+ "# Fit ARIMA model\n",
99
+ "model = ARIMA(train['Close'], order=(5,1,2))\n",
100
+ "model_fit = model.fit()\n",
101
+ "\n",
102
+ "print('\\nARIMA Model Summary:')\n",
103
+ "print(model_fit.summary())\n",
104
+ "\n",
105
+ "# Forecast\n",
106
+ "forecast = model_fit.forecast(steps=len(test))\n",
107
+ "test['Forecast'] = forecast.values\n",
108
+ "\n",
109
+ "# Calculate errors\n",
110
+ "rmse = np.sqrt(mean_squared_error(test['Close'], test['Forecast']))\n",
111
+ "mae = mean_absolute_error(test['Close'], test['Forecast'])\n",
112
+ "mape = np.mean(np.abs((test['Close'] - test['Forecast']) / test['Close'])) * 100\n",
113
+ "\n",
114
+ "print(f'\\nModel Performance:')\n",
115
+ "print(f'RMSE: ${rmse:.2f}')\n",
116
+ "print(f'MAE: ${mae:.2f}')\n",
117
+ "print(f'MAPE: {mape:.2f}%')"
118
+ ]
119
+ }
120
+ ],
121
+ "metadata": {
122
+ "kernelspec": {
123
+ "display_name": "Python 3",
124
+ "language": "python",
125
+ "name": "python3"
126
+ }
127
+ },
128
+ "nbformat": 4,
129
+ "nbformat_minor": 4
130
+ }