pyroleli commited on
Commit
4b90bc1
·
verified ·
1 Parent(s): 5ebaa03

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +197 -148
src/streamlit_app.py CHANGED
@@ -2,188 +2,237 @@ import streamlit as st
2
  import yfinance as yf
3
  import pandas as pd
4
  import numpy as np
5
- import tensorflow as tf
6
- from tensorflow.keras.models import Sequential
7
- from tensorflow.keras.layers import LSTM, Dense
8
  from sklearn.preprocessing import MinMaxScaler
9
- from sklearn.metrics import mean_squared_error
10
  import plotly.graph_objects as go
11
- from datetime import date, timedelta
12
 
13
  # --- CONFIGURATION ---
14
- st.set_page_config(layout="wide", page_title="AI Stock Predictor")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # --- UI HEADER ---
17
- st.title("📈 Neural Network Stock Predictor")
18
  st.markdown("""
19
- This app uses a **Long Short-Term Memory (LSTM)** neural network to predict stock prices.
20
- It first **simulates** the model against the last year's data to verify accuracy, then predicts the future.
21
  """)
22
 
23
  # --- SIDEBAR DASHBOARD ---
24
  st.sidebar.header("Configuration")
25
- ticker = st.sidebar.text_input("Enter Ticker Symbol", value="^IXIC") # Default to NASDAQ
26
- st.sidebar.caption("Examples: ^IXIC (Nasdaq), AAPL, TSLA, BTC-USD")
27
 
28
- horizon_option = st.sidebar.selectbox(
29
- "Prediction Horizon",
30
- ("Next Day", "Next Week", "Next Month", "Next Year")
 
31
  )
32
 
33
- # Map horizon to days
34
- horizon_mapping = {
35
- "Next Day": 1,
36
- "Next Week": 7,
37
- "Next Month": 30,
38
- "Next Year": 365
39
- }
40
- forecast_days = horizon_mapping[horizon_option]
41
-
42
- # --- FUNCTIONS ---
43
-
44
- @st.cache_data
45
- def load_data(symbol):
46
- """Fetches data from yfinance. We fetch 5 years to ensure enough training data."""
47
- start_date = date.today() - timedelta(days=5*365)
48
- data = yf.download(symbol, start=start_date, end=date.today())
 
 
 
 
 
 
 
 
 
 
49
  data.reset_index(inplace=True)
 
 
 
50
  return data
51
 
52
- def create_dataset(dataset, look_back=60):
53
- """Converts array of values into a dataset matrix for LSTM."""
54
- dataX, dataY = [], []
55
- for i in range(len(dataset) - look_back - 1):
56
- a = dataset[i:(i + look_back), 0]
57
- dataX.append(a)
58
- dataY.append(dataset[i + look_back, 0])
59
- return np.array(dataX), np.array(dataY)
60
-
61
- def train_lstm_model(train_data, look_back=60):
62
- """Builds and trains the LSTM Neural Network."""
63
- # Reshape input to be [samples, time steps, features]
64
- X_train, y_train = create_dataset(train_data, look_back)
65
- X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
66
-
67
- # Build LSTM Architecture
68
- model = Sequential()
69
- model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
70
- model.add(LSTM(50, return_sequences=False))
71
- model.add(Dense(25))
72
- model.add(Dense(1)) # Output layer
73
-
74
- model.compile(optimizer='adam', loss='mean_squared_error')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # Train (Epochs=1 is used here for speed in demo, increase to 20-50 for real accuracy)
77
- model.fit(X_train, y_train, batch_size=1, epochs=1, verbose=0)
78
  return model
79
 
80
- # --- MAIN EXECUTION ---
81
 
82
- data_load_state = st.text('Loading data...')
83
- try:
84
- data = load_data(ticker)
85
- data_load_state.text('Loading data... done!')
86
- except Exception as e:
87
- st.error(f"Error loading data: {e}")
88
- st.stop()
89
 
90
- if len(data) < 500:
91
- st.error("Not enough data to train the model. Please choose a stock with deeper history.")
92
  st.stop()
93
 
94
- # Prepare Data
95
- df_close = data[['Close']].values
96
- scaler = MinMaxScaler(feature_range=(0, 1))
97
- scaled_data = scaler.fit_transform(df_close)
98
-
99
- # --- SIMULATION (BACKTESTING) ---
100
- st.subheader("1. Simulation: Testing against Last Year")
101
- st.write("Training model on past data to verify performance on the last 365 days...")
102
-
103
- # Split data: Train on everything BEFORE the last 365 days, Test on LAST 365 days
104
- training_len = len(scaled_data) - 365
105
- train_data = scaled_data[0:training_len, :]
106
- test_data = scaled_data[training_len - 60:, :] # -60 to handle look_back
107
-
108
- # Train Model
109
- with st.spinner('Training Neural Network... (This may take a moment)'):
110
- model = train_lstm_model(train_data)
111
-
112
- # Predict on the "Last Year" (Simulation)
113
- x_test = []
114
- look_back = 60
115
- for i in range(60, len(test_data)):
116
- x_test.append(test_data[i-60:i, 0])
117
- x_test = np.array(x_test)
118
- x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
119
-
120
- predictions = model.predict(x_test)
121
- predictions = scaler.inverse_transform(predictions) # Scale back to normal price
122
-
123
- # Calculate Accuracy (RMSE)
124
- valid_set = data[training_len:]
125
- valid_set['Predictions'] = predictions
126
- rmse = np.sqrt(np.mean(((predictions - valid_set['Close'].values) ** 2)))
127
-
128
- # Calculate Directional Accuracy (Did it go up/down correctly?)
129
- valid_set['Actual_Change'] = valid_set['Close'].diff()
130
- valid_set['Pred_Change'] = valid_set['Predictions'].diff()
131
- valid_set['Correct_Direction'] = np.sign(valid_set['Actual_Change']) == np.sign(valid_set['Pred_Change'])
132
- accuracy_score = valid_set['Correct_Direction'].mean() * 100
133
 
134
- col1, col2 = st.columns(2)
135
- col1.metric("Simulation RMSE (Price Error)", f"{rmse:.2f}")
136
- col2.metric("Directional Accuracy", f"{accuracy_score:.2f}%")
137
 
138
- if accuracy_score > 50:
139
- st.success(f"Model passed simulation with {accuracy_score:.1f}% directional accuracy.")
140
- else:
141
- st.warning(f"Model accuracy is low ({accuracy_score:.1f}%). Stock markets are volatile!")
 
 
 
 
 
 
 
 
142
 
143
- # Plot Simulation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  fig_sim = go.Figure()
145
- fig_sim.add_trace(go.Scatter(x=data['Date'][:training_len], y=data['Close'][:training_len].values.flatten(), mode='lines', name='Training Data'))
146
- fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Close'].values.flatten(), mode='lines', name='Actual Price (Last Year)'))
147
- fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Predictions'].values.flatten(), mode='lines', name='AI Prediction (Simulation)', line=dict(dash='dot', color='orange')))
 
148
  st.plotly_chart(fig_sim, use_container_width=True)
149
 
150
-
151
  # --- FUTURE PREDICTION ---
152
  st.markdown("---")
153
  st.subheader(f"2. Future Forecast: {horizon_option}")
154
 
155
- # Retrain model on ALL data for best future prediction
156
- with st.spinner('Refining model with full data for future prediction...'):
157
- full_model = train_lstm_model(scaled_data)
158
-
159
- # Predict Future Steps
160
- # We start with the last 60 days of known data
161
- last_60_days = scaled_data[-60:]
162
- current_batch = last_60_days.reshape((1, 60, 1))
163
- future_predictions = []
164
-
165
- for i in range(forecast_days):
166
- # Get prediction (scaled)
167
- current_pred = full_model.predict(current_batch)[0]
168
- future_predictions.append(current_pred)
169
-
170
- # Update batch to include new prediction, remove oldest day
171
- current_pred_reshaped = current_pred.reshape((1, 1, 1))
172
- current_batch = np.append(current_batch[:, 1:, :], current_pred_reshaped, axis=1)
173
-
174
- # Inverse transform to get real prices
175
- future_predictions = scaler.inverse_transform(future_predictions)
 
 
 
 
 
 
 
 
 
 
176
 
177
- # Create Future Dates
178
- last_date = data['Date'].iloc[-1]
179
- future_dates = [last_date + timedelta(days=x) for x in range(1, forecast_days + 1)]
180
 
181
- # Plot Future
182
  fig_future = go.Figure()
183
- # Show last 365 days of context
184
- fig_future.add_trace(go.Scatter(x=data['Date'][-365:], y=data['Close'][-365:].values.flatten(), mode='lines', name='Historical Close (Last Year)'))
185
- fig_future.add_trace(go.Scatter(x=future_dates, y=future_predictions.flatten(), mode='lines', name='AI Future Prediction', line=dict(dash='dot', color='green', width=3)))
186
- fig_future.update_layout(title=f"Prediction for next {forecast_days} days")
187
- st.plotly_chart(fig_future, use_container_width=True)
188
-
189
- st.write("Note: Long-term predictions (Year) usually revert to a trend line as error accumulates. Short-term (Day/Week) is generally more reliable.")
 
2
  import yfinance as yf
3
  import pandas as pd
4
  import numpy as np
5
+ import torch
6
+ import torch.nn as nn
 
7
  from sklearn.preprocessing import MinMaxScaler
 
8
  import plotly.graph_objects as go
9
+ from datetime import datetime, timedelta
10
 
11
  # --- CONFIGURATION ---
12
+ st.set_page_config(layout="wide", page_title="PyTorch AI Stock Predictor")
13
+
14
+ # --- DEVICE CONFIG ---
15
+ # Use GPU if available (on Hugging Face, this usually defaults to CPU unless paid)
16
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
+
18
+ # --- PYTORCH LSTM MODEL ---
19
+ class LSTMModel(nn.Module):
20
+ def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
21
+ super().__init__()
22
+ self.hidden_layer_size = hidden_layer_size
23
+ self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
24
+ self.linear = nn.Linear(hidden_layer_size, output_size)
25
+
26
+ def forward(self, input_seq):
27
+ lstm_out, _ = self.lstm(input_seq)
28
+ # We only care about the last time step output
29
+ predictions = self.linear(lstm_out[:, -1, :])
30
+ return predictions
31
 
32
  # --- UI HEADER ---
33
+ st.title("🧠 PyTorch Recurrent Neural Network Predictor")
34
  st.markdown("""
35
+ **Powered by PyTorch.** This app uses a Recurrent Neural Network (LSTM) to learn sequential patterns.
36
+ It supports **Intraday (Live)** data and simulates performance before predicting.
37
  """)
38
 
39
  # --- SIDEBAR DASHBOARD ---
40
  st.sidebar.header("Configuration")
41
+ ticker = st.sidebar.text_input("Enter Ticker", value="^IXIC")
 
42
 
43
+ # Interval Selection (Live/Intraday options added)
44
+ interval_option = st.sidebar.selectbox(
45
+ "Time Interval",
46
+ ("1 Minute (Live)", "1 Hour", "1 Day")
47
  )
48
 
49
+ # Horizon Selection
50
+ horizon_option = st.sidebar.selectbox(
51
+ "Prediction Horizon",
52
+ ("Next 30 Steps", "Next 60 Steps", "Next 90 Steps")
53
+ )
54
+ future_steps = int(horizon_option.split(" ")[1])
55
+
56
+ # --- DATA LOADING ---
57
+ @st.cache_data(ttl=60) # Cache clears every 60 seconds for "Live" feel
58
+ def load_data(symbol, interval):
59
+ """
60
+ Dynamic data loader.
61
+ - 1m: Max 7 days history (Yahoo limit)
62
+ - 1h: Max 730 days history
63
+ - 1d: Max 5 years
64
+ """
65
+ if interval == "1 Minute (Live)":
66
+ data = yf.download(symbol, period="7d", interval="1m")
67
+ elif interval == "1 Hour":
68
+ data = yf.download(symbol, period="730d", interval="1h")
69
+ else: # 1 Day
70
+ data = yf.download(symbol, period="5y", interval="1d")
71
+
72
+ if data.empty:
73
+ return None
74
+
75
  data.reset_index(inplace=True)
76
+ # Standardize column name for Date/Time
77
+ if 'Datetime' in data.columns:
78
+ data.rename(columns={'Datetime': 'Date'}, inplace=True)
79
  return data
80
 
81
+ # --- HELPER FUNCTIONS ---
82
+ def create_sequences(data, seq_length):
83
+ xs, ys = [], []
84
+ for i in range(len(data) - seq_length):
85
+ x = data[i:(i + seq_length)]
86
+ y = data[i + seq_length]
87
+ xs.append(x)
88
+ ys.append(y)
89
+ return np.array(xs), np.array(ys)
90
+
91
+ def train_pytorch_model(train_data, seq_length=60, epochs=15):
92
+ """
93
+ Trains the PyTorch LSTM model.
94
+ Using more epochs = better accuracy but slower speed.
95
+ """
96
+ # Prepare Data
97
+ X_train, y_train = create_sequences(train_data, seq_length)
98
+
99
+ # Convert to PyTorch Tensors
100
+ X_train = torch.from_numpy(X_train).float().to(device)
101
+ y_train = torch.from_numpy(y_train).float().to(device)
102
+
103
+ # Initialize Model
104
+ model = LSTMModel().to(device)
105
+ loss_function = nn.MSELoss()
106
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
107
+
108
+ # Training Loop
109
+ model.train()
110
+ progress_bar = st.progress(0)
111
+ for i in range(epochs):
112
+ optimizer.zero_grad()
113
+ y_pred = model(X_train)
114
+ single_loss = loss_function(y_pred, y_train)
115
+ single_loss.backward()
116
+ optimizer.step()
117
+ progress_bar.progress((i + 1) / epochs)
118
 
119
+ progress_bar.empty()
 
120
  return model
121
 
122
+ # --- MAIN LOGIC ---
123
 
124
+ # 1. Load Data
125
+ st.write(f"Fetching data for **{ticker}** ({interval_option})...")
126
+ data = load_data(ticker, interval_option)
 
 
 
 
127
 
128
+ if data is None or len(data) < 100:
129
+ st.error("Not enough data found. For '1 Minute', markets must be open or data must exist within last 7 days.")
130
  st.stop()
131
 
132
+ # 2. Preprocessing
133
+ df_close = data[['Close']].values.astype(float)
134
+ scaler = MinMaxScaler(feature_range=(-1, 1)) # LSTM often prefers -1 to 1 or 0 to 1
135
+ data_scaled = scaler.fit_transform(df_close)
136
+
137
+ # 3. Define Simulation Window
138
+ # If 1 min data, we can't simulate "Last Year". We simulate "Last 24 Hours" (approx 390 trading minutes)
139
+ if interval_option == "1 Minute (Live)":
140
+ test_size = 390 # Last trading day
141
+ sim_title = "Last 24 Trading Hours"
142
+ elif interval_option == "1 Hour":
143
+ test_size = 24 * 30 # Approx 1 month
144
+ sim_title = "Last Month (Hourly)"
145
+ else:
146
+ test_size = 365 # Last Year
147
+ sim_title = "Last Year (Daily)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ train_size = len(data_scaled) - test_size
150
+ train_set = data_scaled[:train_size]
151
+ test_set = data_scaled[train_size:]
152
 
153
+ # 4. Train & Simulate
154
+ st.subheader(f"1. Simulation: Testing Accuracy on {sim_title}")
155
+ st.caption("Training PyTorch Model... (This uses recurrent backpropagation)")
156
+
157
+ seq_length = 60 # Look back 60 steps
158
+ model = train_pytorch_model(train_set, seq_length=seq_length, epochs=20) # Increased epochs for better accuracy
159
+
160
+ # Evaluation
161
+ model.eval()
162
+ inputs = data_scaled[len(data_scaled) - len(test_set) - seq_length:]
163
+ X_test, y_test = create_sequences(inputs, seq_length)
164
+ X_test = torch.from_numpy(X_test).float().to(device)
165
 
166
+ with torch.no_grad():
167
+ predictions = model(X_test).cpu().numpy()
168
+ predictions = scaler.inverse_transform(predictions)
169
+
170
+ # Metrics
171
+ actuals = scaler.inverse_transform(test_set)
172
+ mse = np.mean((predictions - actuals) ** 2)
173
+ rmse = np.sqrt(mse)
174
+
175
+ # Directional Accuracy
176
+ diff_actual = np.diff(actuals.flatten())
177
+ diff_pred = np.diff(predictions.flatten())
178
+ correct_direction = np.sum(np.sign(diff_actual) == np.sign(diff_pred))
179
+ acc_score = (correct_direction / len(diff_actual)) * 100
180
+
181
+ col1, col2 = st.columns(2)
182
+ col1.metric("Simulation Error (RMSE)", f"{rmse:.2f}")
183
+ col2.metric("Directional Accuracy", f"{acc_score:.2f}%")
184
+
185
+ # Graph Simulation
186
  fig_sim = go.Figure()
187
+ # Plot only the relevant simulation period to keep graph clean
188
+ sim_dates = data['Date'][train_size:]
189
+ fig_sim.add_trace(go.Scatter(x=sim_dates, y=actuals.flatten(), mode='lines', name='Actual Price'))
190
+ fig_sim.add_trace(go.Scatter(x=sim_dates, y=predictions.flatten(), mode='lines', name='AI Prediction', line=dict(dash='dot', color='orange')))
191
  st.plotly_chart(fig_sim, use_container_width=True)
192
 
 
193
  # --- FUTURE PREDICTION ---
194
  st.markdown("---")
195
  st.subheader(f"2. Future Forecast: {horizon_option}")
196
 
197
+ # Retrain on FULL dataset
198
+ with st.spinner('Retraining on full dataset for future generation...'):
199
+ full_model = train_pytorch_model(data_scaled, seq_length=seq_length, epochs=25)
200
+
201
+ # Generate Future Steps
202
+ future_preds = []
203
+ current_seq = torch.from_numpy(data_scaled[-seq_length:]).float().to(device).unsqueeze(0) # Shape: [1, 60, 1]
204
+
205
+ full_model.eval()
206
+ for _ in range(future_steps):
207
+ with torch.no_grad():
208
+ pred = full_model(current_seq)
209
+ future_preds.append(pred.item())
210
+
211
+ # Update sequence: remove first item, add new prediction
212
+ # Ensure pred is shaped [1, 1, 1] to match dims
213
+ pred_reshaped = pred.unsqueeze(1)
214
+ current_seq = torch.cat((current_seq[:, 1:, :], pred_reshaped), dim=1)
215
+
216
+ # Inverse Scale
217
+ future_preds = np.array(future_preds).reshape(-1, 1)
218
+ future_preds = scaler.inverse_transform(future_preds)
219
+
220
+ # Create Future Dates/Times
221
+ last_time = data['Date'].iloc[-1]
222
+ if interval_option == "1 Minute (Live)":
223
+ time_delta = timedelta(minutes=1)
224
+ elif interval_option == "1 Hour":
225
+ time_delta = timedelta(hours=1)
226
+ else:
227
+ time_delta = timedelta(days=1)
228
 
229
+ future_dates = [last_time + i * time_delta for i in range(1, future_steps + 1)]
 
 
230
 
231
+ # Graph Future
232
  fig_future = go.Figure()
233
+ # Show tail of historical data for context
234
+ context_points = 100
235
+ fig_future.add_trace(go.Scatter(x=data['Date'][-context_points:], y=data['Close'][-context_points:].values.flatten(), mode='lines', name='History'))
236
+ fig_future.add_trace(go.Scatter(x=future_dates, y=future_preds.flatten(), mode='lines', name='Future Forecast', line=dict(dash='dot', color='green', width=3)))
237
+ fig_future.update_layout(title=f"Forecast for next {future_steps} intervals")
238
+ st.plotly_chart(fig_future, use_container_width=True)