pyroleli commited on
Commit
358704d
·
verified ·
1 Parent(s): 4b90bc1

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +148 -197
src/streamlit_app.py CHANGED
@@ -2,237 +2,188 @@ import streamlit as st
2
  import yfinance as yf
3
  import pandas as pd
4
  import numpy as np
5
- import torch
6
- import torch.nn as nn
 
7
  from sklearn.preprocessing import MinMaxScaler
 
8
  import plotly.graph_objects as go
9
- from datetime import datetime, timedelta
10
 
11
  # --- CONFIGURATION ---
12
- st.set_page_config(layout="wide", page_title="PyTorch AI Stock Predictor")
13
-
14
- # --- DEVICE CONFIG ---
15
- # Use GPU if available (on Hugging Face, this usually defaults to CPU unless paid)
16
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
-
18
- # --- PYTORCH LSTM MODEL ---
19
- class LSTMModel(nn.Module):
20
- def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
21
- super().__init__()
22
- self.hidden_layer_size = hidden_layer_size
23
- self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
24
- self.linear = nn.Linear(hidden_layer_size, output_size)
25
-
26
- def forward(self, input_seq):
27
- lstm_out, _ = self.lstm(input_seq)
28
- # We only care about the last time step output
29
- predictions = self.linear(lstm_out[:, -1, :])
30
- return predictions
31
 
32
  # --- UI HEADER ---
33
- st.title("🧠 PyTorch Recurrent Neural Network Predictor")
34
  st.markdown("""
35
- **Powered by PyTorch.** This app uses a Recurrent Neural Network (LSTM) to learn sequential patterns.
36
- It supports **Intraday (Live)** data and simulates performance before predicting.
37
  """)
38
 
39
  # --- SIDEBAR DASHBOARD ---
40
  st.sidebar.header("Configuration")
41
- ticker = st.sidebar.text_input("Enter Ticker", value="^IXIC")
 
42
 
43
- # Interval Selection (Live/Intraday options added)
44
- interval_option = st.sidebar.selectbox(
45
- "Time Interval",
46
- ("1 Minute (Live)", "1 Hour", "1 Day")
47
- )
48
-
49
- # Horizon Selection
50
  horizon_option = st.sidebar.selectbox(
51
- "Prediction Horizon",
52
- ("Next 30 Steps", "Next 60 Steps", "Next 90 Steps")
53
  )
54
- future_steps = int(horizon_option.split(" ")[1])
55
-
56
- # --- DATA LOADING ---
57
- @st.cache_data(ttl=60) # Cache clears every 60 seconds for "Live" feel
58
- def load_data(symbol, interval):
59
- """
60
- Dynamic data loader.
61
- - 1m: Max 7 days history (Yahoo limit)
62
- - 1h: Max 730 days history
63
- - 1d: Max 5 years
64
- """
65
- if interval == "1 Minute (Live)":
66
- data = yf.download(symbol, period="7d", interval="1m")
67
- elif interval == "1 Hour":
68
- data = yf.download(symbol, period="730d", interval="1h")
69
- else: # 1 Day
70
- data = yf.download(symbol, period="5y", interval="1d")
71
-
72
- if data.empty:
73
- return None
74
-
75
  data.reset_index(inplace=True)
76
- # Standardize column name for Date/Time
77
- if 'Datetime' in data.columns:
78
- data.rename(columns={'Datetime': 'Date'}, inplace=True)
79
  return data
80
 
81
- # --- HELPER FUNCTIONS ---
82
- def create_sequences(data, seq_length):
83
- xs, ys = [], []
84
- for i in range(len(data) - seq_length):
85
- x = data[i:(i + seq_length)]
86
- y = data[i + seq_length]
87
- xs.append(x)
88
- ys.append(y)
89
- return np.array(xs), np.array(ys)
90
-
91
- def train_pytorch_model(train_data, seq_length=60, epochs=15):
92
- """
93
- Trains the PyTorch LSTM model.
94
- Using more epochs = better accuracy but slower speed.
95
- """
96
- # Prepare Data
97
- X_train, y_train = create_sequences(train_data, seq_length)
98
-
99
- # Convert to PyTorch Tensors
100
- X_train = torch.from_numpy(X_train).float().to(device)
101
- y_train = torch.from_numpy(y_train).float().to(device)
102
-
103
- # Initialize Model
104
- model = LSTMModel().to(device)
105
- loss_function = nn.MSELoss()
106
- optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
107
-
108
- # Training Loop
109
- model.train()
110
- progress_bar = st.progress(0)
111
- for i in range(epochs):
112
- optimizer.zero_grad()
113
- y_pred = model(X_train)
114
- single_loss = loss_function(y_pred, y_train)
115
- single_loss.backward()
116
- optimizer.step()
117
- progress_bar.progress((i + 1) / epochs)
118
 
119
- progress_bar.empty()
 
120
  return model
121
 
122
- # --- MAIN LOGIC ---
123
 
124
- # 1. Load Data
125
- st.write(f"Fetching data for **{ticker}** ({interval_option})...")
126
- data = load_data(ticker, interval_option)
127
-
128
- if data is None or len(data) < 100:
129
- st.error("Not enough data found. For '1 Minute', markets must be open or data must exist within last 7 days.")
130
  st.stop()
131
 
132
- # 2. Preprocessing
133
- df_close = data[['Close']].values.astype(float)
134
- scaler = MinMaxScaler(feature_range=(-1, 1)) # LSTM often prefers -1 to 1 or 0 to 1
135
- data_scaled = scaler.fit_transform(df_close)
136
-
137
- # 3. Define Simulation Window
138
- # If 1 min data, we can't simulate "Last Year". We simulate "Last 24 Hours" (approx 390 trading minutes)
139
- if interval_option == "1 Minute (Live)":
140
- test_size = 390 # Last trading day
141
- sim_title = "Last 24 Trading Hours"
142
- elif interval_option == "1 Hour":
143
- test_size = 24 * 30 # Approx 1 month
144
- sim_title = "Last Month (Hourly)"
145
- else:
146
- test_size = 365 # Last Year
147
- sim_title = "Last Year (Daily)"
148
-
149
- train_size = len(data_scaled) - test_size
150
- train_set = data_scaled[:train_size]
151
- test_set = data_scaled[train_size:]
152
-
153
- # 4. Train & Simulate
154
- st.subheader(f"1. Simulation: Testing Accuracy on {sim_title}")
155
- st.caption("Training PyTorch Model... (This uses recurrent backpropagation)")
156
-
157
- seq_length = 60 # Look back 60 steps
158
- model = train_pytorch_model(train_set, seq_length=seq_length, epochs=20) # Increased epochs for better accuracy
159
-
160
- # Evaluation
161
- model.eval()
162
- inputs = data_scaled[len(data_scaled) - len(test_set) - seq_length:]
163
- X_test, y_test = create_sequences(inputs, seq_length)
164
- X_test = torch.from_numpy(X_test).float().to(device)
165
-
166
- with torch.no_grad():
167
- predictions = model(X_test).cpu().numpy()
168
- predictions = scaler.inverse_transform(predictions)
169
-
170
- # Metrics
171
- actuals = scaler.inverse_transform(test_set)
172
- mse = np.mean((predictions - actuals) ** 2)
173
- rmse = np.sqrt(mse)
174
 
175
- # Directional Accuracy
176
- diff_actual = np.diff(actuals.flatten())
177
- diff_pred = np.diff(predictions.flatten())
178
- correct_direction = np.sum(np.sign(diff_actual) == np.sign(diff_pred))
179
- acc_score = (correct_direction / len(diff_actual)) * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  col1, col2 = st.columns(2)
182
- col1.metric("Simulation Error (RMSE)", f"{rmse:.2f}")
183
- col2.metric("Directional Accuracy", f"{acc_score:.2f}%")
184
 
185
- # Graph Simulation
 
 
 
 
 
186
  fig_sim = go.Figure()
187
- # Plot only the relevant simulation period to keep graph clean
188
- sim_dates = data['Date'][train_size:]
189
- fig_sim.add_trace(go.Scatter(x=sim_dates, y=actuals.flatten(), mode='lines', name='Actual Price'))
190
- fig_sim.add_trace(go.Scatter(x=sim_dates, y=predictions.flatten(), mode='lines', name='AI Prediction', line=dict(dash='dot', color='orange')))
191
  st.plotly_chart(fig_sim, use_container_width=True)
192
 
 
193
  # --- FUTURE PREDICTION ---
194
  st.markdown("---")
195
  st.subheader(f"2. Future Forecast: {horizon_option}")
196
 
197
- # Retrain on FULL dataset
198
- with st.spinner('Retraining on full dataset for future generation...'):
199
- full_model = train_pytorch_model(data_scaled, seq_length=seq_length, epochs=25)
200
-
201
- # Generate Future Steps
202
- future_preds = []
203
- current_seq = torch.from_numpy(data_scaled[-seq_length:]).float().to(device).unsqueeze(0) # Shape: [1, 60, 1]
204
-
205
- full_model.eval()
206
- for _ in range(future_steps):
207
- with torch.no_grad():
208
- pred = full_model(current_seq)
209
- future_preds.append(pred.item())
210
-
211
- # Update sequence: remove first item, add new prediction
212
- # Ensure pred is shaped [1, 1, 1] to match dims
213
- pred_reshaped = pred.unsqueeze(1)
214
- current_seq = torch.cat((current_seq[:, 1:, :], pred_reshaped), dim=1)
215
-
216
- # Inverse Scale
217
- future_preds = np.array(future_preds).reshape(-1, 1)
218
- future_preds = scaler.inverse_transform(future_preds)
219
-
220
- # Create Future Dates/Times
221
- last_time = data['Date'].iloc[-1]
222
- if interval_option == "1 Minute (Live)":
223
- time_delta = timedelta(minutes=1)
224
- elif interval_option == "1 Hour":
225
- time_delta = timedelta(hours=1)
226
- else:
227
- time_delta = timedelta(days=1)
228
 
229
- future_dates = [last_time + i * time_delta for i in range(1, future_steps + 1)]
 
 
230
 
231
- # Graph Future
232
  fig_future = go.Figure()
233
- # Show tail of historical data for context
234
- context_points = 100
235
- fig_future.add_trace(go.Scatter(x=data['Date'][-context_points:], y=data['Close'][-context_points:].values.flatten(), mode='lines', name='History'))
236
- fig_future.add_trace(go.Scatter(x=future_dates, y=future_preds.flatten(), mode='lines', name='Future Forecast', line=dict(dash='dot', color='green', width=3)))
237
- fig_future.update_layout(title=f"Forecast for next {future_steps} intervals")
238
- st.plotly_chart(fig_future, use_container_width=True)
 
 
2
  import yfinance as yf
3
  import pandas as pd
4
  import numpy as np
5
+ import tensorflow as tf
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import LSTM, Dense
8
  from sklearn.preprocessing import MinMaxScaler
9
+ from sklearn.metrics import mean_squared_error
10
  import plotly.graph_objects as go
11
+ from datetime import date, timedelta
12
 
13
  # --- CONFIGURATION ---
14
+ st.set_page_config(layout="wide", page_title="AI Stock Predictor")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # --- UI HEADER ---
17
+ st.title("📈 Neural Network Stock Predictor")
18
  st.markdown("""
19
+ This app uses a **Long Short-Term Memory (LSTM)** neural network to predict stock prices.
20
+ It first **simulates** the model against the last year's data to verify accuracy, then predicts the future.
21
  """)
22
 
23
  # --- SIDEBAR DASHBOARD ---
24
  st.sidebar.header("Configuration")
25
+ ticker = st.sidebar.text_input("Enter Ticker Symbol", value="^IXIC") # Default to NASDAQ
26
+ st.sidebar.caption("Examples: ^IXIC (Nasdaq), AAPL, TSLA, BTC-USD")
27
 
 
 
 
 
 
 
 
28
  horizon_option = st.sidebar.selectbox(
29
+ "Prediction Horizon",
30
+ ("Next Day", "Next Week", "Next Month", "Next Year")
31
  )
32
+
33
+ # Map horizon to days
34
+ horizon_mapping = {
35
+ "Next Day": 1,
36
+ "Next Week": 7,
37
+ "Next Month": 30,
38
+ "Next Year": 365
39
+ }
40
+ forecast_days = horizon_mapping[horizon_option]
41
+
42
+ # --- FUNCTIONS ---
43
+
44
+ @st.cache_data
45
+ def load_data(symbol):
46
+ """Fetches data from yfinance. We fetch 5 years to ensure enough training data."""
47
+ start_date = date.today() - timedelta(days=5*365)
48
+ data = yf.download(symbol, start=start_date, end=date.today())
 
 
 
 
49
  data.reset_index(inplace=True)
 
 
 
50
  return data
51
 
52
+ def create_dataset(dataset, look_back=60):
53
+ """Converts array of values into a dataset matrix for LSTM."""
54
+ dataX, dataY = [], []
55
+ for i in range(len(dataset) - look_back - 1):
56
+ a = dataset[i:(i + look_back), 0]
57
+ dataX.append(a)
58
+ dataY.append(dataset[i + look_back, 0])
59
+ return np.array(dataX), np.array(dataY)
60
+
61
+ def train_lstm_model(train_data, look_back=60):
62
+ """Builds and trains the LSTM Neural Network."""
63
+ # Reshape input to be [samples, time steps, features]
64
+ X_train, y_train = create_dataset(train_data, look_back)
65
+ X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
66
+
67
+ # Build LSTM Architecture
68
+ model = Sequential()
69
+ model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
70
+ model.add(LSTM(50, return_sequences=False))
71
+ model.add(Dense(25))
72
+ model.add(Dense(1)) # Output layer
73
+
74
+ model.compile(optimizer='adam', loss='mean_squared_error')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ # Train (Epochs=1 is used here for speed in demo, increase to 20-50 for real accuracy)
77
+ model.fit(X_train, y_train, batch_size=1, epochs=1, verbose=0)
78
  return model
79
 
80
+ # --- MAIN EXECUTION ---
81
 
82
+ data_load_state = st.text('Loading data...')
83
+ try:
84
+ data = load_data(ticker)
85
+ data_load_state.text('Loading data... done!')
86
+ except Exception as e:
87
+ st.error(f"Error loading data: {e}")
88
  st.stop()
89
 
90
+ if len(data) < 500:
91
+ st.error("Not enough data to train the model. Please choose a stock with deeper history.")
92
+ st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ # Prepare Data
95
+ df_close = data[['Close']].values
96
+ scaler = MinMaxScaler(feature_range=(0, 1))
97
+ scaled_data = scaler.fit_transform(df_close)
98
+
99
+ # --- SIMULATION (BACKTESTING) ---
100
+ st.subheader("1. Simulation: Testing against Last Year")
101
+ st.write("Training model on past data to verify performance on the last 365 days...")
102
+
103
+ # Split data: Train on everything BEFORE the last 365 days, Test on LAST 365 days
104
+ training_len = len(scaled_data) - 365
105
+ train_data = scaled_data[0:training_len, :]
106
+ test_data = scaled_data[training_len - 60:, :] # -60 to handle look_back
107
+
108
+ # Train Model
109
+ with st.spinner('Training Neural Network... (This may take a moment)'):
110
+ model = train_lstm_model(train_data)
111
+
112
+ # Predict on the "Last Year" (Simulation)
113
+ x_test = []
114
+ look_back = 60
115
+ for i in range(60, len(test_data)):
116
+ x_test.append(test_data[i-60:i, 0])
117
+ x_test = np.array(x_test)
118
+ x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
119
+
120
+ predictions = model.predict(x_test)
121
+ predictions = scaler.inverse_transform(predictions) # Scale back to normal price
122
+
123
+ # Calculate Accuracy (RMSE)
124
+ valid_set = data[training_len:]
125
+ valid_set['Predictions'] = predictions
126
+ rmse = np.sqrt(np.mean(((predictions - valid_set['Close'].values) ** 2)))
127
+
128
+ # Calculate Directional Accuracy (Did it go up/down correctly?)
129
+ valid_set['Actual_Change'] = valid_set['Close'].diff()
130
+ valid_set['Pred_Change'] = valid_set['Predictions'].diff()
131
+ valid_set['Correct_Direction'] = np.sign(valid_set['Actual_Change']) == np.sign(valid_set['Pred_Change'])
132
+ accuracy_score = valid_set['Correct_Direction'].mean() * 100
133
 
134
  col1, col2 = st.columns(2)
135
+ col1.metric("Simulation RMSE (Price Error)", f"{rmse:.2f}")
136
+ col2.metric("Directional Accuracy", f"{accuracy_score:.2f}%")
137
 
138
+ if accuracy_score > 50:
139
+ st.success(f"Model passed simulation with {accuracy_score:.1f}% directional accuracy.")
140
+ else:
141
+ st.warning(f"Model accuracy is low ({accuracy_score:.1f}%). Stock markets are volatile!")
142
+
143
+ # Plot Simulation
144
  fig_sim = go.Figure()
145
+ fig_sim.add_trace(go.Scatter(x=data['Date'][:training_len], y=data['Close'][:training_len].values.flatten(), mode='lines', name='Training Data'))
146
+ fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Close'].values.flatten(), mode='lines', name='Actual Price (Last Year)'))
147
+ fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Predictions'].values.flatten(), mode='lines', name='AI Prediction (Simulation)', line=dict(dash='dot', color='orange')))
 
148
  st.plotly_chart(fig_sim, use_container_width=True)
149
 
150
+
151
  # --- FUTURE PREDICTION ---
152
  st.markdown("---")
153
  st.subheader(f"2. Future Forecast: {horizon_option}")
154
 
155
+ # Retrain model on ALL data for best future prediction
156
+ with st.spinner('Refining model with full data for future prediction...'):
157
+ full_model = train_lstm_model(scaled_data)
158
+
159
+ # Predict Future Steps
160
+ # We start with the last 60 days of known data
161
+ last_60_days = scaled_data[-60:]
162
+ current_batch = last_60_days.reshape((1, 60, 1))
163
+ future_predictions = []
164
+
165
+ for i in range(forecast_days):
166
+ # Get prediction (scaled)
167
+ current_pred = full_model.predict(current_batch)[0]
168
+ future_predictions.append(current_pred)
169
+
170
+ # Update batch to include new prediction, remove oldest day
171
+ current_pred_reshaped = current_pred.reshape((1, 1, 1))
172
+ current_batch = np.append(current_batch[:, 1:, :], current_pred_reshaped, axis=1)
173
+
174
+ # Inverse transform to get real prices
175
+ future_predictions = scaler.inverse_transform(future_predictions)
 
 
 
 
 
 
 
 
 
 
176
 
177
+ # Create Future Dates
178
+ last_date = data['Date'].iloc[-1]
179
+ future_dates = [last_date + timedelta(days=x) for x in range(1, forecast_days + 1)]
180
 
181
+ # Plot Future
182
  fig_future = go.Figure()
183
+ # Show last 365 days of context
184
+ fig_future.add_trace(go.Scatter(x=data['Date'][-365:], y=data['Close'][-365:].values.flatten(), mode='lines', name='Historical Close (Last Year)'))
185
+ fig_future.add_trace(go.Scatter(x=future_dates, y=future_predictions.flatten(), mode='lines', name='AI Future Prediction', line=dict(dash='dot', color='green', width=3)))
186
+ fig_future.update_layout(title=f"Prediction for next {forecast_days} days")
187
+ st.plotly_chart(fig_future, use_container_width=True)
188
+
189
+ st.write("Note: Long-term predictions (Year) usually revert to a trend line as error accumulates. Short-term (Day/Week) is generally more reliable.")