Div0013 commited on
Commit
a9bcd08
·
1 Parent(s): 9f578b5

Add complete project files for stock predictor

Browse files
app.py DELETED
@@ -1,92 +0,0 @@
1
- # app.py
2
- import gradio as gr
3
- import json
4
- import traceback
5
-
6
- # Import necessary functions and the model class from your original script
7
- # Make sure predict_stock_prices.py is in the same directory
8
- from predict_stock_prices import (
9
- BiLSTMModel, # Need to import the class for joblib/torch to load model correctly
10
- predict_stock_prices,
11
- batch_predict_to_json # Assuming this function takes the list and paths
12
- )
13
-
14
- # --- Model Configuration ---
15
- # These paths should correspond to the files uploaded to your Hugging Face Space
16
- MODEL_PATH = "bilstm_stock_model.pth"
17
- SCALER_PATH = "scaler_diff.pkl"
18
- METADATA_PATH = "model_metadata.pkl"
19
-
20
- # --- Gradio Interface Function ---
21
- def run_prediction(ticker_string):
22
- """
23
- Takes a comma-separated string of tickers, runs prediction,
24
- and returns the result as a JSON object or error string.
25
- """
26
- if not ticker_string:
27
- return {"error": "Please enter at least one ticker symbol."}
28
-
29
- # Split string into a list of tickers, removing whitespace
30
- tickers = [ticker.strip().upper() for ticker in ticker_string.split(',') if ticker.strip()]
31
-
32
- if not tickers:
33
- return {"error": "No valid ticker symbols entered."}
34
-
35
- print(f"Received request for tickers: {tickers}") # Log received tickers
36
-
37
- try:
38
- # Call your existing batch prediction function
39
- # It already returns a dictionary suitable for JSON output
40
- predictions = batch_predict_to_json(
41
- ticker_symbols=tickers,
42
- model_path=MODEL_PATH,
43
- scaler_path=SCALER_PATH,
44
- metadata_path=METADATA_PATH
45
- )
46
- print(f"Prediction successful for: {list(predictions.keys())}") # Log success
47
- # Check for errors within the prediction results
48
- errors = {k:v for k,v in predictions.items() if isinstance(v, dict) and 'error' in v}
49
- if errors:
50
- print(f"Errors occurred during prediction: {errors}") # Log errors
51
- return predictions # Return the entire dictionary
52
-
53
- except FileNotFoundError as e:
54
- print(f"Error: Model file not found - {e}")
55
- return {"error": f"Required file not found: {e}. Ensure model, scaler, and metadata files are uploaded correctly."}
56
- except Exception as e:
57
- print(f"An unexpected error occurred: {e}")
58
- traceback.print_exc() # Print detailed traceback to logs
59
- return {"error": f"An unexpected error occurred: {str(e)}"}
60
-
61
- # --- Build Gradio Interface ---
62
- # Use Markdown for a richer description
63
- description = """
64
- ## BiLSTM Stock Price Predictor (-15y / +15y)
65
-
66
- Enter one or more stock ticker symbols (e.g., `AAPL`, `MSFT`, `GOOGL`), separated by commas.
67
- The model will fetch historical data, predict future prices for the next 15 years using a BiLSTM model combined with Geometric Brownian Motion (GBM),
68
- and return the historical data for the past 15 years (or less if unavailable) combined with the predictions.
69
-
70
- **Note:**
71
- * Predictions are based on historical 'Close' prices and involve inherent uncertainty. **This is not financial advice.**
72
- * Fetching data and running predictions might take a moment, especially for multiple tickers.
73
- * Ensure ticker symbols are valid on Yahoo Finance.
74
- """
75
-
76
- iface = gr.Interface(
77
- fn=run_prediction,
78
- inputs=gr.Textbox(
79
- lines=1,
80
- placeholder="Enter Ticker Symbols (e.g., AAPL, MSFT, GOOGL)",
81
- label="Ticker Symbols (comma-separated)"
82
- ),
83
- outputs=gr.JSON(label="Prediction Results (Historical + Future Prices)"),
84
- title="Stock Price Prediction",
85
- description=description,
86
- examples=[["AAPL"], ["MSFT,GOOGL,NVDA"]],
87
- allow_flagging='never' # Optional: Disable flagging
88
- )
89
-
90
- # --- Launch the App ---
91
- if __name__ == "__main__":
92
- iface.launch() # Share=True is not needed when deploying on Spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bilstm_stock_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f05bab113734f62c3b0cfbeb7ff04c0327c3005fff294baae440280c2babf46
3
+ size 538337
model_metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5bf4b66d3a14b21c90a6c155f39f22294bbb17b67b8c856301e08ac8b86a825
3
+ size 149
predict_stock_prices.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ import datetime
6
+ import yfinance as yf
7
+ import joblib
8
+ from sklearn.preprocessing import MinMaxScaler
9
+ import json
10
+ from tqdm import tqdm
11
+ import os
12
+ from typing import List, Dict, Any, Union, Tuple
13
+
14
+ class BiLSTMModel(nn.Module):
15
+ def _init_(self, input_size=1, hidden_size=64, num_layers=2, output_size=1):
16
+ super(BiLSTMModel, self).__init__()
17
+ self.hidden_size = hidden_size
18
+ self.num_layers = num_layers
19
+
20
+ # BiLSTM layers
21
+ self.lstm = nn.LSTM(
22
+ input_size=input_size,
23
+ hidden_size=hidden_size,
24
+ num_layers=num_layers,
25
+ batch_first=True,
26
+ bidirectional=True
27
+ )
28
+
29
+ # Fully connected layer
30
+ self.fc = nn.Linear(hidden_size * 2, output_size) # *2 because bidirectional
31
+
32
+ def forward(self, x):
33
+ # Initialize hidden state and cell state
34
+ batch_size = x.size(0)
35
+ h0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(x.device) # *2 because bidirectional
36
+ c0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(x.device)
37
+
38
+ # Forward propagate LSTM
39
+ out, _ = self.lstm(x, (h0, c0))
40
+
41
+ # Get output from last time step
42
+ out = self.fc(out[:, -1, :])
43
+
44
+ return out
45
+
46
+ def predict_future(model, last_sequence, steps, scaler_diff, current_price):
47
+ """Predict future values using trained model and GBM."""
48
+ model.eval()
49
+
50
+ # Initialize arrays for differences and actual prices
51
+ future_prices = []
52
+ future_prices.append(current_price)
53
+
54
+ # Create a copy of the last sequence for prediction
55
+ current_sequence = last_sequence.clone()
56
+
57
+ # Parameters for Geometric Brownian Motion
58
+ # Using default parameters if historical data isn't available
59
+ daily_mu = 0.0002 # Default daily drift
60
+ daily_sigma = 0.02 # Default daily volatility
61
+
62
+ device = next(model.parameters()).device
63
+
64
+ for _ in range(steps):
65
+ with torch.no_grad():
66
+ # Get model prediction for next difference
67
+ current_sequence_tensor = current_sequence.unsqueeze(0).to(device)
68
+ pred_diff_scaled = model(current_sequence_tensor)
69
+
70
+ # Inverse transform to get actual difference
71
+ pred_diff = scaler_diff.inverse_transform(pred_diff_scaled.cpu().numpy())[0][0]
72
+
73
+ # Use GBM to add stochastic component to the predicted difference
74
+ dt = 1 # One day
75
+ drift = (daily_mu - 0.5 * daily_sigma**2) * dt
76
+ diffusion = daily_sigma * np.sqrt(dt) * np.random.normal(0, 1)
77
+
78
+ # Combine model prediction with GBM
79
+ stochastic_factor = np.exp(drift + diffusion)
80
+ adjustment = current_price * (stochastic_factor - 1)
81
+
82
+ # Blend model prediction with GBM
83
+ blend_weight = 0.7 # Higher weight to model prediction
84
+ blended_diff = (blend_weight * pred_diff) + ((1 - blend_weight) * adjustment)
85
+
86
+ # Calculate next price
87
+ next_price = current_price + blended_diff
88
+
89
+ # Ensure price doesn't go negative
90
+ next_price = max(0.01, next_price)
91
+
92
+ # Store results
93
+ future_prices.append(next_price)
94
+
95
+ # Update current price
96
+ current_price = next_price
97
+
98
+ # Update sequence for next prediction (with the scaled difference)
99
+ new_diff_scaled = torch.tensor([[pred_diff_scaled.item()]], dtype=torch.float32)
100
+ current_sequence = torch.cat([current_sequence[1:], new_diff_scaled], dim=0)
101
+
102
+ future_prices = np.array(future_prices[1:]).reshape(-1, 1) # Remove the initial price
103
+
104
+ return future_prices
105
+
106
+ def fetch_and_prepare_data(ticker_symbol: str, seq_length: int) -> Tuple[np.ndarray, float, pd.DatetimeIndex]:
107
+ """Fetch ticker data and prepare it for prediction."""
108
+ # Fetch data using yfinance
109
+ ticker = yf.Ticker(ticker_symbol)
110
+ df = ticker.history(period="max",interval='1d')
111
+
112
+ # Make sure the data has a Close column
113
+ if 'Close' not in df.columns:
114
+ raise ValueError(f"No 'Close' price data available for {ticker_symbol}")
115
+
116
+ # Extract closing prices
117
+ close_prices = df['Close'].values.astype(float).reshape(-1, 1)
118
+
119
+ # Create differenced data
120
+ diff_close_prices = np.diff(close_prices, axis=0)
121
+
122
+ # Get the last price (for starting predictions)
123
+ last_price = close_prices[-1][0]
124
+
125
+ # Get the dates
126
+ dates = df.index
127
+
128
+ # If we don't have enough data for the sequence length, pad with zeros
129
+ if len(diff_close_prices) < seq_length:
130
+ padding = np.zeros((seq_length - len(diff_close_prices), 1))
131
+ diff_close_prices = np.vstack([padding, diff_close_prices])
132
+
133
+ return diff_close_prices, last_price, dates, df
134
+
135
+ def predict_stock_prices(
136
+ ticker_symbols: List[str],
137
+ model_path: str,
138
+ scaler_path: str,
139
+ metadata_path: str
140
+ ) -> Dict[str, Any]:
141
+ """
142
+ Predict stock prices for multiple ticker symbols for -15 to +15 years.
143
+
144
+ Args:
145
+ ticker_symbols: List of ticker symbols to predict
146
+ model_path: Path to the trained BiLSTM model
147
+ scaler_path: Path to the saved scaler for differences
148
+ metadata_path: Path to the saved model metadata
149
+
150
+ Returns:
151
+ Dictionary with ticker symbols as keys and arrays of dates and prices as values
152
+ """
153
+ # Set random seeds for reproducibility
154
+ torch.manual_seed(42)
155
+ np.random.seed(42)
156
+
157
+ # Load the model, scaler, and metadata
158
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
159
+
160
+ # Load model metadata
161
+ model_metadata = joblib.load(metadata_path)
162
+ seq_length = model_metadata['seq_length']
163
+
164
+ # Initialize and load the model
165
+ model = BiLSTMModel().to(device)
166
+ model.load_state_dict(torch.load(model_path, map_location=device))
167
+ model.eval()
168
+
169
+ # Load the scaler
170
+ scaler_diff = joblib.load(scaler_path)
171
+
172
+ # Trading days per year (approximately)
173
+ trading_days_per_year = 252
174
+
175
+ # Prepare the result dictionary
176
+ result = {}
177
+
178
+ # Process each ticker symbol
179
+ for symbol in tqdm(ticker_symbols, desc="Processing tickers"):
180
+ try:
181
+ # Fetch and prepare data
182
+ diff_close_prices, last_price, historical_dates, df = fetch_and_prepare_data(symbol, seq_length)
183
+
184
+ # Scale the differenced data
185
+ diff_scaled = scaler_diff.transform(diff_close_prices[-seq_length:])
186
+
187
+ # Convert to tensor
188
+ last_diff_sequence = torch.tensor(diff_scaled, dtype=torch.float32)
189
+
190
+ # Calculate the number of days to predict (15 years)
191
+ future_days = trading_days_per_year * 15
192
+
193
+ # Predict future prices
194
+ future_prices = predict_future(model, last_diff_sequence, future_days, scaler_diff, last_price)
195
+
196
+ # Create future dates
197
+ last_date = historical_dates[-1]
198
+ future_dates = [last_date + datetime.timedelta(days=i+1) for i in range(future_days)]
199
+
200
+ # Format dates to strings for JSON serialization
201
+ future_dates_str = [date.strftime('%Y-%m-%d') for date in future_dates]
202
+
203
+ # Get historical dates for past 15 years or as many as available
204
+ past_days = min(len(historical_dates), trading_days_per_year * 15)
205
+ historical_subset = historical_dates[-past_days:]
206
+ historical_prices = df['Close'].values[-past_days:]
207
+
208
+ # Format historical dates to strings
209
+ historical_dates_str = [date.strftime('%Y-%m-%d') for date in historical_subset]
210
+
211
+ # Combine historical and future data
212
+ all_dates = historical_dates_str + future_dates_str
213
+ all_prices = np.concatenate([historical_prices, future_prices.flatten()])
214
+
215
+ # Store in result dictionary
216
+ result[symbol] = [
217
+ {"date": date, "value": float(value)} for date, value in zip(all_dates, all_prices)
218
+ ]
219
+
220
+
221
+ except Exception as e:
222
+ print(f"Error processing {symbol}: {str(e)}")
223
+ result[symbol] = {"error": str(e)}
224
+
225
+ return result
226
+
227
+ def batch_predict_to_json(
228
+ ticker_symbols: List[str],
229
+ model_path: str,
230
+ scaler_path: str,
231
+ metadata_path: str,
232
+ output_path: str = "stock_predictions.json"
233
+ ) -> str:
234
+ """
235
+ Batch predict stock prices and save to JSON file.
236
+
237
+ Args:
238
+ ticker_symbols: List of ticker symbols
239
+ model_path: Path to the trained model
240
+ scaler_path: Path to the saved scaler
241
+ metadata_path: Path to the saved metadata
242
+ output_path: Path to save the output JSON
243
+
244
+ Returns:
245
+ Path to the saved JSON file
246
+ """
247
+ # Get predictions
248
+ predictions = predict_stock_prices(ticker_symbols, model_path, scaler_path, metadata_path)
249
+
250
+ return predictions
251
+
252
+ # Example usage
253
+ def get_stock_predictions(tickers):
254
+ # Example ticker list
255
+ # tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "META"]
256
+
257
+ # Paths to saved model files
258
+ model_path = "bilstm_stock_model.pth"
259
+ scaler_path = "scaler_diff.pkl"
260
+ metadata_path = "model_metadata.pkl"
261
+
262
+ # Run batch prediction
263
+ print('ok')
264
+ output_file = batch_predict_to_json(tickers, model_path, scaler_path, metadata_path)
265
+ return output_file
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ pandas
3
+ numpy
4
+ torch
5
+ yfinance
6
+ joblib
7
+ scikit-learn
8
+ tqdm
9
+ gradio
scaler_diff.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:428e2c3222ff72b35ee62b049f68e8b0774041481452c2f9a0929474543b6995
3
+ size 719