Spaces:

Div0013
/

hackfest_biLSTM

Sleeping

App Files Files Community

Div0013 commited on Apr 12, 2025

Commit

a9bcd08

1 Parent(s): 9f578b5

Add complete project files for stock predictor

Browse files

Files changed (6) hide show

app.py +0 -92
bilstm_stock_model.pth +3 -0
model_metadata.pkl +3 -0
predict_stock_prices.py +265 -0
requirements.txt +9 -0
scaler_diff.pkl +3 -0

app.py DELETED Viewed

@@ -1,92 +0,0 @@
-# app.py
-import gradio as gr
-import json
-import traceback
-# Import necessary functions and the model class from your original script
-# Make sure predict_stock_prices.py is in the same directory
-from predict_stock_prices import (
-    BiLSTMModel, # Need to import the class for joblib/torch to load model correctly
-    predict_stock_prices,
-    batch_predict_to_json # Assuming this function takes the list and paths
-)
-# --- Model Configuration ---
-# These paths should correspond to the files uploaded to your Hugging Face Space
-MODEL_PATH = "bilstm_stock_model.pth"
-SCALER_PATH = "scaler_diff.pkl"
-METADATA_PATH = "model_metadata.pkl"
-# --- Gradio Interface Function ---
-def run_prediction(ticker_string):
-    """
-    Takes a comma-separated string of tickers, runs prediction,
-    and returns the result as a JSON object or error string.
-    """
-    if not ticker_string:
-        return {"error": "Please enter at least one ticker symbol."}
-    # Split string into a list of tickers, removing whitespace
-    tickers = [ticker.strip().upper() for ticker in ticker_string.split(',') if ticker.strip()]
-    if not tickers:
-        return {"error": "No valid ticker symbols entered."}
-    print(f"Received request for tickers: {tickers}") # Log received tickers
-    try:
-        # Call your existing batch prediction function
-        # It already returns a dictionary suitable for JSON output
-        predictions = batch_predict_to_json(
-            ticker_symbols=tickers,
-            model_path=MODEL_PATH,
-            scaler_path=SCALER_PATH,
-            metadata_path=METADATA_PATH
-        )
-        print(f"Prediction successful for: {list(predictions.keys())}") # Log success
-        # Check for errors within the prediction results
-        errors = {k:v for k,v in predictions.items() if isinstance(v, dict) and 'error' in v}
-        if errors:
-             print(f"Errors occurred during prediction: {errors}") # Log errors
-        return predictions # Return the entire dictionary
-    except FileNotFoundError as e:
-         print(f"Error: Model file not found - {e}")
-         return {"error": f"Required file not found: {e}. Ensure model, scaler, and metadata files are uploaded correctly."}
-    except Exception as e:
-        print(f"An unexpected error occurred: {e}")
-        traceback.print_exc() # Print detailed traceback to logs
-        return {"error": f"An unexpected error occurred: {str(e)}"}
-# --- Build Gradio Interface ---
-# Use Markdown for a richer description
-description = """
-## BiLSTM Stock Price Predictor (-15y / +15y)
-Enter one or more stock ticker symbols (e.g., `AAPL`, `MSFT`, `GOOGL`), separated by commas.
-The model will fetch historical data, predict future prices for the next 15 years using a BiLSTM model combined with Geometric Brownian Motion (GBM),
-and return the historical data for the past 15 years (or less if unavailable) combined with the predictions.
-**Note:**
-*   Predictions are based on historical 'Close' prices and involve inherent uncertainty. **This is not financial advice.**
-*   Fetching data and running predictions might take a moment, especially for multiple tickers.
-*   Ensure ticker symbols are valid on Yahoo Finance.
-"""
-iface = gr.Interface(
-    fn=run_prediction,
-    inputs=gr.Textbox(
-        lines=1,
-        placeholder="Enter Ticker Symbols (e.g., AAPL, MSFT, GOOGL)",
-        label="Ticker Symbols (comma-separated)"
-    ),
-    outputs=gr.JSON(label="Prediction Results (Historical + Future Prices)"),
-    title="Stock Price Prediction",
-    description=description,
-    examples=[["AAPL"], ["MSFT,GOOGL,NVDA"]],
-    allow_flagging='never' # Optional: Disable flagging
-)
-# --- Launch the App ---
-if __name__ == "__main__":
-    iface.launch() # Share=True is not needed when deploying on Spaces

bilstm_stock_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f05bab113734f62c3b0cfbeb7ff04c0327c3005fff294baae440280c2babf46
+size 538337

model_metadata.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5bf4b66d3a14b21c90a6c155f39f22294bbb17b67b8c856301e08ac8b86a825
+size 149

predict_stock_prices.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import pandas as pd
+import numpy as np
+import torch
+import torch.nn as nn
+import datetime
+import yfinance as yf
+import joblib
+from sklearn.preprocessing import MinMaxScaler
+import json
+from tqdm import tqdm
+import os
+from typing import List, Dict, Any, Union, Tuple
+class BiLSTMModel(nn.Module):
+    def _init_(self, input_size=1, hidden_size=64, num_layers=2, output_size=1):
+        super(BiLSTMModel, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        # BiLSTM layers
+        self.lstm = nn.LSTM(
+            input_size=input_size,
+            hidden_size=hidden_size,
+            num_layers=num_layers,
+            batch_first=True,
+            bidirectional=True
+        )
+        # Fully connected layer
+        self.fc = nn.Linear(hidden_size * 2, output_size)  # *2 because bidirectional
+    def forward(self, x):
+        # Initialize hidden state and cell state
+        batch_size = x.size(0)
+        h0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(x.device)  # *2 because bidirectional
+        c0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(x.device)
+        # Forward propagate LSTM
+        out, _ = self.lstm(x, (h0, c0))
+        # Get output from last time step
+        out = self.fc(out[:, -1, :])
+        return out
+def predict_future(model, last_sequence, steps, scaler_diff, current_price):
+    """Predict future values using trained model and GBM."""
+    model.eval()
+    # Initialize arrays for differences and actual prices
+    future_prices = []
+    future_prices.append(current_price)
+    # Create a copy of the last sequence for prediction
+    current_sequence = last_sequence.clone()
+    # Parameters for Geometric Brownian Motion
+    # Using default parameters if historical data isn't available
+    daily_mu = 0.0002  # Default daily drift
+    daily_sigma = 0.02  # Default daily volatility
+    device = next(model.parameters()).device
+    for _ in range(steps):
+        with torch.no_grad():
+            # Get model prediction for next difference
+            current_sequence_tensor = current_sequence.unsqueeze(0).to(device)
+            pred_diff_scaled = model(current_sequence_tensor)
+            # Inverse transform to get actual difference
+            pred_diff = scaler_diff.inverse_transform(pred_diff_scaled.cpu().numpy())[0][0]
+            # Use GBM to add stochastic component to the predicted difference
+            dt = 1  # One day
+            drift = (daily_mu - 0.5 * daily_sigma**2) * dt
+            diffusion = daily_sigma * np.sqrt(dt) * np.random.normal(0, 1)
+            # Combine model prediction with GBM
+            stochastic_factor = np.exp(drift + diffusion)
+            adjustment = current_price * (stochastic_factor - 1)
+            # Blend model prediction with GBM
+            blend_weight = 0.7  # Higher weight to model prediction
+            blended_diff = (blend_weight * pred_diff) + ((1 - blend_weight) * adjustment)
+            # Calculate next price
+            next_price = current_price + blended_diff
+            # Ensure price doesn't go negative
+            next_price = max(0.01, next_price)
+            # Store results
+            future_prices.append(next_price)
+            # Update current price
+            current_price = next_price
+            # Update sequence for next prediction (with the scaled difference)
+            new_diff_scaled = torch.tensor([[pred_diff_scaled.item()]], dtype=torch.float32)
+            current_sequence = torch.cat([current_sequence[1:], new_diff_scaled], dim=0)
+    future_prices = np.array(future_prices[1:]).reshape(-1, 1)  # Remove the initial price
+    return future_prices
+def fetch_and_prepare_data(ticker_symbol: str, seq_length: int) -> Tuple[np.ndarray, float, pd.DatetimeIndex]:
+    """Fetch ticker data and prepare it for prediction."""
+    # Fetch data using yfinance
+    ticker = yf.Ticker(ticker_symbol)
+    df = ticker.history(period="max",interval='1d')
+    # Make sure the data has a Close column
+    if 'Close' not in df.columns:
+        raise ValueError(f"No 'Close' price data available for {ticker_symbol}")
+    # Extract closing prices
+    close_prices = df['Close'].values.astype(float).reshape(-1, 1)
+    # Create differenced data
+    diff_close_prices = np.diff(close_prices, axis=0)
+    # Get the last price (for starting predictions)
+    last_price = close_prices[-1][0]
+    # Get the dates
+    dates = df.index
+    # If we don't have enough data for the sequence length, pad with zeros
+    if len(diff_close_prices) < seq_length:
+        padding = np.zeros((seq_length - len(diff_close_prices), 1))
+        diff_close_prices = np.vstack([padding, diff_close_prices])
+    return diff_close_prices, last_price, dates, df
+def predict_stock_prices(
+    ticker_symbols: List[str],
+    model_path: str,
+    scaler_path: str,
+    metadata_path: str
+) -> Dict[str, Any]:
+    """
+    Predict stock prices for multiple ticker symbols for -15 to +15 years.
+    Args:
+        ticker_symbols: List of ticker symbols to predict
+        model_path: Path to the trained BiLSTM model
+        scaler_path: Path to the saved scaler for differences
+        metadata_path: Path to the saved model metadata
+    Returns:
+        Dictionary with ticker symbols as keys and arrays of dates and prices as values
+    """
+    # Set random seeds for reproducibility
+    torch.manual_seed(42)
+    np.random.seed(42)
+    # Load the model, scaler, and metadata
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    # Load model metadata
+    model_metadata = joblib.load(metadata_path)
+    seq_length = model_metadata['seq_length']
+    # Initialize and load the model
+    model = BiLSTMModel().to(device)
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.eval()
+    # Load the scaler
+    scaler_diff = joblib.load(scaler_path)
+    # Trading days per year (approximately)
+    trading_days_per_year = 252
+    # Prepare the result dictionary
+    result = {}
+    # Process each ticker symbol
+    for symbol in tqdm(ticker_symbols, desc="Processing tickers"):
+        try:
+            # Fetch and prepare data
+            diff_close_prices, last_price, historical_dates, df = fetch_and_prepare_data(symbol, seq_length)
+            # Scale the differenced data
+            diff_scaled = scaler_diff.transform(diff_close_prices[-seq_length:])
+            # Convert to tensor
+            last_diff_sequence = torch.tensor(diff_scaled, dtype=torch.float32)
+            # Calculate the number of days to predict (15 years)
+            future_days = trading_days_per_year * 15
+            # Predict future prices
+            future_prices = predict_future(model, last_diff_sequence, future_days, scaler_diff, last_price)
+            # Create future dates
+            last_date = historical_dates[-1]
+            future_dates = [last_date + datetime.timedelta(days=i+1) for i in range(future_days)]
+            # Format dates to strings for JSON serialization
+            future_dates_str = [date.strftime('%Y-%m-%d') for date in future_dates]
+            # Get historical dates for past 15 years or as many as available
+            past_days = min(len(historical_dates), trading_days_per_year * 15)
+            historical_subset = historical_dates[-past_days:]
+            historical_prices = df['Close'].values[-past_days:]
+            # Format historical dates to strings
+            historical_dates_str = [date.strftime('%Y-%m-%d') for date in historical_subset]
+            # Combine historical and future data
+            all_dates = historical_dates_str + future_dates_str
+            all_prices = np.concatenate([historical_prices, future_prices.flatten()])
+            # Store in result dictionary
+            result[symbol] = [
+                {"date": date, "value": float(value)} for date, value in zip(all_dates, all_prices)
+            ]
+        except Exception as e:
+            print(f"Error processing {symbol}: {str(e)}")
+            result[symbol] = {"error": str(e)}
+    return result
+def batch_predict_to_json(
+    ticker_symbols: List[str],
+    model_path: str,
+    scaler_path: str,
+    metadata_path: str,
+    output_path: str = "stock_predictions.json"
+) -> str:
+    """
+    Batch predict stock prices and save to JSON file.
+    Args:
+        ticker_symbols: List of ticker symbols
+        model_path: Path to the trained model
+        scaler_path: Path to the saved scaler
+        metadata_path: Path to the saved metadata
+        output_path: Path to save the output JSON
+    Returns:
+        Path to the saved JSON file
+    """
+    # Get predictions
+    predictions = predict_stock_prices(ticker_symbols, model_path, scaler_path, metadata_path)
+    return predictions
+# Example usage
+def get_stock_predictions(tickers):
+    # Example ticker list
+    # tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "META"]
+    # Paths to saved model files
+    model_path = "bilstm_stock_model.pth"
+    scaler_path = "scaler_diff.pkl"
+    metadata_path = "model_metadata.pkl"
+    # Run batch prediction
+    print('ok')
+    output_file = batch_predict_to_json(tickers, model_path, scaler_path, metadata_path)
+    return output_file

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+# requirements.txt
+pandas
+numpy
+torch
+yfinance
+joblib
+scikit-learn
+tqdm
+gradio

scaler_diff.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:428e2c3222ff72b35ee62b049f68e8b0774041481452c2f9a0929474543b6995
+size 719