Spaces:

pyroleli
/

predictor

Running

App Files Files Community

pyroleli commited on Dec 31, 2025

Commit

358704d

verified ·

1 Parent(s): 4b90bc1

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +148 -197

src/streamlit_app.py CHANGED Viewed

@@ -2,237 +2,188 @@ import streamlit as st
 import yfinance as yf
 import pandas as pd
 import numpy as np
-import torch
-import torch.nn as nn
 from sklearn.preprocessing import MinMaxScaler
 import plotly.graph_objects as go
-from datetime import datetime, timedelta
 # --- CONFIGURATION ---
-st.set_page_config(layout="wide", page_title="PyTorch AI Stock Predictor")
-# --- DEVICE CONFIG ---
-# Use GPU if available (on Hugging Face, this usually defaults to CPU unless paid)
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# --- PYTORCH LSTM MODEL ---
-class LSTMModel(nn.Module):
-    def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
-        super().__init__()
-        self.hidden_layer_size = hidden_layer_size
-        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
-        self.linear = nn.Linear(hidden_layer_size, output_size)
-    def forward(self, input_seq):
-        lstm_out, _ = self.lstm(input_seq)
-        # We only care about the last time step output
-        predictions = self.linear(lstm_out[:, -1, :])
-        return predictions
 # --- UI HEADER ---
-st.title("🧠 PyTorch Recurrent Neural Network Predictor")
 st.markdown("""
-**Powered by PyTorch.** This app uses a Recurrent Neural Network (LSTM) to learn sequential patterns.
-It supports **Intraday (Live)** data and simulates performance before predicting.
 """)
 # --- SIDEBAR DASHBOARD ---
 st.sidebar.header("Configuration")
-ticker = st.sidebar.text_input("Enter Ticker", value="^IXIC")
-# Interval Selection (Live/Intraday options added)
-interval_option = st.sidebar.selectbox(
-    "Time Interval",
-    ("1 Minute (Live)", "1 Hour", "1 Day")
-)
-# Horizon Selection
 horizon_option = st.sidebar.selectbox(
-    "Prediction Horizon",
-    ("Next 30 Steps", "Next 60 Steps", "Next 90 Steps")
 )
-future_steps = int(horizon_option.split(" ")[1])
-# --- DATA LOADING ---
-@st.cache_data(ttl=60) # Cache clears every 60 seconds for "Live" feel
-def load_data(symbol, interval):
-    """
-    Dynamic data loader.
-    - 1m: Max 7 days history (Yahoo limit)
-    - 1h: Max 730 days history
-    - 1d: Max 5 years
-    """
-    if interval == "1 Minute (Live)":
-        data = yf.download(symbol, period="7d", interval="1m")
-    elif interval == "1 Hour":
-        data = yf.download(symbol, period="730d", interval="1h")
-    else: # 1 Day
-        data = yf.download(symbol, period="5y", interval="1d")
-    if data.empty:
-        return None
     data.reset_index(inplace=True)
-    # Standardize column name for Date/Time
-    if 'Datetime' in data.columns:
-        data.rename(columns={'Datetime': 'Date'}, inplace=True)
     return data
-# --- HELPER FUNCTIONS ---
-def create_sequences(data, seq_length):
-    xs, ys = [], []
-    for i in range(len(data) - seq_length):
-        x = data[i:(i + seq_length)]
-        y = data[i + seq_length]
-        xs.append(x)
-        ys.append(y)
-    return np.array(xs), np.array(ys)
-def train_pytorch_model(train_data, seq_length=60, epochs=15):
-    """
-    Trains the PyTorch LSTM model.
-    Using more epochs = better accuracy but slower speed.
-    """
-    # Prepare Data
-    X_train, y_train = create_sequences(train_data, seq_length)
-    # Convert to PyTorch Tensors
-    X_train = torch.from_numpy(X_train).float().to(device)
-    y_train = torch.from_numpy(y_train).float().to(device)
-    # Initialize Model
-    model = LSTMModel().to(device)
-    loss_function = nn.MSELoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
-    # Training Loop
-    model.train()
-    progress_bar = st.progress(0)
-    for i in range(epochs):
-        optimizer.zero_grad()
-        y_pred = model(X_train)
-        single_loss = loss_function(y_pred, y_train)
-        single_loss.backward()
-        optimizer.step()
-        progress_bar.progress((i + 1) / epochs)
-    progress_bar.empty()
     return model
-# --- MAIN LOGIC ---
-# 1. Load Data
-st.write(f"Fetching data for **{ticker}** ({interval_option})...")
-data = load_data(ticker, interval_option)
-if data is None or len(data) < 100:
-    st.error("Not enough data found. For '1 Minute', markets must be open or data must exist within last 7 days.")
     st.stop()
-# 2. Preprocessing
-df_close = data[['Close']].values.astype(float)
-scaler = MinMaxScaler(feature_range=(-1, 1)) # LSTM often prefers -1 to 1 or 0 to 1
-data_scaled = scaler.fit_transform(df_close)
-# 3. Define Simulation Window
-# If 1 min data, we can't simulate "Last Year". We simulate "Last 24 Hours" (approx 390 trading minutes)
-if interval_option == "1 Minute (Live)":
-    test_size = 390 # Last trading day
-    sim_title = "Last 24 Trading Hours"
-elif interval_option == "1 Hour":
-    test_size = 24 * 30 # Approx 1 month
-    sim_title = "Last Month (Hourly)"
-else:
-    test_size = 365 # Last Year
-    sim_title = "Last Year (Daily)"
-train_size = len(data_scaled) - test_size
-train_set = data_scaled[:train_size]
-test_set = data_scaled[train_size:]
-# 4. Train & Simulate
-st.subheader(f"1. Simulation: Testing Accuracy on {sim_title}")
-st.caption("Training PyTorch Model... (This uses recurrent backpropagation)")
-seq_length = 60 # Look back 60 steps
-model = train_pytorch_model(train_set, seq_length=seq_length, epochs=20) # Increased epochs for better accuracy
-# Evaluation
-model.eval()
-inputs = data_scaled[len(data_scaled) - len(test_set) - seq_length:]
-X_test, y_test = create_sequences(inputs, seq_length)
-X_test = torch.from_numpy(X_test).float().to(device)
-with torch.no_grad():
-    predictions = model(X_test).cpu().numpy()
-    predictions = scaler.inverse_transform(predictions)
-# Metrics
-actuals = scaler.inverse_transform(test_set)
-mse = np.mean((predictions - actuals) ** 2)
-rmse = np.sqrt(mse)
-# Directional Accuracy
-diff_actual = np.diff(actuals.flatten())
-diff_pred = np.diff(predictions.flatten())
-correct_direction = np.sum(np.sign(diff_actual) == np.sign(diff_pred))
-acc_score = (correct_direction / len(diff_actual)) * 100
 col1, col2 = st.columns(2)
-col1.metric("Simulation Error (RMSE)", f"{rmse:.2f}")
-col2.metric("Directional Accuracy", f"{acc_score:.2f}%")
-# Graph Simulation
 fig_sim = go.Figure()
-# Plot only the relevant simulation period to keep graph clean
-sim_dates = data['Date'][train_size:]
-fig_sim.add_trace(go.Scatter(x=sim_dates, y=actuals.flatten(), mode='lines', name='Actual Price'))
-fig_sim.add_trace(go.Scatter(x=sim_dates, y=predictions.flatten(), mode='lines', name='AI Prediction', line=dict(dash='dot', color='orange')))
 st.plotly_chart(fig_sim, use_container_width=True)
 # --- FUTURE PREDICTION ---
 st.markdown("---")
 st.subheader(f"2. Future Forecast: {horizon_option}")
-# Retrain on FULL dataset
-with st.spinner('Retraining on full dataset for future generation...'):
-    full_model = train_pytorch_model(data_scaled, seq_length=seq_length, epochs=25)
-# Generate Future Steps
-future_preds = []
-current_seq = torch.from_numpy(data_scaled[-seq_length:]).float().to(device).unsqueeze(0) # Shape: [1, 60, 1]
-full_model.eval()
-for _ in range(future_steps):
-    with torch.no_grad():
-        pred = full_model(current_seq)
-        future_preds.append(pred.item())
-        # Update sequence: remove first item, add new prediction
-        # Ensure pred is shaped [1, 1, 1] to match dims
-        pred_reshaped = pred.unsqueeze(1)
-        current_seq = torch.cat((current_seq[:, 1:, :], pred_reshaped), dim=1)
-# Inverse Scale
-future_preds = np.array(future_preds).reshape(-1, 1)
-future_preds = scaler.inverse_transform(future_preds)
-# Create Future Dates/Times
-last_time = data['Date'].iloc[-1]
-if interval_option == "1 Minute (Live)":
-    time_delta = timedelta(minutes=1)
-elif interval_option == "1 Hour":
-    time_delta = timedelta(hours=1)
-else:
-    time_delta = timedelta(days=1)
-future_dates = [last_time + i * time_delta for i in range(1, future_steps + 1)]
-# Graph Future
 fig_future = go.Figure()
-# Show tail of historical data for context
-context_points = 100
-fig_future.add_trace(go.Scatter(x=data['Date'][-context_points:], y=data['Close'][-context_points:].values.flatten(), mode='lines', name='History'))
-fig_future.add_trace(go.Scatter(x=future_dates, y=future_preds.flatten(), mode='lines', name='Future Forecast', line=dict(dash='dot', color='green', width=3)))
-fig_future.update_layout(title=f"Forecast for next {future_steps} intervals")
-st.plotly_chart(fig_future, use_container_width=True)

 import yfinance as yf
 import pandas as pd
 import numpy as np
+import tensorflow as tf
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense
 from sklearn.preprocessing import MinMaxScaler
+from sklearn.metrics import mean_squared_error
 import plotly.graph_objects as go
+from datetime import date, timedelta
 # --- CONFIGURATION ---
+st.set_page_config(layout="wide", page_title="AI Stock Predictor")
 # --- UI HEADER ---
+st.title("📈 Neural Network Stock Predictor")
 st.markdown("""
+This app uses a **Long Short-Term Memory (LSTM)** neural network to predict stock prices.
+It first **simulates** the model against the last year's data to verify accuracy, then predicts the future.
 """)
 # --- SIDEBAR DASHBOARD ---
 st.sidebar.header("Configuration")
+ticker = st.sidebar.text_input("Enter Ticker Symbol", value="^IXIC") # Default to NASDAQ
+st.sidebar.caption("Examples: ^IXIC (Nasdaq), AAPL, TSLA, BTC-USD")
 horizon_option = st.sidebar.selectbox(
+    "Prediction Horizon",
+    ("Next Day", "Next Week", "Next Month", "Next Year")
 )
+# Map horizon to days
+horizon_mapping = {
+    "Next Day": 1,
+    "Next Week": 7,
+    "Next Month": 30,
+    "Next Year": 365
+}
+forecast_days = horizon_mapping[horizon_option]
+# --- FUNCTIONS ---
+@st.cache_data
+def load_data(symbol):
+    """Fetches data from yfinance. We fetch 5 years to ensure enough training data."""
+    start_date = date.today() - timedelta(days=5*365)
+    data = yf.download(symbol, start=start_date, end=date.today())
     data.reset_index(inplace=True)
     return data
+def create_dataset(dataset, look_back=60):
+    """Converts array of values into a dataset matrix for LSTM."""
+    dataX, dataY = [], []
+    for i in range(len(dataset) - look_back - 1):
+        a = dataset[i:(i + look_back), 0]
+        dataX.append(a)
+        dataY.append(dataset[i + look_back, 0])
+    return np.array(dataX), np.array(dataY)
+def train_lstm_model(train_data, look_back=60):
+    """Builds and trains the LSTM Neural Network."""
+    # Reshape input to be [samples, time steps, features]
+    X_train, y_train = create_dataset(train_data, look_back)
+    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
+    # Build LSTM Architecture
+    model = Sequential()
+    model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
+    model.add(LSTM(50, return_sequences=False))
+    model.add(Dense(25))
+    model.add(Dense(1)) # Output layer
+    model.compile(optimizer='adam', loss='mean_squared_error')
+    # Train (Epochs=1 is used here for speed in demo, increase to 20-50 for real accuracy)
+    model.fit(X_train, y_train, batch_size=1, epochs=1, verbose=0)
     return model
+# --- MAIN EXECUTION ---
+data_load_state = st.text('Loading data...')
+try:
+    data = load_data(ticker)
+    data_load_state.text('Loading data... done!')
+except Exception as e:
+    st.error(f"Error loading data: {e}")
     st.stop()
+if len(data) < 500:
+    st.error("Not enough data to train the model. Please choose a stock with deeper history.")
+    st.stop()
+# Prepare Data
+df_close = data[['Close']].values
+scaler = MinMaxScaler(feature_range=(0, 1))
+scaled_data = scaler.fit_transform(df_close)
+# --- SIMULATION (BACKTESTING) ---
+st.subheader("1. Simulation: Testing against Last Year")
+st.write("Training model on past data to verify performance on the last 365 days...")
+# Split data: Train on everything BEFORE the last 365 days, Test on LAST 365 days
+training_len = len(scaled_data) - 365
+train_data = scaled_data[0:training_len, :]
+test_data = scaled_data[training_len - 60:, :] # -60 to handle look_back
+# Train Model
+with st.spinner('Training Neural Network... (This may take a moment)'):
+    model = train_lstm_model(train_data)
+# Predict on the "Last Year" (Simulation)
+x_test = []
+look_back = 60
+for i in range(60, len(test_data)):
+    x_test.append(test_data[i-60:i, 0])
+x_test = np.array(x_test)
+x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
+predictions = model.predict(x_test)
+predictions = scaler.inverse_transform(predictions) # Scale back to normal price
+# Calculate Accuracy (RMSE)
+valid_set = data[training_len:]
+valid_set['Predictions'] = predictions
+rmse = np.sqrt(np.mean(((predictions - valid_set['Close'].values) ** 2)))
+# Calculate Directional Accuracy (Did it go up/down correctly?)
+valid_set['Actual_Change'] = valid_set['Close'].diff()
+valid_set['Pred_Change'] = valid_set['Predictions'].diff()
+valid_set['Correct_Direction'] = np.sign(valid_set['Actual_Change']) == np.sign(valid_set['Pred_Change'])
+accuracy_score = valid_set['Correct_Direction'].mean() * 100
 col1, col2 = st.columns(2)
+col1.metric("Simulation RMSE (Price Error)", f"{rmse:.2f}")
+col2.metric("Directional Accuracy", f"{accuracy_score:.2f}%")
+if accuracy_score > 50:
+    st.success(f"Model passed simulation with {accuracy_score:.1f}% directional accuracy.")
+else:
+    st.warning(f"Model accuracy is low ({accuracy_score:.1f}%). Stock markets are volatile!")
+# Plot Simulation
 fig_sim = go.Figure()
+fig_sim.add_trace(go.Scatter(x=data['Date'][:training_len], y=data['Close'][:training_len].values.flatten(), mode='lines', name='Training Data'))
+fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Close'].values.flatten(), mode='lines', name='Actual Price (Last Year)'))
+fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Predictions'].values.flatten(), mode='lines', name='AI Prediction (Simulation)', line=dict(dash='dot', color='orange')))
 st.plotly_chart(fig_sim, use_container_width=True)
 # --- FUTURE PREDICTION ---
 st.markdown("---")
 st.subheader(f"2. Future Forecast: {horizon_option}")
+# Retrain model on ALL data for best future prediction
+with st.spinner('Refining model with full data for future prediction...'):
+    full_model = train_lstm_model(scaled_data)
+# Predict Future Steps
+# We start with the last 60 days of known data
+last_60_days = scaled_data[-60:]
+current_batch = last_60_days.reshape((1, 60, 1))
+future_predictions = []
+for i in range(forecast_days):
+    # Get prediction (scaled)
+    current_pred = full_model.predict(current_batch)[0]
+    future_predictions.append(current_pred)
+    # Update batch to include new prediction, remove oldest day
+    current_pred_reshaped = current_pred.reshape((1, 1, 1))
+    current_batch = np.append(current_batch[:, 1:, :], current_pred_reshaped, axis=1)
+# Inverse transform to get real prices
+future_predictions = scaler.inverse_transform(future_predictions)
+# Create Future Dates
+last_date = data['Date'].iloc[-1]
+future_dates = [last_date + timedelta(days=x) for x in range(1, forecast_days + 1)]
+# Plot Future
 fig_future = go.Figure()
+# Show last 365 days of context
+fig_future.add_trace(go.Scatter(x=data['Date'][-365:], y=data['Close'][-365:].values.flatten(), mode='lines', name='Historical Close (Last Year)'))
+fig_future.add_trace(go.Scatter(x=future_dates, y=future_predictions.flatten(), mode='lines', name='AI Future Prediction', line=dict(dash='dot', color='green', width=3)))
+fig_future.update_layout(title=f"Prediction for next {forecast_days} days")
+st.plotly_chart(fig_future, use_container_width=True)
+st.write("Note: Long-term predictions (Year) usually revert to a trend line as error accumulates. Short-term (Day/Week) is generally more reliable.")