predictor / src /streamlit_app.py
pyroleli's picture
Update src/streamlit_app.py
358704d verified
import streamlit as st
import yfinance as yf
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go
from datetime import date, timedelta
# --- CONFIGURATION ---
st.set_page_config(layout="wide", page_title="AI Stock Predictor")
# --- UI HEADER ---
st.title("📈 Neural Network Stock Predictor")
st.markdown("""
This app uses a **Long Short-Term Memory (LSTM)** neural network to predict stock prices.
It first **simulates** the model against the last year's data to verify accuracy, then predicts the future.
""")
# --- SIDEBAR DASHBOARD ---
st.sidebar.header("Configuration")
ticker = st.sidebar.text_input("Enter Ticker Symbol", value="^IXIC") # Default to NASDAQ
st.sidebar.caption("Examples: ^IXIC (Nasdaq), AAPL, TSLA, BTC-USD")
horizon_option = st.sidebar.selectbox(
"Prediction Horizon",
("Next Day", "Next Week", "Next Month", "Next Year")
)
# Map horizon to days
horizon_mapping = {
"Next Day": 1,
"Next Week": 7,
"Next Month": 30,
"Next Year": 365
}
forecast_days = horizon_mapping[horizon_option]
# --- FUNCTIONS ---
@st.cache_data
def load_data(symbol):
"""Fetches data from yfinance. We fetch 5 years to ensure enough training data."""
start_date = date.today() - timedelta(days=5*365)
data = yf.download(symbol, start=start_date, end=date.today())
data.reset_index(inplace=True)
return data
def create_dataset(dataset, look_back=60):
"""Converts array of values into a dataset matrix for LSTM."""
dataX, dataY = [], []
for i in range(len(dataset) - look_back - 1):
a = dataset[i:(i + look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
def train_lstm_model(train_data, look_back=60):
"""Builds and trains the LSTM Neural Network."""
# Reshape input to be [samples, time steps, features]
X_train, y_train = create_dataset(train_data, look_back)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# Build LSTM Architecture
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1)) # Output layer
model.compile(optimizer='adam', loss='mean_squared_error')
# Train (Epochs=1 is used here for speed in demo, increase to 20-50 for real accuracy)
model.fit(X_train, y_train, batch_size=1, epochs=1, verbose=0)
return model
# --- MAIN EXECUTION ---
data_load_state = st.text('Loading data...')
try:
data = load_data(ticker)
data_load_state.text('Loading data... done!')
except Exception as e:
st.error(f"Error loading data: {e}")
st.stop()
if len(data) < 500:
st.error("Not enough data to train the model. Please choose a stock with deeper history.")
st.stop()
# Prepare Data
df_close = data[['Close']].values
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df_close)
# --- SIMULATION (BACKTESTING) ---
st.subheader("1. Simulation: Testing against Last Year")
st.write("Training model on past data to verify performance on the last 365 days...")
# Split data: Train on everything BEFORE the last 365 days, Test on LAST 365 days
training_len = len(scaled_data) - 365
train_data = scaled_data[0:training_len, :]
test_data = scaled_data[training_len - 60:, :] # -60 to handle look_back
# Train Model
with st.spinner('Training Neural Network... (This may take a moment)'):
model = train_lstm_model(train_data)
# Predict on the "Last Year" (Simulation)
x_test = []
look_back = 60
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions) # Scale back to normal price
# Calculate Accuracy (RMSE)
valid_set = data[training_len:]
valid_set['Predictions'] = predictions
rmse = np.sqrt(np.mean(((predictions - valid_set['Close'].values) ** 2)))
# Calculate Directional Accuracy (Did it go up/down correctly?)
valid_set['Actual_Change'] = valid_set['Close'].diff()
valid_set['Pred_Change'] = valid_set['Predictions'].diff()
valid_set['Correct_Direction'] = np.sign(valid_set['Actual_Change']) == np.sign(valid_set['Pred_Change'])
accuracy_score = valid_set['Correct_Direction'].mean() * 100
col1, col2 = st.columns(2)
col1.metric("Simulation RMSE (Price Error)", f"{rmse:.2f}")
col2.metric("Directional Accuracy", f"{accuracy_score:.2f}%")
if accuracy_score > 50:
st.success(f"Model passed simulation with {accuracy_score:.1f}% directional accuracy.")
else:
st.warning(f"Model accuracy is low ({accuracy_score:.1f}%). Stock markets are volatile!")
# Plot Simulation
fig_sim = go.Figure()
fig_sim.add_trace(go.Scatter(x=data['Date'][:training_len], y=data['Close'][:training_len].values.flatten(), mode='lines', name='Training Data'))
fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Close'].values.flatten(), mode='lines', name='Actual Price (Last Year)'))
fig_sim.add_trace(go.Scatter(x=valid_set['Date'], y=valid_set['Predictions'].values.flatten(), mode='lines', name='AI Prediction (Simulation)', line=dict(dash='dot', color='orange')))
st.plotly_chart(fig_sim, use_container_width=True)
# --- FUTURE PREDICTION ---
st.markdown("---")
st.subheader(f"2. Future Forecast: {horizon_option}")
# Retrain model on ALL data for best future prediction
with st.spinner('Refining model with full data for future prediction...'):
full_model = train_lstm_model(scaled_data)
# Predict Future Steps
# We start with the last 60 days of known data
last_60_days = scaled_data[-60:]
current_batch = last_60_days.reshape((1, 60, 1))
future_predictions = []
for i in range(forecast_days):
# Get prediction (scaled)
current_pred = full_model.predict(current_batch)[0]
future_predictions.append(current_pred)
# Update batch to include new prediction, remove oldest day
current_pred_reshaped = current_pred.reshape((1, 1, 1))
current_batch = np.append(current_batch[:, 1:, :], current_pred_reshaped, axis=1)
# Inverse transform to get real prices
future_predictions = scaler.inverse_transform(future_predictions)
# Create Future Dates
last_date = data['Date'].iloc[-1]
future_dates = [last_date + timedelta(days=x) for x in range(1, forecast_days + 1)]
# Plot Future
fig_future = go.Figure()
# Show last 365 days of context
fig_future.add_trace(go.Scatter(x=data['Date'][-365:], y=data['Close'][-365:].values.flatten(), mode='lines', name='Historical Close (Last Year)'))
fig_future.add_trace(go.Scatter(x=future_dates, y=future_predictions.flatten(), mode='lines', name='AI Future Prediction', line=dict(dash='dot', color='green', width=3)))
fig_future.update_layout(title=f"Prediction for next {forecast_days} days")
st.plotly_chart(fig_future, use_container_width=True)
st.write("Note: Long-term predictions (Year) usually revert to a trend line as error accumulates. Short-term (Day/Week) is generally more reliable.")