import streamlit as st import pandas as pd import numpy as np import yfinance as yf import pickle import plotly.graph_objects as go import plotly.express as px from datetime import datetime, timedelta import warnings from curl_cffi import requests session = requests.Session(impersonate="chrome") warnings.filterwarnings('ignore') # Page config st.set_page_config( page_title="Stock Price Prediction App", page_icon="📈", layout="wide" ) # Title and description st.title("📈 Stock Price Prediction App") st.markdown("This app uses a trained Logistic Regression model to predict whether a stock will go **UP** âŦ†ī¸ or **DOWN** âŦ‡ī¸ the next day.") # Sidebar for user inputs st.sidebar.header("🔧 Configuration") # Stock symbols from your model STOCK_SYMBOLS = [ 'ADANIENT.NS', 'ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BEL.NS', 'BHARTIARTL.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DRREDDY.NS', 'EICHERMOT.NS', 'GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS', 'INDUSINDBK.NS', 'INFY.NS', 'ITC.NS', 'JIOFIN.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS', 'MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SHRIRAMFIN.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TATACONSUM.NS', 'TCS.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TECHM.NS', 'TITAN.NS', 'TRENT.NS', 'ULTRACEMCO.NS', 'WIPRO.NS', 'ETERNAL.NS' ] # User inputs selected_stock = st.sidebar.selectbox("Select Stock Symbol", STOCK_SYMBOLS, index=35) # Default to RELIANCE.NS start_date = st.sidebar.date_input("Start Date", value=datetime(2020, 1, 1)) end_date = st.sidebar.date_input("End Date", value=datetime.now()) prediction_mode = st.sidebar.button("Start Analysis") rsi_period = st.sidebar.slider("RSI Period", min_value=5, max_value=30, value=14, step=1) short_period = st.sidebar.slider("Short-term", min_value=5, max_value=50, value=20, step=1) long_period = st.sidebar.slider("Long-term", min_value=50, max_value=200, value=50, step=1) # Helper functions (same as in your original code) def SMA(series, period): return series.rolling(window=period).mean() def EMA(series, period): return series.ewm(span=period, adjust=False).mean() def MACD(series, fast=12, slow=26, signal=9): ema_fast = EMA(series, fast) ema_slow = EMA(series, slow) macd = ema_fast - ema_slow macd_signal = EMA(macd, signal) macd_hist = macd - macd_signal return macd, macd_signal, macd_hist def RSI(series, period=14): delta = series.diff() gain = (delta.where(delta > 0, 0)).ewm(alpha=1/period, min_periods=period).mean() loss = (-delta.where(delta < 0, 0)).ewm(alpha=1/period, min_periods=period).mean() RS = gain / loss return 100 - (100 / (1 + RS)) def create_volatility_features(df): if 'return_1d' not in df.columns: df['return_1d'] = df['Close'].pct_change() for period in [5, 10, 20, 30]: df[f'volatility_{period}d'] = df['return_1d'].rolling(period).std() df['vol_ratio_5_20'] = df['volatility_5d'] / df['volatility_20d'] df['vol_ratio_10_20'] = df['volatility_10d'] / df['volatility_20d'] df['vol_rank_20'] = df['volatility_5d'].rolling(20).rank(pct=True) df['vol_rank_50'] = df['volatility_5d'].rolling(50).rank(pct=True) return df def create_enhanced_lag_features(df): for lag in [1, 2, 3, 5, 10]: df[f'return_lag_{lag}'] = df['return_1d'].shift(lag) for lag in [1, 2, 3]: if 'RSI14' in df.columns: df[f'rsi_lag_{lag}'] = df['RSI14'].shift(lag) if 'MACD' in df.columns: df[f'macd_lag_{lag}'] = df['MACD'].shift(lag) if 'volume_ratio_20' in df.columns: for lag in [1, 2]: df[f'volume_ratio_lag_{lag}'] = df['volume_ratio_20'].shift(lag) return df def create_volume_features(df): df['volume_sma_10'] = df['Volume'].rolling(10).mean() df['volume_sma_20'] = df['Volume'].rolling(20).mean() df['volume_sma_50'] = df['Volume'].rolling(50).mean() df['volume_ratio_10'] = df['Volume'] / df['volume_sma_10'] df['volume_ratio_20'] = df['Volume'] / df['volume_sma_20'] df['volume_ratio_50'] = df['Volume'] / df['volume_sma_50'] df['price_volume'] = df['Close'] * df['Volume'] df['pv_sma_5'] = df['price_volume'].rolling(5).mean() df['volume_momentum_5'] = df['Volume'] / df['Volume'].shift(5) return df def create_momentum_features(df): for period in [3, 5, 10, 20]: df[f'momentum_{period}d'] = df['Close'] / df['Close'].shift(period) - 1 for period in [5, 10]: df[f'roc_{period}d'] = (df['Close'] - df['Close'].shift(period)) / df['Close'].shift(period) return df def create_position_features(df): for period in [10, 20, 50]: df[f'high_{period}d'] = df['High'].rolling(period).max() df[f'low_{period}d'] = df['Low'].rolling(period).min() df[f'price_position_{period}'] = (df['Close'] - df[f'low_{period}d']) / (df[f'high_{period}d'] - df[f'low_{period}d']) if 'SMA20' in df.columns: bb_std = df['Close'].rolling(20).std() df['bb_upper'] = df['SMA20'] + (bb_std * 2) df['bb_lower'] = df['SMA20'] - (bb_std * 2) df['bb_position'] = (df['Close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower']) return df def process_stock_data(df): """Process stock data to create all features""" df = df.copy() # Basic technical indicators df['SMA20'] = SMA(df['Close'], short_period) df['SMA50'] = SMA(df['Close'], long_period) df['EMA20'] = EMA(df['Close'], short_period) df['EMA50'] = EMA(df['Close'], long_period) df['RSI14'] = RSI(df['Close'], rsi_period) df['RSI20'] = RSI(df['Close'], rsi_period + 6) # Example for another RSI period df['MACD'], df['MACD_signal'], df['MACD_hist'] = MACD(df['Close']) # Create feature sets df = create_volatility_features(df) df = create_enhanced_lag_features(df) df = create_volume_features(df) df = create_momentum_features(df) df = create_position_features(df) # Additional features df['SMA_crossover'] = (df['SMA20'] > df['SMA50']).astype(int) df['RSI_oversold'] = (df['RSI14'] < 30).astype(int) # Target: next-day up/down df['next_close'] = df['Close'].shift(-1) df['target'] = (df['next_close'] > df['Close']).astype(int) return df @st.cache_data def load_stock_data(symbol, start_date, end_date): """Load stock data from Yahoo Finance""" try: data = yf.download(symbol, start=start_date, end=end_date,session=session) # Flatten the MultiIndex columns data.columns = [col[0] for col in data.columns] return data except Exception as e: st.error(f"Error loading data: {e}") return None # Feature list (same as in your model) FEATURES = [ 'Close', 'Volume', 'SMA20', 'SMA50', 'EMA20', 'EMA50', 'RSI14', 'MACD', 'MACD_signal', 'MACD_hist', 'SMA_crossover', 'RSI_oversold', 'return_1d', 'volatility_5d', 'volatility_10d', 'volatility_20d', 'volatility_30d', 'vol_ratio_5_20', 'vol_ratio_10_20', 'vol_rank_20', 'vol_rank_50', 'return_lag_1', 'return_lag_2', 'return_lag_3', 'return_lag_5', 'return_lag_10', 'rsi_lag_1', 'macd_lag_1', 'rsi_lag_2', 'macd_lag_2', 'rsi_lag_3', 'macd_lag_3', 'volume_sma_10', 'volume_sma_20', 'volume_sma_50', 'volume_ratio_10', 'volume_ratio_20', 'volume_ratio_50', 'price_volume', 'pv_sma_5', 'volume_momentum_5', 'momentum_3d', 'momentum_5d', 'momentum_10d', 'momentum_20d', 'roc_5d', 'roc_10d', 'high_10d', 'low_10d', 'price_position_10', 'high_20d', 'low_20d', 'price_position_20', 'high_50d', 'low_50d', 'price_position_50', 'bb_upper', 'bb_lower', 'bb_position','target' ] # Main app logic st.header(f"📊 Latest Data Prediction for {selected_stock}") with st.spinner("Loading stock data..."): stock_data = load_stock_data(selected_stock, start_date, end_date) if stock_data is not None and not stock_data.empty: # Process the data processed_data = process_stock_data(stock_data) processed_data = processed_data.dropna() if len(processed_data) > 0: # Get the latest row for prediction latest_data = processed_data.iloc[-1] # Display current stock info col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Current Price", f"₹{latest_data['Close']:.2f}") with col2: daily_change = ((latest_data['Close'] - processed_data.iloc[-2]['Close']) / processed_data.iloc[-2]['Close']) * 100 st.metric("Daily Change", f"{daily_change:.2f}%") with col3: st.metric("Volume", f"{latest_data['Volume']:,.0f}") with col4: st.metric("RSI14", f"{latest_data['RSI14']:.2f}") # Create feature vector feature_vector = latest_data[FEATURES].values.reshape(1, -1) # For demo purposes, create a mock prediction (since we don't have the actual model file) # In real implementation, you would load your saved model: model = pickle.load(open('logistic_regression_model.pkl', 'rb')) scaler = pickle.load(open('scaler.pkl', 'rb')) # You'd need to save this too # Scale the features feature_vector_scaled = scaler.transform(feature_vector) # Make prediction prediction = model.predict(feature_vector_scaled)[0] probability = model.predict_proba(feature_vector_scaled)[0].max() # Display prediction st.header("🔮 Prediction") col1, col2 = st.columns(2) with col1: if prediction == 1: st.success("📈 **PREDICTION: UP**") st.write(f"The model predicts the stock will go **UP** tomorrow with {probability:.1%} confidence.") else: st.error("📉 **PREDICTION: DOWN**") st.write(f"The model predicts the stock will go **DOWN** tomorrow with {probability:.1%} confidence.") with col2: # Confidence gauge fig_gauge = go.Figure(go.Indicator( mode = "gauge+number", value = probability * 100, domain = {'x': [0, 1], 'y': [0, 1]}, title = {'text': "Confidence %"}, gauge = { 'axis': {'range': [None, 100]}, 'bar': {'color': "darkgreen" if prediction == 1 else "darkred"}, 'steps': [ {'range': [0, 50], 'color': "lightgray"}, {'range': [50, 80], 'color': "yellow"}, {'range': [80, 100], 'color': "lightgreen"} ], 'threshold': { 'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 90 } } )) fig_gauge.update_layout(height=300) st.plotly_chart(fig_gauge, use_container_width=True) # Technical indicators chart st.header("📈 Technical Analysis") # Price and Simple moving averages fig_price = go.Figure() fig_price.add_trace(go.Scatter( x=processed_data.index[-60:], y=processed_data['Close'][-60:], mode='lines', name='Close Price', line=dict(color='blue', width=2) )) fig_price.add_trace(go.Scatter( x=processed_data.index[-60:], y=processed_data['SMA20'][-60:], mode='lines', name='SMA20', line=dict(color='orange', width=1) )) fig_price.add_trace(go.Scatter( x=processed_data.index[-60:], y=processed_data['SMA50'][-60:], mode='lines', name='SMA50', line=dict(color='red', width=1) )) fig_price.update_layout( title=f"{selected_stock} - Price and Simple Moving Averages (Last 60 Days)", xaxis_title="Date", yaxis_title="Price (₹)", height=400 ) st.plotly_chart(fig_price, use_container_width=True) # Price and Exponential moving averages fig_price = go.Figure() fig_price.add_trace(go.Scatter( x=processed_data.index[-30:], y=processed_data['Close'][-30:], mode='lines', name='Close Price', line=dict(color='blue', width=2) )) fig_price.add_trace(go.Scatter( x=processed_data.index[-30:], y=processed_data['EMA20'][-30:], mode='lines', name='EMA20', line=dict(color='orange', width=1) )) fig_price.add_trace(go.Scatter( x=processed_data.index[-30:], y=processed_data['EMA50'][-30:], mode='lines', name='EMA50', line=dict(color='red', width=1) )) fig_price.update_layout( title=f"{selected_stock} - Price and Exponential Moving Averages (Last 60 Days)", xaxis_title="Date", yaxis_title="Price (₹)", height=400 ) st.plotly_chart(fig_price, use_container_width=True) # RSI chart col1, col2 = st.columns(2) with col1: fig_rsi = go.Figure() fig_rsi.add_trace(go.Scatter( x=processed_data.index[-30:], y=processed_data['RSI14'][-30:], mode='lines', name='RSI14', line=dict(color='purple') )) fig_rsi.add_hline(y=70, line_dash="dash", line_color="red", annotation_text="Overbought") fig_rsi.add_hline(y=30, line_dash="dash", line_color="green", annotation_text="Oversold") fig_rsi.update_layout( title="RSI (14-day)", xaxis_title="Date", yaxis_title="RSI", height=300 ) st.plotly_chart(fig_rsi, use_container_width=True) with col2: # MACD chart fig_macd = go.Figure() fig_macd.add_trace(go.Scatter( x=processed_data.index[-30:], y=processed_data['MACD'][-30:], mode='lines', name='MACD', line=dict(color='blue') )) fig_macd.add_trace(go.Scatter( x=processed_data.index[-30:], y=processed_data['MACD_signal'][-30:], mode='lines', name='Signal', line=dict(color='red') )) fig_macd.update_layout( title="MACD", xaxis_title="Date", yaxis_title="MACD", height=300 ) st.plotly_chart(fig_macd, use_container_width=True) # Feature importance (mock data for demo) st.header("đŸŽ¯ Key Factors") st.write("Most important features affecting the prediction:") mock_features = ['RSI14', 'return_lag_1', 'volatility_5d', 'MACD', 'volume_ratio_20'] mock_importance = [0.15, 0.12, 0.10, 0.08, 0.07] fig_importance = px.bar( x=mock_importance, y=mock_features, orientation='h', title="Feature Importance" ) fig_importance.update_layout(height=300) st.plotly_chart(fig_importance, use_container_width=True) else: st.error("Not enough data to make a prediction. Please try a different stock or date range.") else: st.error("Unable to load stock data. Please check the symbol and try again.") # Sidebar information st.sidebar.markdown("---") st.sidebar.header("â„šī¸ About") st.sidebar.write(""" This app uses a Logistic Regression model trained on: - **50 Indian stocks** from NSE - **59 technical features** including RSI, MACD, moving averages, volatility measures, and lag features - **Historical data** for pattern recognition **Disclaimer**: This is for educational purposes only. Always do your own research before making investment decisions. """) st.sidebar.markdown("---") st.sidebar.write("**Model Performance:**") st.sidebar.write("â€ĸ Accuracy: 55%") st.sidebar.write("â€ĸ F1 Score: 0.4839") st.sidebar.write("â€ĸ AUC: 0.5370") st.sidebar.write("Average Precision (AP): 0.5300") # Footer st.markdown("---") st.markdown("**âš ī¸ Disclaimer**: This prediction model is for research purposes only. Stock market investments are subject to market risks. Please consult with a financial advisor before making investment decisions.")