import yfinance as yf import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split import gradio as gr import matplotlib.pyplot as plt from datetime import datetime, timedelta import joblib import os import re # Define stock tickers STOCK_TICKERS = [ "AAPL", # Apple "GOOGL", # Alphabet "MSFT", # Microsoft "AMZN", # Amazon "TSLA", # Tesla "META", # Meta Platforms "NVDA", # NVIDIA "JPM", # JPMorgan Chase "V", # Visa "NFLX" # Netflix ] def fetch_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame: """ Fetches historical stock data from Yahoo Finance. Parameters: - ticker (str): Stock ticker symbol. - start_date (str): Start date in 'YYYY-MM-DD' format. - end_date (str): End date in 'YYYY-MM-DD' format. Returns: - pd.DataFrame: DataFrame containing stock data. """ stock = yf.Ticker(ticker) data = stock.history(start=start_date, end=end_date) return data def preprocess_data(data: pd.DataFrame) -> (np.ndarray, np.ndarray): """ Preprocesses the stock data for Random Forest Regressor. Parameters: - data (pd.DataFrame): DataFrame containing stock data. Returns: - X (np.ndarray): Feature array. - y (np.ndarray): Target array. """ # Use 'Close' price for prediction data['Target'] = data['Close'].shift(-1) # Predict next day's close price # Drop the last row as it will have NaN target data = data[:-1] # Features can include current and past prices. Here, we'll use previous 5 days' close prices. for i in range(1, 6): data[f'Close_{i}'] = data['Close'].shift(i) data.dropna(inplace=True) feature_cols = [f'Close_{i}' for i in range(1, 6)] X = data[feature_cols].values y = data['Target'].values return X, y def train_model(X: np.ndarray, y: np.ndarray) -> RandomForestRegressor: """ Trains the Random Forest Regressor model. Parameters: - X (np.ndarray): Feature array. - y (np.ndarray): Target array. Returns: - model (RandomForestRegressor): Trained Random Forest model. """ # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) # Initialize the model model = RandomForestRegressor(n_estimators=100, random_state=42) # Train the model model.fit(X_train, y_train) # Evaluate the model predictions = model.predict(X_test) mse = mean_squared_error(y_test, predictions) print(f"Model Mean Squared Error: {mse}") return model def make_prediction(model: RandomForestRegressor, recent_data: pd.DataFrame) -> float: """ Makes a prediction for the next day's closing price. Parameters: - model (RandomForestRegressor): Trained Random Forest model. - recent_data (pd.DataFrame): Recent stock data. Returns: - predicted_price (float): Predicted closing price. """ # Use the last 5 days' close prices as features recent_close = recent_data['Close'].values[-5:] if len(recent_close) < 5: raise ValueError("Not enough data to make a prediction.") X_new = recent_close[::-1].reshape(1, -1) # Reverse to match feature order predicted_price = model.predict(X_new)[0] return predicted_price def buy_or_sell(current_price: float, predicted_price: float) -> str: """ Determines whether to buy or sell based on price prediction. Parameters: - current_price (float): Current closing price. - predicted_price (float): Predicted closing price. Returns: - decision (str): 'Buy' if predicted price is higher, else 'Sell'. """ if predicted_price > current_price: return "Buy" else: return "Sell" def validate_date_format(date_text: str) -> bool: """ Validates that the input string is a date in 'YYYY-MM-DD' format. Parameters: - date_text (str): Date string to validate. Returns: - bool: True if valid, False otherwise. """ # Regular expression for YYYY-MM-DD format regex = r'^\d{4}-\d{2}-\d{2}$' if re.match(regex, date_text): try: datetime.strptime(date_text, '%Y-%m-%d') return True except ValueError: return False return False def stock_prediction_app(ticker: str, start_date: str, end_date: str): """ Main function to handle stock prediction and return outputs. Parameters: - ticker (str): Selected stock ticker. - start_date (str): Training start date in 'YYYY-MM-DD' format. - end_date (str): Training end date in 'YYYY-MM-DD' format. Returns: - percentage_change (str): Percentage change from start to end date. - highest_price (float): Highest closing price in the period. - lowest_price (float): Lowest closing price in the period. - decision (str): Buy or Sell decision. - plot (matplotlib.figure.Figure): Plot of historical prices with tomorrow's prediction. """ # Validate date formats if not (validate_date_format(start_date) and validate_date_format(end_date)): return "Invalid date format. Please use YYYY-MM-DD.", "N/A", "N/A", "Error", None # Convert strings to datetime objects try: start_dt = datetime.strptime(start_date, '%Y-%m-%d') end_dt = datetime.strptime(end_date, '%Y-%m-%d') except ValueError: return "Invalid date values. Please ensure dates are correct.", "N/A", "N/A", "Error", None if start_dt >= end_dt: return "Start date must be before end date.", "N/A", "N/A", "Error", None # Fetch data data = fetch_stock_data(ticker, start_date, end_date) if data.empty: return "N/A", "N/A", "N/A", "No Data Available", None # Calculate percentage change, highest and lowest start_price = data['Close'].iloc[0] end_price = data['Close'].iloc[-1] percentage_change = ((end_price - start_price) / start_price) * 100 highest_price = data['Close'].max() lowest_price = data['Close'].min() # Preprocess data try: X, y = preprocess_data(data) except Exception as e: return f"Error in preprocessing data: {e}", "N/A", "N/A", "Error", None if len(X) == 0: return f"{percentage_change:.2f}%", highest_price, lowest_price, "No Prediction", None # Train the model try: model = train_model(X, y) except Exception as e: return f"Error in training model: {e}", highest_price, lowest_price, "Error", None # Make prediction try: predicted_price = make_prediction(model, data) except Exception as e: return f"Error in making prediction: {e}", highest_price, lowest_price, "Error", None # Current price is the last closing price current_price = data['Close'].iloc[-1] decision = buy_or_sell(current_price, predicted_price) # Plotting historical prices and predicted tomorrow's price plt.figure(figsize=(10,5)) plt.plot(data['Close'], label='Historical Close Price') # Add predicted price for tomorrow tomorrow_date = data.index[-1] + timedelta(days=1) # Ensure tomorrow is a business day while tomorrow_date.weekday() >= 5: # Saturday=5, Sunday=6 tomorrow_date += timedelta(days=1) plt.scatter(tomorrow_date, predicted_price, color='red', label='Predicted Close Price (Tomorrow)') plt.title(f'{ticker} Price Prediction for Tomorrow') plt.xlabel('Date') plt.ylabel('Price ($)') plt.legend() plt.tight_layout() fig = plt.gcf() plt.close() # Formatting outputs percentage_change_str = f"{percentage_change:.2f}%" return percentage_change_str, highest_price, lowest_price, decision, fig # Define the Gradio interface iface = gr.Interface( fn=stock_prediction_app, inputs=[ gr.Dropdown(choices=STOCK_TICKERS, label="Select Stock Ticker"), gr.Textbox(label="Enter Start Date (YYYY-MM-DD)", placeholder="e.g., 2020-01-01"), gr.Textbox(label="Enter End Date (YYYY-MM-DD)", placeholder="e.g., 2023-12-31") ], outputs=[ gr.Textbox(label="Percentage Change"), gr.Number(label="Highest Closing Price"), gr.Number(label="Lowest Closing Price"), gr.Textbox(label="Decision (Buy/Sell)"), gr.Plot(label="Stock Performance") ], title="Stock Prediction App", description="Predict whether to buy or sell a stock based on historical data. Please enter dates in YYYY-MM-DD format." ) # Launch the interface iface.launch()