Spaces:

geethareddy
/

ggg

Sleeping

ggg

File size: 8,694 Bytes

import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import gradio as gr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import joblib
import os
import re

# Define stock tickers
STOCK_TICKERS = [
    "AAPL",  # Apple
    "GOOGL", # Alphabet
    "MSFT",  # Microsoft
    "AMZN",  # Amazon
    "TSLA",  # Tesla
    "META",  # Meta Platforms
    "NVDA",  # NVIDIA
    "JPM",   # JPMorgan Chase
    "V",     # Visa
    "NFLX"   # Netflix
]

def fetch_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Fetches historical stock data from Yahoo Finance.

    Parameters:
    - ticker (str): Stock ticker symbol.
    - start_date (str): Start date in 'YYYY-MM-DD' format.
    - end_date (str): End date in 'YYYY-MM-DD' format.

    Returns:
    - pd.DataFrame: DataFrame containing stock data.
    """
    stock = yf.Ticker(ticker)
    data = stock.history(start=start_date, end=end_date)
    return data

def preprocess_data(data: pd.DataFrame) -> (np.ndarray, np.ndarray):
    """
    Preprocesses the stock data for Random Forest Regressor.

    Parameters:
    - data (pd.DataFrame): DataFrame containing stock data.

    Returns:
    - X (np.ndarray): Feature array.
    - y (np.ndarray): Target array.
    """
    # Use 'Close' price for prediction
    data['Target'] = data['Close'].shift(-1)  # Predict next day's close price

    # Drop the last row as it will have NaN target
    data = data[:-1]

    # Features can include current and past prices. Here, we'll use previous 5 days' close prices.
    for i in range(1, 6):
        data[f'Close_{i}'] = data['Close'].shift(i)

    data.dropna(inplace=True)

    feature_cols = [f'Close_{i}' for i in range(1, 6)]
    X = data[feature_cols].values
    y = data['Target'].values

    return X, y

def train_model(X: np.ndarray, y: np.ndarray) -> RandomForestRegressor:
    """
    Trains the Random Forest Regressor model.

    Parameters:
    - X (np.ndarray): Feature array.
    - y (np.ndarray): Target array.

    Returns:
    - model (RandomForestRegressor): Trained Random Forest model.
    """
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Initialize the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)

    # Train the model
    model.fit(X_train, y_train)

    # Evaluate the model
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    print(f"Model Mean Squared Error: {mse}")

    return model

def make_prediction(model: RandomForestRegressor, recent_data: pd.DataFrame) -> float:
    """
    Makes a prediction for the next day's closing price.

    Parameters:
    - model (RandomForestRegressor): Trained Random Forest model.
    - recent_data (pd.DataFrame): Recent stock data.

    Returns:
    - predicted_price (float): Predicted closing price.
    """
    # Use the last 5 days' close prices as features
    recent_close = recent_data['Close'].values[-5:]
    if len(recent_close) < 5:
        raise ValueError("Not enough data to make a prediction.")

    X_new = recent_close[::-1].reshape(1, -1)  # Reverse to match feature order
    predicted_price = model.predict(X_new)[0]
    return predicted_price

def buy_or_sell(current_price: float, predicted_price: float) -> str:
    """
    Determines whether to buy or sell based on price prediction.

    Parameters:
    - current_price (float): Current closing price.
    - predicted_price (float): Predicted closing price.

    Returns:
    - decision (str): 'Buy' if predicted price is higher, else 'Sell'.
    """
    if predicted_price > current_price:
        return "Buy"
    else:
        return "Sell"

def validate_date_format(date_text: str) -> bool:
    """
    Validates that the input string is a date in 'YYYY-MM-DD' format.

    Parameters:
    - date_text (str): Date string to validate.

    Returns:
    - bool: True if valid, False otherwise.
    """
    # Regular expression for YYYY-MM-DD format
    regex = r'^\d{4}-\d{2}-\d{2}$'
    if re.match(regex, date_text):
        try:
            datetime.strptime(date_text, '%Y-%m-%d')
            return True
        except ValueError:
            return False
    return False

def stock_prediction_app(ticker: str, start_date: str, end_date: str):
    """
    Main function to handle stock prediction and return outputs.

    Parameters:
    - ticker (str): Selected stock ticker.
    - start_date (str): Training start date in 'YYYY-MM-DD' format.
    - end_date (str): Training end date in 'YYYY-MM-DD' format.

    Returns:
    - percentage_change (str): Percentage change from start to end date.
    - highest_price (float): Highest closing price in the period.
    - lowest_price (float): Lowest closing price in the period.
    - decision (str): Buy or Sell decision.
    - plot (matplotlib.figure.Figure): Plot of historical prices with tomorrow's prediction.
    """
    # Validate date formats
    if not (validate_date_format(start_date) and validate_date_format(end_date)):
        return "Invalid date format. Please use YYYY-MM-DD.", "N/A", "N/A", "Error", None

    # Convert strings to datetime objects
    try:
        start_dt = datetime.strptime(start_date, '%Y-%m-%d')
        end_dt = datetime.strptime(end_date, '%Y-%m-%d')
    except ValueError:
        return "Invalid date values. Please ensure dates are correct.", "N/A", "N/A", "Error", None

    if start_dt >= end_dt:
        return "Start date must be before end date.", "N/A", "N/A", "Error", None

    # Fetch data
    data = fetch_stock_data(ticker, start_date, end_date)

    if data.empty:
        return "N/A", "N/A", "N/A", "No Data Available", None

    # Calculate percentage change, highest and lowest
    start_price = data['Close'].iloc[0]
    end_price = data['Close'].iloc[-1]
    percentage_change = ((end_price - start_price) / start_price) * 100
    highest_price = data['Close'].max()
    lowest_price = data['Close'].min()

    # Preprocess data
    try:
        X, y = preprocess_data(data)
    except Exception as e:
        return f"Error in preprocessing data: {e}", "N/A", "N/A", "Error", None

    if len(X) == 0:
        return f"{percentage_change:.2f}%", highest_price, lowest_price, "No Prediction", None

    # Train the model
    try:
        model = train_model(X, y)
    except Exception as e:
        return f"Error in training model: {e}", highest_price, lowest_price, "Error", None

    # Make prediction
    try:
        predicted_price = make_prediction(model, data)
    except Exception as e:
        return f"Error in making prediction: {e}", highest_price, lowest_price, "Error", None

    # Current price is the last closing price
    current_price = data['Close'].iloc[-1]
    decision = buy_or_sell(current_price, predicted_price)

    # Plotting historical prices and predicted tomorrow's price
    plt.figure(figsize=(10,5))
    plt.plot(data['Close'], label='Historical Close Price')

    # Add predicted price for tomorrow
    tomorrow_date = data.index[-1] + timedelta(days=1)
    # Ensure tomorrow is a business day
    while tomorrow_date.weekday() >= 5:  # Saturday=5, Sunday=6
        tomorrow_date += timedelta(days=1)

    plt.scatter(tomorrow_date, predicted_price, color='red', label='Predicted Close Price (Tomorrow)')
    plt.title(f'{ticker} Price Prediction for Tomorrow')
    plt.xlabel('Date')
    plt.ylabel('Price ($)')
    plt.legend()
    plt.tight_layout()
    fig = plt.gcf()
    plt.close()

    # Formatting outputs
    percentage_change_str = f"{percentage_change:.2f}%"

    return percentage_change_str, highest_price, lowest_price, decision, fig

# Define the Gradio interface
iface = gr.Interface(
    fn=stock_prediction_app,
    inputs=[
        gr.Dropdown(choices=STOCK_TICKERS, label="Select Stock Ticker"),
        gr.Textbox(label="Enter Start Date (YYYY-MM-DD)", placeholder="e.g., 2020-01-01"),
        gr.Textbox(label="Enter End Date (YYYY-MM-DD)", placeholder="e.g., 2023-12-31")
    ],
    outputs=[
        gr.Textbox(label="Percentage Change"),
        gr.Number(label="Highest Closing Price"),
        gr.Number(label="Lowest Closing Price"),
        gr.Textbox(label="Decision (Buy/Sell)"),
        gr.Plot(label="Stock Performance")
    ],
    title="Stock Prediction App",
    description="Predict whether to buy or sell a stock based on historical data. Please enter dates in YYYY-MM-DD format."
)

# Launch the interface
iface.launch()