ggg / app.py
geethareddy's picture
Update app.py
209bf86 verified
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import gradio as gr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import joblib
import os
import re
# Define stock tickers
STOCK_TICKERS = [
"AAPL", # Apple
"GOOGL", # Alphabet
"MSFT", # Microsoft
"AMZN", # Amazon
"TSLA", # Tesla
"META", # Meta Platforms
"NVDA", # NVIDIA
"JPM", # JPMorgan Chase
"V", # Visa
"NFLX" # Netflix
]
def fetch_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
"""
Fetches historical stock data from Yahoo Finance.
Parameters:
- ticker (str): Stock ticker symbol.
- start_date (str): Start date in 'YYYY-MM-DD' format.
- end_date (str): End date in 'YYYY-MM-DD' format.
Returns:
- pd.DataFrame: DataFrame containing stock data.
"""
stock = yf.Ticker(ticker)
data = stock.history(start=start_date, end=end_date)
return data
def preprocess_data(data: pd.DataFrame) -> (np.ndarray, np.ndarray):
"""
Preprocesses the stock data for Random Forest Regressor.
Parameters:
- data (pd.DataFrame): DataFrame containing stock data.
Returns:
- X (np.ndarray): Feature array.
- y (np.ndarray): Target array.
"""
# Use 'Close' price for prediction
data['Target'] = data['Close'].shift(-1) # Predict next day's close price
# Drop the last row as it will have NaN target
data = data[:-1]
# Features can include current and past prices. Here, we'll use previous 5 days' close prices.
for i in range(1, 6):
data[f'Close_{i}'] = data['Close'].shift(i)
data.dropna(inplace=True)
feature_cols = [f'Close_{i}' for i in range(1, 6)]
X = data[feature_cols].values
y = data['Target'].values
return X, y
def train_model(X: np.ndarray, y: np.ndarray) -> RandomForestRegressor:
"""
Trains the Random Forest Regressor model.
Parameters:
- X (np.ndarray): Feature array.
- y (np.ndarray): Target array.
Returns:
- model (RandomForestRegressor): Trained Random Forest model.
"""
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
# Initialize the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model
model.fit(X_train, y_train)
# Evaluate the model
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Model Mean Squared Error: {mse}")
return model
def make_prediction(model: RandomForestRegressor, recent_data: pd.DataFrame) -> float:
"""
Makes a prediction for the next day's closing price.
Parameters:
- model (RandomForestRegressor): Trained Random Forest model.
- recent_data (pd.DataFrame): Recent stock data.
Returns:
- predicted_price (float): Predicted closing price.
"""
# Use the last 5 days' close prices as features
recent_close = recent_data['Close'].values[-5:]
if len(recent_close) < 5:
raise ValueError("Not enough data to make a prediction.")
X_new = recent_close[::-1].reshape(1, -1) # Reverse to match feature order
predicted_price = model.predict(X_new)[0]
return predicted_price
def buy_or_sell(current_price: float, predicted_price: float) -> str:
"""
Determines whether to buy or sell based on price prediction.
Parameters:
- current_price (float): Current closing price.
- predicted_price (float): Predicted closing price.
Returns:
- decision (str): 'Buy' if predicted price is higher, else 'Sell'.
"""
if predicted_price > current_price:
return "Buy"
else:
return "Sell"
def validate_date_format(date_text: str) -> bool:
"""
Validates that the input string is a date in 'YYYY-MM-DD' format.
Parameters:
- date_text (str): Date string to validate.
Returns:
- bool: True if valid, False otherwise.
"""
# Regular expression for YYYY-MM-DD format
regex = r'^\d{4}-\d{2}-\d{2}$'
if re.match(regex, date_text):
try:
datetime.strptime(date_text, '%Y-%m-%d')
return True
except ValueError:
return False
return False
def stock_prediction_app(ticker: str, start_date: str, end_date: str):
"""
Main function to handle stock prediction and return outputs.
Parameters:
- ticker (str): Selected stock ticker.
- start_date (str): Training start date in 'YYYY-MM-DD' format.
- end_date (str): Training end date in 'YYYY-MM-DD' format.
Returns:
- percentage_change (str): Percentage change from start to end date.
- highest_price (float): Highest closing price in the period.
- lowest_price (float): Lowest closing price in the period.
- decision (str): Buy or Sell decision.
- plot (matplotlib.figure.Figure): Plot of historical prices with tomorrow's prediction.
"""
# Validate date formats
if not (validate_date_format(start_date) and validate_date_format(end_date)):
return "Invalid date format. Please use YYYY-MM-DD.", "N/A", "N/A", "Error", None
# Convert strings to datetime objects
try:
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
except ValueError:
return "Invalid date values. Please ensure dates are correct.", "N/A", "N/A", "Error", None
if start_dt >= end_dt:
return "Start date must be before end date.", "N/A", "N/A", "Error", None
# Fetch data
data = fetch_stock_data(ticker, start_date, end_date)
if data.empty:
return "N/A", "N/A", "N/A", "No Data Available", None
# Calculate percentage change, highest and lowest
start_price = data['Close'].iloc[0]
end_price = data['Close'].iloc[-1]
percentage_change = ((end_price - start_price) / start_price) * 100
highest_price = data['Close'].max()
lowest_price = data['Close'].min()
# Preprocess data
try:
X, y = preprocess_data(data)
except Exception as e:
return f"Error in preprocessing data: {e}", "N/A", "N/A", "Error", None
if len(X) == 0:
return f"{percentage_change:.2f}%", highest_price, lowest_price, "No Prediction", None
# Train the model
try:
model = train_model(X, y)
except Exception as e:
return f"Error in training model: {e}", highest_price, lowest_price, "Error", None
# Make prediction
try:
predicted_price = make_prediction(model, data)
except Exception as e:
return f"Error in making prediction: {e}", highest_price, lowest_price, "Error", None
# Current price is the last closing price
current_price = data['Close'].iloc[-1]
decision = buy_or_sell(current_price, predicted_price)
# Plotting historical prices and predicted tomorrow's price
plt.figure(figsize=(10,5))
plt.plot(data['Close'], label='Historical Close Price')
# Add predicted price for tomorrow
tomorrow_date = data.index[-1] + timedelta(days=1)
# Ensure tomorrow is a business day
while tomorrow_date.weekday() >= 5: # Saturday=5, Sunday=6
tomorrow_date += timedelta(days=1)
plt.scatter(tomorrow_date, predicted_price, color='red', label='Predicted Close Price (Tomorrow)')
plt.title(f'{ticker} Price Prediction for Tomorrow')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.tight_layout()
fig = plt.gcf()
plt.close()
# Formatting outputs
percentage_change_str = f"{percentage_change:.2f}%"
return percentage_change_str, highest_price, lowest_price, decision, fig
# Define the Gradio interface
iface = gr.Interface(
fn=stock_prediction_app,
inputs=[
gr.Dropdown(choices=STOCK_TICKERS, label="Select Stock Ticker"),
gr.Textbox(label="Enter Start Date (YYYY-MM-DD)", placeholder="e.g., 2020-01-01"),
gr.Textbox(label="Enter End Date (YYYY-MM-DD)", placeholder="e.g., 2023-12-31")
],
outputs=[
gr.Textbox(label="Percentage Change"),
gr.Number(label="Highest Closing Price"),
gr.Number(label="Lowest Closing Price"),
gr.Textbox(label="Decision (Buy/Sell)"),
gr.Plot(label="Stock Performance")
],
title="Stock Prediction App",
description="Predict whether to buy or sell a stock based on historical data. Please enter dates in YYYY-MM-DD format."
)
# Launch the interface
iface.launch()