Spaces:
Sleeping
Sleeping
File size: 8,694 Bytes
e72394f 209bf86 e72394f 40241c9 e72394f 40241c9 e72394f 40241c9 e72394f 209bf86 e72394f 209bf86 e72394f 014da9e e72394f 209bf86 e72394f 209bf86 e72394f 209bf86 e72394f 40241c9 e72394f 209bf86 e72394f 209bf86 e72394f 209bf86 e72394f 209bf86 014da9e 2c4da9a 40241c9 209bf86 014da9e 209bf86 014da9e e72394f 40241c9 e72394f 40241c9 e72394f 209bf86 40241c9 209bf86 40241c9 209bf86 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 |
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import gradio as gr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import joblib
import os
import re
# Define stock tickers
STOCK_TICKERS = [
"AAPL", # Apple
"GOOGL", # Alphabet
"MSFT", # Microsoft
"AMZN", # Amazon
"TSLA", # Tesla
"META", # Meta Platforms
"NVDA", # NVIDIA
"JPM", # JPMorgan Chase
"V", # Visa
"NFLX" # Netflix
]
def fetch_stock_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
"""
Fetches historical stock data from Yahoo Finance.
Parameters:
- ticker (str): Stock ticker symbol.
- start_date (str): Start date in 'YYYY-MM-DD' format.
- end_date (str): End date in 'YYYY-MM-DD' format.
Returns:
- pd.DataFrame: DataFrame containing stock data.
"""
stock = yf.Ticker(ticker)
data = stock.history(start=start_date, end=end_date)
return data
def preprocess_data(data: pd.DataFrame) -> (np.ndarray, np.ndarray):
"""
Preprocesses the stock data for Random Forest Regressor.
Parameters:
- data (pd.DataFrame): DataFrame containing stock data.
Returns:
- X (np.ndarray): Feature array.
- y (np.ndarray): Target array.
"""
# Use 'Close' price for prediction
data['Target'] = data['Close'].shift(-1) # Predict next day's close price
# Drop the last row as it will have NaN target
data = data[:-1]
# Features can include current and past prices. Here, we'll use previous 5 days' close prices.
for i in range(1, 6):
data[f'Close_{i}'] = data['Close'].shift(i)
data.dropna(inplace=True)
feature_cols = [f'Close_{i}' for i in range(1, 6)]
X = data[feature_cols].values
y = data['Target'].values
return X, y
def train_model(X: np.ndarray, y: np.ndarray) -> RandomForestRegressor:
"""
Trains the Random Forest Regressor model.
Parameters:
- X (np.ndarray): Feature array.
- y (np.ndarray): Target array.
Returns:
- model (RandomForestRegressor): Trained Random Forest model.
"""
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
# Initialize the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model
model.fit(X_train, y_train)
# Evaluate the model
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Model Mean Squared Error: {mse}")
return model
def make_prediction(model: RandomForestRegressor, recent_data: pd.DataFrame) -> float:
"""
Makes a prediction for the next day's closing price.
Parameters:
- model (RandomForestRegressor): Trained Random Forest model.
- recent_data (pd.DataFrame): Recent stock data.
Returns:
- predicted_price (float): Predicted closing price.
"""
# Use the last 5 days' close prices as features
recent_close = recent_data['Close'].values[-5:]
if len(recent_close) < 5:
raise ValueError("Not enough data to make a prediction.")
X_new = recent_close[::-1].reshape(1, -1) # Reverse to match feature order
predicted_price = model.predict(X_new)[0]
return predicted_price
def buy_or_sell(current_price: float, predicted_price: float) -> str:
"""
Determines whether to buy or sell based on price prediction.
Parameters:
- current_price (float): Current closing price.
- predicted_price (float): Predicted closing price.
Returns:
- decision (str): 'Buy' if predicted price is higher, else 'Sell'.
"""
if predicted_price > current_price:
return "Buy"
else:
return "Sell"
def validate_date_format(date_text: str) -> bool:
"""
Validates that the input string is a date in 'YYYY-MM-DD' format.
Parameters:
- date_text (str): Date string to validate.
Returns:
- bool: True if valid, False otherwise.
"""
# Regular expression for YYYY-MM-DD format
regex = r'^\d{4}-\d{2}-\d{2}$'
if re.match(regex, date_text):
try:
datetime.strptime(date_text, '%Y-%m-%d')
return True
except ValueError:
return False
return False
def stock_prediction_app(ticker: str, start_date: str, end_date: str):
"""
Main function to handle stock prediction and return outputs.
Parameters:
- ticker (str): Selected stock ticker.
- start_date (str): Training start date in 'YYYY-MM-DD' format.
- end_date (str): Training end date in 'YYYY-MM-DD' format.
Returns:
- percentage_change (str): Percentage change from start to end date.
- highest_price (float): Highest closing price in the period.
- lowest_price (float): Lowest closing price in the period.
- decision (str): Buy or Sell decision.
- plot (matplotlib.figure.Figure): Plot of historical prices with tomorrow's prediction.
"""
# Validate date formats
if not (validate_date_format(start_date) and validate_date_format(end_date)):
return "Invalid date format. Please use YYYY-MM-DD.", "N/A", "N/A", "Error", None
# Convert strings to datetime objects
try:
start_dt = datetime.strptime(start_date, '%Y-%m-%d')
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
except ValueError:
return "Invalid date values. Please ensure dates are correct.", "N/A", "N/A", "Error", None
if start_dt >= end_dt:
return "Start date must be before end date.", "N/A", "N/A", "Error", None
# Fetch data
data = fetch_stock_data(ticker, start_date, end_date)
if data.empty:
return "N/A", "N/A", "N/A", "No Data Available", None
# Calculate percentage change, highest and lowest
start_price = data['Close'].iloc[0]
end_price = data['Close'].iloc[-1]
percentage_change = ((end_price - start_price) / start_price) * 100
highest_price = data['Close'].max()
lowest_price = data['Close'].min()
# Preprocess data
try:
X, y = preprocess_data(data)
except Exception as e:
return f"Error in preprocessing data: {e}", "N/A", "N/A", "Error", None
if len(X) == 0:
return f"{percentage_change:.2f}%", highest_price, lowest_price, "No Prediction", None
# Train the model
try:
model = train_model(X, y)
except Exception as e:
return f"Error in training model: {e}", highest_price, lowest_price, "Error", None
# Make prediction
try:
predicted_price = make_prediction(model, data)
except Exception as e:
return f"Error in making prediction: {e}", highest_price, lowest_price, "Error", None
# Current price is the last closing price
current_price = data['Close'].iloc[-1]
decision = buy_or_sell(current_price, predicted_price)
# Plotting historical prices and predicted tomorrow's price
plt.figure(figsize=(10,5))
plt.plot(data['Close'], label='Historical Close Price')
# Add predicted price for tomorrow
tomorrow_date = data.index[-1] + timedelta(days=1)
# Ensure tomorrow is a business day
while tomorrow_date.weekday() >= 5: # Saturday=5, Sunday=6
tomorrow_date += timedelta(days=1)
plt.scatter(tomorrow_date, predicted_price, color='red', label='Predicted Close Price (Tomorrow)')
plt.title(f'{ticker} Price Prediction for Tomorrow')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.tight_layout()
fig = plt.gcf()
plt.close()
# Formatting outputs
percentage_change_str = f"{percentage_change:.2f}%"
return percentage_change_str, highest_price, lowest_price, decision, fig
# Define the Gradio interface
iface = gr.Interface(
fn=stock_prediction_app,
inputs=[
gr.Dropdown(choices=STOCK_TICKERS, label="Select Stock Ticker"),
gr.Textbox(label="Enter Start Date (YYYY-MM-DD)", placeholder="e.g., 2020-01-01"),
gr.Textbox(label="Enter End Date (YYYY-MM-DD)", placeholder="e.g., 2023-12-31")
],
outputs=[
gr.Textbox(label="Percentage Change"),
gr.Number(label="Highest Closing Price"),
gr.Number(label="Lowest Closing Price"),
gr.Textbox(label="Decision (Buy/Sell)"),
gr.Plot(label="Stock Performance")
],
title="Stock Prediction App",
description="Predict whether to buy or sell a stock based on historical data. Please enter dates in YYYY-MM-DD format."
)
# Launch the interface
iface.launch()
|