import yfinance as yf import gradio as gr import pandas as pd import numpy as np from datetime import date, timedelta from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error import xgboost as xgb import lightgbm as lgb from sklearn.ensemble import HistGradientBoostingRegressor #import catboost as cb import matplotlib.pyplot as plt def fetch_data(symbol, start, end): df = yf.download(symbol, start=start, end=end) df = df[['Close']].reset_index() df.columns = ['Date', 'Close'] return df def preprocess_data(df, n_days): df['Target'] = df['Close'].shift(-n_days) df.dropna(inplace=True) df['Date'] = pd.to_datetime(df['Date']) df['Year'] = df['Date'].dt.year df['Month'] = df['Date'].dt.month df['Day'] = df['Date'].dt.day X = df[['Year', 'Month', 'Day', 'Close']] y = df['Target'] return X, y def train_predict(symbol, start_date, end_date, forecast_days): # Fetch and preprocess data df = fetch_data(symbol, start_date, end_date) X, y = preprocess_data(df, forecast_days) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Define models models = { 'XGBoost': xgb.XGBRegressor(), 'LightGBM': lgb.LGBMRegressor(), # 'CatBoost': catboost.CatBoostRegressor(), 'HistGradientBoosting': HistGradientBoostingRegressor() } results = {} for name, model in models.items(): model.fit(X_train, y_train) preds = model.predict(X_test) mae = mean_absolute_error(y_test, preds) future_dates = [end_date + timedelta(days=i) for i in range(1, forecast_days + 1)] future_df = pd.DataFrame({ 'Year': [date.year for date in future_dates], 'Month': [date.month for date in future_dates], 'Day': [date.day for date in future_dates], 'Close': [df['Close'].iloc[-1]] * forecast_days }) future_preds = model.predict(future_df) results[name] = {'mae': mae, 'future_preds': future_preds.tolist()} return df, results def plot_results(df, end_date, forecast_days, results): # Plot historical data plt.figure(figsize=(14, 7)) plt.plot(df['Date'], df['Close'], label='Historical Data') plt.xlabel('Date') plt.ylabel('Close Price') plt.title('Historical Stock Data') plt.grid(True) plt.tight_layout() historical_path = 'historical_data.png' plt.savefig(historical_path) plt.close() # Plot future predictions plt.figure(figsize=(14, 7)) future_dates = [end_date + timedelta(days=i) for i in range(1, forecast_days + 1)] for model, result in results.items(): plt.plot(future_dates, result['future_preds'], label=f'{model} Predictions') plt.xlabel('Date') plt.ylabel('Close Price') plt.title('Future Stock Price Predictions') plt.legend() plt.grid(True) plt.tight_layout() prediction_path = 'future_predictions.png' plt.savefig(prediction_path) plt.close() return historical_path, prediction_path def gradio_interface(symbol, years_of_data, forecast_days): end_date = date.today() start_date = date(end_date.year - years_of_data, end_date.month, end_date.day) df, results = train_predict(symbol, start_date, end_date, forecast_days) historical_path, prediction_path = plot_results(df, end_date, forecast_days, results) output = f"## Prediction Results for {symbol}\n" output += f"Training Period: {start_date} to {end_date}\n\n" for model, result in results.items(): output += f"### {model}\n" output += f"Mean Absolute Error: {result['mae']}\n" output += f"Future Predictions: {result['future_preds']}\n\n" return historical_path, prediction_path interface = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(label="Stock Symbol (e.g., ^NSEI for Nifty 50, ^BSESN for Sensex)"), gr.Slider(label="Years of Data", minimum=1, maximum=10, step=1), gr.Slider(label="Forecast Days", minimum=1, maximum=30, step=1) ], outputs=["image", "image"], title="Stock Price Prediction", description="Predict future stock prices using XGBoost, LightGBM, CatBoost, and HistGradientBoosting models." ) if __name__ == "__main__": interface.launch()