File size: 4,456 Bytes
d511723
 
 
 
 
 
 
 
 
 
2c4cd2e
d511723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b11d2e6
d511723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7eb9cb
d511723
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import yfinance as yf
import gradio as gr
import pandas as pd
import numpy as np
from datetime import date, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import HistGradientBoostingRegressor
#import catboost as cb
import matplotlib.pyplot as plt

def fetch_data(symbol, start, end):
    df = yf.download(symbol, start=start, end=end)
    df = df[['Close']].reset_index()
    df.columns = ['Date', 'Close']
    return df

def preprocess_data(df, n_days):
    df['Target'] = df['Close'].shift(-n_days)
    df.dropna(inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day
    X = df[['Year', 'Month', 'Day', 'Close']]
    y = df['Target']
    return X, y

def train_predict(symbol, start_date, end_date, forecast_days):
    # Fetch and preprocess data
    df = fetch_data(symbol, start_date, end_date)
    X, y = preprocess_data(df, forecast_days)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Define models
    models = {
        'XGBoost': xgb.XGBRegressor(),
        'LightGBM': lgb.LGBMRegressor(),
    #    'CatBoost': catboost.CatBoostRegressor(),
        'HistGradientBoosting': HistGradientBoostingRegressor()
    }
    
    results = {}
    
    for name, model in models.items():
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        mae = mean_absolute_error(y_test, preds)
        
        future_dates = [end_date + timedelta(days=i) for i in range(1, forecast_days + 1)]
        future_df = pd.DataFrame({
            'Year': [date.year for date in future_dates],
            'Month': [date.month for date in future_dates],
            'Day': [date.day for date in future_dates],
            'Close': [df['Close'].iloc[-1]] * forecast_days
        })
        
        future_preds = model.predict(future_df)
        results[name] = {'mae': mae, 'future_preds': future_preds.tolist()}
    
    return df, results

def plot_results(df, end_date, forecast_days, results):
    # Plot historical data
    plt.figure(figsize=(14, 7))
    plt.plot(df['Date'], df['Close'], label='Historical Data')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.title('Historical Stock Data')
    plt.grid(True)
    plt.tight_layout()
    
    historical_path = 'historical_data.png'
    plt.savefig(historical_path)
    plt.close()

    # Plot future predictions
    plt.figure(figsize=(14, 7))
    future_dates = [end_date + timedelta(days=i) for i in range(1, forecast_days + 1)]
    
    for model, result in results.items():
        plt.plot(future_dates, result['future_preds'], label=f'{model} Predictions')
    
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.title('Future Stock Price Predictions')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    
    prediction_path = 'future_predictions.png'
    plt.savefig(prediction_path)
    plt.close()
    
    return historical_path, prediction_path

def gradio_interface(symbol, years_of_data, forecast_days):
    end_date = date.today()
    start_date = date(end_date.year - years_of_data, end_date.month, end_date.day)
    
    df, results = train_predict(symbol, start_date, end_date, forecast_days)
    historical_path, prediction_path = plot_results(df, end_date, forecast_days, results)
    
    output = f"## Prediction Results for {symbol}\n"
    output += f"Training Period: {start_date} to {end_date}\n\n"
    
    for model, result in results.items():
        output += f"### {model}\n"
        output += f"Mean Absolute Error: {result['mae']}\n"
        output += f"Future Predictions: {result['future_preds']}\n\n"
    
    return historical_path, prediction_path

interface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Stock Symbol (e.g., ^NSEI for Nifty 50, ^BSESN for Sensex)"),
        gr.Slider(label="Years of Data", minimum=1, maximum=10, step=1),
        gr.Slider(label="Forecast Days", minimum=1, maximum=30, step=1)
    ],
    outputs=["image", "image"],
    title="Stock Price Prediction",
    description="Predict future stock prices using XGBoost, LightGBM, CatBoost, and HistGradientBoosting models."
)

if __name__ == "__main__":
    interface.launch()