Update ml_model.py
Browse files- ml_model.py +34 -8
ml_model.py
CHANGED
|
@@ -2,16 +2,26 @@ import yfinance as yf
|
|
| 2 |
import pandas as pd
|
| 3 |
import ta
|
| 4 |
import lightgbm as lgb
|
|
|
|
|
|
|
| 5 |
from nifty100 import NIFTY100
|
| 6 |
|
|
|
|
| 7 |
model = None
|
| 8 |
features_list = ["rsi", "macd", "ma20", "ma50", "volatility", "ticker_code"]
|
| 9 |
|
| 10 |
-
def
|
| 11 |
global model
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
|
|
|
| 15 |
ticker_mapping = {ticker: idx for idx, ticker in enumerate(NIFTY100)}
|
| 16 |
|
| 17 |
for ticker in NIFTY100:
|
|
@@ -21,44 +31,57 @@ def train_model():
|
|
| 21 |
if len(df) < 100:
|
| 22 |
continue
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
close = df["Close"].squeeze()
|
| 25 |
|
| 26 |
df["return"] = close.pct_change()
|
| 27 |
-
df["rsi"] = ta.momentum.RSIIndicator(close).rsi()
|
| 28 |
df["macd"] = ta.trend.MACD(close).macd()
|
| 29 |
df["ma20"] = close.rolling(20).mean()
|
| 30 |
df["ma50"] = close.rolling(50).mean()
|
| 31 |
df["volatility"] = df["return"].rolling(10).std()
|
| 32 |
|
| 33 |
df["target"] = (df["return"].shift(-1) > 0).astype(int)
|
| 34 |
-
|
| 35 |
df["ticker_code"] = ticker_mapping[ticker]
|
| 36 |
|
| 37 |
df = df.dropna()
|
| 38 |
all_data.append(df)
|
| 39 |
|
| 40 |
-
except:
|
| 41 |
pass
|
| 42 |
|
| 43 |
full_df = pd.concat(all_data)
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
X = full_df[features_list]
|
| 46 |
y = full_df["target"]
|
| 47 |
|
|
|
|
| 48 |
model = lgb.LGBMClassifier(
|
| 49 |
n_estimators=300,
|
| 50 |
max_depth=6,
|
| 51 |
-
learning_rate=0.05
|
| 52 |
-
categorical_feature=[len(features_list) - 1]
|
| 53 |
)
|
| 54 |
|
| 55 |
model.fit(X, y)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def predict_probability(df, ticker):
|
| 58 |
global model
|
| 59 |
|
| 60 |
if model is None:
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
close = df["Close"].squeeze()
|
| 64 |
|
|
@@ -74,6 +97,9 @@ def predict_probability(df, ticker):
|
|
| 74 |
|
| 75 |
X_pred = pd.DataFrame([[rsi, macd, ma20, ma50, vol, ticker_code]], columns=features_list)
|
| 76 |
|
|
|
|
|
|
|
|
|
|
| 77 |
prob = model.predict_proba(X_pred)[0][1]
|
| 78 |
|
| 79 |
return round(prob * 100, 2)
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import ta
|
| 4 |
import lightgbm as lgb
|
| 5 |
+
import joblib
|
| 6 |
+
import os
|
| 7 |
from nifty100 import NIFTY100
|
| 8 |
|
| 9 |
+
MODEL_FILE = "lightgbm_model.joblib"
|
| 10 |
model = None
|
| 11 |
features_list = ["rsi", "macd", "ma20", "ma50", "volatility", "ticker_code"]
|
| 12 |
|
| 13 |
+
def load_or_train_model():
|
| 14 |
global model
|
| 15 |
|
| 16 |
+
# Check if we already have a trained model saved
|
| 17 |
+
if os.path.exists(MODEL_FILE):
|
| 18 |
+
print("Loading pre-trained LightGBM model...")
|
| 19 |
+
model = joblib.load(MODEL_FILE)
|
| 20 |
+
return
|
| 21 |
+
|
| 22 |
+
print("Training LightGBM model from scratch. This will take a few minutes...")
|
| 23 |
|
| 24 |
+
all_data = []
|
| 25 |
ticker_mapping = {ticker: idx for idx, ticker in enumerate(NIFTY100)}
|
| 26 |
|
| 27 |
for ticker in NIFTY100:
|
|
|
|
| 31 |
if len(df) < 100:
|
| 32 |
continue
|
| 33 |
|
| 34 |
+
# FIX 1: Flatten the multi-index columns from yfinance
|
| 35 |
+
df.columns = df.columns.get_level_values(0)
|
| 36 |
+
|
| 37 |
close = df["Close"].squeeze()
|
| 38 |
|
| 39 |
df["return"] = close.pct_change()
|
| 40 |
+
df["rsi"] = ta.momentum.RSIIndicator(close).rsi().rsi() if hasattr(ta.momentum.RSIIndicator(close).rsi(), 'rsi') else ta.momentum.RSIIndicator(close).rsi()
|
| 41 |
df["macd"] = ta.trend.MACD(close).macd()
|
| 42 |
df["ma20"] = close.rolling(20).mean()
|
| 43 |
df["ma50"] = close.rolling(50).mean()
|
| 44 |
df["volatility"] = df["return"].rolling(10).std()
|
| 45 |
|
| 46 |
df["target"] = (df["return"].shift(-1) > 0).astype(int)
|
|
|
|
| 47 |
df["ticker_code"] = ticker_mapping[ticker]
|
| 48 |
|
| 49 |
df = df.dropna()
|
| 50 |
all_data.append(df)
|
| 51 |
|
| 52 |
+
except Exception as e:
|
| 53 |
pass
|
| 54 |
|
| 55 |
full_df = pd.concat(all_data)
|
| 56 |
|
| 57 |
+
# FIX 2: Convert ticker_code to pandas category type (removes the warning)
|
| 58 |
+
full_df["ticker_code"] = full_df["ticker_code"].astype("category")
|
| 59 |
+
|
| 60 |
X = full_df[features_list]
|
| 61 |
y = full_df["target"]
|
| 62 |
|
| 63 |
+
# Removed the deprecated categorical_feature argument
|
| 64 |
model = lgb.LGBMClassifier(
|
| 65 |
n_estimators=300,
|
| 66 |
max_depth=6,
|
| 67 |
+
learning_rate=0.05
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
model.fit(X, y)
|
| 71 |
+
|
| 72 |
+
# Save the model to disk
|
| 73 |
+
joblib.dump(model, MODEL_FILE)
|
| 74 |
+
print("Model saved successfully as", MODEL_FILE)
|
| 75 |
|
| 76 |
def predict_probability(df, ticker):
|
| 77 |
global model
|
| 78 |
|
| 79 |
if model is None:
|
| 80 |
+
load_or_train_model()
|
| 81 |
+
|
| 82 |
+
# Ensure columns are flat before making predictions
|
| 83 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 84 |
+
df.columns = df.columns.get_level_values(0)
|
| 85 |
|
| 86 |
close = df["Close"].squeeze()
|
| 87 |
|
|
|
|
| 97 |
|
| 98 |
X_pred = pd.DataFrame([[rsi, macd, ma20, ma50, vol, ticker_code]], columns=features_list)
|
| 99 |
|
| 100 |
+
# Cast it to category here as well
|
| 101 |
+
X_pred["ticker_code"] = X_pred["ticker_code"].astype("category")
|
| 102 |
+
|
| 103 |
prob = model.predict_proba(X_pred)[0][1]
|
| 104 |
|
| 105 |
return round(prob * 100, 2)
|