nitishkarvekar commited on
Commit
4bfd4e3
·
verified ·
1 Parent(s): 9bf6040

Update ml_model.py

Browse files
Files changed (1) hide show
  1. ml_model.py +34 -8
ml_model.py CHANGED
@@ -2,16 +2,26 @@ import yfinance as yf
2
  import pandas as pd
3
  import ta
4
  import lightgbm as lgb
 
 
5
  from nifty100 import NIFTY100
6
 
 
7
  model = None
8
  features_list = ["rsi", "macd", "ma20", "ma50", "volatility", "ticker_code"]
9
 
10
- def train_model():
11
  global model
12
 
13
- all_data = []
 
 
 
 
 
 
14
 
 
15
  ticker_mapping = {ticker: idx for idx, ticker in enumerate(NIFTY100)}
16
 
17
  for ticker in NIFTY100:
@@ -21,44 +31,57 @@ def train_model():
21
  if len(df) < 100:
22
  continue
23
 
 
 
 
24
  close = df["Close"].squeeze()
25
 
26
  df["return"] = close.pct_change()
27
- df["rsi"] = ta.momentum.RSIIndicator(close).rsi()
28
  df["macd"] = ta.trend.MACD(close).macd()
29
  df["ma20"] = close.rolling(20).mean()
30
  df["ma50"] = close.rolling(50).mean()
31
  df["volatility"] = df["return"].rolling(10).std()
32
 
33
  df["target"] = (df["return"].shift(-1) > 0).astype(int)
34
-
35
  df["ticker_code"] = ticker_mapping[ticker]
36
 
37
  df = df.dropna()
38
  all_data.append(df)
39
 
40
- except:
41
  pass
42
 
43
  full_df = pd.concat(all_data)
44
 
 
 
 
45
  X = full_df[features_list]
46
  y = full_df["target"]
47
 
 
48
  model = lgb.LGBMClassifier(
49
  n_estimators=300,
50
  max_depth=6,
51
- learning_rate=0.05,
52
- categorical_feature=[len(features_list) - 1]
53
  )
54
 
55
  model.fit(X, y)
 
 
 
 
56
 
57
  def predict_probability(df, ticker):
58
  global model
59
 
60
  if model is None:
61
- train_model()
 
 
 
 
62
 
63
  close = df["Close"].squeeze()
64
 
@@ -74,6 +97,9 @@ def predict_probability(df, ticker):
74
 
75
  X_pred = pd.DataFrame([[rsi, macd, ma20, ma50, vol, ticker_code]], columns=features_list)
76
 
 
 
 
77
  prob = model.predict_proba(X_pred)[0][1]
78
 
79
  return round(prob * 100, 2)
 
2
  import pandas as pd
3
  import ta
4
  import lightgbm as lgb
5
+ import joblib
6
+ import os
7
  from nifty100 import NIFTY100
8
 
9
+ MODEL_FILE = "lightgbm_model.joblib"
10
  model = None
11
  features_list = ["rsi", "macd", "ma20", "ma50", "volatility", "ticker_code"]
12
 
13
+ def load_or_train_model():
14
  global model
15
 
16
+ # Check if we already have a trained model saved
17
+ if os.path.exists(MODEL_FILE):
18
+ print("Loading pre-trained LightGBM model...")
19
+ model = joblib.load(MODEL_FILE)
20
+ return
21
+
22
+ print("Training LightGBM model from scratch. This will take a few minutes...")
23
 
24
+ all_data = []
25
  ticker_mapping = {ticker: idx for idx, ticker in enumerate(NIFTY100)}
26
 
27
  for ticker in NIFTY100:
 
31
  if len(df) < 100:
32
  continue
33
 
34
+ # FIX 1: Flatten the multi-index columns from yfinance
35
+ df.columns = df.columns.get_level_values(0)
36
+
37
  close = df["Close"].squeeze()
38
 
39
  df["return"] = close.pct_change()
40
+ df["rsi"] = ta.momentum.RSIIndicator(close).rsi().rsi() if hasattr(ta.momentum.RSIIndicator(close).rsi(), 'rsi') else ta.momentum.RSIIndicator(close).rsi()
41
  df["macd"] = ta.trend.MACD(close).macd()
42
  df["ma20"] = close.rolling(20).mean()
43
  df["ma50"] = close.rolling(50).mean()
44
  df["volatility"] = df["return"].rolling(10).std()
45
 
46
  df["target"] = (df["return"].shift(-1) > 0).astype(int)
 
47
  df["ticker_code"] = ticker_mapping[ticker]
48
 
49
  df = df.dropna()
50
  all_data.append(df)
51
 
52
+ except Exception as e:
53
  pass
54
 
55
  full_df = pd.concat(all_data)
56
 
57
+ # FIX 2: Convert ticker_code to pandas category type (removes the warning)
58
+ full_df["ticker_code"] = full_df["ticker_code"].astype("category")
59
+
60
  X = full_df[features_list]
61
  y = full_df["target"]
62
 
63
+ # Removed the deprecated categorical_feature argument
64
  model = lgb.LGBMClassifier(
65
  n_estimators=300,
66
  max_depth=6,
67
+ learning_rate=0.05
 
68
  )
69
 
70
  model.fit(X, y)
71
+
72
+ # Save the model to disk
73
+ joblib.dump(model, MODEL_FILE)
74
+ print("Model saved successfully as", MODEL_FILE)
75
 
76
  def predict_probability(df, ticker):
77
  global model
78
 
79
  if model is None:
80
+ load_or_train_model()
81
+
82
+ # Ensure columns are flat before making predictions
83
+ if isinstance(df.columns, pd.MultiIndex):
84
+ df.columns = df.columns.get_level_values(0)
85
 
86
  close = df["Close"].squeeze()
87
 
 
97
 
98
  X_pred = pd.DataFrame([[rsi, macd, ma20, ma50, vol, ticker_code]], columns=features_list)
99
 
100
+ # Cast it to category here as well
101
+ X_pred["ticker_code"] = X_pred["ticker_code"].astype("category")
102
+
103
  prob = model.predict_proba(X_pred)[0][1]
104
 
105
  return round(prob * 100, 2)