iqranaz's picture
Update train/train.py
cb3a3a4 verified
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
try:
import lightgbm as lgb
except:
print("An exception occurred")
!pip install lightgbm
import lightgbm as lgb
# Load the dataset
file_path = 'datasets' # Update with your dataset path
df = pd.read_excel(file_path + '/dataset.xlsx')
X = df.drop('target', axis=1)
y = df['target']
# Encode the target variable
le = LabelEncoder()
y_enc = le.fit_transform(y)
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y_enc, test_size=0.2, random_state=42)
# Apply SMOTE for handling imbalanced classes
smote = SMOTE(sampling_strategy='minority', n_jobs=-1)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
# Define LightGBM parameters
params = {
'objective': 'multiclass',
'num_class': len(le.classes_),
'metric': 'multi_logloss',
'boosting_type': 'gbdt',
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': -1
}
# Create a LightGBM dataset
train_data = lgb.Dataset(X_train_res, label=y_train_res)
val_data = lgb.Dataset(X_val, label=y_val)
# Train the LightGBM model
num_round = 1000
bst = lgb.train(params, train_data, num_round, valid_sets=[val_data], early_stopping_rounds=10)
# Save the trained model in the 'inference' folder
bst.save_model('inference/lgbm_model.txt')