import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from imblearn.over_sampling import SMOTE try: import lightgbm as lgb except: print("An exception occurred") !pip install lightgbm import lightgbm as lgb # Load the dataset file_path = 'datasets' # Update with your dataset path df = pd.read_excel(file_path + '/dataset.xlsx') X = df.drop('target', axis=1) y = df['target'] # Encode the target variable le = LabelEncoder() y_enc = le.fit_transform(y) # Split the data into training and validation sets X_train, X_val, y_train, y_val = train_test_split(X, y_enc, test_size=0.2, random_state=42) # Apply SMOTE for handling imbalanced classes smote = SMOTE(sampling_strategy='minority', n_jobs=-1) X_train_res, y_train_res = smote.fit_resample(X_train, y_train) # Define LightGBM parameters params = { 'objective': 'multiclass', 'num_class': len(le.classes_), 'metric': 'multi_logloss', 'boosting_type': 'gbdt', 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'verbose': -1 } # Create a LightGBM dataset train_data = lgb.Dataset(X_train_res, label=y_train_res) val_data = lgb.Dataset(X_val, label=y_val) # Train the LightGBM model num_round = 1000 bst = lgb.train(params, train_data, num_round, valid_sets=[val_data], early_stopping_rounds=10) # Save the trained model in the 'inference' folder bst.save_model('inference/lgbm_model.txt')