Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder | |
| from imblearn.over_sampling import SMOTE | |
| try: | |
| import lightgbm as lgb | |
| except: | |
| print("An exception occurred") | |
| !pip install lightgbm | |
| import lightgbm as lgb | |
| # Load the dataset | |
| file_path = 'datasets' # Update with your dataset path | |
| df = pd.read_excel(file_path + '/dataset.xlsx') | |
| X = df.drop('target', axis=1) | |
| y = df['target'] | |
| # Encode the target variable | |
| le = LabelEncoder() | |
| y_enc = le.fit_transform(y) | |
| # Split the data into training and validation sets | |
| X_train, X_val, y_train, y_val = train_test_split(X, y_enc, test_size=0.2, random_state=42) | |
| # Apply SMOTE for handling imbalanced classes | |
| smote = SMOTE(sampling_strategy='minority', n_jobs=-1) | |
| X_train_res, y_train_res = smote.fit_resample(X_train, y_train) | |
| # Define LightGBM parameters | |
| params = { | |
| 'objective': 'multiclass', | |
| 'num_class': len(le.classes_), | |
| 'metric': 'multi_logloss', | |
| 'boosting_type': 'gbdt', | |
| 'num_leaves': 31, | |
| 'learning_rate': 0.05, | |
| 'feature_fraction': 0.9, | |
| 'bagging_fraction': 0.8, | |
| 'bagging_freq': 5, | |
| 'verbose': -1 | |
| } | |
| # Create a LightGBM dataset | |
| train_data = lgb.Dataset(X_train_res, label=y_train_res) | |
| val_data = lgb.Dataset(X_val, label=y_val) | |
| # Train the LightGBM model | |
| num_round = 1000 | |
| bst = lgb.train(params, train_data, num_round, valid_sets=[val_data], early_stopping_rounds=10) | |
| # Save the trained model in the 'inference' folder | |
| bst.save_model('inference/lgbm_model.txt') | |