import numpy as np import xgboost as xgb from sklearn.metrics import accuracy_score, classification_report from sklearn.model_selection import ParameterGrid import joblib X_train = np.load("featureextraction/final_features/train_X.npy") y_train = np.load("featureextraction/final_features/train_y.npy") X_val = np.load("featureextraction/final_features/val_X.npy") y_val = np.load("featureextraction/final_features/val_y.npy") print("Train shape:", X_train.shape) print("Val shape:", X_val.shape) param_grid = { "n_estimators": [300, 500], "max_depth": [4, 6, 8], "learning_rate": [0.03, 0.05, 0.1], "subsample": [0.8, 1.0], "colsample_bytree": [0.8, 1.0] } best_acc = 0.0 best_model = None best_params = None print("\n Starting XGBoost Hyperparameter Tuning...\n") for idx, params in enumerate(ParameterGrid(param_grid), start=1): print(f"Trial {idx}: {params}") model = xgb.XGBClassifier( objective="binary:logistic", eval_metric="logloss", random_state=42, **params ) model.fit(X_train, y_train) val_preds = model.predict(X_val) acc = accuracy_score(y_val, val_preds) print("Validation Accuracy:", round(acc, 4)) if acc > best_acc: best_acc = acc best_model = model best_params = params print("New best model found") print("-" * 60) print("\nšŸ† BEST VALIDATION ACCURACY:", round(best_acc, 4)) print("šŸ† BEST PARAMETERS:", best_params) final_preds = best_model.predict(X_val) print("\nFINAL VALIDATION REPORT\n") print( classification_report( y_val, final_preds, target_names=["Human", "AI"] ) ) best_model.save_model("classifier/xgboost_final_model.json") joblib.dump(best_model, "classifier/xgboost_final_model.pkl") print("\n Tuned XGBoost model saved successfully")