import numpy as np import xgboost as xgb from sklearn.metrics import accuracy_score, classification_report import joblib import os PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) FEATURE_DIR = os.path.join( PROJECT_ROOT, "featureextraction", "final_features" ) X_train = np.load( os.path.join(FEATURE_DIR, "train_X.npy"), allow_pickle=True ) y_train = np.load( os.path.join(FEATURE_DIR, "train_y.npy"), allow_pickle=True ) X_val = np.load( os.path.join(FEATURE_DIR, "val_X.npy"), allow_pickle=True ) y_val = np.load( os.path.join(FEATURE_DIR, "val_y.npy"), allow_pickle=True ) print("Train shape:", X_train.shape) print("Validation shape:", X_val.shape) print("Feature dtype:", X_train.dtype) model = xgb.XGBClassifier( n_estimators=300, max_depth=6, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, objective="binary:logistic", eval_metric="logloss", random_state=42 ) model.fit(X_train, y_train) val_preds = model.predict(X_val) print("\nVALIDATION RESULTS\n") print("Accuracy:", accuracy_score(y_val, val_preds)) print( classification_report( y_val, val_preds, target_names=["Human", "AI"] ) ) MODEL_DIR = os.path.join(PROJECT_ROOT, "models") os.makedirs(MODEL_DIR, exist_ok=True) model.save_model(os.path.join(MODEL_DIR, "xgboost_final_model.json")) joblib.dump(model, os.path.join(MODEL_DIR, "xgboost_final_model.pkl")) print("\n XGBoost model saved successfully")