ai-code-detection / classifier /tune_xgboost.py
joshnavip's picture
Initial commit: AI code detection project (without binary files)
b144cb7
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import ParameterGrid
import joblib
X_train = np.load("featureextraction/final_features/train_X.npy")
y_train = np.load("featureextraction/final_features/train_y.npy")
X_val = np.load("featureextraction/final_features/val_X.npy")
y_val = np.load("featureextraction/final_features/val_y.npy")
print("Train shape:", X_train.shape)
print("Val shape:", X_val.shape)
param_grid = {
"n_estimators": [300, 500],
"max_depth": [4, 6, 8],
"learning_rate": [0.03, 0.05, 0.1],
"subsample": [0.8, 1.0],
"colsample_bytree": [0.8, 1.0]
}
best_acc = 0.0
best_model = None
best_params = None
print("\n Starting XGBoost Hyperparameter Tuning...\n")
for idx, params in enumerate(ParameterGrid(param_grid), start=1):
print(f"Trial {idx}: {params}")
model = xgb.XGBClassifier(
objective="binary:logistic",
eval_metric="logloss",
random_state=42,
**params
)
model.fit(X_train, y_train)
val_preds = model.predict(X_val)
acc = accuracy_score(y_val, val_preds)
print("Validation Accuracy:", round(acc, 4))
if acc > best_acc:
best_acc = acc
best_model = model
best_params = params
print("New best model found")
print("-" * 60)
print("\n๐Ÿ† BEST VALIDATION ACCURACY:", round(best_acc, 4))
print("๐Ÿ† BEST PARAMETERS:", best_params)
final_preds = best_model.predict(X_val)
print("\nFINAL VALIDATION REPORT\n")
print(
classification_report(
y_val,
final_preds,
target_names=["Human", "AI"]
)
)
best_model.save_model("classifier/xgboost_final_model.json")
joblib.dump(best_model, "classifier/xgboost_final_model.pkl")
print("\n Tuned XGBoost model saved successfully")