ai-code-detection / basemodel /evaluate_randomforest.py
joshnavip's picture
Initial commit: AI code detection project (without binary files)
b144cb7
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
test_df = pd.read_csv("basemodel/test_features.csv")
drop_cols = ["Label"]
if "language" in test_df.columns:
drop_cols.append("language")
X_test = test_df.drop(columns=drop_cols)
y_test = test_df["Label"]
rf = joblib.load("basemodel/random_forest_baseline.pkl")
test_preds = rf.predict(X_test)
accuracy = accuracy_score(y_test, test_preds)
print("\n🧪 TEST SET EVALUATION (Statistical Baseline)\n")
print("Test Accuracy:", round(accuracy, 4))
print("\nClassification Report:\n")
print(
classification_report(
y_test,
test_preds,
target_names=["Human", "AI"],
zero_division=0
)
)
print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, test_preds))