Spaces:
Runtime error
Runtime error
File size: 1,086 Bytes
b144cb7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
train_df = pd.read_csv("basemodel/train_features.csv")
val_df = pd.read_csv("basemodel/val_features.csv")
drop_cols = ["Label"]
if "language" in train_df.columns:
drop_cols.append("language")
X_train = train_df.drop(columns=drop_cols)
y_train = train_df["Label"]
X_val = val_df.drop(columns=drop_cols)
y_val = val_df["Label"]
rf = RandomForestClassifier(
n_estimators=200,
max_depth=8,
min_samples_split=5,
min_samples_leaf=3,
random_state=42,
class_weight="balanced"
)
rf.fit(X_train, y_train)
val_preds = rf.predict(X_val)
accuracy = accuracy_score(y_val, val_preds)
print("\nValidation Accuracy:", round(accuracy, 4))
print("\nValidation Classification Report:\n")
print(classification_report(y_val, val_preds, target_names=["Human", "AI"]))
joblib.dump(rf, "basemodel/random_forest_baseline.pkl")
print("\n✅ Random Forest baseline model saved to basemodel/random_forest_baseline.pkl")
|