Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import joblib | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import accuracy_score, classification_report | |
| train_df = pd.read_csv("basemodel/train_features.csv") | |
| val_df = pd.read_csv("basemodel/val_features.csv") | |
| drop_cols = ["Label"] | |
| if "language" in train_df.columns: | |
| drop_cols.append("language") | |
| X_train = train_df.drop(columns=drop_cols) | |
| y_train = train_df["Label"] | |
| X_val = val_df.drop(columns=drop_cols) | |
| y_val = val_df["Label"] | |
| rf = RandomForestClassifier( | |
| n_estimators=200, | |
| max_depth=8, | |
| min_samples_split=5, | |
| min_samples_leaf=3, | |
| random_state=42, | |
| class_weight="balanced" | |
| ) | |
| rf.fit(X_train, y_train) | |
| val_preds = rf.predict(X_val) | |
| accuracy = accuracy_score(y_val, val_preds) | |
| print("\nValidation Accuracy:", round(accuracy, 4)) | |
| print("\nValidation Classification Report:\n") | |
| print(classification_report(y_val, val_preds, target_names=["Human", "AI"])) | |
| joblib.dump(rf, "basemodel/random_forest_baseline.pkl") | |
| print("\n✅ Random Forest baseline model saved to basemodel/random_forest_baseline.pkl") | |