File size: 1,967 Bytes
30a2ab4 cdb8e3e 30a2ab4 cdb8e3e 30a2ab4 cdb8e3e 30a2ab4 cdb8e3e 30a2ab4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import joblib
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from src.preprocessing import preprocess_texts
import json
import matplotlib.pyplot as plt
import seaborn as sns
import os
MODEL_PATH = "model/saved_model.joblib"
VECT_PATH = "model/vectorizer.joblib"
DATA_PATH = "data/comments.csv"
def load_model():
model = joblib.load(MODEL_PATH)
vect = joblib.load(VECT_PATH)
return model, vect
def evaluate():
df = pd.read_csv(DATA_PATH).dropna(subset=["text","label"])
texts = preprocess_texts(df["text"].tolist())
y_true = df["label"].astype(str).tolist()
model, vect = load_model()
X = vect.transform(texts)
y_pred = model.predict(X)
report = classification_report(y_true, y_pred, output_dict=True)
cm_array = confusion_matrix(y_true, y_pred)
cm = cm_array.tolist()
os.makedirs("model", exist_ok=True)
# full report
with open("model/eval_report.json", "w") as f:
json.dump({"report": report, "confusion_matrix": cm}, f, indent=2)
target_label = "judi" if "judi" in report else list(report.keys())[0]
summary = {
"accuracy": report["accuracy"],
f"precision_{target_label}": report[target_label]["precision"],
f"recall_{target_label}": report[target_label]["recall"],
f"f1_{target_label}": report[target_label]["f1-score"]
}
with open("model/metrics_summary.json", "w") as f:
json.dump(summary, f, indent=2)
# confusion matrix heatmap
labels = sorted(list(set(y_true)))
plt.figure(figsize=(6,4))
sns.heatmap(cm_array, annot=True, fmt="d", cmap="Blues",
xticklabels=labels, yticklabels=labels)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.savefig("model/confusion_matrix.png")
plt.close()
print("Saved evaluation reports & confusion matrix.")
return summary
if __name__ == "__main__":
evaluate()
|