File size: 1,967 Bytes
30a2ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdb8e3e
 
30a2ab4
 
 
 
 
 
 
cdb8e3e
30a2ab4
 
cdb8e3e
 
 
30a2ab4
 
 
 
 
 
 
cdb8e3e
30a2ab4
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import joblib
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from src.preprocessing import preprocess_texts
import json
import matplotlib.pyplot as plt
import seaborn as sns
import os

MODEL_PATH = "model/saved_model.joblib"
VECT_PATH = "model/vectorizer.joblib"
DATA_PATH = "data/comments.csv"

def load_model():
    model = joblib.load(MODEL_PATH)
    vect = joblib.load(VECT_PATH)
    return model, vect

def evaluate():
    df = pd.read_csv(DATA_PATH).dropna(subset=["text","label"])
    texts = preprocess_texts(df["text"].tolist())
    y_true = df["label"].astype(str).tolist()

    model, vect = load_model()
    X = vect.transform(texts)
    y_pred = model.predict(X)

    report = classification_report(y_true, y_pred, output_dict=True)
    cm_array = confusion_matrix(y_true, y_pred)
    cm = cm_array.tolist()

    os.makedirs("model", exist_ok=True)

    # full report
    with open("model/eval_report.json", "w") as f:
        json.dump({"report": report, "confusion_matrix": cm}, f, indent=2)

    target_label = "judi" if "judi" in report else list(report.keys())[0]
    summary = {
        "accuracy": report["accuracy"],
        f"precision_{target_label}": report[target_label]["precision"],
        f"recall_{target_label}": report[target_label]["recall"],
        f"f1_{target_label}": report[target_label]["f1-score"]
    }
    with open("model/metrics_summary.json", "w") as f:
        json.dump(summary, f, indent=2)

    # confusion matrix heatmap
    labels = sorted(list(set(y_true)))
    plt.figure(figsize=(6,4))
    sns.heatmap(cm_array, annot=True, fmt="d", cmap="Blues",
                xticklabels=labels, yticklabels=labels)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.savefig("model/confusion_matrix.png")
    plt.close()

    print("Saved evaluation reports & confusion matrix.")
    return summary

if __name__ == "__main__":
    evaluate()