File size: 2,674 Bytes
62a3be1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python
import argparse
import json
from pathlib import Path

import joblib
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report
)

BASE_DIR = Path(__file__).resolve().parent.parent
MODELS_DIR = BASE_DIR / "models"
DATA_DIR = BASE_DIR / "data"


def load_model():
    model_path = MODELS_DIR / "trained_pipeline.joblib"
    if not model_path.exists():
        raise FileNotFoundError(f"Model not found: {model_path}")
    return joblib.load(model_path)


def load_dataset(dataset_path: Path):
    if not dataset_path.exists():
        raise FileNotFoundError(f"Dataset not found: {dataset_path}")

    # Hard guard: never evaluate on training data
    if dataset_path.name in {"training_data.json", "train.json"}:
        raise RuntimeError(
            f"Refusing to evaluate on training dataset: {dataset_path.name}"
        )

    with dataset_path.open("r", encoding="utf-8") as f:
        raw = json.load(f)

    if isinstance(raw, list):
        samples = raw
    elif isinstance(raw, dict) and "samples" in raw:
        samples = raw["samples"]
    else:
        raise ValueError("Unsupported JSON dataset format")

    texts = []
    labels = []

    for i, item in enumerate(samples):
        if "text" not in item or "label" not in item:
            raise ValueError(f"Invalid sample at index {i}: {item}")
        texts.append(item["text"])
        labels.append(item["label"])

    return texts, labels


def evaluate(model, X, y):
    y_pred = model.predict(X)

    acc = accuracy_score(y, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(
        y, y_pred, average="weighted", zero_division=0
    )

    print("====================================")
    print("Offline Evaluation Results")
    print("====================================")
    print(f"Samples  : {len(y)}")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall   : {recall:.4f}")
    print(f"F1-score : {f1:.4f}")
    print()
    print("Detailed Classification Report")
    print("------------------------------------")
    print(classification_report(y, y_pred, zero_division=0))


def main():
    parser = argparse.ArgumentParser(
        description="Offline evaluation using held-out JSON dataset"
    )
    parser.add_argument(
        "--data",
        default=str(DATA_DIR / "samples" / "eval.json"),
        help="Path to evaluation dataset (default: data/samples/eval.json)"
    )

    args = parser.parse_args()

    model = load_model()
    X, y = load_dataset(Path(args.data))
    evaluate(model, X, y)


if __name__ == "__main__":
    main()