shivam2k3
/

opensoc-env

Model card Files Files and versions

opensoc-env / eval /metrics.py

shivam2k3's picture

OpenSOC v1

bb6a031 about 1 month ago

history blame contribute delete

3.49 kB

	"""Pure-python evaluation metrics for OpenSOC.

	Exposes:
	* `confusion_matrix(predictions, truths)` — 5x5 dict-of-dicts
	* `per_class_f1(cm)` — macro F1 plus per-class precision/recall/F1
	* `dismiss_on_malicious_rate(predictions, truths)` — the cardinal SOC
	metric. This is what we publish in the headline plot.
	* `over_react_rate(predictions, truths)` — how often the model
	quarantines or blocks on a benign incident.

	We deliberately don't pull in scikit-learn — keeping eval dependency-free
	makes it easy to run inside the OpenEnv container and from a Hugging
	Face Space build log.
	"""

	from __future__ import annotations

	from typing import Dict, Iterable, List, Tuple

	from schema import CONTAINMENT_ACTIONS, TriageAction


	ALL_ACTIONS: List[str] = [a.value for a in TriageAction]


	def confusion_matrix(predictions: Iterable[str], truths: Iterable[str]) -> Dict[str, Dict[str, int]]:
	cm: Dict[str, Dict[str, int]] = {gt: {p: 0 for p in ALL_ACTIONS} for gt in ALL_ACTIONS}
	for p, gt in zip(predictions, truths):
	if gt not in cm:
	cm[gt] = {a: 0 for a in ALL_ACTIONS}
	if p not in cm[gt]:
	cm[gt][p] = 0
	cm[gt][p] = cm[gt].get(p, 0) + 1
	return cm


	def per_class_f1(cm: Dict[str, Dict[str, int]]) -> Tuple[float, Dict[str, Dict[str, float]]]:
	per_class: Dict[str, Dict[str, float]] = {}
	f1_sum = 0.0
	n_classes = 0
	for cls in ALL_ACTIONS:
	tp = cm.get(cls, {}).get(cls, 0)
	fp = sum(cm.get(other, {}).get(cls, 0) for other in ALL_ACTIONS if other != cls)
	fn = sum(cm.get(cls, {}).get(other, 0) for other in ALL_ACTIONS if other != cls)
	precision = tp / (tp + fp) if (tp + fp) else 0.0
	recall = tp / (tp + fn) if (tp + fn) else 0.0
	f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0.0
	per_class[cls] = {"precision": precision, "recall": recall, "f1": f1, "support": tp + fn}
	f1_sum += f1
	n_classes += 1
	macro_f1 = f1_sum / n_classes if n_classes else 0.0
	return macro_f1, per_class


	def dismiss_on_malicious_rate(predictions: Iterable[str], truths: Iterable[str]) -> float:
	"""Fraction of malicious incidents the model wrongly dismissed."""
	malicious_total = 0
	dismissed = 0
	for p, gt in zip(predictions, truths):
	if gt != TriageAction.DISMISS.value:
	malicious_total += 1
	if p == TriageAction.DISMISS.value:
	dismissed += 1
	return dismissed / malicious_total if malicious_total else 0.0


	def over_react_rate(predictions: Iterable[str], truths: Iterable[str]) -> float:
	"""Fraction of benign-or-low incidents the model over-reacted on."""
	benign_total = 0
	over_reacted = 0
	containment = {a.value for a in CONTAINMENT_ACTIONS}
	for p, gt in zip(predictions, truths):
	if gt in (TriageAction.DISMISS.value, TriageAction.MONITOR.value):
	benign_total += 1
	if p in containment:
	over_reacted += 1
	return over_reacted / benign_total if benign_total else 0.0


	def accuracy(predictions: Iterable[str], truths: Iterable[str]) -> float:
	correct = 0
	n = 0
	for p, gt in zip(predictions, truths):
	n += 1
	if p == gt:
	correct += 1
	return correct / n if n else 0.0


	__all__ = [
	"ALL_ACTIONS",
	"confusion_matrix",
	"per_class_f1",
	"dismiss_on_malicious_rate",
	"over_react_rate",
	"accuracy",
	]