Spaces:
Sleeping
Sleeping
File size: 1,573 Bytes
761f203 dc990fa 761f203 dc990fa 761f203 dc990fa 761f203 dc990fa 761f203 dc990fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | from __future__ import annotations
import json
from pathlib import Path
from env.models import FlakySleuthAction
_SIM_PATH = Path(__file__).resolve().parent.parent / "dataset" / "category_similarity.json"
with _SIM_PATH.open("r", encoding="utf-8") as handle:
_RAW_SIM = json.load(handle)
_CANONICAL = {
"OD": "OD",
"OD-BRIT": "OD-Brit",
"OD-VIC": "OD-Vic",
"NIO": "NIO",
"NOD": "NOD",
"UD": "UD",
"TD": "TD",
"TZD": "TZD",
"ID": "ID",
"NDOI": "NDOI",
"NDOD": "NDOD",
"OSD": "OSD",
}
VALID_CATEGORIES = set(_CANONICAL.values())
def _normalize_category(value: str) -> str:
text = value.strip().replace("_", "-").replace(" ", "-")
upper = text.upper()
return _CANONICAL.get(upper, "")
def _get_similarity(predicted: str, truth: str) -> float:
if predicted == truth:
return 0.999
key_a = f"{predicted},{truth}"
key_b = f"{truth},{predicted}"
return float(_RAW_SIM.get(key_a, _RAW_SIM.get(key_b, 0.0)))
def grade(action: FlakySleuthAction, task: dict) -> float:
"""Root cause category classification with matrix-based partial credit."""
if action.action_type != "classify_root_cause":
return 0.001
predicted = _normalize_category(action.argument)
if predicted not in VALID_CATEGORIES:
return 0.001
raw_truth = str(task.get("category", "")).split(";")[0]
truth = _normalize_category(raw_truth)
if truth not in VALID_CATEGORIES:
return 0.001
sim = _get_similarity(predicted, truth)
return max(0.001, min(0.999, sim))
|