File size: 529 Bytes
761f203
 
 
 
 
 
 
 
dc990fa
761f203
 
 
dc990fa
761f203
 
dc990fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from __future__ import annotations

from env.models import FlakySleuthAction


def grade(action: FlakySleuthAction, task: dict) -> float:
    """Binary classification: flaky or stable. Exact match only."""
    if action.action_type != "classify_flakiness":
        return 0.001

    predicted = action.argument.strip().lower()
    if predicted not in ("flaky", "stable"):
        return 0.001

    ground_truth = str(task.get("label", "flaky")).strip().lower() or "flaky"
    return 0.999 if predicted == ground_truth else 0.001