Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
bug_triage_env/graders/task1_grader.py
CHANGED
|
@@ -19,13 +19,13 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
|
|
| 19 |
Score in [0.0, 1.0].
|
| 20 |
"""
|
| 21 |
if not episode_log:
|
| 22 |
-
return 0.
|
| 23 |
|
| 24 |
last_action = episode_log[-1]
|
| 25 |
predicted = (last_action.get("bug_type") or "").strip().lower()
|
| 26 |
expected = (ground_truth.get("bug_type") or "").strip().lower()
|
| 27 |
|
| 28 |
if not predicted:
|
| 29 |
-
return 0.
|
| 30 |
|
| 31 |
-
return
|
|
|
|
| 19 |
Score in [0.0, 1.0].
|
| 20 |
"""
|
| 21 |
if not episode_log:
|
| 22 |
+
return 0.01
|
| 23 |
|
| 24 |
last_action = episode_log[-1]
|
| 25 |
predicted = (last_action.get("bug_type") or "").strip().lower()
|
| 26 |
expected = (ground_truth.get("bug_type") or "").strip().lower()
|
| 27 |
|
| 28 |
if not predicted:
|
| 29 |
+
return 0.01
|
| 30 |
|
| 31 |
+
return 0.99 if predicted == expected else 0.01
|
bug_triage_env/graders/task2_grader.py
CHANGED
|
@@ -27,17 +27,17 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
|
|
| 27 |
float score in [0.0, 1.0].
|
| 28 |
"""
|
| 29 |
if not episode_log:
|
| 30 |
-
return 0.
|
| 31 |
|
| 32 |
last_action = episode_log[-1]
|
| 33 |
predicted = (last_action.get("priority") or "").strip().lower()
|
| 34 |
expected = (ground_truth.get("priority") or "").strip().lower()
|
| 35 |
|
| 36 |
if not predicted or predicted not in PRIORITY_ORDER:
|
| 37 |
-
return 0.
|
| 38 |
if expected not in PRIORITY_ORDER:
|
| 39 |
-
return 0.
|
| 40 |
|
| 41 |
diff = abs(PRIORITY_ORDER[predicted] - PRIORITY_ORDER[expected])
|
| 42 |
score = 1.0 - diff * (1.0 / 3.0)
|
| 43 |
-
return max(0.
|
|
|
|
| 27 |
float score in [0.0, 1.0].
|
| 28 |
"""
|
| 29 |
if not episode_log:
|
| 30 |
+
return 0.01
|
| 31 |
|
| 32 |
last_action = episode_log[-1]
|
| 33 |
predicted = (last_action.get("priority") or "").strip().lower()
|
| 34 |
expected = (ground_truth.get("priority") or "").strip().lower()
|
| 35 |
|
| 36 |
if not predicted or predicted not in PRIORITY_ORDER:
|
| 37 |
+
return 0.01
|
| 38 |
if expected not in PRIORITY_ORDER:
|
| 39 |
+
return 0.01
|
| 40 |
|
| 41 |
diff = abs(PRIORITY_ORDER[predicted] - PRIORITY_ORDER[expected])
|
| 42 |
score = 1.0 - diff * (1.0 / 3.0)
|
| 43 |
+
return max(0.01, min(0.99, score))
|
bug_triage_env/graders/task3_grader.py
CHANGED
|
@@ -85,7 +85,7 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
|
|
| 85 |
float score in [0.0, 1.0].
|
| 86 |
"""
|
| 87 |
if not episode_log:
|
| 88 |
-
return 0.
|
| 89 |
|
| 90 |
action = episode_log[-1]
|
| 91 |
|
|
@@ -111,4 +111,4 @@ def grade(episode_log: List[Dict[str, Any]], ground_truth: Dict[str, Any]) -> fl
|
|
| 111 |
+ WEIGHTS["action"] * s_act
|
| 112 |
)
|
| 113 |
|
| 114 |
-
return max(0.
|
|
|
|
| 85 |
float score in [0.0, 1.0].
|
| 86 |
"""
|
| 87 |
if not episode_log:
|
| 88 |
+
return 0.01
|
| 89 |
|
| 90 |
action = episode_log[-1]
|
| 91 |
|
|
|
|
| 111 |
+ WEIGHTS["action"] * s_act
|
| 112 |
)
|
| 113 |
|
| 114 |
+
return max(0.01, min(0.99, round(score, 4)))
|