BAIBHAV1234 commited on
Commit
8017cc7
·
verified ·
1 Parent(s): a9609ac

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. graders.py +33 -13
graders.py CHANGED
@@ -6,20 +6,40 @@ from tasks import TaskConfig
6
 
7
 
8
  SCORE_EPS = 1e-3
 
9
 
10
 
11
  def _clamp(value: float, low: float = SCORE_EPS, high: float = 1.0 - SCORE_EPS) -> float:
12
- return max(low, min(high, value))
 
 
 
 
 
 
 
 
13
 
14
 
15
  def _strict_score(value: float) -> float:
16
  return _clamp(value, SCORE_EPS, 1.0 - SCORE_EPS)
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
19
  def grade_episode(task: TaskConfig, metrics: dict[str, Any]) -> float:
20
- weights = task.score_weights
21
- score = sum(weights.get(metric_name, 0.0) * _clamp(metrics.get(metric_name, 0.0)) for metric_name in weights)
22
- return round(_strict_score(score), 4)
23
 
24
 
25
  def summarize_episode(total_reward: float, state_history: list[dict[str, Any]], terminal_outcome: str) -> dict[str, Any]:
@@ -51,16 +71,16 @@ def summarize_episode(total_reward: float, state_history: list[dict[str, Any]],
51
  timeliness = _clamp(1.0 - (first_meaningful_step / step_count))
52
  stability = _clamp(sum(item.get("stability_score", 0.0) for item in state_history) / step_count)
53
  safety = _clamp(1.0 - (safety_violations / step_count))
54
- outcome = 1.0 - SCORE_EPS if terminal_outcome == "survived" else SCORE_EPS
55
  return {
56
  "steps": step_count,
57
- "avg_reward": _clamp(total_reward / step_count),
58
- "detection": round(_clamp(detection), 4),
59
- "lab_workup": round(_clamp(lab_workup), 4),
60
- "treatment": round(_clamp(treatment), 4),
61
- "timeliness": round(_clamp(timeliness), 4),
62
- "stability": round(_clamp(stability), 4),
63
- "safety": round(_clamp(safety), 4),
64
- "safety_violation_rate": _clamp(safety_violations / step_count),
65
  "outcome": outcome,
66
  }
 
6
 
7
 
8
  SCORE_EPS = 1e-3
9
+ SCORE_MARGIN = 1e-6
10
 
11
 
12
  def _clamp(value: float, low: float = SCORE_EPS, high: float = 1.0 - SCORE_EPS) -> float:
13
+ try:
14
+ numeric_value = float(value)
15
+ except (TypeError, ValueError):
16
+ numeric_value = low
17
+ if numeric_value <= low:
18
+ return low + SCORE_MARGIN
19
+ if numeric_value >= high:
20
+ return high - SCORE_MARGIN
21
+ return numeric_value
22
 
23
 
24
  def _strict_score(value: float) -> float:
25
  return _clamp(value, SCORE_EPS, 1.0 - SCORE_EPS)
26
 
27
 
28
+ def _normalize_weights(weights: dict[str, float]) -> dict[str, float]:
29
+ total = sum(float(weight) for weight in weights.values())
30
+ if total <= 0:
31
+ return weights
32
+ return {metric_name: float(weight) / total for metric_name, weight in weights.items()}
33
+
34
+
35
+ def _format_metric(value: float) -> float:
36
+ return float(f"{_clamp(value):.6f}")
37
+
38
+
39
  def grade_episode(task: TaskConfig, metrics: dict[str, Any]) -> float:
40
+ weights = _normalize_weights(task.score_weights)
41
+ score = sum(weight * _clamp(metrics.get(metric_name, 0.0)) for metric_name, weight in weights.items())
42
+ return float(f"{_strict_score(score):.6f}")
43
 
44
 
45
  def summarize_episode(total_reward: float, state_history: list[dict[str, Any]], terminal_outcome: str) -> dict[str, Any]:
 
71
  timeliness = _clamp(1.0 - (first_meaningful_step / step_count))
72
  stability = _clamp(sum(item.get("stability_score", 0.0) for item in state_history) / step_count)
73
  safety = _clamp(1.0 - (safety_violations / step_count))
74
+ outcome = _format_metric(1.0 if terminal_outcome == "survived" else 0.0)
75
  return {
76
  "steps": step_count,
77
+ "avg_reward": _format_metric(total_reward / step_count),
78
+ "detection": _format_metric(detection),
79
+ "lab_workup": _format_metric(lab_workup),
80
+ "treatment": _format_metric(treatment),
81
+ "timeliness": _format_metric(timeliness),
82
+ "stability": _format_metric(stability),
83
+ "safety": _format_metric(safety),
84
+ "safety_violation_rate": _format_metric(safety_violations / step_count),
85
  "outcome": outcome,
86
  }