Veer15 commited on
Commit
81547b4
·
verified ·
1 Parent(s): 0da1902

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. __init__.py +1 -13
  2. server/graders.py +15 -14
  3. tests/test_graders.py +11 -11
__init__.py CHANGED
@@ -1,13 +1 @@
1
- """Distributed systems debug OpenEnv package."""
2
-
3
- from .client import DistributedSystemsDebugEnv
4
- from .models import Action, Observation, Reward, StepResult, SystemMetrics
5
-
6
- __all__ = [
7
- "DistributedSystemsDebugEnv",
8
- "Action",
9
- "Observation",
10
- "Reward",
11
- "StepResult",
12
- "SystemMetrics",
13
- ]
 
1
+ """Package marker for OpenEnv structure validation."""
 
 
 
 
 
 
 
 
 
 
 
 
server/graders.py CHANGED
@@ -5,13 +5,14 @@ from .models import SystemMetrics
5
 
6
 
7
  def _clamp(score: float) -> float:
8
- return max(0.0, min(1.0, score))
 
9
 
10
 
11
  def grade_cascading_timeout(metrics: SystemMetrics, context: dict[str, Any]) -> float:
12
  timeout_resolved = bool(context.get("cascading_timeout_resolved", False))
13
  if timeout_resolved and metrics.gateway_success_rate >= 0.99:
14
- return 1.0
15
  if not timeout_resolved:
16
  # Prevent instant pass while the injected timeout fault is still active.
17
  return _clamp(metrics.gateway_success_rate * 0.25)
@@ -25,9 +26,9 @@ def grade_byzantine_queue_fault(
25
  restart_delta = max(0, metrics.worker_restart_count - baseline_restart)
26
 
27
  if metrics.queue_depth == 0 and restart_delta <= 1:
28
- return 1.0
29
  if metrics.queue_depth == 0:
30
- return 0.6
31
 
32
  queue_component = max(0.0, 1.0 - metrics.queue_depth / 50.0)
33
  stability_penalty = min(0.4, restart_delta * 0.05)
@@ -42,12 +43,12 @@ def grade_distributed_lock_starvation(
42
  stall_delta = max(0, metrics.consumer_stall_count - baseline_stall)
43
 
44
  if not lock_exists and metrics.queue_depth <= 3:
45
- return 1.0
46
  if not lock_exists:
47
- return 0.6
48
 
49
  # If lock still exists, reward slight progress only when stalls don't explode.
50
- return 0.2 if stall_delta <= 1 else 0.0
51
 
52
 
53
  def grade_backpressure_cascade(metrics: SystemMetrics, _: dict[str, Any]) -> float:
@@ -57,10 +58,10 @@ def grade_backpressure_cascade(metrics: SystemMetrics, _: dict[str, Any]) -> flo
57
  def grade_route_partition(metrics: SystemMetrics, context: dict[str, Any]) -> float:
58
  route_blocked = bool(context.get("route_blocked", True))
59
  if not route_blocked and metrics.gateway_success_rate >= 0.95:
60
- return 1.0
61
  if not route_blocked:
62
  return _clamp(metrics.gateway_success_rate)
63
- return 0.0
64
 
65
 
66
  def grade_registry_corruption(metrics: SystemMetrics, context: dict[str, Any]) -> float:
@@ -68,7 +69,7 @@ def grade_registry_corruption(metrics: SystemMetrics, context: dict[str, Any]) -
68
  context.get("registry_auth_matches_default", False)
69
  )
70
  if registry_auth_matches_default and metrics.gateway_success_rate >= 0.99:
71
- return 1.0
72
  if registry_auth_matches_default:
73
  return _clamp(0.5 + metrics.gateway_success_rate * 0.5)
74
  return _clamp(metrics.gateway_success_rate * 0.3)
@@ -79,12 +80,12 @@ def grade_job_generator_runaway(
79
  ) -> float:
80
  rate_resolved = bool(context.get("job_generator_rate_resolved", False))
81
  if rate_resolved and metrics.queue_depth <= 5:
82
- return 1.0
83
  if rate_resolved and metrics.queue_depth <= 30:
84
- return 0.7
85
  if rate_resolved:
86
  return _clamp(0.7 - (metrics.queue_depth - 30) / 100.0)
87
- return 0.2 if metrics.queue_depth <= 30 else 0.0
88
 
89
 
90
  def grade_task(
@@ -106,4 +107,4 @@ def grade_task(
106
  return grade_registry_corruption(metrics, context)
107
  if task is TaskName.JOB_GENERATOR_RUNAWAY:
108
  return grade_job_generator_runaway(metrics, context)
109
- return 0.0
 
5
 
6
 
7
  def _clamp(score: float) -> float:
8
+ """Clamp score to the open interval (0, 1), as required by the evaluation pipeline."""
9
+ return max(0.01, min(0.99, score))
10
 
11
 
12
  def grade_cascading_timeout(metrics: SystemMetrics, context: dict[str, Any]) -> float:
13
  timeout_resolved = bool(context.get("cascading_timeout_resolved", False))
14
  if timeout_resolved and metrics.gateway_success_rate >= 0.99:
15
+ return _clamp(1.0)
16
  if not timeout_resolved:
17
  # Prevent instant pass while the injected timeout fault is still active.
18
  return _clamp(metrics.gateway_success_rate * 0.25)
 
26
  restart_delta = max(0, metrics.worker_restart_count - baseline_restart)
27
 
28
  if metrics.queue_depth == 0 and restart_delta <= 1:
29
+ return _clamp(1.0)
30
  if metrics.queue_depth == 0:
31
+ return _clamp(0.6)
32
 
33
  queue_component = max(0.0, 1.0 - metrics.queue_depth / 50.0)
34
  stability_penalty = min(0.4, restart_delta * 0.05)
 
43
  stall_delta = max(0, metrics.consumer_stall_count - baseline_stall)
44
 
45
  if not lock_exists and metrics.queue_depth <= 3:
46
+ return _clamp(1.0)
47
  if not lock_exists:
48
+ return _clamp(0.6)
49
 
50
  # If lock still exists, reward slight progress only when stalls don't explode.
51
+ return _clamp(0.2) if stall_delta <= 1 else _clamp(0.0)
52
 
53
 
54
  def grade_backpressure_cascade(metrics: SystemMetrics, _: dict[str, Any]) -> float:
 
58
  def grade_route_partition(metrics: SystemMetrics, context: dict[str, Any]) -> float:
59
  route_blocked = bool(context.get("route_blocked", True))
60
  if not route_blocked and metrics.gateway_success_rate >= 0.95:
61
+ return _clamp(1.0)
62
  if not route_blocked:
63
  return _clamp(metrics.gateway_success_rate)
64
+ return _clamp(0.0)
65
 
66
 
67
  def grade_registry_corruption(metrics: SystemMetrics, context: dict[str, Any]) -> float:
 
69
  context.get("registry_auth_matches_default", False)
70
  )
71
  if registry_auth_matches_default and metrics.gateway_success_rate >= 0.99:
72
+ return _clamp(1.0)
73
  if registry_auth_matches_default:
74
  return _clamp(0.5 + metrics.gateway_success_rate * 0.5)
75
  return _clamp(metrics.gateway_success_rate * 0.3)
 
80
  ) -> float:
81
  rate_resolved = bool(context.get("job_generator_rate_resolved", False))
82
  if rate_resolved and metrics.queue_depth <= 5:
83
+ return _clamp(1.0)
84
  if rate_resolved and metrics.queue_depth <= 30:
85
+ return _clamp(0.7)
86
  if rate_resolved:
87
  return _clamp(0.7 - (metrics.queue_depth - 30) / 100.0)
88
+ return _clamp(0.2) if metrics.queue_depth <= 30 else _clamp(0.0)
89
 
90
 
91
  def grade_task(
 
107
  return grade_registry_corruption(metrics, context)
108
  if task is TaskName.JOB_GENERATOR_RUNAWAY:
109
  return grade_job_generator_runaway(metrics, context)
110
+ return _clamp(0.0)
tests/test_graders.py CHANGED
@@ -32,7 +32,7 @@ def test_grade_cascading_timeout_boundaries() -> None:
32
  grade_cascading_timeout(
33
  _metrics(success_rate=1.0), {"cascading_timeout_resolved": True}
34
  )
35
- == 1.0
36
  )
37
  assert (
38
  grade_cascading_timeout(
@@ -50,9 +50,9 @@ def test_grade_cascading_timeout_boundaries() -> None:
50
 
51
  def test_grade_byzantine_queue_fault_cases() -> None:
52
  ctx = {"baseline_worker_restart_count": 3}
53
- assert grade_byzantine_queue_fault(_metrics(depth=0, restarts=3), ctx) == 1.0
54
  assert grade_byzantine_queue_fault(_metrics(depth=0, restarts=8), ctx) == 0.6
55
- assert grade_byzantine_queue_fault(_metrics(depth=40, restarts=10), ctx) == 0.0
56
 
57
 
58
  def test_grade_distributed_lock_starvation_cases() -> None:
@@ -61,7 +61,7 @@ def test_grade_distributed_lock_starvation_cases() -> None:
61
 
62
  assert (
63
  grade_distributed_lock_starvation(_metrics(depth=2, stalls=0), ctx_unlocked)
64
- == 1.0
65
  )
66
  assert (
67
  grade_distributed_lock_starvation(_metrics(depth=10, stalls=0), ctx_unlocked)
@@ -69,24 +69,24 @@ def test_grade_distributed_lock_starvation_cases() -> None:
69
  )
70
  assert (
71
  grade_distributed_lock_starvation(_metrics(depth=10, stalls=3), ctx_locked)
72
- == 0.0
73
  )
74
 
75
 
76
  def test_grade_backpressure_cascade_continuous() -> None:
77
- assert grade_backpressure_cascade(_metrics(depth=0), {}) == 1.0
78
  assert grade_backpressure_cascade(_metrics(depth=100), {}) == 0.5
79
- assert grade_backpressure_cascade(_metrics(depth=200), {}) == 0.0
80
 
81
 
82
  def test_grade_route_partition_threshold() -> None:
83
  assert (
84
  grade_route_partition(_metrics(success_rate=0.96), {"route_blocked": False})
85
- == 1.0
86
  )
87
  assert (
88
  grade_route_partition(_metrics(success_rate=0.8), {"route_blocked": True})
89
- == 0.0
90
  )
91
 
92
 
@@ -95,7 +95,7 @@ def test_grade_registry_corruption_thresholds() -> None:
95
  grade_registry_corruption(
96
  _metrics(success_rate=0.99), {"registry_auth_matches_default": True}
97
  )
98
- == 1.0
99
  )
100
  assert (
101
  grade_registry_corruption(
@@ -116,7 +116,7 @@ def test_grade_job_generator_runaway_thresholds() -> None:
116
  grade_job_generator_runaway(
117
  _metrics(depth=4), {"job_generator_rate_resolved": True}
118
  )
119
- == 1.0
120
  )
121
  assert (
122
  grade_job_generator_runaway(
 
32
  grade_cascading_timeout(
33
  _metrics(success_rate=1.0), {"cascading_timeout_resolved": True}
34
  )
35
+ == 0.99
36
  )
37
  assert (
38
  grade_cascading_timeout(
 
50
 
51
  def test_grade_byzantine_queue_fault_cases() -> None:
52
  ctx = {"baseline_worker_restart_count": 3}
53
+ assert grade_byzantine_queue_fault(_metrics(depth=0, restarts=3), ctx) == 0.99
54
  assert grade_byzantine_queue_fault(_metrics(depth=0, restarts=8), ctx) == 0.6
55
+ assert grade_byzantine_queue_fault(_metrics(depth=40, restarts=10), ctx) == 0.01
56
 
57
 
58
  def test_grade_distributed_lock_starvation_cases() -> None:
 
61
 
62
  assert (
63
  grade_distributed_lock_starvation(_metrics(depth=2, stalls=0), ctx_unlocked)
64
+ == 0.99
65
  )
66
  assert (
67
  grade_distributed_lock_starvation(_metrics(depth=10, stalls=0), ctx_unlocked)
 
69
  )
70
  assert (
71
  grade_distributed_lock_starvation(_metrics(depth=10, stalls=3), ctx_locked)
72
+ == 0.01
73
  )
74
 
75
 
76
  def test_grade_backpressure_cascade_continuous() -> None:
77
+ assert grade_backpressure_cascade(_metrics(depth=0), {}) == 0.99
78
  assert grade_backpressure_cascade(_metrics(depth=100), {}) == 0.5
79
+ assert grade_backpressure_cascade(_metrics(depth=200), {}) == 0.01
80
 
81
 
82
  def test_grade_route_partition_threshold() -> None:
83
  assert (
84
  grade_route_partition(_metrics(success_rate=0.96), {"route_blocked": False})
85
+ == 0.99
86
  )
87
  assert (
88
  grade_route_partition(_metrics(success_rate=0.8), {"route_blocked": True})
89
+ == 0.01
90
  )
91
 
92
 
 
95
  grade_registry_corruption(
96
  _metrics(success_rate=0.99), {"registry_auth_matches_default": True}
97
  )
98
+ == 0.99
99
  )
100
  assert (
101
  grade_registry_corruption(
 
116
  grade_job_generator_runaway(
117
  _metrics(depth=4), {"job_generator_rate_resolved": True}
118
  )
119
+ == 0.99
120
  )
121
  assert (
122
  grade_job_generator_runaway(