Update openenv.yaml
Browse files- openenv.yaml +4 -4
openenv.yaml
CHANGED
|
@@ -74,7 +74,7 @@ tasks:
|
|
| 74 |
description: "Stable, balanced traffic. Minimal emergencies. Ideal for learning."
|
| 75 |
config_key: easy
|
| 76 |
max_steps: 50
|
| 77 |
-
score_range: [0.
|
| 78 |
params:
|
| 79 |
arrival_rate: [0, 1]
|
| 80 |
discharge_rate: [4, 5]
|
|
@@ -86,7 +86,7 @@ tasks:
|
|
| 86 |
description: "Random traffic bursts, moderate congestion, occasional emergencies."
|
| 87 |
config_key: medium
|
| 88 |
max_steps: 100
|
| 89 |
-
score_range: [0.
|
| 90 |
params:
|
| 91 |
arrival_rate: [1, 3]
|
| 92 |
discharge_rate: [3, 5]
|
|
@@ -98,7 +98,7 @@ tasks:
|
|
| 98 |
description: "High-intensity traffic, frequent emergencies, strict fairness constraints."
|
| 99 |
config_key: hard
|
| 100 |
max_steps: 200
|
| 101 |
-
score_range: [0.
|
| 102 |
params:
|
| 103 |
arrival_rate: [2, 5]
|
| 104 |
discharge_rate: [2, 4]
|
|
@@ -112,7 +112,7 @@ tasks:
|
|
| 112 |
# ─────────────────────────────────────────────────────────────────────
|
| 113 |
reward:
|
| 114 |
range: [-0.999, 0.999]
|
| 115 |
-
|
| 116 |
components:
|
| 117 |
efficiency:
|
| 118 |
sign: "+"
|
|
|
|
| 74 |
description: "Stable, balanced traffic. Minimal emergencies. Ideal for learning."
|
| 75 |
config_key: easy
|
| 76 |
max_steps: 50
|
| 77 |
+
score_range: [0.001, 0.999]
|
| 78 |
params:
|
| 79 |
arrival_rate: [0, 1]
|
| 80 |
discharge_rate: [4, 5]
|
|
|
|
| 86 |
description: "Random traffic bursts, moderate congestion, occasional emergencies."
|
| 87 |
config_key: medium
|
| 88 |
max_steps: 100
|
| 89 |
+
score_range: [0.001, 0.999]
|
| 90 |
params:
|
| 91 |
arrival_rate: [1, 3]
|
| 92 |
discharge_rate: [3, 5]
|
|
|
|
| 98 |
description: "High-intensity traffic, frequent emergencies, strict fairness constraints."
|
| 99 |
config_key: hard
|
| 100 |
max_steps: 200
|
| 101 |
+
score_range: [0.001, 0.999]
|
| 102 |
params:
|
| 103 |
arrival_rate: [2, 5]
|
| 104 |
discharge_rate: [2, 4]
|
|
|
|
| 112 |
# ─────────────────────────────────────────────────────────────────────
|
| 113 |
reward:
|
| 114 |
range: [-0.999, 0.999]
|
| 115 |
+
score_normalisation: "(reward + 1) / 2, clamped to [0.001, 0.999]"
|
| 116 |
components:
|
| 117 |
efficiency:
|
| 118 |
sign: "+"
|