arrow072 commited on
Commit
36c3c58
·
verified ·
1 Parent(s): 4bbf0fc

Update openenv.yaml

Browse files
Files changed (1) hide show
  1. openenv.yaml +4 -4
openenv.yaml CHANGED
@@ -74,7 +74,7 @@ tasks:
74
  description: "Stable, balanced traffic. Minimal emergencies. Ideal for learning."
75
  config_key: easy
76
  max_steps: 50
77
- score_range: [0.0, 1.0] # open interval (0,1) enforced by grader
78
  params:
79
  arrival_rate: [0, 1]
80
  discharge_rate: [4, 5]
@@ -86,7 +86,7 @@ tasks:
86
  description: "Random traffic bursts, moderate congestion, occasional emergencies."
87
  config_key: medium
88
  max_steps: 100
89
- score_range: [0.0, 1.0]
90
  params:
91
  arrival_rate: [1, 3]
92
  discharge_rate: [3, 5]
@@ -98,7 +98,7 @@ tasks:
98
  description: "High-intensity traffic, frequent emergencies, strict fairness constraints."
99
  config_key: hard
100
  max_steps: 200
101
- score_range: [0.0, 1.0]
102
  params:
103
  arrival_rate: [2, 5]
104
  discharge_rate: [2, 4]
@@ -112,7 +112,7 @@ tasks:
112
  # ─────────────────────────────────────────────────────────────────────
113
  reward:
114
  range: [-0.999, 0.999]
115
- score_normalisation: "(reward + 1) / 2 → (0.0005, 0.9995)"
116
  components:
117
  efficiency:
118
  sign: "+"
 
74
  description: "Stable, balanced traffic. Minimal emergencies. Ideal for learning."
75
  config_key: easy
76
  max_steps: 50
77
+ score_range: [0.001, 0.999]
78
  params:
79
  arrival_rate: [0, 1]
80
  discharge_rate: [4, 5]
 
86
  description: "Random traffic bursts, moderate congestion, occasional emergencies."
87
  config_key: medium
88
  max_steps: 100
89
+ score_range: [0.001, 0.999]
90
  params:
91
  arrival_rate: [1, 3]
92
  discharge_rate: [3, 5]
 
98
  description: "High-intensity traffic, frequent emergencies, strict fairness constraints."
99
  config_key: hard
100
  max_steps: 200
101
+ score_range: [0.001, 0.999]
102
  params:
103
  arrival_rate: [2, 5]
104
  discharge_rate: [2, 4]
 
112
  # ─────────────────────────────────────────────────────────────────────
113
  reward:
114
  range: [-0.999, 0.999]
115
+ score_normalisation: "(reward + 1) / 2, clamped to [0.001, 0.999]"
116
  components:
117
  efficiency:
118
  sign: "+"