AE-Shree commited on
Commit
a3ecae0
·
1 Parent(s): 31a6ed9

Bhagavan mera madad karo 🙏

Browse files
Files changed (1) hide show
  1. models.py +13 -10
models.py CHANGED
@@ -78,29 +78,32 @@ def grader(trajectory: dict) -> float:
78
 
79
  def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
80
  """
81
- A deterministic grader returning 0.0-1.0 based on:
82
  - completion rate
83
- - deadline adherence
84
  - energy efficiency
 
 
85
  """
86
  if not tasks:
87
- return 0.0
88
-
89
  completion_rate = sum(t.progress for t in tasks) / len(tasks)
90
-
91
  # penalty for missed deadlines
92
  missed_deadlines = 0
93
  for t in tasks:
94
  if t.deadline and time_step > t.deadline and t.progress < 1.0:
95
  missed_deadlines += 1
96
-
97
  deadline_penalty = min(0.3, missed_deadlines * 0.1)
98
-
99
  # energy efficiency
100
  energy_score = max(0.0, (final_energy - 0.1) * 0.2)
101
-
102
  score = completion_rate * 0.8 - deadline_penalty + energy_score
103
- return max(0.0, min(1.0, score))
 
104
 
105
 
106
  # ==========================================
@@ -201,7 +204,7 @@ class CLMEnvironment:
201
  else:
202
  reward += 1.0
203
 
204
- reward = max(0.0, min(0.99, float(reward)))
205
 
206
  return self._get_observation(), reward, done, self.state.model_dump()
207
 
 
78
 
79
  def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
80
  """
81
+ A deterministic grader returning a score strictly between 0 and 1 based on:
82
  - completion rate
83
+ - deadline adherence
84
  - energy efficiency
85
+
86
+ Score is clamped to (0.01, 0.99) — never exactly 0.0 or 1.0.
87
  """
88
  if not tasks:
89
+ return 0.01
90
+
91
  completion_rate = sum(t.progress for t in tasks) / len(tasks)
92
+
93
  # penalty for missed deadlines
94
  missed_deadlines = 0
95
  for t in tasks:
96
  if t.deadline and time_step > t.deadline and t.progress < 1.0:
97
  missed_deadlines += 1
98
+
99
  deadline_penalty = min(0.3, missed_deadlines * 0.1)
100
+
101
  # energy efficiency
102
  energy_score = max(0.0, (final_energy - 0.1) * 0.2)
103
+
104
  score = completion_rate * 0.8 - deadline_penalty + energy_score
105
+ # Clamp strictly between 0 and 1 — validator requires score in (0, 1) exclusive
106
+ return round(max(0.01, min(0.99, score)), 4)
107
 
108
 
109
  # ==========================================
 
204
  else:
205
  reward += 1.0
206
 
207
+ reward = round(max(0.01, min(0.99, float(reward))), 4)
208
 
209
  return self._get_observation(), reward, done, self.state.model_dump()
210