AE-Shree commited on
Commit ·
a3ecae0
1
Parent(s): 31a6ed9
Bhagavan mera madad karo 🙏
Browse files
models.py
CHANGED
|
@@ -78,29 +78,32 @@ def grader(trajectory: dict) -> float:
|
|
| 78 |
|
| 79 |
def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
|
| 80 |
"""
|
| 81 |
-
A deterministic grader returning 0
|
| 82 |
- completion rate
|
| 83 |
-
- deadline adherence
|
| 84 |
- energy efficiency
|
|
|
|
|
|
|
| 85 |
"""
|
| 86 |
if not tasks:
|
| 87 |
-
return 0.
|
| 88 |
-
|
| 89 |
completion_rate = sum(t.progress for t in tasks) / len(tasks)
|
| 90 |
-
|
| 91 |
# penalty for missed deadlines
|
| 92 |
missed_deadlines = 0
|
| 93 |
for t in tasks:
|
| 94 |
if t.deadline and time_step > t.deadline and t.progress < 1.0:
|
| 95 |
missed_deadlines += 1
|
| 96 |
-
|
| 97 |
deadline_penalty = min(0.3, missed_deadlines * 0.1)
|
| 98 |
-
|
| 99 |
# energy efficiency
|
| 100 |
energy_score = max(0.0, (final_energy - 0.1) * 0.2)
|
| 101 |
-
|
| 102 |
score = completion_rate * 0.8 - deadline_penalty + energy_score
|
| 103 |
-
|
|
|
|
| 104 |
|
| 105 |
|
| 106 |
# ==========================================
|
|
@@ -201,7 +204,7 @@ class CLMEnvironment:
|
|
| 201 |
else:
|
| 202 |
reward += 1.0
|
| 203 |
|
| 204 |
-
reward = max(0.
|
| 205 |
|
| 206 |
return self._get_observation(), reward, done, self.state.model_dump()
|
| 207 |
|
|
|
|
| 78 |
|
| 79 |
def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
|
| 80 |
"""
|
| 81 |
+
A deterministic grader returning a score strictly between 0 and 1 based on:
|
| 82 |
- completion rate
|
| 83 |
+
- deadline adherence
|
| 84 |
- energy efficiency
|
| 85 |
+
|
| 86 |
+
Score is clamped to (0.01, 0.99) — never exactly 0.0 or 1.0.
|
| 87 |
"""
|
| 88 |
if not tasks:
|
| 89 |
+
return 0.01
|
| 90 |
+
|
| 91 |
completion_rate = sum(t.progress for t in tasks) / len(tasks)
|
| 92 |
+
|
| 93 |
# penalty for missed deadlines
|
| 94 |
missed_deadlines = 0
|
| 95 |
for t in tasks:
|
| 96 |
if t.deadline and time_step > t.deadline and t.progress < 1.0:
|
| 97 |
missed_deadlines += 1
|
| 98 |
+
|
| 99 |
deadline_penalty = min(0.3, missed_deadlines * 0.1)
|
| 100 |
+
|
| 101 |
# energy efficiency
|
| 102 |
energy_score = max(0.0, (final_energy - 0.1) * 0.2)
|
| 103 |
+
|
| 104 |
score = completion_rate * 0.8 - deadline_penalty + energy_score
|
| 105 |
+
# Clamp strictly between 0 and 1 — validator requires score in (0, 1) exclusive
|
| 106 |
+
return round(max(0.01, min(0.99, score)), 4)
|
| 107 |
|
| 108 |
|
| 109 |
# ==========================================
|
|
|
|
| 204 |
else:
|
| 205 |
reward += 1.0
|
| 206 |
|
| 207 |
+
reward = round(max(0.01, min(0.99, float(reward))), 4)
|
| 208 |
|
| 209 |
return self._get_observation(), reward, done, self.state.model_dump()
|
| 210 |
|