AE-Shree commited on
Commit
41595ac
·
1 Parent(s): 7896686

Bhagavan mera madad karo 🙏

Browse files
Files changed (5) hide show
  1. backend/main.py +144 -171
  2. models.py +13 -9
  3. openenv.yaml +28 -10
  4. tasks/__init__.py +0 -1
  5. tasks/graders.py +0 -158
backend/main.py CHANGED
@@ -1,198 +1,171 @@
1
  import os
2
  import sys
3
- import math
4
  from typing import Any, Dict, List, Optional
5
 
6
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
 
8
- from fastapi import FastAPI, Body
9
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
 
 
10
 
11
  from models import (
12
  Action as ModelAction,
 
13
  generate_tasks,
14
  deterministic_grader,
15
  CLMEnvironment,
16
  )
17
 
18
- _SCORE_MIN = 0.01
19
- _SCORE_MAX = 0.99
20
-
21
- def _safe_score(raw) -> float:
22
- if raw is None or (isinstance(raw, float) and math.isnan(raw)):
23
- return _SCORE_MIN
24
- r = float(raw)
25
- return round(max(_SCORE_MIN, min(_SCORE_MAX, r)), 4)
26
-
27
- _session: Dict[str, Any] = {
28
- "env": None, "task_id": "easy",
29
- "done": False, "final_score": _SCORE_MIN, "step_count": 0,
30
- }
31
-
32
- def _run_grader_for_task(task_id: str) -> float:
33
- if task_id not in ("easy", "medium", "hard"):
34
- task_id = "easy"
35
- tasks = generate_tasks(task_id)
36
- env = CLMEnvironment(tasks=tasks, max_steps=50)
37
- env.reset()
38
- for _ in range(50):
39
- state = env.state
40
- incomplete = [t for t in state.tasks if t.progress < 1.0]
41
- if not incomplete:
42
- break
43
- if state.energy < 0.3 or state.stress > 0.7:
44
- action = ModelAction(type="break")
45
- else:
46
- action = ModelAction(type="work", task_id=incomplete[0].id)
47
- _, _, done, _ = env.step(action)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  if done:
49
- break
50
- score = deterministic_grader(env.state.tasks, env.state.time_step, env.state.energy)
51
- return _safe_score(score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  def build_app() -> FastAPI:
 
 
 
 
 
 
 
54
  _app = FastAPI(
55
  title="Cognitive Load Manager (CLM) Environment API",
56
  version="1.0.0",
57
- description="OpenEnv-compliant environment for the Meta PyTorch Hackathon.",
 
 
 
58
  )
59
- _app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
60
- allow_methods=["*"], allow_headers=["*"])
61
-
62
- @_app.get("/")
63
- @_app.get("/health")
64
- @_app.get("/healthz")
65
- async def health():
66
- return {"status": "healthy", "name": "cognitive-load-manager", "version": "1.0.0"}
67
-
68
- @_app.get("/metadata")
69
- async def metadata():
70
- return {
71
- "name": "cognitive-load-manager",
72
- "description": "Cognitive Load Manager simulates human cognitive load while managing tasks with deadlines.",
73
- "version": "1.0.0", "author": "Team Innovators",
74
- }
75
-
76
- @_app.get("/schema")
77
- async def schema():
78
- return {
79
- "action": {"type": "object", "properties": {
80
- "type": {"type": "string", "enum": ["work", "break", "switch", "delay"]},
81
- "task_id": {"type": "string", "nullable": True},
82
- }},
83
- "observation": {"type": "object", "properties": {
84
- "tasks": {"type": "array"},
85
- "visible_state": {"type": "object"},
86
- "time_step": {"type": "integer"},
87
- }},
88
- "state": {"type": "object", "properties": {
89
- "energy": {"type": "number"}, "stress": {"type": "number"},
90
- "fatigue": {"type": "number"}, "time_step": {"type": "integer"},
91
- }},
92
- }
93
-
94
- @_app.post("/reset")
95
- async def reset(body: dict = Body(default={})):
96
- task_id = body.get("task_id", "easy")
97
- if task_id not in ("easy", "medium", "hard"):
98
- task_id = "easy"
99
- tasks = generate_tasks(task_id)
100
- env = CLMEnvironment(tasks=tasks, max_steps=50)
101
- obs = env.reset()
102
- _session.update({"env": env, "task_id": task_id, "done": False,
103
- "final_score": _SCORE_MIN, "step_count": 0})
104
- return {
105
- "observation": {"tasks": [t.model_dump() for t in obs.tasks],
106
- "visible_state": obs.visible_state.model_dump(),
107
- "time_step": obs.time_step},
108
- "reward": None, "done": False, "info": {},
109
- }
110
-
111
- @_app.post("/step")
112
- async def step(body: dict = Body(default={})):
113
- env = _session.get("env")
114
- if env is None:
115
- tasks = generate_tasks("easy")
116
- env = CLMEnvironment(tasks=tasks, max_steps=50)
117
- env.reset()
118
- _session.update({"env": env, "task_id": "easy"})
119
-
120
- raw = body.get("action") or body
121
- if isinstance(raw, dict):
122
- action_type = raw.get("type", "delay")
123
- task_id_action = raw.get("task_id")
124
- else:
125
- action_type = "delay"
126
- task_id_action = None
127
- if action_type not in ("work", "break", "switch", "delay"):
128
- action_type = "delay"
129
-
130
- action = ModelAction(type=action_type, task_id=task_id_action)
131
- obs, raw_reward, done, info = env.step(action)
132
- _session["step_count"] = _session.get("step_count", 0) + 1
133
 
134
- if done:
135
- final_score = _safe_score(
136
- deterministic_grader(env.state.tasks, env.state.time_step, env.state.energy)
137
- )
138
- _session.update({"done": True, "final_score": final_score})
139
- info["final_score"] = final_score
140
- reward = final_score
141
- else:
142
- reward = _safe_score(raw_reward)
143
-
144
- return {
145
- "observation": {"tasks": [t.model_dump() for t in obs.tasks],
146
- "visible_state": obs.visible_state.model_dump(),
147
- "time_step": obs.time_step},
148
- "reward": reward, "score": reward, "done": done, "info": info,
149
- }
150
-
151
- @_app.get("/state")
152
- async def state():
153
- env = _session.get("env")
154
- if env is None:
155
- return {"energy": 1.0, "stress": 0.0, "fatigue": 0.0, "time_step": 0, "tasks": []}
156
- return env.state_dict()
157
-
158
- @_app.get("/grader")
159
- @_app.post("/grader")
160
- async def grader_endpoint(body: dict = Body(default={})):
161
- env = _session.get("env")
162
- if _session.get("done"):
163
- score = _session.get("final_score", _SCORE_MIN)
164
- elif env is not None:
165
- score = _safe_score(
166
- deterministic_grader(env.state.tasks, env.state.time_step, env.state.energy)
167
- )
168
- else:
169
- score = _run_grader_for_task(_session.get("task_id", "easy"))
170
- return {"task_id": _session.get("task_id", "easy"), "reward": score,
171
- "score": score, "done": _session.get("done", False),
172
- "step_count": _session.get("step_count", 0)}
173
-
174
- @_app.get("/grade/easy")
175
- @_app.get("/grade/t1_easy")
176
- async def grade_easy():
177
- score = _run_grader_for_task("easy")
178
- return {"task_id": "easy", "score": score, "reward": score}
179
-
180
- @_app.get("/grade/medium")
181
- @_app.get("/grade/t2_medium")
182
- async def grade_medium():
183
- score = _run_grader_for_task("medium")
184
- return {"task_id": "medium", "score": score, "reward": score}
185
-
186
- @_app.get("/grade/hard")
187
- @_app.get("/grade/t3_hard")
188
- async def grade_hard():
189
- score = _run_grader_for_task("hard")
190
- return {"task_id": "hard", "score": score, "reward": score}
191
-
192
- @_app.post("/mcp")
193
- async def mcp(body: dict = Body(default={})):
194
- return {"jsonrpc": "2.0", "id": body.get("id", 1), "result": {"status": "ok"}}
195
 
 
196
  return _app
197
 
 
198
  app = build_app()
 
1
  import os
2
  import sys
 
3
  from typing import Any, Dict, List, Optional
4
 
5
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
6
 
7
+ from fastapi import FastAPI
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from pydantic import Field
10
+
11
+ from openenv.core.env_server.interfaces import Environment
12
+ from openenv.core.env_server.types import (
13
+ Action as OEAction,
14
+ Observation as OEObservation,
15
+ State as OEState,
16
+ EnvironmentMetadata,
17
+ )
18
+ from openenv.core.env_server.http_server import HTTPEnvServer
19
 
20
  from models import (
21
  Action as ModelAction,
22
+ Observation as ModelObservation,
23
  generate_tasks,
24
  deterministic_grader,
25
  CLMEnvironment,
26
  )
27
 
28
+
29
+ # ── OpenEnv-compatible Action / Observation / State models ──────────────────
30
+
31
+ class CLMAction(OEAction):
32
+ """Action for the Cognitive Load Manager environment."""
33
+ type: str = Field(description="Action type: work, break, switch, or delay")
34
+ task_id: Optional[str] = Field(default=None, description="Task ID to act on")
35
+
36
+ model_config = {"extra": "allow"}
37
+
38
+
39
+ class CLMObservation(OEObservation):
40
+ """Observation from the Cognitive Load Manager environment."""
41
+ tasks: List[Dict[str, Any]] = Field(default_factory=list)
42
+ visible_state: Dict[str, Any] = Field(default_factory=dict)
43
+ time_step: int = Field(default=0)
44
+
45
+ model_config = {"extra": "allow"}
46
+
47
+
48
+ class CLMState(OEState):
49
+ """State for the Cognitive Load Manager environment."""
50
+ energy: float = Field(default=1.0)
51
+ stress: float = Field(default=0.0)
52
+ fatigue: float = Field(default=0.0)
53
+ current_task_id: Optional[str] = Field(default=None)
54
+ tasks: List[Dict[str, Any]] = Field(default_factory=list)
55
+
56
+ model_config = {"extra": "allow"}
57
+
58
+
59
+ # ── OpenEnv Environment wrapper ─────────────────────────────────────────────
60
+
61
+ class CLMEnvWrapper(Environment):
62
+ """
63
+ Cognitive Load Manager wrapped as an OpenEnv-compliant environment.
64
+
65
+ Three difficulty levels via the task_id reset parameter:
66
+ - easy: 2 tasks, no deadlines
67
+ - medium: 5 tasks with deadlines
68
+ - hard: 8 tasks with tight deadlines
69
+ """
70
+
71
+ SUPPORTS_CONCURRENT_SESSIONS = True
72
+
73
+ def __init__(self):
74
+ super().__init__()
75
+ level = os.getenv("CLM_LEVEL", "easy")
76
+ tasks = generate_tasks(level)
77
+ self._env = CLMEnvironment(tasks=tasks, max_steps=50)
78
+ self._final_score: float = 0.0
79
+
80
+ def _to_oe_obs(self, obs: ModelObservation, done: bool = False, reward: Optional[float] = None, info: Optional[dict] = None) -> CLMObservation:
81
+ return CLMObservation(
82
+ tasks=[t.model_dump() for t in obs.tasks],
83
+ visible_state=obs.visible_state.model_dump(),
84
+ time_step=obs.time_step,
85
+ done=done,
86
+ reward=reward,
87
+ metadata=info or {},
88
+ )
89
+
90
+ def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, task_id: str = "easy", **kwargs) -> CLMObservation:
91
+ if task_id not in ("easy", "medium", "hard"):
92
+ task_id = "easy"
93
+ tasks = generate_tasks(task_id)
94
+ self._env = CLMEnvironment(tasks=tasks, max_steps=50)
95
+ self._final_score = 0.0
96
+ obs = self._env.reset()
97
+ return self._to_oe_obs(obs)
98
+
99
+ def step(self, action: CLMAction, timeout_s: Optional[float] = None, **kwargs) -> CLMObservation:
100
+ model_action = ModelAction(type=action.type, task_id=action.task_id)
101
+ obs, reward, done, info = self._env.step(model_action)
102
  if done:
103
+ self._final_score = deterministic_grader(
104
+ self._env.state.tasks,
105
+ self._env.state.time_step,
106
+ self._env.state.energy,
107
+ )
108
+ info["final_score"] = self._final_score
109
+ return self._to_oe_obs(obs, done=done, reward=float(reward), info=info)
110
+
111
+ @property
112
+ def state(self) -> CLMState:
113
+ raw = self._env.state_dict()
114
+ return CLMState(
115
+ energy=raw.get("energy", 1.0),
116
+ stress=raw.get("stress", 0.0),
117
+ fatigue=raw.get("fatigue", 0.0),
118
+ current_task_id=raw.get("current_task_id"),
119
+ tasks=raw.get("tasks", []),
120
+ step_count=raw.get("time_step", 0),
121
+ )
122
+
123
+ def get_metadata(self) -> EnvironmentMetadata:
124
+ return EnvironmentMetadata(
125
+ name="cognitive-load-manager",
126
+ description=(
127
+ "Cognitive Load Manager (CLM) simulates human cognitive load "
128
+ "(energy, stress, fatigue) while managing tasks with deadlines. "
129
+ "Three difficulty levels: easy (2 tasks, no deadlines), "
130
+ "medium (5 tasks with deadlines), hard (8 tasks with tight deadlines)."
131
+ ),
132
+ version="1.0.0",
133
+ author="Team Innovators",
134
+ )
135
+
136
+ def close(self) -> None:
137
+ pass
138
+
139
+
140
+ # ── Build FastAPI app via OpenEnv HTTPEnvServer ──────────────────────────────
141
 
142
  def build_app() -> FastAPI:
143
+ server = HTTPEnvServer(
144
+ env=CLMEnvWrapper,
145
+ action_cls=CLMAction,
146
+ observation_cls=CLMObservation,
147
+ max_concurrent_envs=10,
148
+ )
149
+
150
  _app = FastAPI(
151
  title="Cognitive Load Manager (CLM) Environment API",
152
  version="1.0.0",
153
+ description=(
154
+ "OpenEnv-compliant environment for the Meta PyTorch Hackathon. "
155
+ "Simulates cognitive load management with three difficulty levels."
156
+ ),
157
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ _app.add_middleware(
160
+ CORSMiddleware,
161
+ allow_origins=["*"],
162
+ allow_credentials=True,
163
+ allow_methods=["*"],
164
+ allow_headers=["*"],
165
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ server.register_routes(_app)
168
  return _app
169
 
170
+
171
  app = build_app()
models.py CHANGED
@@ -68,6 +68,7 @@ def grader(trajectory: dict) -> float:
68
 
69
  Wraps deterministic_grader for use with the openenv-core task evaluation
70
  framework. The trajectory dict should contain keys: tasks, time_step, energy.
 
71
  """
72
  raw_tasks = trajectory.get("tasks", [])
73
  time_step_val = trajectory.get("time_step", 50)
@@ -78,19 +79,21 @@ def grader(trajectory: dict) -> float:
78
 
79
  def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
80
  """
81
- A deterministic grader returning a score STRICTLY between 0 and 1 based on:
82
  - completion rate
83
  - deadline adherence
84
  - energy efficiency
85
 
86
- Score is clamped to [0.01, 0.99]never exactly 0.0 or 1.0.
 
87
  """
 
88
  if not tasks:
89
  return 0.01
90
 
91
  completion_rate = sum(t.progress for t in tasks) / len(tasks)
92
 
93
- # penalty for missed deadlines
94
  missed_deadlines = 0
95
  for t in tasks:
96
  if t.deadline and time_step > t.deadline and t.progress < 1.0:
@@ -98,12 +101,13 @@ def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float)
98
 
99
  deadline_penalty = min(0.3, missed_deadlines * 0.1)
100
 
101
- # energy efficiency
102
- energy_score = max(0.0, (final_energy - 0.1) * 0.2)
103
 
104
- score = completion_rate * 0.8 - deadline_penalty + energy_score
105
- # Validator requires score strictly in (0, 1) — clamp to [0.01, 0.99]
106
- return round(max(0.01, min(0.99, score)), 4)
 
107
 
108
 
109
  # ==========================================
@@ -204,7 +208,7 @@ class CLMEnvironment:
204
  else:
205
  reward += 1.0
206
 
207
- reward = round(max(0.01, min(0.99, float(reward))), 4)
208
 
209
  return self._get_observation(), reward, done, self.state.model_dump()
210
 
 
68
 
69
  Wraps deterministic_grader for use with the openenv-core task evaluation
70
  framework. The trajectory dict should contain keys: tasks, time_step, energy.
71
+ Score is always strictly in the open interval (0.01, 0.99) — never 0.0 or 1.0.
72
  """
73
  raw_tasks = trajectory.get("tasks", [])
74
  time_step_val = trajectory.get("time_step", 50)
 
79
 
80
  def deterministic_grader(tasks: list[Task], time_step: int, final_energy: float) -> float:
81
  """
82
+ A deterministic grader returning a score strictly in (0.01, 0.99) based on:
83
  - completion rate
84
  - deadline adherence
85
  - energy efficiency
86
 
87
+ Score is NEVER exactly 0.0 or 1.0always strictly between 0 and 1
88
+ to satisfy openenv Phase 2 validation requirements.
89
  """
90
+ # Guard: no tasks → minimal score (not zero)
91
  if not tasks:
92
  return 0.01
93
 
94
  completion_rate = sum(t.progress for t in tasks) / len(tasks)
95
 
96
+ # Penalty for missed deadlines
97
  missed_deadlines = 0
98
  for t in tasks:
99
  if t.deadline and time_step > t.deadline and t.progress < 1.0:
 
101
 
102
  deadline_penalty = min(0.3, missed_deadlines * 0.1)
103
 
104
+ # Energy efficiency bonus (capped so total can't reach 1.0)
105
+ energy_score = max(0.0, (final_energy - 0.1) * 0.18)
106
 
107
+ raw = completion_rate * 0.78 - deadline_penalty + energy_score
108
+
109
+ # Strictly clamp to open interval (0.01, 0.99) — never 0.0 or 1.0
110
+ return round(max(0.01, min(0.99, raw)), 4)
111
 
112
 
113
  # ==========================================
 
208
  else:
209
  reward += 1.0
210
 
211
+ reward = max(0.01, min(0.99, float(reward)))
212
 
213
  return self._get_observation(), reward, done, self.state.model_dump()
214
 
openenv.yaml CHANGED
@@ -1,5 +1,5 @@
1
  spec_version: 1
2
- name: clm-env
3
  type: space
4
  runtime: fastapi
5
  app: server.app:app
@@ -8,11 +8,10 @@ description: Cognitive Load Manager (CLM) simulates human cognitive load (energy
8
  version: "1.0.0"
9
 
10
  endpoints:
11
- health: /health
12
  reset: /reset
13
  step: /step
14
  state: /state
15
- grade: /grader
16
 
17
  schema:
18
  observation:
@@ -41,26 +40,45 @@ schema:
41
  reward:
42
  type: number
43
 
44
- scoring:
45
- reward_range: [0.01, 0.99]
46
- success_threshold: 0.5
47
- score_formula: mean(step_rewards)
48
 
49
  tasks:
50
  - id: easy
51
  difficulty: easy
52
  description: "2 easy tasks with no deadlines. Agent must complete both tasks without burning out."
53
  max_steps: 50
54
- grader: "models:grader"
 
 
55
 
56
  - id: medium
57
  difficulty: medium
58
  description: "5 medium tasks with deadlines. Agent must balance speed and energy to meet deadlines."
59
  max_steps: 50
60
- grader: "models:grader"
 
 
61
 
62
  - id: hard
63
  difficulty: hard
64
  description: "8 hard tasks with tight deadlines and hidden fatigue mechanics. Agent must manage stress and interruptions."
65
  max_steps: 50
66
- grader: "models:grader"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  spec_version: 1
2
+ name: cognitive-load-manager
3
  type: space
4
  runtime: fastapi
5
  app: server.app:app
 
8
  version: "1.0.0"
9
 
10
  endpoints:
11
+ health: /
12
  reset: /reset
13
  step: /step
14
  state: /state
 
15
 
16
  schema:
17
  observation:
 
40
  reward:
41
  type: number
42
 
43
+ graders:
44
+ deterministic_grader:
45
+ description: "Evaluates agent performance based on task completion, deadline adherence, and energy efficiency. Score strictly in (0.01, 0.99)."
46
+ fn: "models.grader"
47
 
48
  tasks:
49
  - id: easy
50
  difficulty: easy
51
  description: "2 easy tasks with no deadlines. Agent must complete both tasks without burning out."
52
  max_steps: 50
53
+ grader:
54
+ fn: "models.grader"
55
+ description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
56
 
57
  - id: medium
58
  difficulty: medium
59
  description: "5 medium tasks with deadlines. Agent must balance speed and energy to meet deadlines."
60
  max_steps: 50
61
+ grader:
62
+ fn: "models.grader"
63
+ description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
64
 
65
  - id: hard
66
  difficulty: hard
67
  description: "8 hard tasks with tight deadlines and hidden fatigue mechanics. Agent must manage stress and interruptions."
68
  max_steps: 50
69
+ grader:
70
+ fn: "models.grader"
71
+ description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
72
+
73
+ scoring:
74
+ reward_range: [0.01, 0.99]
75
+ success_threshold: 0.5
76
+ score_formula: deterministic_grader
77
+ notes: >
78
+ All task scores are strictly within (0.01, 0.99) — never exactly 0.0 or 1.0.
79
+ Grader evaluates completion rate, deadline adherence, and energy efficiency.
80
+
81
+ constraints:
82
+ max_runtime_seconds: 600
83
+ max_memory_gb: 4
84
+ max_vcpu: 2
tasks/__init__.py DELETED
@@ -1 +0,0 @@
1
- # tasks package
 
 
tasks/graders.py DELETED
@@ -1,158 +0,0 @@
1
- from typing import Dict, Callable
2
-
3
- # ─────────────────────────────────────────────────────────────────────────────
4
- # GRADER REGISTRY — required by the OpenEnv hackathon validator.
5
- #
6
- # Each grader takes (action: str, signals: dict) -> float and must return a
7
- # score STRICTLY between 0 and 1 (not 0.0, not 1.0).
8
- #
9
- # action: one of "work" | "break" | "switch" | "delay"
10
- # signals: dict with keys like energy, stress, fatigue, progress, deadline_gap
11
- # ─────────────────────────────────────────────────────────────────────────────
12
-
13
- GRADER_REGISTRY: Dict[str, Callable[[str, dict], float]] = {}
14
-
15
-
16
- def register_grader(task_id: str):
17
- def decorator(func: Callable[[str, dict], float]):
18
- GRADER_REGISTRY[task_id] = func
19
- return func
20
- return decorator
21
-
22
-
23
- def _clamp(value: float) -> float:
24
- """Clamp to strictly (0, 1) as required by the validator."""
25
- return round(min(max(float(value), 0.01), 0.99), 4)
26
-
27
-
28
- def grade_action(task_id: str, action: str, signals: dict) -> float:
29
- """
30
- Grade a single action for a given task.
31
- Falls back to 0.5 if the task has no registered grader.
32
- """
33
- action = action.lower().strip()
34
- if action not in ("work", "break", "switch", "delay"):
35
- for a in ("work", "break", "switch", "delay"):
36
- if a in action:
37
- action = a
38
- break
39
- else:
40
- return 0.05
41
-
42
- grader_func = GRADER_REGISTRY.get(task_id)
43
- if not grader_func:
44
- return 0.5
45
-
46
- return _clamp(grader_func(action, signals))
47
-
48
-
49
- # ── Task: easy ────────────────────────────────────────────────────────────────
50
- # 2 tasks, no deadlines. Agent should work efficiently without burning out.
51
- @register_grader("easy")
52
- def _grade_easy(action: str, signals: dict) -> float:
53
- energy = signals.get("energy", 0.7)
54
- progress = signals.get("progress", 0.0)
55
- stress = signals.get("stress", 0.0)
56
-
57
- if action == "work":
58
- # Working is good when energy is healthy
59
- if energy >= 0.4:
60
- return _clamp(0.55 + energy * 0.40 + progress * 0.10)
61
- else:
62
- # Low energy — working now is suboptimal
63
- return _clamp(0.20 + energy * 0.25)
64
-
65
- elif action == "break":
66
- # Breaks are valuable when energy is low, costly when energy is fine
67
- if energy < 0.4:
68
- return _clamp(0.70 + (0.4 - energy) * 0.70)
69
- else:
70
- return _clamp(0.20 + energy * 0.10)
71
-
72
- elif action == "switch":
73
- # Unnecessary context-switching in easy mode is mildly penalised
74
- return _clamp(0.25 + progress * 0.20)
75
-
76
- else: # delay
77
- return _clamp(0.15 + (1.0 - stress) * 0.15)
78
-
79
-
80
- # ── Task: medium ──────────────────────────────────────────────────────────────
81
- # 5 medium tasks with moderate deadlines. Agent must balance speed and energy.
82
- @register_grader("medium")
83
- def _grade_medium(action: str, signals: dict) -> float:
84
- energy = signals.get("energy", 0.7)
85
- stress = signals.get("stress", 0.2)
86
- deadline_gap = signals.get("deadline_gap", 10) # steps until nearest deadline
87
- progress = signals.get("progress", 0.0)
88
-
89
- urgency = max(0.0, 1.0 - deadline_gap / 20.0) # 0 = no urgency, 1 = critical
90
-
91
- if action == "work":
92
- if energy >= 0.3:
93
- return _clamp(0.50 + urgency * 0.35 + energy * 0.20)
94
- else:
95
- # Working on empty is risky but may be necessary near deadline
96
- return _clamp(0.25 + urgency * 0.35)
97
-
98
- elif action == "break":
99
- if energy < 0.35 and urgency < 0.6:
100
- return _clamp(0.65 + (0.35 - energy) * 0.80)
101
- elif urgency >= 0.6:
102
- # Break during urgency is a costly choice
103
- return _clamp(0.15 + energy * 0.10)
104
- else:
105
- return _clamp(0.30 + (1.0 - urgency) * 0.25)
106
-
107
- elif action == "switch":
108
- # Switching can be okay if current task is blocked / done
109
- return _clamp(0.30 + (1.0 - urgency) * 0.20 + progress * 0.15)
110
-
111
- else: # delay
112
- return _clamp(0.12 + (1.0 - urgency) * 0.20)
113
-
114
-
115
- # ── Task: hard ────────────────────────────────────────────────────────────────
116
- # 8 hard tasks with tight deadlines and hidden fatigue mechanics.
117
- # Agent must manage stress and avoid interruptions.
118
- @register_grader("hard")
119
- def _grade_hard(action: str, signals: dict) -> float:
120
- energy = signals.get("energy", 0.6)
121
- stress = signals.get("stress", 0.3)
122
- deadline_gap = signals.get("deadline_gap", 5)
123
- fatigue = signals.get("fatigue", 0.2)
124
-
125
- urgency = max(0.0, 1.0 - deadline_gap / 12.0)
126
- overloaded = stress > 0.7 or fatigue > 0.6
127
-
128
- if action == "work":
129
- if overloaded:
130
- # Grinding while overloaded leads to burnout — penalise
131
- return _clamp(0.20 + urgency * 0.25 - stress * 0.10)
132
- elif energy >= 0.25:
133
- return _clamp(0.52 + urgency * 0.38 + energy * 0.15 - fatigue * 0.10)
134
- else:
135
- return _clamp(0.18 + urgency * 0.30)
136
-
137
- elif action == "break":
138
- if overloaded:
139
- return _clamp(0.72 + stress * 0.25)
140
- elif energy < 0.3:
141
- return _clamp(0.65 + (0.3 - energy) * 0.90)
142
- elif urgency > 0.75:
143
- return _clamp(0.14 + energy * 0.08)
144
- else:
145
- return _clamp(0.35 + (1.0 - urgency) * 0.30)
146
-
147
- elif action == "switch":
148
- # Switching in hard mode is costly due to context cost
149
- if urgency < 0.4 and not overloaded:
150
- return _clamp(0.35 + energy * 0.15)
151
- else:
152
- return _clamp(0.12 + (1.0 - urgency) * 0.12)
153
-
154
- else: # delay
155
- if overloaded and urgency < 0.5:
156
- return _clamp(0.55 + stress * 0.25)
157
- else:
158
- return _clamp(0.10 + (1.0 - urgency) * 0.15)