AE-Shree commited on
Commit
33e9ed5
Β·
1 Parent(s): 2f28f2f

Bhagavan mera madad karo πŸ™

Browse files
Files changed (2) hide show
  1. backend/main.py +71 -22
  2. openenv.yaml +5 -48
backend/main.py CHANGED
@@ -25,49 +25,69 @@ from models import (
25
  CLMEnvironment,
26
  )
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # ── OpenEnv-compatible Action / Observation / State models ──────────────────
30
 
31
  class CLMAction(OEAction):
32
- """Action for the Cognitive Load Manager environment."""
33
  type: str = Field(description="Action type: work, break, switch, or delay")
34
  task_id: Optional[str] = Field(default=None, description="Task ID to act on")
35
-
36
  model_config = {"extra": "allow"}
37
 
38
 
39
  class CLMObservation(OEObservation):
40
- """Observation from the Cognitive Load Manager environment."""
41
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
42
  visible_state: Dict[str, Any] = Field(default_factory=dict)
43
  time_step: int = Field(default=0)
44
-
45
  model_config = {"extra": "allow"}
46
 
47
 
48
  class CLMState(OEState):
49
- """State for the Cognitive Load Manager environment."""
50
  energy: float = Field(default=1.0)
51
  stress: float = Field(default=0.0)
52
  fatigue: float = Field(default=0.0)
53
  current_task_id: Optional[str] = Field(default=None)
54
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
55
-
56
  model_config = {"extra": "allow"}
57
 
58
 
59
  # ── OpenEnv Environment wrapper ─────────────────────────────────────────────
60
 
61
  class CLMEnvWrapper(Environment):
62
- """
63
- Cognitive Load Manager wrapped as an OpenEnv-compliant environment.
64
-
65
- Three difficulty levels via the task_id reset parameter:
66
- - easy: 2 tasks, no deadlines
67
- - medium: 5 tasks with deadlines
68
- - hard: 8 tasks with tight deadlines
69
- """
70
-
71
  SUPPORTS_CONCURRENT_SESSIONS = True
72
 
73
  def __init__(self):
@@ -75,9 +95,10 @@ class CLMEnvWrapper(Environment):
75
  level = os.getenv("CLM_LEVEL", "easy")
76
  tasks = generate_tasks(level)
77
  self._env = CLMEnvironment(tasks=tasks, max_steps=50)
78
- self._final_score: float = 0.0
79
 
80
- def _to_oe_obs(self, obs: ModelObservation, done: bool = False, reward: Optional[float] = None, info: Optional[dict] = None) -> CLMObservation:
 
81
  return CLMObservation(
82
  tasks=[t.model_dump() for t in obs.tasks],
83
  visible_state=obs.visible_state.model_dump(),
@@ -87,12 +108,13 @@ class CLMEnvWrapper(Environment):
87
  metadata=info or {},
88
  )
89
 
90
- def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, task_id: str = "easy", **kwargs) -> CLMObservation:
 
91
  if task_id not in ("easy", "medium", "hard"):
92
  task_id = "easy"
93
  tasks = generate_tasks(task_id)
94
  self._env = CLMEnvironment(tasks=tasks, max_steps=50)
95
- self._final_score = 0.0
96
  obs = self._env.reset()
97
  return self._to_oe_obs(obs)
98
 
@@ -100,13 +122,15 @@ class CLMEnvWrapper(Environment):
100
  model_action = ModelAction(type=action.type, task_id=action.task_id)
101
  obs, reward, done, info = self._env.step(model_action)
102
  if done:
103
- self._final_score = deterministic_grader(
104
  self._env.state.tasks,
105
  self._env.state.time_step,
106
  self._env.state.energy,
107
  )
 
108
  info["final_score"] = self._final_score
109
- return self._to_oe_obs(obs, done=done, reward=float(reward), info=info)
 
110
 
111
  @property
112
  def state(self) -> CLMState:
@@ -137,7 +161,7 @@ class CLMEnvWrapper(Environment):
137
  pass
138
 
139
 
140
- # ── Build FastAPI app via OpenEnv HTTPEnvServer ──────────────────────────────
141
 
142
  def build_app() -> FastAPI:
143
  server = HTTPEnvServer(
@@ -165,6 +189,31 @@ def build_app() -> FastAPI:
165
  )
166
 
167
  server.register_routes(_app)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  return _app
169
 
170
 
 
25
  CLMEnvironment,
26
  )
27
 
28
+ _SCORE_MIN = 0.01
29
+ _SCORE_MAX = 0.99
30
+
31
+
32
+ def _safe_score(raw: float) -> float:
33
+ """Clamp to strictly open interval (0, 1). Never returns 0.0 or 1.0."""
34
+ try:
35
+ s = float(raw)
36
+ except (TypeError, ValueError):
37
+ return _SCORE_MIN
38
+ return round(max(_SCORE_MIN, min(_SCORE_MAX, s)), 4)
39
+
40
+
41
+ def _grade_task(difficulty: str) -> dict:
42
+ """Run deterministic grader on a fresh environment for the given difficulty."""
43
+ try:
44
+ tasks = generate_tasks(difficulty)
45
+ env = CLMEnvironment(tasks=tasks, max_steps=50)
46
+ env.reset()
47
+ score = deterministic_grader(
48
+ env.state.tasks,
49
+ env.state.time_step,
50
+ env.state.energy,
51
+ )
52
+ score = _safe_score(score)
53
+ except Exception:
54
+ score = _SCORE_MIN
55
+ return {
56
+ "task_id": difficulty,
57
+ "reward": score,
58
+ "score": score,
59
+ "done": False,
60
+ "grader_message": f"CLM deterministic grader for difficulty={difficulty}",
61
+ }
62
+
63
 
64
  # ── OpenEnv-compatible Action / Observation / State models ──────────────────
65
 
66
  class CLMAction(OEAction):
 
67
  type: str = Field(description="Action type: work, break, switch, or delay")
68
  task_id: Optional[str] = Field(default=None, description="Task ID to act on")
 
69
  model_config = {"extra": "allow"}
70
 
71
 
72
  class CLMObservation(OEObservation):
 
73
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
74
  visible_state: Dict[str, Any] = Field(default_factory=dict)
75
  time_step: int = Field(default=0)
 
76
  model_config = {"extra": "allow"}
77
 
78
 
79
  class CLMState(OEState):
 
80
  energy: float = Field(default=1.0)
81
  stress: float = Field(default=0.0)
82
  fatigue: float = Field(default=0.0)
83
  current_task_id: Optional[str] = Field(default=None)
84
  tasks: List[Dict[str, Any]] = Field(default_factory=list)
 
85
  model_config = {"extra": "allow"}
86
 
87
 
88
  # ── OpenEnv Environment wrapper ─────────────────────────────────────────────
89
 
90
  class CLMEnvWrapper(Environment):
 
 
 
 
 
 
 
 
 
91
  SUPPORTS_CONCURRENT_SESSIONS = True
92
 
93
  def __init__(self):
 
95
  level = os.getenv("CLM_LEVEL", "easy")
96
  tasks = generate_tasks(level)
97
  self._env = CLMEnvironment(tasks=tasks, max_steps=50)
98
+ self._final_score: float = _SCORE_MIN
99
 
100
+ def _to_oe_obs(self, obs: ModelObservation, done: bool = False,
101
+ reward: Optional[float] = None, info: Optional[dict] = None) -> CLMObservation:
102
  return CLMObservation(
103
  tasks=[t.model_dump() for t in obs.tasks],
104
  visible_state=obs.visible_state.model_dump(),
 
108
  metadata=info or {},
109
  )
110
 
111
+ def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None,
112
+ task_id: str = "easy", **kwargs) -> CLMObservation:
113
  if task_id not in ("easy", "medium", "hard"):
114
  task_id = "easy"
115
  tasks = generate_tasks(task_id)
116
  self._env = CLMEnvironment(tasks=tasks, max_steps=50)
117
+ self._final_score = _SCORE_MIN
118
  obs = self._env.reset()
119
  return self._to_oe_obs(obs)
120
 
 
122
  model_action = ModelAction(type=action.type, task_id=action.task_id)
123
  obs, reward, done, info = self._env.step(model_action)
124
  if done:
125
+ raw_score = deterministic_grader(
126
  self._env.state.tasks,
127
  self._env.state.time_step,
128
  self._env.state.energy,
129
  )
130
+ self._final_score = _safe_score(raw_score)
131
  info["final_score"] = self._final_score
132
+ safe_reward = _safe_score(float(reward))
133
+ return self._to_oe_obs(obs, done=done, reward=safe_reward, info=info)
134
 
135
  @property
136
  def state(self) -> CLMState:
 
161
  pass
162
 
163
 
164
+ # ── Build FastAPI app ────────────────────────────────────────────────────────
165
 
166
  def build_app() -> FastAPI:
167
  server = HTTPEnvServer(
 
189
  )
190
 
191
  server.register_routes(_app)
192
+
193
+ # ── Grade endpoints (required by hackathon Phase 2 validator) ────────────
194
+ # Validator calls GET /grader and GET /grade/{task_id} to score each task.
195
+ # Scores must be strictly in (0.01, 0.99) β€” never 0.0 or 1.0.
196
+
197
+ @_app.get("/grader", tags=["Grader"])
198
+ async def get_grader_score():
199
+ """General grader endpoint β€” returns score for 'easy' difficulty."""
200
+ return _grade_task("easy")
201
+
202
+ @_app.get("/grade/easy", tags=["Grader"])
203
+ async def grade_easy():
204
+ """Grade the 'easy' task (2 tasks, no deadlines)."""
205
+ return _grade_task("easy")
206
+
207
+ @_app.get("/grade/medium", tags=["Grader"])
208
+ async def grade_medium():
209
+ """Grade the 'medium' task (5 tasks with deadlines)."""
210
+ return _grade_task("medium")
211
+
212
+ @_app.get("/grade/hard", tags=["Grader"])
213
+ async def grade_hard():
214
+ """Grade the 'hard' task (8 tasks with tight deadlines)."""
215
+ return _grade_task("hard")
216
+
217
  return _app
218
 
219
 
openenv.yaml CHANGED
@@ -8,67 +8,30 @@ description: Cognitive Load Manager (CLM) simulates human cognitive load (energy
8
  version: "1.0.0"
9
 
10
  endpoints:
11
- health: /
12
  reset: /reset
13
  step: /step
14
  state: /state
15
-
16
- schema:
17
- observation:
18
- type: object
19
- properties:
20
- tasks:
21
- type: array
22
- items:
23
- type: object
24
- properties:
25
- id: { type: string }
26
- difficulty: { type: string }
27
- progress: { type: number }
28
- deadline: { type: number, nullable: true }
29
- visible_state:
30
- type: object
31
- properties:
32
- fatigue_level: { type: string }
33
- stress_warning: { type: boolean }
34
- time_step: { type: integer }
35
- action:
36
- type: object
37
- properties:
38
- type: { type: string, enum: ["work", "break", "switch", "delay"] }
39
- task_id: { type: string, nullable: true }
40
- reward:
41
- type: number
42
-
43
- graders:
44
- deterministic_grader:
45
- description: "Evaluates agent performance based on task completion, deadline adherence, and energy efficiency. Score strictly in (0.01, 0.99)."
46
- fn: "models.grader"
47
 
48
  tasks:
49
  - id: easy
50
  difficulty: easy
51
  description: "2 easy tasks with no deadlines. Agent must complete both tasks without burning out."
52
  max_steps: 50
53
- grader:
54
- fn: "models.grader"
55
- description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
56
 
57
  - id: medium
58
  difficulty: medium
59
  description: "5 medium tasks with deadlines. Agent must balance speed and energy to meet deadlines."
60
  max_steps: 50
61
- grader:
62
- fn: "models.grader"
63
- description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
64
 
65
  - id: hard
66
  difficulty: hard
67
  description: "8 hard tasks with tight deadlines and hidden fatigue mechanics. Agent must manage stress and interruptions."
68
  max_steps: 50
69
- grader:
70
- fn: "models.grader"
71
- description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
72
 
73
  scoring:
74
  reward_range: [0.01, 0.99]
@@ -76,9 +39,3 @@ scoring:
76
  score_formula: deterministic_grader
77
  notes: >
78
  All task scores are strictly within (0.01, 0.99) β€” never exactly 0.0 or 1.0.
79
- Grader evaluates completion rate, deadline adherence, and energy efficiency.
80
-
81
- constraints:
82
- max_runtime_seconds: 600
83
- max_memory_gb: 4
84
- max_vcpu: 2
 
8
  version: "1.0.0"
9
 
10
  endpoints:
11
+ health: /health
12
  reset: /reset
13
  step: /step
14
  state: /state
15
+ grade: /grader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  tasks:
18
  - id: easy
19
  difficulty: easy
20
  description: "2 easy tasks with no deadlines. Agent must complete both tasks without burning out."
21
  max_steps: 50
22
+ grader: "models:grader"
 
 
23
 
24
  - id: medium
25
  difficulty: medium
26
  description: "5 medium tasks with deadlines. Agent must balance speed and energy to meet deadlines."
27
  max_steps: 50
28
+ grader: "models:grader"
 
 
29
 
30
  - id: hard
31
  difficulty: hard
32
  description: "8 hard tasks with tight deadlines and hidden fatigue mechanics. Agent must manage stress and interruptions."
33
  max_steps: 50
34
+ grader: "models:grader"
 
 
35
 
36
  scoring:
37
  reward_range: [0.01, 0.99]
 
39
  score_formula: deterministic_grader
40
  notes: >
41
  All task scores are strictly within (0.01, 0.99) β€” never exactly 0.0 or 1.0.