AE-Shree commited on
Commit Β·
33e9ed5
1
Parent(s): 2f28f2f
Bhagavan mera madad karo π
Browse files- backend/main.py +71 -22
- openenv.yaml +5 -48
backend/main.py
CHANGED
|
@@ -25,49 +25,69 @@ from models import (
|
|
| 25 |
CLMEnvironment,
|
| 26 |
)
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# ββ OpenEnv-compatible Action / Observation / State models ββββββββββββββββββ
|
| 30 |
|
| 31 |
class CLMAction(OEAction):
|
| 32 |
-
"""Action for the Cognitive Load Manager environment."""
|
| 33 |
type: str = Field(description="Action type: work, break, switch, or delay")
|
| 34 |
task_id: Optional[str] = Field(default=None, description="Task ID to act on")
|
| 35 |
-
|
| 36 |
model_config = {"extra": "allow"}
|
| 37 |
|
| 38 |
|
| 39 |
class CLMObservation(OEObservation):
|
| 40 |
-
"""Observation from the Cognitive Load Manager environment."""
|
| 41 |
tasks: List[Dict[str, Any]] = Field(default_factory=list)
|
| 42 |
visible_state: Dict[str, Any] = Field(default_factory=dict)
|
| 43 |
time_step: int = Field(default=0)
|
| 44 |
-
|
| 45 |
model_config = {"extra": "allow"}
|
| 46 |
|
| 47 |
|
| 48 |
class CLMState(OEState):
|
| 49 |
-
"""State for the Cognitive Load Manager environment."""
|
| 50 |
energy: float = Field(default=1.0)
|
| 51 |
stress: float = Field(default=0.0)
|
| 52 |
fatigue: float = Field(default=0.0)
|
| 53 |
current_task_id: Optional[str] = Field(default=None)
|
| 54 |
tasks: List[Dict[str, Any]] = Field(default_factory=list)
|
| 55 |
-
|
| 56 |
model_config = {"extra": "allow"}
|
| 57 |
|
| 58 |
|
| 59 |
# ββ OpenEnv Environment wrapper βββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
|
| 61 |
class CLMEnvWrapper(Environment):
|
| 62 |
-
"""
|
| 63 |
-
Cognitive Load Manager wrapped as an OpenEnv-compliant environment.
|
| 64 |
-
|
| 65 |
-
Three difficulty levels via the task_id reset parameter:
|
| 66 |
-
- easy: 2 tasks, no deadlines
|
| 67 |
-
- medium: 5 tasks with deadlines
|
| 68 |
-
- hard: 8 tasks with tight deadlines
|
| 69 |
-
"""
|
| 70 |
-
|
| 71 |
SUPPORTS_CONCURRENT_SESSIONS = True
|
| 72 |
|
| 73 |
def __init__(self):
|
|
@@ -75,9 +95,10 @@ class CLMEnvWrapper(Environment):
|
|
| 75 |
level = os.getenv("CLM_LEVEL", "easy")
|
| 76 |
tasks = generate_tasks(level)
|
| 77 |
self._env = CLMEnvironment(tasks=tasks, max_steps=50)
|
| 78 |
-
self._final_score: float =
|
| 79 |
|
| 80 |
-
def _to_oe_obs(self, obs: ModelObservation, done: bool = False,
|
|
|
|
| 81 |
return CLMObservation(
|
| 82 |
tasks=[t.model_dump() for t in obs.tasks],
|
| 83 |
visible_state=obs.visible_state.model_dump(),
|
|
@@ -87,12 +108,13 @@ class CLMEnvWrapper(Environment):
|
|
| 87 |
metadata=info or {},
|
| 88 |
)
|
| 89 |
|
| 90 |
-
def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None,
|
|
|
|
| 91 |
if task_id not in ("easy", "medium", "hard"):
|
| 92 |
task_id = "easy"
|
| 93 |
tasks = generate_tasks(task_id)
|
| 94 |
self._env = CLMEnvironment(tasks=tasks, max_steps=50)
|
| 95 |
-
self._final_score =
|
| 96 |
obs = self._env.reset()
|
| 97 |
return self._to_oe_obs(obs)
|
| 98 |
|
|
@@ -100,13 +122,15 @@ class CLMEnvWrapper(Environment):
|
|
| 100 |
model_action = ModelAction(type=action.type, task_id=action.task_id)
|
| 101 |
obs, reward, done, info = self._env.step(model_action)
|
| 102 |
if done:
|
| 103 |
-
|
| 104 |
self._env.state.tasks,
|
| 105 |
self._env.state.time_step,
|
| 106 |
self._env.state.energy,
|
| 107 |
)
|
|
|
|
| 108 |
info["final_score"] = self._final_score
|
| 109 |
-
|
|
|
|
| 110 |
|
| 111 |
@property
|
| 112 |
def state(self) -> CLMState:
|
|
@@ -137,7 +161,7 @@ class CLMEnvWrapper(Environment):
|
|
| 137 |
pass
|
| 138 |
|
| 139 |
|
| 140 |
-
# ββ Build FastAPI app
|
| 141 |
|
| 142 |
def build_app() -> FastAPI:
|
| 143 |
server = HTTPEnvServer(
|
|
@@ -165,6 +189,31 @@ def build_app() -> FastAPI:
|
|
| 165 |
)
|
| 166 |
|
| 167 |
server.register_routes(_app)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
return _app
|
| 169 |
|
| 170 |
|
|
|
|
| 25 |
CLMEnvironment,
|
| 26 |
)
|
| 27 |
|
| 28 |
+
_SCORE_MIN = 0.01
|
| 29 |
+
_SCORE_MAX = 0.99
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _safe_score(raw: float) -> float:
|
| 33 |
+
"""Clamp to strictly open interval (0, 1). Never returns 0.0 or 1.0."""
|
| 34 |
+
try:
|
| 35 |
+
s = float(raw)
|
| 36 |
+
except (TypeError, ValueError):
|
| 37 |
+
return _SCORE_MIN
|
| 38 |
+
return round(max(_SCORE_MIN, min(_SCORE_MAX, s)), 4)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _grade_task(difficulty: str) -> dict:
|
| 42 |
+
"""Run deterministic grader on a fresh environment for the given difficulty."""
|
| 43 |
+
try:
|
| 44 |
+
tasks = generate_tasks(difficulty)
|
| 45 |
+
env = CLMEnvironment(tasks=tasks, max_steps=50)
|
| 46 |
+
env.reset()
|
| 47 |
+
score = deterministic_grader(
|
| 48 |
+
env.state.tasks,
|
| 49 |
+
env.state.time_step,
|
| 50 |
+
env.state.energy,
|
| 51 |
+
)
|
| 52 |
+
score = _safe_score(score)
|
| 53 |
+
except Exception:
|
| 54 |
+
score = _SCORE_MIN
|
| 55 |
+
return {
|
| 56 |
+
"task_id": difficulty,
|
| 57 |
+
"reward": score,
|
| 58 |
+
"score": score,
|
| 59 |
+
"done": False,
|
| 60 |
+
"grader_message": f"CLM deterministic grader for difficulty={difficulty}",
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
|
| 64 |
# ββ OpenEnv-compatible Action / Observation / State models ββββββββββββββββββ
|
| 65 |
|
| 66 |
class CLMAction(OEAction):
|
|
|
|
| 67 |
type: str = Field(description="Action type: work, break, switch, or delay")
|
| 68 |
task_id: Optional[str] = Field(default=None, description="Task ID to act on")
|
|
|
|
| 69 |
model_config = {"extra": "allow"}
|
| 70 |
|
| 71 |
|
| 72 |
class CLMObservation(OEObservation):
|
|
|
|
| 73 |
tasks: List[Dict[str, Any]] = Field(default_factory=list)
|
| 74 |
visible_state: Dict[str, Any] = Field(default_factory=dict)
|
| 75 |
time_step: int = Field(default=0)
|
|
|
|
| 76 |
model_config = {"extra": "allow"}
|
| 77 |
|
| 78 |
|
| 79 |
class CLMState(OEState):
|
|
|
|
| 80 |
energy: float = Field(default=1.0)
|
| 81 |
stress: float = Field(default=0.0)
|
| 82 |
fatigue: float = Field(default=0.0)
|
| 83 |
current_task_id: Optional[str] = Field(default=None)
|
| 84 |
tasks: List[Dict[str, Any]] = Field(default_factory=list)
|
|
|
|
| 85 |
model_config = {"extra": "allow"}
|
| 86 |
|
| 87 |
|
| 88 |
# ββ OpenEnv Environment wrapper βββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
|
| 90 |
class CLMEnvWrapper(Environment):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
SUPPORTS_CONCURRENT_SESSIONS = True
|
| 92 |
|
| 93 |
def __init__(self):
|
|
|
|
| 95 |
level = os.getenv("CLM_LEVEL", "easy")
|
| 96 |
tasks = generate_tasks(level)
|
| 97 |
self._env = CLMEnvironment(tasks=tasks, max_steps=50)
|
| 98 |
+
self._final_score: float = _SCORE_MIN
|
| 99 |
|
| 100 |
+
def _to_oe_obs(self, obs: ModelObservation, done: bool = False,
|
| 101 |
+
reward: Optional[float] = None, info: Optional[dict] = None) -> CLMObservation:
|
| 102 |
return CLMObservation(
|
| 103 |
tasks=[t.model_dump() for t in obs.tasks],
|
| 104 |
visible_state=obs.visible_state.model_dump(),
|
|
|
|
| 108 |
metadata=info or {},
|
| 109 |
)
|
| 110 |
|
| 111 |
+
def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None,
|
| 112 |
+
task_id: str = "easy", **kwargs) -> CLMObservation:
|
| 113 |
if task_id not in ("easy", "medium", "hard"):
|
| 114 |
task_id = "easy"
|
| 115 |
tasks = generate_tasks(task_id)
|
| 116 |
self._env = CLMEnvironment(tasks=tasks, max_steps=50)
|
| 117 |
+
self._final_score = _SCORE_MIN
|
| 118 |
obs = self._env.reset()
|
| 119 |
return self._to_oe_obs(obs)
|
| 120 |
|
|
|
|
| 122 |
model_action = ModelAction(type=action.type, task_id=action.task_id)
|
| 123 |
obs, reward, done, info = self._env.step(model_action)
|
| 124 |
if done:
|
| 125 |
+
raw_score = deterministic_grader(
|
| 126 |
self._env.state.tasks,
|
| 127 |
self._env.state.time_step,
|
| 128 |
self._env.state.energy,
|
| 129 |
)
|
| 130 |
+
self._final_score = _safe_score(raw_score)
|
| 131 |
info["final_score"] = self._final_score
|
| 132 |
+
safe_reward = _safe_score(float(reward))
|
| 133 |
+
return self._to_oe_obs(obs, done=done, reward=safe_reward, info=info)
|
| 134 |
|
| 135 |
@property
|
| 136 |
def state(self) -> CLMState:
|
|
|
|
| 161 |
pass
|
| 162 |
|
| 163 |
|
| 164 |
+
# ββ Build FastAPI app ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 165 |
|
| 166 |
def build_app() -> FastAPI:
|
| 167 |
server = HTTPEnvServer(
|
|
|
|
| 189 |
)
|
| 190 |
|
| 191 |
server.register_routes(_app)
|
| 192 |
+
|
| 193 |
+
# ββ Grade endpoints (required by hackathon Phase 2 validator) ββββββββββββ
|
| 194 |
+
# Validator calls GET /grader and GET /grade/{task_id} to score each task.
|
| 195 |
+
# Scores must be strictly in (0.01, 0.99) β never 0.0 or 1.0.
|
| 196 |
+
|
| 197 |
+
@_app.get("/grader", tags=["Grader"])
|
| 198 |
+
async def get_grader_score():
|
| 199 |
+
"""General grader endpoint β returns score for 'easy' difficulty."""
|
| 200 |
+
return _grade_task("easy")
|
| 201 |
+
|
| 202 |
+
@_app.get("/grade/easy", tags=["Grader"])
|
| 203 |
+
async def grade_easy():
|
| 204 |
+
"""Grade the 'easy' task (2 tasks, no deadlines)."""
|
| 205 |
+
return _grade_task("easy")
|
| 206 |
+
|
| 207 |
+
@_app.get("/grade/medium", tags=["Grader"])
|
| 208 |
+
async def grade_medium():
|
| 209 |
+
"""Grade the 'medium' task (5 tasks with deadlines)."""
|
| 210 |
+
return _grade_task("medium")
|
| 211 |
+
|
| 212 |
+
@_app.get("/grade/hard", tags=["Grader"])
|
| 213 |
+
async def grade_hard():
|
| 214 |
+
"""Grade the 'hard' task (8 tasks with tight deadlines)."""
|
| 215 |
+
return _grade_task("hard")
|
| 216 |
+
|
| 217 |
return _app
|
| 218 |
|
| 219 |
|
openenv.yaml
CHANGED
|
@@ -8,67 +8,30 @@ description: Cognitive Load Manager (CLM) simulates human cognitive load (energy
|
|
| 8 |
version: "1.0.0"
|
| 9 |
|
| 10 |
endpoints:
|
| 11 |
-
health: /
|
| 12 |
reset: /reset
|
| 13 |
step: /step
|
| 14 |
state: /state
|
| 15 |
-
|
| 16 |
-
schema:
|
| 17 |
-
observation:
|
| 18 |
-
type: object
|
| 19 |
-
properties:
|
| 20 |
-
tasks:
|
| 21 |
-
type: array
|
| 22 |
-
items:
|
| 23 |
-
type: object
|
| 24 |
-
properties:
|
| 25 |
-
id: { type: string }
|
| 26 |
-
difficulty: { type: string }
|
| 27 |
-
progress: { type: number }
|
| 28 |
-
deadline: { type: number, nullable: true }
|
| 29 |
-
visible_state:
|
| 30 |
-
type: object
|
| 31 |
-
properties:
|
| 32 |
-
fatigue_level: { type: string }
|
| 33 |
-
stress_warning: { type: boolean }
|
| 34 |
-
time_step: { type: integer }
|
| 35 |
-
action:
|
| 36 |
-
type: object
|
| 37 |
-
properties:
|
| 38 |
-
type: { type: string, enum: ["work", "break", "switch", "delay"] }
|
| 39 |
-
task_id: { type: string, nullable: true }
|
| 40 |
-
reward:
|
| 41 |
-
type: number
|
| 42 |
-
|
| 43 |
-
graders:
|
| 44 |
-
deterministic_grader:
|
| 45 |
-
description: "Evaluates agent performance based on task completion, deadline adherence, and energy efficiency. Score strictly in (0.01, 0.99)."
|
| 46 |
-
fn: "models.grader"
|
| 47 |
|
| 48 |
tasks:
|
| 49 |
- id: easy
|
| 50 |
difficulty: easy
|
| 51 |
description: "2 easy tasks with no deadlines. Agent must complete both tasks without burning out."
|
| 52 |
max_steps: 50
|
| 53 |
-
grader:
|
| 54 |
-
fn: "models.grader"
|
| 55 |
-
description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
|
| 56 |
|
| 57 |
- id: medium
|
| 58 |
difficulty: medium
|
| 59 |
description: "5 medium tasks with deadlines. Agent must balance speed and energy to meet deadlines."
|
| 60 |
max_steps: 50
|
| 61 |
-
grader:
|
| 62 |
-
fn: "models.grader"
|
| 63 |
-
description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
|
| 64 |
|
| 65 |
- id: hard
|
| 66 |
difficulty: hard
|
| 67 |
description: "8 hard tasks with tight deadlines and hidden fatigue mechanics. Agent must manage stress and interruptions."
|
| 68 |
max_steps: 50
|
| 69 |
-
grader:
|
| 70 |
-
fn: "models.grader"
|
| 71 |
-
description: "Score strictly in (0.01, 0.99). Grades on completion rate, deadline adherence, and energy efficiency."
|
| 72 |
|
| 73 |
scoring:
|
| 74 |
reward_range: [0.01, 0.99]
|
|
@@ -76,9 +39,3 @@ scoring:
|
|
| 76 |
score_formula: deterministic_grader
|
| 77 |
notes: >
|
| 78 |
All task scores are strictly within (0.01, 0.99) β never exactly 0.0 or 1.0.
|
| 79 |
-
Grader evaluates completion rate, deadline adherence, and energy efficiency.
|
| 80 |
-
|
| 81 |
-
constraints:
|
| 82 |
-
max_runtime_seconds: 600
|
| 83 |
-
max_memory_gb: 4
|
| 84 |
-
max_vcpu: 2
|
|
|
|
| 8 |
version: "1.0.0"
|
| 9 |
|
| 10 |
endpoints:
|
| 11 |
+
health: /health
|
| 12 |
reset: /reset
|
| 13 |
step: /step
|
| 14 |
state: /state
|
| 15 |
+
grade: /grader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
tasks:
|
| 18 |
- id: easy
|
| 19 |
difficulty: easy
|
| 20 |
description: "2 easy tasks with no deadlines. Agent must complete both tasks without burning out."
|
| 21 |
max_steps: 50
|
| 22 |
+
grader: "models:grader"
|
|
|
|
|
|
|
| 23 |
|
| 24 |
- id: medium
|
| 25 |
difficulty: medium
|
| 26 |
description: "5 medium tasks with deadlines. Agent must balance speed and energy to meet deadlines."
|
| 27 |
max_steps: 50
|
| 28 |
+
grader: "models:grader"
|
|
|
|
|
|
|
| 29 |
|
| 30 |
- id: hard
|
| 31 |
difficulty: hard
|
| 32 |
description: "8 hard tasks with tight deadlines and hidden fatigue mechanics. Agent must manage stress and interruptions."
|
| 33 |
max_steps: 50
|
| 34 |
+
grader: "models:grader"
|
|
|
|
|
|
|
| 35 |
|
| 36 |
scoring:
|
| 37 |
reward_range: [0.01, 0.99]
|
|
|
|
| 39 |
score_formula: deterministic_grader
|
| 40 |
notes: >
|
| 41 |
All task scores are strictly within (0.01, 0.99) β never exactly 0.0 or 1.0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|