Spaces:

anonymousDevil
/

cognitive-load-manager

Sleeping

AE-Shree

Select ous To ROund 2 !!

f3f7834 about 1 month ago

4.13 kB

	spec_version: 1
	name: cognitive-load-manager
	type: space
	runtime: fastapi
	app: server.app:app
	port: 7860

	description: >
	Cognitive Load Manager (CLM) — a real-world productivity simulation where an AI agent
	acts as a human task scheduler, managing energy, stress, and fatigue while completing
	heterogeneous work items (emails, meetings, code reviews, reports, calls) under deadlines.
	Features task dependencies, mid-episode interruptions, focus mode, and priority weighting.
	version: "2.0.0"
	author: "CLM Team"
	tags: [openenv, scheduling, productivity, rl, agent-eval]

	endpoints:
	health: /health
	reset: /reset
	step: /step
	state: /state
	grade: /grader

	action_space:
	type: discrete
	actions:
	- name: work
	description: "Work on task_id at normal pace (energy cost varies by task type)"
	requires: task_id
	- name: focus
	description: "Deep-work mode: 2× progress, 2× energy cost; exits on break"
	requires: task_id
	- name: break
	description: "Rest: +0.22 energy, -0.18 stress"
	- name: switch
	description: "Change active task (small context-switch cost)"
	requires: task_id
	- name: delay
	description: "Wait one step; slight stress reduction"

	observation_space:
	tasks:
	- id: string
	- task_type: "email \| meeting \| code_review \| report \| call"
	- priority: "critical \| high \| normal \| low"
	- progress: float [0.0, 1.0]
	- deadline: int (step number) or null
	- depends_on: task_id or null
	- is_interrupted: bool
	visible_state:
	# Partial observability: energy/stress are categorical labels, not raw floats.
	- fatigue_level: "low \| medium \| high" # energy bands: >0.6 \| 0.3-0.6 \| <0.3
	- stress_level: "calm \| elevated \| critical" # stress bands: <0.45 \| 0.45-0.75 \| >0.75
	- stress_warning: bool # true when stress > 0.65
	- focus_mode: bool
	- upcoming_deadlines: list[task_id]
	- blocked_tasks: list[task_id]
	time_step: int

	tasks:
	- id: easy
	difficulty: easy
	description: >
	2 tasks (email + report), normal priority, no deadlines.
	Agent must complete both without burning out.
	Tests basic work/break balance.
	max_steps: 50
	grader: "grader.clm_graders:EasyGrader"
	baseline_score: 0.856

	- id: medium
	difficulty: medium
	description: >
	5 heterogeneous tasks (email/meeting/code_review/report/call) with mixed
	priorities (critical→low) and real deadlines. Agent must triage intelligently.
	Tests priority-aware scheduling and deadline management.
	max_steps: 50
	grader: "grader.clm_graders:MediumGrader"
	baseline_score: 0.523

	- id: hard
	difficulty: hard
	description: >
	8 tasks with explicit dependencies (task B cannot start until task A completes),
	tight deadlines, and 2 mid-episode urgent email interruptions.
	Tests dependency-aware scheduling under time pressure.
	max_steps: 50
	grader: "grader.clm_graders:HardGrader"
	baseline_score: 0.301

	- id: expert
	difficulty: expert
	description: >
	10 tasks in a deep dependency chain, 3 mid-episode interruptions,
	mixed critical/high/normal priorities, and very tight deadlines.
	Genuinely challenges frontier LLM agents.
	max_steps: 60
	grader: "grader.clm_graders:ExpertGrader"
	baseline_score: 0.221

	scoring:
	reward_range: [-1.0, 1.0] # step rewards (negative preserved for burnout)
	grader_range: [0.01, 0.99] # final episode scores
	success_threshold: 0.50
	score_formula: deterministic_grader
	components:
	- weighted_completion: 0.60
	- deadline_adherence: 0.22
	- energy_efficiency: 0.10
	- dependency_bonus: 0.05
	- interruption_bonus: 0.03

	reward_shaping:
	milestone_rewards: [0.25, 0.50, 0.75, 1.00]
	burnout_penalty: -1.0
	context_switch_penalty: -0.07
	blocked_task_penalty: -0.15
	stress_penalty_threshold: 0.80

	constraints:
	max_runtime_seconds: 1800
	max_memory_gb: 8
	max_vcpu: 2

	inference:
	script: "inference.py"
	env_vars:
	- API_BASE_URL
	- MODEL_NAME
	- HF_TOKEN