File size: 4,125 Bytes
c33d988 41595ac c33d988 d86d6a2 dfa9f05 70b313b 33e9ed5 70b313b 33e9ed5 7896686 dfa9f05 f3f7834 dfa9f05 b8dbf99 55309da dfa9f05 55309da ec1ce67 dfa9f05 7896686 b8dbf99 55309da dfa9f05 55309da ec1ce67 dfa9f05 7896686 b8dbf99 55309da dfa9f05 55309da ec1ce67 dfa9f05 41595ac dfa9f05 41595ac dfa9f05 d86d6a2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | spec_version: 1
name: cognitive-load-manager
type: space
runtime: fastapi
app: server.app:app
port: 7860
description: >
Cognitive Load Manager (CLM) — a real-world productivity simulation where an AI agent
acts as a human task scheduler, managing energy, stress, and fatigue while completing
heterogeneous work items (emails, meetings, code reviews, reports, calls) under deadlines.
Features task dependencies, mid-episode interruptions, focus mode, and priority weighting.
version: "2.0.0"
author: "CLM Team"
tags: [openenv, scheduling, productivity, rl, agent-eval]
endpoints:
health: /health
reset: /reset
step: /step
state: /state
grade: /grader
action_space:
type: discrete
actions:
- name: work
description: "Work on task_id at normal pace (energy cost varies by task type)"
requires: task_id
- name: focus
description: "Deep-work mode: 2× progress, 2× energy cost; exits on break"
requires: task_id
- name: break
description: "Rest: +0.22 energy, -0.18 stress"
- name: switch
description: "Change active task (small context-switch cost)"
requires: task_id
- name: delay
description: "Wait one step; slight stress reduction"
observation_space:
tasks:
- id: string
- task_type: "email | meeting | code_review | report | call"
- priority: "critical | high | normal | low"
- progress: float [0.0, 1.0]
- deadline: int (step number) or null
- depends_on: task_id or null
- is_interrupted: bool
visible_state:
# Partial observability: energy/stress are categorical labels, not raw floats.
- fatigue_level: "low | medium | high" # energy bands: >0.6 | 0.3-0.6 | <0.3
- stress_level: "calm | elevated | critical" # stress bands: <0.45 | 0.45-0.75 | >0.75
- stress_warning: bool # true when stress > 0.65
- focus_mode: bool
- upcoming_deadlines: list[task_id]
- blocked_tasks: list[task_id]
time_step: int
tasks:
- id: easy
difficulty: easy
description: >
2 tasks (email + report), normal priority, no deadlines.
Agent must complete both without burning out.
Tests basic work/break balance.
max_steps: 50
grader: "grader.clm_graders:EasyGrader"
baseline_score: 0.856
- id: medium
difficulty: medium
description: >
5 heterogeneous tasks (email/meeting/code_review/report/call) with mixed
priorities (critical→low) and real deadlines. Agent must triage intelligently.
Tests priority-aware scheduling and deadline management.
max_steps: 50
grader: "grader.clm_graders:MediumGrader"
baseline_score: 0.523
- id: hard
difficulty: hard
description: >
8 tasks with explicit dependencies (task B cannot start until task A completes),
tight deadlines, and 2 mid-episode urgent email interruptions.
Tests dependency-aware scheduling under time pressure.
max_steps: 50
grader: "grader.clm_graders:HardGrader"
baseline_score: 0.301
- id: expert
difficulty: expert
description: >
10 tasks in a deep dependency chain, 3 mid-episode interruptions,
mixed critical/high/normal priorities, and very tight deadlines.
Genuinely challenges frontier LLM agents.
max_steps: 60
grader: "grader.clm_graders:ExpertGrader"
baseline_score: 0.221
scoring:
reward_range: [-1.0, 1.0] # step rewards (negative preserved for burnout)
grader_range: [0.01, 0.99] # final episode scores
success_threshold: 0.50
score_formula: deterministic_grader
components:
- weighted_completion: 0.60
- deadline_adherence: 0.22
- energy_efficiency: 0.10
- dependency_bonus: 0.05
- interruption_bonus: 0.03
reward_shaping:
milestone_rewards: [0.25, 0.50, 0.75, 1.00]
burnout_penalty: -1.0
context_switch_penalty: -0.07
blocked_task_penalty: -0.15
stress_penalty_threshold: 0.80
constraints:
max_runtime_seconds: 1800
max_memory_gb: 8
max_vcpu: 2
inference:
script: "inference.py"
env_vars:
- API_BASE_URL
- MODEL_NAME
- HF_TOKEN
|