spec_version: 1
name: cognitive-load-manager
type: space
runtime: fastapi
app: server.app:app
port: 7860

description: >
  Cognitive Load Manager (CLM) — a real-world productivity simulation where an AI agent
  acts as a human task scheduler, managing energy, stress, and fatigue while completing
  heterogeneous work items (emails, meetings, code reviews, reports, calls) under deadlines.
  Features task dependencies, mid-episode interruptions, focus mode, and priority weighting.
version: "2.0.0"
author: "CLM Team"
tags: [openenv, scheduling, productivity, rl, agent-eval]

endpoints:
  health: /health
  reset:  /reset
  step:   /step
  state:  /state
  grade:  /grader

action_space:
  type: discrete
  actions:
    - name: work
      description: "Work on task_id at normal pace (energy cost varies by task type)"
      requires: task_id
    - name: focus
      description: "Deep-work mode: 2× progress, 2× energy cost; exits on break"
      requires: task_id
    - name: break
      description: "Rest: +0.22 energy, -0.18 stress"
    - name: switch
      description: "Change active task (small context-switch cost)"
      requires: task_id
    - name: delay
      description: "Wait one step; slight stress reduction"

observation_space:
  tasks:
    - id: string
    - task_type: "email | meeting | code_review | report | call"
    - priority: "critical | high | normal | low"
    - progress: float [0.0, 1.0]
    - deadline: int (step number) or null
    - depends_on: task_id or null
    - is_interrupted: bool
  visible_state:
    # Partial observability: energy/stress are categorical labels, not raw floats.
    - fatigue_level: "low | medium | high"     # energy bands: >0.6 | 0.3-0.6 | <0.3
    - stress_level: "calm | elevated | critical" # stress bands: <0.45 | 0.45-0.75 | >0.75
    - stress_warning: bool                       # true when stress > 0.65
    - focus_mode: bool
    - upcoming_deadlines: list[task_id]
    - blocked_tasks: list[task_id]
  time_step: int

tasks:
  - id: easy
    difficulty: easy
    description: >
      2 tasks (email + report), normal priority, no deadlines.
      Agent must complete both without burning out.
      Tests basic work/break balance.
    max_steps: 50
    grader: "grader.clm_graders:EasyGrader"
    baseline_score: 0.856

  - id: medium
    difficulty: medium
    description: >
      5 heterogeneous tasks (email/meeting/code_review/report/call) with mixed
      priorities (critical→low) and real deadlines. Agent must triage intelligently.
      Tests priority-aware scheduling and deadline management.
    max_steps: 50
    grader: "grader.clm_graders:MediumGrader"
    baseline_score: 0.523

  - id: hard
    difficulty: hard
    description: >
      8 tasks with explicit dependencies (task B cannot start until task A completes),
      tight deadlines, and 2 mid-episode urgent email interruptions.
      Tests dependency-aware scheduling under time pressure.
    max_steps: 50
    grader: "grader.clm_graders:HardGrader"
    baseline_score: 0.301

  - id: expert
    difficulty: expert
    description: >
      10 tasks in a deep dependency chain, 3 mid-episode interruptions,
      mixed critical/high/normal priorities, and very tight deadlines.
      Genuinely challenges frontier LLM agents.
    max_steps: 60
    grader: "grader.clm_graders:ExpertGrader"
    baseline_score: 0.221

scoring:
  reward_range: [-1.0, 1.0]         # step rewards (negative preserved for burnout)
  grader_range: [0.01, 0.99]        # final episode scores
  success_threshold: 0.50
  score_formula: deterministic_grader
  components:
    - weighted_completion:  0.60
    - deadline_adherence:   0.22
    - energy_efficiency:    0.10
    - dependency_bonus:     0.05
    - interruption_bonus:   0.03

reward_shaping:
  milestone_rewards: [0.25, 0.50, 0.75, 1.00]
  burnout_penalty: -1.0
  context_switch_penalty: -0.07
  blocked_task_penalty: -0.15
  stress_penalty_threshold: 0.80

constraints:
  max_runtime_seconds: 1800
  max_memory_gb: 8
  max_vcpu: 2

inference:
  script: "inference.py"
  env_vars:
    - API_BASE_URL
    - MODEL_NAME
    - HF_TOKEN