spec_version: 1
name: pytorch-training-debugger
type: space
runtime: fastapi
app: server.app:app
port: 7860

version: "1.1.0"
description: |
  PyTorch-native fault injection engine for training failure debugging.
  An AI agent investigates, diagnoses, fixes, and verifies broken
  training runs using real torch.nn.Module models (CNN + MLP), torch.autograd
  gradients, state_dict() weight inspection, and PyTorch code-level
  debugging. 7 tasks across 3 difficulty tiers with context-gated
  reward shaping, difficulty scaling (1-5), confusion matrices, and
  a live diagnostic dashboard.
framework: openenv
tags:
  - ml-debugging
  - pytorch
  - reinforcement-learning
  - root-cause-analysis
  - fault-injection
  - code-debugging
  - openenv

observation_space:
  type: MLTrainingObservation
  description: "Training run snapshot with progressive reveal — gradients, weights, data stats, model modes, code snippets, and confusion matrices revealed on inspection"

action_space:
  type: MLTrainingAction
  description: "Investigation, fix, code-fix, and diagnosis actions with dynamic availability"

tasks:
  - id: task_001
    difficulty: easy
    max_steps: 20
    has_grader: true
    param_ranges:
      learning_rate: [0.05, 0.08, 0.10, 0.15, 0.30]

  - id: task_002
    difficulty: easy
    max_steps: 20
    has_grader: true
    param_ranges:
      learning_rate: [1e-6, 5e-6, 1e-5]
      depth_multiplier: [1.0, 1.5, 2.0]

  - id: task_003
    difficulty: medium
    max_steps: 25
    has_grader: true
    param_ranges:
      leakage_pct: [0.12, 0.18, 0.22, 0.28]

  - id: task_004
    difficulty: medium
    max_steps: 25
    has_grader: true
    param_ranges:
      weight_decay: [0.0, 0.0001, 0.001]
      divergence_epoch: [5, 8, 12]

  - id: task_005
    difficulty: hard
    max_steps: 30
    has_grader: true
    param_ranges:
      red_herring_intensity: [0.8, 2.5]

  - id: task_006
    difficulty: hard
    max_steps: 30
    has_grader: true
    param_ranges:
      bug_type: [eval_mode, detach_loss, zero_grad_missing, inplace_relu]

  - id: task_007
    difficulty: hard
    max_steps: 25
    has_grader: true
    param_ranges:
      scheduler_gamma: [0.01, 0.001, 0.0001]
      scheduler_step_size: [2, 3, 5]

reward:
  range: [-1.0, 1.0]
  shaped: true
  step_penalty: -0.01
  investigation_bonus: 0.05
  max_investigation_bonus: 0.25
  correct_diagnosis: 0.50
  terminal_convergence: 0.40

endpoints:
  websocket: "/ws"
  tasks: "GET /tasks"
  grader: "POST /grader"
  baseline: "POST /baseline"
  health: "GET /health"
  dashboard: "GET /dashboard"
  validation_report: "GET /validation-report"
  curriculum: "GET /curriculum"
  leaderboard: "GET /leaderboard"
  replay: "GET /replay/{episode_id}"