| spec_version: 1 |
| name: pytorch-training-debugger |
| type: space |
| runtime: fastapi |
| app: server.app:app |
| port: 7860 |
|
|
| version: "1.1.0" |
| description: | |
| PyTorch-native fault injection engine for training failure debugging. |
| An AI agent investigates, diagnoses, fixes, and verifies broken |
| training runs using real torch.nn.Module models (CNN + MLP), torch.autograd |
| gradients, state_dict() weight inspection, and PyTorch code-level |
| debugging. 7 tasks across 3 difficulty tiers with context-gated |
| reward shaping, difficulty scaling (1-5), confusion matrices, and |
| a live diagnostic dashboard. |
| framework: openenv |
| tags: |
| - ml-debugging |
| - pytorch |
| - reinforcement-learning |
| - root-cause-analysis |
| - fault-injection |
| - code-debugging |
| - openenv |
|
|
| observation_space: |
| type: MLTrainingObservation |
| description: "Training run snapshot with progressive reveal — gradients, weights, data stats, model modes, code snippets, and confusion matrices revealed on inspection" |
|
|
| action_space: |
| type: MLTrainingAction |
| description: "Investigation, fix, code-fix, and diagnosis actions with dynamic availability" |
|
|
| tasks: |
| - id: task_001 |
| difficulty: easy |
| max_steps: 20 |
| has_grader: true |
| param_ranges: |
| learning_rate: [0.05, 0.08, 0.10, 0.15, 0.30] |
|
|
| - id: task_002 |
| difficulty: easy |
| max_steps: 20 |
| has_grader: true |
| param_ranges: |
| learning_rate: [1e-6, 5e-6, 1e-5] |
| depth_multiplier: [1.0, 1.5, 2.0] |
|
|
| - id: task_003 |
| difficulty: medium |
| max_steps: 25 |
| has_grader: true |
| param_ranges: |
| leakage_pct: [0.12, 0.18, 0.22, 0.28] |
|
|
| - id: task_004 |
| difficulty: medium |
| max_steps: 25 |
| has_grader: true |
| param_ranges: |
| weight_decay: [0.0, 0.0001, 0.001] |
| divergence_epoch: [5, 8, 12] |
|
|
| - id: task_005 |
| difficulty: hard |
| max_steps: 30 |
| has_grader: true |
| param_ranges: |
| red_herring_intensity: [0.8, 2.5] |
|
|
| - id: task_006 |
| difficulty: hard |
| max_steps: 30 |
| has_grader: true |
| param_ranges: |
| bug_type: [eval_mode, detach_loss, zero_grad_missing, inplace_relu] |
|
|
| - id: task_007 |
| difficulty: hard |
| max_steps: 25 |
| has_grader: true |
| param_ranges: |
| scheduler_gamma: [0.01, 0.001, 0.0001] |
| scheduler_step_size: [2, 3, 5] |
|
|
| reward: |
| range: [-1.0, 1.0] |
| shaped: true |
| step_penalty: -0.01 |
| investigation_bonus: 0.05 |
| max_investigation_bonus: 0.25 |
| correct_diagnosis: 0.50 |
| terminal_convergence: 0.40 |
|
|
| endpoints: |
| websocket: "/ws" |
| tasks: "GET /tasks" |
| grader: "POST /grader" |
| baseline: "POST /baseline" |
| health: "GET /health" |
| dashboard: "GET /dashboard" |
| validation_report: "GET /validation-report" |
| curriculum: "GET /curriculum" |
| leaderboard: "GET /leaderboard" |
| replay: "GET /replay/{episode_id}" |
|
|