File size: 2,727 Bytes
e2f8b29 0b9b77b e2f8b29 0b9b77b e2f8b29 0b9b77b e2f8b29 9e6a926 e2f8b29 0b9b77b e2f8b29 9e6a926 e2f8b29 fc3fbaf 9e6a926 fc3fbaf 9e6a926 e2f8b29 fc3fbaf 9e6a926 fc3fbaf 9e6a926 e2f8b29 fc3fbaf 9e6a926 fc3fbaf 9e6a926 e2f8b29 0b9b77b 4414fa9 0b9b77b fc3fbaf 0b9b77b e2f8b29 9e6a926 4f58e42 0b9b77b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | spec_version: 1
name: pytorch-training-debugger
type: space
runtime: fastapi
app: server.app:app
port: 7860
version: "1.1.0"
description: |
PyTorch-native fault injection engine for training failure debugging.
An AI agent investigates, diagnoses, fixes, and verifies broken
training runs using real torch.nn.Module models (CNN + MLP), torch.autograd
gradients, state_dict() weight inspection, and PyTorch code-level
debugging. 7 tasks across 3 difficulty tiers with context-gated
reward shaping, difficulty scaling (1-5), confusion matrices, and
a live diagnostic dashboard.
framework: openenv
tags:
- ml-debugging
- pytorch
- reinforcement-learning
- root-cause-analysis
- fault-injection
- code-debugging
- openenv
observation_space:
type: MLTrainingObservation
description: "Training run snapshot with progressive reveal — gradients, weights, data stats, model modes, code snippets, and confusion matrices revealed on inspection"
action_space:
type: MLTrainingAction
description: "Investigation, fix, code-fix, and diagnosis actions with dynamic availability"
tasks:
- id: task_001
difficulty: easy
max_steps: 20
has_grader: true
param_ranges:
learning_rate: [0.05, 0.08, 0.10, 0.15, 0.30]
- id: task_002
difficulty: easy
max_steps: 20
has_grader: true
param_ranges:
learning_rate: [1e-6, 5e-6, 1e-5]
depth_multiplier: [1.0, 1.5, 2.0]
- id: task_003
difficulty: medium
max_steps: 25
has_grader: true
param_ranges:
leakage_pct: [0.12, 0.18, 0.22, 0.28]
- id: task_004
difficulty: medium
max_steps: 25
has_grader: true
param_ranges:
weight_decay: [0.0, 0.0001, 0.001]
divergence_epoch: [5, 8, 12]
- id: task_005
difficulty: hard
max_steps: 30
has_grader: true
param_ranges:
red_herring_intensity: [0.8, 2.5]
- id: task_006
difficulty: hard
max_steps: 30
has_grader: true
param_ranges:
bug_type: [eval_mode, detach_loss, zero_grad_missing, inplace_relu]
- id: task_007
difficulty: hard
max_steps: 25
has_grader: true
param_ranges:
scheduler_gamma: [0.01, 0.001, 0.0001]
scheduler_step_size: [2, 3, 5]
reward:
range: [-1.0, 1.0]
shaped: true
step_penalty: -0.01
investigation_bonus: 0.05
max_investigation_bonus: 0.25
correct_diagnosis: 0.50
terminal_convergence: 0.40
endpoints:
websocket: "/ws"
tasks: "GET /tasks"
grader: "POST /grader"
baseline: "POST /baseline"
health: "GET /health"
dashboard: "GET /dashboard"
validation_report: "GET /validation-report"
curriculum: "GET /curriculum"
leaderboard: "GET /leaderboard"
replay: "GET /replay/{episode_id}"
|