UjjwalPardeshi
fix: add has_grader: true to all tasks for evaluator grader check
fc3fbaf
spec_version: 1
name: pytorch-training-debugger
type: space
runtime: fastapi
app: server.app:app
port: 7860
version: "1.1.0"
description: |
PyTorch-native fault injection engine for training failure debugging.
An AI agent investigates, diagnoses, fixes, and verifies broken
training runs using real torch.nn.Module models (CNN + MLP), torch.autograd
gradients, state_dict() weight inspection, and PyTorch code-level
debugging. 7 tasks across 3 difficulty tiers with context-gated
reward shaping, difficulty scaling (1-5), confusion matrices, and
a live diagnostic dashboard.
framework: openenv
tags:
- ml-debugging
- pytorch
- reinforcement-learning
- root-cause-analysis
- fault-injection
- code-debugging
- openenv
observation_space:
type: MLTrainingObservation
description: "Training run snapshot with progressive reveal — gradients, weights, data stats, model modes, code snippets, and confusion matrices revealed on inspection"
action_space:
type: MLTrainingAction
description: "Investigation, fix, code-fix, and diagnosis actions with dynamic availability"
tasks:
- id: task_001
difficulty: easy
max_steps: 20
has_grader: true
param_ranges:
learning_rate: [0.05, 0.08, 0.10, 0.15, 0.30]
- id: task_002
difficulty: easy
max_steps: 20
has_grader: true
param_ranges:
learning_rate: [1e-6, 5e-6, 1e-5]
depth_multiplier: [1.0, 1.5, 2.0]
- id: task_003
difficulty: medium
max_steps: 25
has_grader: true
param_ranges:
leakage_pct: [0.12, 0.18, 0.22, 0.28]
- id: task_004
difficulty: medium
max_steps: 25
has_grader: true
param_ranges:
weight_decay: [0.0, 0.0001, 0.001]
divergence_epoch: [5, 8, 12]
- id: task_005
difficulty: hard
max_steps: 30
has_grader: true
param_ranges:
red_herring_intensity: [0.8, 2.5]
- id: task_006
difficulty: hard
max_steps: 30
has_grader: true
param_ranges:
bug_type: [eval_mode, detach_loss, zero_grad_missing, inplace_relu]
- id: task_007
difficulty: hard
max_steps: 25
has_grader: true
param_ranges:
scheduler_gamma: [0.01, 0.001, 0.0001]
scheduler_step_size: [2, 3, 5]
reward:
range: [-1.0, 1.0]
shaped: true
step_penalty: -0.01
investigation_bonus: 0.05
max_investigation_bonus: 0.25
correct_diagnosis: 0.50
terminal_convergence: 0.40
endpoints:
websocket: "/ws"
tasks: "GET /tasks"
grader: "POST /grader"
baseline: "POST /baseline"
health: "GET /health"
dashboard: "GET /dashboard"
validation_report: "GET /validation-report"
curriculum: "GET /curriculum"
leaderboard: "GET /leaderboard"
replay: "GET /replay/{episode_id}"