{ "methodology": "Real PyTorch 20-epoch mini-training with fault injection", "torch_version": "2.5.1+cpu", "models": [ "SimpleCNN (~50K params)", "SimpleMLP (~20K params)" ], "training_approach": "Real forward+backward passes on random CIFAR-10 style data, cached per (task_id, seed)", "results": [ { "task": "task_001", "fault": "exploding_gradients", "checks": { "gradient_instability_detected": true, "loss_shows_instability": true, "max_gradient_norm": 111.8, "max_loss": 43.27, "real_pytorch_training": true }, "pass": true }, { "task": "task_002", "fault": "vanishing_gradients", "checks": { "vanishing_detected": true, "min_gradient_norm": 0.0, "real_pytorch_gradients": true }, "pass": true }, { "task": "task_003", "fault": "data_leakage", "checks": { "class_overlap_above_0.5": true, "class_overlap_score": 0.83, "real_training_runs": true, "has_confusion_matrix": true }, "pass": true }, { "task": "task_004", "fault": "overfitting", "checks": { "real_training_runs": true, "clean_data": true, "final_train_loss": 0.1017, "final_val_loss": 2.6519 }, "pass": true }, { "task": "task_005", "fault": "batchnorm_eval_mode", "checks": { "all_layers_in_eval_mode": true, "no_layer_is_exploding": true, "real_training_runs": true, "real_model_eval_mode": true, "red_herring_spike_layer": "conv1" }, "pass": true }, { "task": "task_006", "fault": "code_bug", "checks": { "variants_tested": 4, "variant_results": { "eval_mode": { "correct_fix_accepted": true, "wrong_fix_rejected": true }, "detach_loss": { "correct_fix_accepted": true, "wrong_fix_rejected": true }, "zero_grad_missing": { "correct_fix_accepted": true, "wrong_fix_rejected": true }, "inplace_relu": { "correct_fix_accepted": true, "wrong_fix_rejected": true } }, "fix_validation_pipeline": "normalize -> tokenize -> semantic -> AST" }, "pass": true }, { "task": "task_007", "fault": "scheduler_misconfigured", "checks": { "real_training_runs": true, "scheduler_gamma": 0.0001, "scheduler_step_size": 2, "final_loss": 2.5911 }, "pass": true }, { "task": "architecture", "fault": "dual_model_support", "checks": { "cnn_output_shape": [ 4, 10 ], "mlp_output_shape": [ 4, 10 ], "cnn_params": 66890, "mlp_params": 411658, "both_produce_10_classes": true }, "pass": true } ], "summary": { "total": 8, "passed": 8, "failed": 0 } }