| { |
| "methodology": "Real PyTorch 20-epoch mini-training with fault injection", |
| "torch_version": "2.5.1+cpu", |
| "models": [ |
| "SimpleCNN (~50K params)", |
| "SimpleMLP (~20K params)" |
| ], |
| "training_approach": "Real forward+backward passes on random CIFAR-10 style data, cached per (task_id, seed)", |
| "results": [ |
| { |
| "task": "task_001", |
| "fault": "exploding_gradients", |
| "checks": { |
| "gradient_instability_detected": true, |
| "loss_shows_instability": true, |
| "max_gradient_norm": 111.8, |
| "max_loss": 43.27, |
| "real_pytorch_training": true |
| }, |
| "pass": true |
| }, |
| { |
| "task": "task_002", |
| "fault": "vanishing_gradients", |
| "checks": { |
| "vanishing_detected": true, |
| "min_gradient_norm": 0.0, |
| "real_pytorch_gradients": true |
| }, |
| "pass": true |
| }, |
| { |
| "task": "task_003", |
| "fault": "data_leakage", |
| "checks": { |
| "class_overlap_above_0.5": true, |
| "class_overlap_score": 0.83, |
| "real_training_runs": true, |
| "has_confusion_matrix": true |
| }, |
| "pass": true |
| }, |
| { |
| "task": "task_004", |
| "fault": "overfitting", |
| "checks": { |
| "real_training_runs": true, |
| "clean_data": true, |
| "final_train_loss": 0.1017, |
| "final_val_loss": 2.6519 |
| }, |
| "pass": true |
| }, |
| { |
| "task": "task_005", |
| "fault": "batchnorm_eval_mode", |
| "checks": { |
| "all_layers_in_eval_mode": true, |
| "no_layer_is_exploding": true, |
| "real_training_runs": true, |
| "real_model_eval_mode": true, |
| "red_herring_spike_layer": "conv1" |
| }, |
| "pass": true |
| }, |
| { |
| "task": "task_006", |
| "fault": "code_bug", |
| "checks": { |
| "variants_tested": 4, |
| "variant_results": { |
| "eval_mode": { |
| "correct_fix_accepted": true, |
| "wrong_fix_rejected": true |
| }, |
| "detach_loss": { |
| "correct_fix_accepted": true, |
| "wrong_fix_rejected": true |
| }, |
| "zero_grad_missing": { |
| "correct_fix_accepted": true, |
| "wrong_fix_rejected": true |
| }, |
| "inplace_relu": { |
| "correct_fix_accepted": true, |
| "wrong_fix_rejected": true |
| } |
| }, |
| "fix_validation_pipeline": "normalize -> tokenize -> semantic -> AST" |
| }, |
| "pass": true |
| }, |
| { |
| "task": "task_007", |
| "fault": "scheduler_misconfigured", |
| "checks": { |
| "real_training_runs": true, |
| "scheduler_gamma": 0.0001, |
| "scheduler_step_size": 2, |
| "final_loss": 2.5911 |
| }, |
| "pass": true |
| }, |
| { |
| "task": "architecture", |
| "fault": "dual_model_support", |
| "checks": { |
| "cnn_output_shape": [ |
| 4, |
| 10 |
| ], |
| "mlp_output_shape": [ |
| 4, |
| 10 |
| ], |
| "cnn_params": 66890, |
| "mlp_params": 411658, |
| "both_produce_10_classes": true |
| }, |
| "pass": true |
| } |
| ], |
| "summary": { |
| "total": 8, |
| "passed": 8, |
| "failed": 0 |
| } |
| } |