File size: 3,138 Bytes
4f58e42 aa0bed2 206438f aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 aa0bed2 4f58e42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | {
"methodology": "Real PyTorch 20-epoch mini-training with fault injection",
"torch_version": "2.5.1+cpu",
"models": [
"SimpleCNN (~50K params)",
"SimpleMLP (~20K params)"
],
"training_approach": "Real forward+backward passes on random CIFAR-10 style data, cached per (task_id, seed)",
"results": [
{
"task": "task_001",
"fault": "exploding_gradients",
"checks": {
"gradient_instability_detected": true,
"loss_shows_instability": true,
"max_gradient_norm": 111.8,
"max_loss": 43.27,
"real_pytorch_training": true
},
"pass": true
},
{
"task": "task_002",
"fault": "vanishing_gradients",
"checks": {
"vanishing_detected": true,
"min_gradient_norm": 0.0,
"real_pytorch_gradients": true
},
"pass": true
},
{
"task": "task_003",
"fault": "data_leakage",
"checks": {
"class_overlap_above_0.5": true,
"class_overlap_score": 0.83,
"real_training_runs": true,
"has_confusion_matrix": true
},
"pass": true
},
{
"task": "task_004",
"fault": "overfitting",
"checks": {
"real_training_runs": true,
"clean_data": true,
"final_train_loss": 0.1017,
"final_val_loss": 2.6519
},
"pass": true
},
{
"task": "task_005",
"fault": "batchnorm_eval_mode",
"checks": {
"all_layers_in_eval_mode": true,
"no_layer_is_exploding": true,
"real_training_runs": true,
"real_model_eval_mode": true,
"red_herring_spike_layer": "conv1"
},
"pass": true
},
{
"task": "task_006",
"fault": "code_bug",
"checks": {
"variants_tested": 4,
"variant_results": {
"eval_mode": {
"correct_fix_accepted": true,
"wrong_fix_rejected": true
},
"detach_loss": {
"correct_fix_accepted": true,
"wrong_fix_rejected": true
},
"zero_grad_missing": {
"correct_fix_accepted": true,
"wrong_fix_rejected": true
},
"inplace_relu": {
"correct_fix_accepted": true,
"wrong_fix_rejected": true
}
},
"fix_validation_pipeline": "normalize -> tokenize -> semantic -> AST"
},
"pass": true
},
{
"task": "task_007",
"fault": "scheduler_misconfigured",
"checks": {
"real_training_runs": true,
"scheduler_gamma": 0.0001,
"scheduler_step_size": 2,
"final_loss": 2.5911
},
"pass": true
},
{
"task": "architecture",
"fault": "dual_model_support",
"checks": {
"cnn_output_shape": [
4,
10
],
"mlp_output_shape": [
4,
10
],
"cnn_params": 66890,
"mlp_params": 411658,
"both_produce_10_classes": true
},
"pass": true
}
],
"summary": {
"total": 8,
"passed": 8,
"failed": 0
}
} |