AbstractPhil commited on
Commit
d7c88e7
Β·
verified Β·
1 Parent(s): 478db65

Upload weights and configs - David-fully_shared-weighted_sum - Run 20251012_135249

Browse files
weights/David-fully_shared-weighted_sum/20251012_135249/MODEL_SUMMARY.txt CHANGED
@@ -3,24 +3,24 @@
3
  β•‘ DAVID MODEL SUMMARY β•‘
4
  ╠══════════════════════════════════════════════════════════════╣
5
  β•‘ β•‘
6
- β•‘ 🎯 VALIDATION ACCURACY: 71.78% β•‘
7
  β•‘ β•‘
8
  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
9
 
10
  MODEL: David-fully_shared-weighted_sum
11
  RUN ID: 20251012_135249
12
- BEST EPOCH: 8/10
13
 
14
  ═══════════════════════════════════════════════════════════════
15
 
16
  πŸ“Š PERFORMANCE BREAKDOWN
17
 
18
- Final Training Accuracy: 72.75%
19
- Best Validation Accuracy: 71.78%
20
 
21
  Per-Scale Accuracies:
22
- β€’ Scale 256: 71.49%
23
- β€’ Scale 512: 71.69%
24
 
25
  ═══════════════════════════════════════════════════════════════
26
 
@@ -46,17 +46,18 @@ Epoch | Train Acc | Val Acc | Learning Rate
46
  5 | 71.36% | 70.97% | 5.01e-04
47
  6 | 71.84% | 71.46% | 3.46e-04
48
  7 | 72.29% | 71.77% | 2.07e-04
49
- 8 | 72.75% | 71.78% πŸ‘‘ | 9.64e-05
 
50
 
51
  ═══════════════════════════════════════════════════════════════
52
 
53
  πŸ“ FILES
54
 
55
- Best Model: best_model_acc71.78.safetensors
56
  Config: david_config.json
57
  Training Cfg: train_config.json
58
  History: training_history.json
59
 
60
  ═══════════════════════════════════════════════════════════════
61
 
62
- Generated: 2025-10-12 14:06:21
 
3
  β•‘ DAVID MODEL SUMMARY β•‘
4
  ╠══════════════════════════════════════════════════════════════╣
5
  β•‘ β•‘
6
+ β•‘ 🎯 VALIDATION ACCURACY: 71.91% β•‘
7
  β•‘ β•‘
8
  β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
9
 
10
  MODEL: David-fully_shared-weighted_sum
11
  RUN ID: 20251012_135249
12
+ BEST EPOCH: 9/10
13
 
14
  ═══════════════════════════════════════════════════════════════
15
 
16
  πŸ“Š PERFORMANCE BREAKDOWN
17
 
18
+ Final Training Accuracy: 73.02%
19
+ Best Validation Accuracy: 71.91%
20
 
21
  Per-Scale Accuracies:
22
+ β€’ Scale 256: 71.57%
23
+ β€’ Scale 512: 71.71%
24
 
25
  ═══════════════════════════════════════════════════════════════
26
 
 
46
  5 | 71.36% | 70.97% | 5.01e-04
47
  6 | 71.84% | 71.46% | 3.46e-04
48
  7 | 72.29% | 71.77% | 2.07e-04
49
+ 8 | 72.75% | 71.78% | 9.64e-05
50
+ 9 | 73.02% | 71.91% πŸ‘‘ | 2.54e-05
51
 
52
  ═══════════════════════════════════════════════════════════════
53
 
54
  πŸ“ FILES
55
 
56
+ Best Model: best_model_acc71.91.safetensors
57
  Config: david_config.json
58
  Training Cfg: train_config.json
59
  History: training_history.json
60
 
61
  ═══════════════════════════════════════════════════════════════
62
 
63
+ Generated: 2025-10-12 14:08:03
weights/David-fully_shared-weighted_sum/20251012_135249/best_model_acc71.91.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5cd333eab56f48ed4340554d416262c27ef15daf83daa939444f82a0a6b6b31
3
+ size 2628344
weights/David-fully_shared-weighted_sum/20251012_135249/best_model_acc71.91_metadata.json ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(11268.)",
7
+ "exp_avg": "tensor([[ 1.6730e-04, -3.5845e-03, -2.9463e-04, ..., 5.4469e-05,\n 1.8532e-04, 1.8314e-03],\n [ 1.9663e-04, -1.2293e-04, -5.9021e-04, ..., 1.4957e-03,\n 1.7826e-04, 6.0047e-04],\n [-2.0832e-03, 7.9586e-04, -4.6102e-04, ..., 1.6200e-03,\n -7.5037e-05, -7.4932e-04],\n ...,\n [-1.0317e-03, 1.3865e-04, 2.0382e-03, ..., -4.5965e-05,\n -7.8645e-04, 1.7134e-03],\n [-3.6024e-04, -2.3257e-03, 1.8227e-03, ..., 3.0624e-04,\n -2.0507e-06, 1.2756e-04],\n [ 3.4405e-04, -4.4155e-03, -9.5354e-04, ..., -1.4266e-03,\n -4.3076e-04, -1.9174e-04]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[6.0221e-06, 3.2733e-05, 1.6837e-05, ..., 6.1610e-06, 3.1043e-06,\n 8.1782e-06],\n [5.5283e-06, 2.4316e-05, 1.8268e-05, ..., 8.3223e-06, 2.2850e-06,\n 9.1605e-06],\n [1.1135e-05, 6.4293e-05, 2.1208e-05, ..., 6.1934e-06, 3.2528e-06,\n 4.0694e-06],\n ...,\n [6.3581e-06, 2.4659e-05, 1.4418e-05, ..., 3.4166e-06, 1.8933e-06,\n 2.8011e-06],\n [7.2142e-06, 4.4596e-05, 1.5713e-05, ..., 7.1391e-06, 3.7922e-06,\n 6.0627e-06],\n [3.9601e-06, 4.7474e-05, 1.1590e-05, ..., 3.0785e-06, 1.8115e-06,\n 2.1169e-06]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(11268.)",
12
+ "exp_avg": "tensor([ 2.3417e-02, 2.4568e-02, -2.4948e-02, -3.8981e-03, 1.4236e-02,\n -1.3251e-03, 2.5327e-02, -3.8716e-02, 4.5635e-02, 4.1084e-02,\n 6.2685e-03, 1.1092e-02, 1.4601e-02, 4.6408e-03, 2.3364e-02,\n -1.9685e-02, -1.0079e-03, 4.3966e-02, -4.2904e-02, -2.5925e-02,\n 2.3419e-02, 8.4815e-03, -1.7112e-02, 8.1780e-03, 8.9684e-03,\n -2.0205e-03, -3.1504e-02, -4.6504e-03, 8.0569e-03, -3.9166e-03,\n 2.4622e-02, -1.9405e-02, -8.1325e-03, -1.3363e-02, -8.4387e-04,\n 1.4350e-02, 1.1040e-02, -1.6993e-02, 1.6805e-02, -6.3212e-03,\n 4.9554e-02, 1.8274e-02, 7.2142e-03, -1.3180e-02, -1.9856e-02,\n -6.7373e-03, -1.3154e-02, 1.9755e-02, 6.3563e-03, -2.5880e-02,\n -2.6455e-02, 8.4570e-03, 1.2293e-02, -6.4048e-03, -5.3977e-03,\n 1.5325e-02, 1.1222e-03, -1.8461e-02, 6.1249e-03, -1.3892e-02,\n 2.1076e-02, 2.6032e-02, 1.8742e-02, 3.5452e-02, -5.4550e-03,\n -3.3830e-02, 1.6358e-02, -7.3318e-03, 7.1393e-03, 7.1724e-03,\n 2.2005e-03, 1.7771e-03, 1.5879e-02, 7.7530e-03, 2.9091e-02,\n 1.1877e-04, -6.9003e-03, -1.3296e-02, -4.6233e-03, 1.9011e-02,\n 1.0496e-03, 1.5354e-02, 1.7245e-02, 2.3618e-02, 2.6573e-02,\n -9.7724e-03, -7.0901e-04, 1.7073e-02, 1.6063e-02, 2.7890e-04,\n 5.9407e-03, 2.1000e-02, 1.7279e-02, 1.0560e-02, 5.1744e-03,\n 4.9364e-03, 2.7571e-03, 4.0429e-03, -7.3446e-04, -1.0896e-02,\n 1.4041e-02, -1.2464e-02, 4.3924e-04, 1.1170e-02, 2.3301e-02,\n 2.9854e-03, 2.0234e-02, 1.2097e-02, -4.2271e-03, -3.6744e-02,\n -1.1477e-02, -9.5774e-02, 2.4953e-02, 1.5827e-02, 1.2379e-02,\n 2.9007e-02, 5.0457e-02, -3.5164e-02, -7.8931e-04, -4.6327e-02,\n -9.0779e-03, 2.0608e-03, 1.3435e-02, 1.2624e-02, -4.2322e-02,\n 1.1696e-02, 7.3104e-03, -7.5815e-03, 1.7814e-02, 2.0646e-03,\n -9.3445e-03, 2.6580e-02, 1.8488e-02, 3.1770e-02, -9.2026e-04,\n 1.2714e-02, -3.1506e-03, 9.2296e-03, 9.4015e-03, -3.5044e-02,\n -3.0579e-02, 8.5404e-03, 1.6727e-02, -1.5936e-02, 2.4565e-02,\n 5.9315e-03, 8.9090e-03, 5.1917e-04, 1.4244e-02, -2.7509e-02,\n -8.0235e-04, 1.0796e-02, -5.9597e-03, 3.7340e-02, -2.6956e-03,\n 1.0407e-02, 1.3774e-02, 1.8758e-02, -1.0442e-02, 7.4692e-03,\n 1.5819e-04, -1.3350e-03, -1.5845e-03, -9.8105e-03, 6.5078e-04,\n -2.6122e-02, 5.4580e-03, -1.1448e-02, 4.4871e-02, 2.2683e-02,\n -1.0429e-02, 2.2779e-02, -2.9393e-02, 3.2069e-04, -3.4460e-02,\n -8.5228e-04, 1.4744e-03, 1.0110e-02, -3.2068e-02, -3.3942e-02,\n 1.0565e-02, 1.9476e-02, 2.5715e-03, 1.1491e-02, 3.0778e-03,\n -1.2419e-02, 9.9467e-03, 2.5693e-03, 4.3628e-03, 1.1604e-02,\n -1.4779e-02, 8.8604e-03, -5.5464e-03, 1.6012e-02, 9.4487e-03,\n 2.3047e-02, -1.9399e-02, -9.4726e-03, 2.8240e-03, 1.4161e-03,\n 2.8132e-02, -9.2503e-03, 3.6927e-04, -1.8092e-02, 2.0011e-02,\n 2.4054e-02, -1.0017e-03, 4.1355e-05, 3.0499e-03, 2.6769e-02,\n 2.0352e-02, 1.8197e-02, -5.2026e-03, -1.7787e-02, 1.0246e-02,\n -2.2973e-02, -1.0475e-02, -9.3069e-03, 3.9987e-04, 1.7289e-02,\n 4.5149e-03, -4.7103e-02, 2.8241e-02, -1.5305e-02, 8.8425e-03,\n -1.0419e-02, 2.6158e-03, 3.0699e-02, 8.3658e-03, 2.5466e-03,\n -1.3101e-02, 1.4214e-02, -3.7761e-04, 2.7105e-02, -3.5094e-02,\n -2.9760e-03, -2.2416e-02, 1.6855e-02, 3.0014e-02, -8.5159e-03,\n -1.4484e-02, -1.0927e-02, 3.7028e-03, 5.7203e-03, 8.7947e-03,\n -1.1759e-02, -1.2740e-02, -2.8305e-02, 7.0330e-03, -2.7081e-02,\n -8.6393e-02, -2.5057e-02, 8.0040e-03, -1.3091e-02, 1.3749e-02,\n 1.5344e-02, 1.4968e-02, 1.3072e-03, 1.2245e-02, -4.1840e-03,\n -3.6183e-03, -9.9598e-03, -1.7933e-02, -1.7962e-02, 1.1602e-02,\n -1.1091e-02, -3.6709e-02, -2.5325e-02, -1.3609e-02, 8.8257e-03,\n 9.8987e-03, -6.5019e-03, -2.3896e-02, 1.2553e-02, 7.6524e-03,\n -2.9426e-02, -2.3711e-02, -1.3833e-02, 7.3366e-03, -2.3608e-02,\n -2.4407e-02, -2.0078e-02, -1.5352e-02, 1.1377e-02, 1.5055e-02,\n -1.9883e-02, -2.1071e-02, -9.3366e-03, -3.4556e-02, 1.7653e-02,\n -8.6176e-03, -3.0723e-02, -4.1446e-02, -4.3035e-02, 4.9087e-03,\n -1.0463e-02, -8.5456e-03, -1.8274e-02, -2.0428e-02, 3.2974e-02,\n 4.2646e-03, 3.0269e-02, -3.3250e-02, 3.3279e-02, 3.1356e-02,\n 5.0759e-03, 4.2344e-03, 2.2810e-02, -3.0778e-03, 6.0539e-03,\n 3.0022e-02, 2.8016e-02, 2.3003e-02, 5.7731e-02, 1.9192e-02,\n -5.8274e-03, -1.4113e-02, 5.4328e-03, -7.0662e-03, -5.9251e-03,\n -2.0554e-02, 1.0130e-02, 9.7318e-03, -5.5134e-03, 1.3090e-02,\n 2.2694e-02, 1.5230e-02, -9.0117e-03, -4.6628e-03, 1.1780e-02,\n -4.4631e-02, -2.6486e-02, 6.7319e-03, 9.0405e-03, 1.4729e-02,\n 1.2131e-02, -6.5690e-03, -1.0547e-03, -1.0715e-02, 6.9890e-04,\n 1.6102e-02, -8.1128e-03, -1.6691e-02, 1.3611e-02, -3.8144e-02,\n -1.4594e-02, -1.1065e-02, 8.0046e-03, -7.6482e-03, -3.8973e-02,\n -9.7575e-03, 9.7779e-03, -5.7173e-03, 9.5195e-03, -3.2671e-03,\n 7.2250e-03, 1.7442e-02, -2.9992e-02, -1.0860e-03, -1.6720e-02,\n 1.7207e-02, 5.6576e-03, -4.3277e-03, -1.3699e-02, -1.5863e-02,\n 4.2251e-03, 2.8606e-02, -1.8297e-02, 5.9028e-03, 5.2723e-03,\n 7.0744e-03, 2.6067e-02, 2.8844e-04, 2.0400e-03, -1.0353e-02,\n 5.6203e-03, 1.6595e-02, -2.3032e-02, -3.6138e-03, 2.9388e-02,\n -1.1867e-04, 1.3212e-02, 4.1837e-03, 8.2909e-03, 3.7230e-02,\n 1.7210e-02, -3.3278e-02, 2.9877e-02, 1.1760e-02, -6.2272e-03,\n -5.2627e-02, 4.2746e-03, -9.4480e-03, -5.8680e-02, -2.3676e-02,\n 1.7897e-02, -5.3967e-03, 4.4075e-02, -6.4443e-03, 1.6696e-02,\n -2.2963e-03, 6.6957e-03, 9.1096e-03, -8.4349e-02, 1.7080e-02,\n 2.2908e-02, 1.9319e-02, 8.5321e-03, 2.0333e-03, 9.6435e-03,\n 1.9194e-03, 5.9562e-03, 2.8823e-02, 1.4122e-02, -7.3116e-03,\n 1.7836e-02, 4.7562e-02, -3.7067e-02, -1.7976e-02, -4.2345e-05,\n -9.0524e-04, 1.2399e-02, -8.2412e-03, -2.7185e-02, 1.4940e-03,\n -5.2218e-03, -2.5443e-02, -8.2259e-03, 8.3559e-04, 2.3527e-02,\n 7.1882e-04, -1.6138e-02, 6.0001e-03, 2.7678e-02, -1.3036e-03,\n -1.2300e-02, -6.0173e-03, 2.2566e-03, -1.6208e-02, 5.2931e-03,\n 1.5479e-02, -5.8386e-02, 2.4551e-03, 2.8743e-02, 2.9157e-03,\n 9.7359e-03, 1.4621e-03, -1.2678e-02, -1.5211e-02, -2.3293e-02,\n -2.7286e-05, 1.8583e-02, 1.7581e-02, -3.0441e-03, -3.9659e-02,\n -1.3483e-03, 1.5743e-03, -2.2535e-02, 5.8813e-03, -7.8293e-05,\n -3.1185e-03, -1.0948e-02, 1.8998e-02, 3.3024e-03, -2.0726e-02,\n -7.8829e-03, -9.6723e-03, -1.0459e-02, 2.1232e-02, 3.1223e-02,\n 1.2489e-02, -7.0425e-03, 1.9629e-03, 1.1051e-02, -4.6408e-02,\n 2.7816e-03, 7.1215e-03, -2.5369e-02, 1.9043e-02, -1.4511e-02,\n 1.7568e-02, 8.1064e-03, -1.0444e-02, -1.0373e-02, -4.4138e-02,\n -7.6848e-03, -1.6807e-02, -5.3552e-03, 3.5353e-03, 1.7877e-02,\n -1.3080e-03, 2.1461e-02, 1.9499e-03, -4.2106e-03, -3.6306e-02,\n -1.5678e-02, 8.5211e-03, 9.1395e-03, -1.0793e-02, -8.1219e-04,\n 8.2307e-04, 3.4270e-03, -2.7472e-02, 1.2197e-02, -1.1212e-02,\n 2.2751e-02, 1.8279e-03, -1.0927e-02, -2.3278e-03, -2.2223e-02,\n -3.2218e-02, -3.3266e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0048, 0.0050, 0.0053, 0.0029, 0.0065, 0.0031, 0.0034, 0.0032, 0.0059,\n 0.0040, 0.0027, 0.0039, 0.0046, 0.0032, 0.0040, 0.0036, 0.0012, 0.0021,\n 0.0027, 0.0029, 0.0033, 0.0037, 0.0042, 0.0045, 0.0052, 0.0038, 0.0041,\n 0.0035, 0.0051, 0.0016, 0.0034, 0.0047, 0.0048, 0.0031, 0.0037, 0.0032,\n 0.0024, 0.0039, 0.0033, 0.0031, 0.0060, 0.0034, 0.0036, 0.0047, 0.0037,\n 0.0051, 0.0079, 0.0045, 0.0034, 0.0048, 0.0029, 0.0038, 0.0044, 0.0021,\n 0.0065, 0.0045, 0.0025, 0.0049, 0.0031, 0.0034, 0.0034, 0.0067, 0.0053,\n 0.0027, 0.0024, 0.0025, 0.0033, 0.0049, 0.0086, 0.0060, 0.0051, 0.0032,\n 0.0068, 0.0024, 0.0040, 0.0070, 0.0040, 0.0039, 0.0040, 0.0083, 0.0062,\n 0.0031, 0.0055, 0.0046, 0.0032, 0.0035, 0.0013, 0.0080, 0.0046, 0.0042,\n 0.0025, 0.0034, 0.0026, 0.0043, 0.0059, 0.0038, 0.0051, 0.0050, 0.0040,\n 0.0046, 0.0030, 0.0032, 0.0047, 0.0018, 0.0031, 0.0031, 0.0063, 0.0028,\n 0.0046, 0.0035, 0.0017, 0.0058, 0.0036, 0.0023, 0.0025, 0.0025, 0.0042,\n 0.0056, 0.0024, 0.0038, 0.0027, 0.0026, 0.0038, 0.0049, 0.0054, 0.0044,\n 0.0049, 0.0034, 0.0050, 0.0029, 0.0045, 0.0034, 0.0035, 0.0032, 0.0045,\n 0.0026, 0.0029, 0.0018, 0.0047, 0.0074, 0.0051, 0.0027, 0.0032, 0.0051,\n 0.0050, 0.0031, 0.0039, 0.0042, 0.0046, 0.0049, 0.0031, 0.0042, 0.0028,\n 0.0026, 0.0019, 0.0024, 0.0045, 0.0042, 0.0025, 0.0038, 0.0031, 0.0035,\n 0.0034, 0.0035, 0.0046, 0.0044, 0.0018, 0.0028, 0.0043, 0.0037, 0.0044,\n 0.0033, 0.0022, 0.0035, 0.0046, 0.0026, 0.0035, 0.0036, 0.0021, 0.0037,\n 0.0051, 0.0080, 0.0020, 0.0043, 0.0065, 0.0042, 0.0016, 0.0022, 0.0039,\n 0.0034, 0.0022, 0.0024, 0.0061, 0.0039, 0.0042, 0.0037, 0.0028, 0.0031,\n 0.0034, 0.0092, 0.0022, 0.0038, 0.0025, 0.0051, 0.0033, 0.0031, 0.0022,\n 0.0026, 0.0033, 0.0049, 0.0023, 0.0047, 0.0051, 0.0077, 0.0041, 0.0024,\n 0.0038, 0.0024, 0.0034, 0.0047, 0.0023, 0.0043, 0.0060, 0.0052, 0.0018,\n 0.0036, 0.0039, 0.0046, 0.0038, 0.0062, 0.0039, 0.0030, 0.0047, 0.0025,\n 0.0037, 0.0045, 0.0044, 0.0041, 0.0036, 0.0083, 0.0056, 0.0031, 0.0037,\n 0.0030, 0.0027, 0.0050, 0.0032, 0.0039, 0.0024, 0.0063, 0.0028, 0.0032,\n 0.0017, 0.0087, 0.0043, 0.0043, 0.0077, 0.0030, 0.0041, 0.0039, 0.0041,\n 0.0070, 0.0029, 0.0028, 0.0018, 0.0042, 0.0029, 0.0050, 0.0019, 0.0031,\n 0.0024, 0.0024, 0.0049, 0.0037, 0.0031, 0.0065, 0.0016, 0.0029, 0.0079,\n 0.0037, 0.0033, 0.0046, 0.0038, 0.0027, 0.0063, 0.0047, 0.0036, 0.0042,\n 0.0057, 0.0030, 0.0035, 0.0057, 0.0039, 0.0042, 0.0061, 0.0049, 0.0051,\n 0.0044, 0.0063, 0.0061, 0.0045, 0.0028, 0.0035, 0.0026, 0.0030, 0.0033,\n 0.0042, 0.0028, 0.0055, 0.0039, 0.0034, 0.0045, 0.0027, 0.0038, 0.0028,\n 0.0050, 0.0021, 0.0052, 0.0036, 0.0028, 0.0048, 0.0073, 0.0035, 0.0027,\n 0.0042, 0.0029, 0.0034, 0.0036, 0.0023, 0.0039, 0.0057, 0.0035, 0.0034,\n 0.0029, 0.0027, 0.0029, 0.0036, 0.0040, 0.0051, 0.0049, 0.0035, 0.0035,\n 0.0031, 0.0034, 0.0045, 0.0047, 0.0028, 0.0032, 0.0031, 0.0036, 0.0042,\n 0.0067, 0.0031, 0.0046, 0.0023, 0.0039, 0.0051, 0.0041, 0.0025, 0.0038,\n 0.0038, 0.0042, 0.0039, 0.0022, 0.0031, 0.0031, 0.0027, 0.0073, 0.0027,\n 0.0026, 0.0028, 0.0079, 0.0049, 0.0043, 0.0037, 0.0026, 0.0039, 0.0030,\n 0.0042, 0.0031, 0.0024, 0.0041, 0.0034, 0.0043, 0.0052, 0.0031, 0.0025,\n 0.0041, 0.0051, 0.0017, 0.0042, 0.0073, 0.0033, 0.0038, 0.0027, 0.0053,\n 0.0024, 0.0031, 0.0074, 0.0020, 0.0021, 0.0033, 0.0037, 0.0050, 0.0047,\n 0.0067, 0.0035, 0.0030, 0.0046, 0.0024, 0.0049, 0.0049, 0.0055, 0.0028,\n 0.0061, 0.0029, 0.0037, 0.0028, 0.0054, 0.0028, 0.0042, 0.0035, 0.0046,\n 0.0056, 0.0062, 0.0036, 0.0072, 0.0056, 0.0028, 0.0024, 0.0043, 0.0027,\n 0.0030, 0.0105, 0.0024, 0.0044, 0.0044, 0.0050, 0.0052, 0.0048, 0.0052,\n 0.0068, 0.0037, 0.0033, 0.0026, 0.0032, 0.0044, 0.0027, 0.0030, 0.0030,\n 0.0017, 0.0065, 0.0037, 0.0044, 0.0032, 0.0057, 0.0039, 0.0027, 0.0041,\n 0.0043, 0.0045, 0.0055, 0.0032, 0.0052, 0.0013, 0.0041, 0.0028, 0.0038,\n 0.0037, 0.0052, 0.0046, 0.0031, 0.0055, 0.0035, 0.0035, 0.0026, 0.0032,\n 0.0040, 0.0035, 0.0030, 0.0031, 0.0037, 0.0028, 0.0034, 0.0032, 0.0039,\n 0.0027, 0.0030, 0.0073, 0.0046, 0.0006, 0.0043, 0.0042, 0.0032, 0.0050,\n 0.0045, 0.0045, 0.0046, 0.0050, 0.0041, 0.0027, 0.0035, 0.0045, 0.0030,\n 0.0046, 0.0054, 0.0039, 0.0029, 0.0041, 0.0038, 0.0054, 0.0028],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(11268.)",
17
+ "exp_avg": "tensor([ 4.6031e-03, 3.8414e-03, -3.2292e-03, -3.1939e-03, 9.1023e-04,\n -2.3513e-04, 4.5959e-03, -7.6311e-03, 4.4207e-03, 8.1163e-03,\n 3.5375e-04, 3.0676e-03, 1.8497e-03, 1.0620e-03, 2.1356e-03,\n -3.0428e-03, -1.8022e-03, 1.3218e-02, -9.5368e-03, -6.0689e-03,\n 6.2057e-03, 3.6467e-03, -3.6197e-03, 1.4236e-04, 2.5245e-03,\n 7.4866e-04, -4.5217e-03, -3.5070e-03, 5.5272e-04, -1.6563e-03,\n 3.6899e-03, -1.9559e-03, -1.5528e-03, -2.0160e-03, 6.1850e-05,\n 1.8337e-03, 3.0109e-03, -4.2826e-03, 1.9704e-03, -2.6499e-03,\n 7.2248e-03, 4.4385e-03, 4.9552e-04, -2.9510e-03, -5.2398e-03,\n -7.9122e-04, -1.9608e-03, 3.1324e-03, 1.4843e-03, -4.8122e-03,\n -9.7687e-03, 3.7534e-03, 1.6650e-03, -2.3824e-03, -1.1616e-03,\n 3.7344e-03, -1.9324e-03, -3.6173e-03, 1.0841e-03, -1.8348e-03,\n 5.7453e-03, 1.4949e-03, 5.0766e-03, 1.0625e-02, 3.0771e-04,\n -9.9718e-03, 4.4277e-03, -2.2248e-03, -4.5703e-05, 1.0593e-03,\n -1.2005e-03, 1.4690e-03, 4.8255e-04, 1.4839e-05, 5.1279e-03,\n 3.4938e-04, -1.6224e-03, -2.3893e-03, -1.3471e-03, 3.0477e-03,\n 1.8052e-05, 1.6305e-03, 1.9233e-03, 3.5647e-03, 5.4279e-03,\n -3.2336e-03, 7.8011e-06, 2.0587e-03, 1.1144e-03, 9.1978e-05,\n 6.7845e-04, 8.6417e-03, 6.0411e-03, 1.5114e-03, 5.0670e-05,\n 1.5084e-03, 4.2364e-04, 1.1850e-04, -2.2306e-04, -9.8223e-04,\n 2.2291e-03, -2.3884e-03, 4.8528e-04, 2.6830e-03, 6.6701e-03,\n 7.5323e-04, 1.6600e-03, 4.9449e-03, -1.0605e-03, -6.5662e-03,\n -5.4704e-03, -8.5780e-03, 4.0161e-03, 3.7944e-03, 4.3299e-03,\n 5.4635e-03, 5.8981e-03, -3.3703e-03, 8.9837e-04, -9.8344e-03,\n -4.0125e-03, 1.0754e-03, 1.4701e-03, 2.0397e-03, -6.1496e-03,\n 2.1729e-03, 8.1073e-04, -2.0290e-03, 8.7814e-04, 5.9113e-05,\n -1.9073e-03, 5.2275e-03, 5.3627e-03, 8.2674e-03, -8.2283e-04,\n 4.1513e-03, -8.4229e-04, 2.3005e-03, 7.0055e-04, -4.3464e-03,\n -9.7800e-03, 1.9055e-03, 2.4787e-03, -1.6393e-03, 2.3803e-03,\n 1.7909e-03, 2.9819e-03, -9.9314e-04, 1.6808e-03, -2.5562e-03,\n -1.0654e-03, 2.1034e-03, -2.9216e-03, 9.0053e-03, -8.4874e-04,\n 4.9724e-03, 2.5405e-03, 3.7508e-03, -4.6365e-03, 8.0582e-05,\n 1.1193e-03, -1.0205e-03, -4.9311e-04, -7.5387e-04, -7.4048e-04,\n -1.9900e-03, 1.6978e-04, -3.7926e-03, 7.7339e-03, 3.8856e-03,\n -2.5349e-03, 4.8172e-03, -1.2799e-02, 1.9656e-04, -4.9526e-03,\n 1.5026e-04, -9.5008e-04, 3.9295e-03, -6.6921e-03, -5.6970e-03,\n 2.5982e-04, 3.2726e-03, 3.0716e-03, 2.0146e-03, -5.1987e-05,\n -2.5175e-03, 4.3365e-03, -8.7169e-04, 1.5504e-04, 2.1605e-03,\n -7.1467e-03, 3.3468e-03, 4.1751e-04, 1.4666e-03, 1.4645e-03,\n 2.7804e-03, -4.2210e-03, -1.2208e-03, 1.0280e-03, 4.9141e-05,\n 1.1818e-02, -3.1794e-03, -1.8580e-04, -1.0133e-03, 5.1498e-03,\n 7.2491e-03, 9.0675e-04, -2.2548e-04, -4.4867e-04, 4.7646e-03,\n 4.1554e-03, 2.9549e-03, 9.9204e-04, -1.5838e-03, 2.7689e-03,\n -5.4287e-03, -1.7290e-03, -7.5518e-04, -1.6319e-04, 3.2908e-03,\n -1.0611e-04, -4.8991e-03, 2.4602e-03, -1.6210e-03, 2.3874e-03,\n -3.6197e-03, -1.3555e-03, 3.1904e-03, 1.9771e-03, -7.4417e-04,\n -1.1668e-03, 3.8476e-03, 3.4952e-04, 7.5131e-03, -9.5561e-03,\n -8.8550e-04, -2.6342e-03, 3.2067e-03, 7.3222e-03, 1.8044e-04,\n -1.9948e-03, -2.6955e-03, 1.3186e-03, -5.6526e-05, 3.1656e-03,\n -1.0785e-03, -1.3548e-03, -2.5453e-03, 7.8193e-04, -2.9689e-03,\n -1.9654e-02, -5.2267e-03, 6.0136e-03, -1.4276e-03, 1.5119e-03,\n 2.5791e-03, 3.8273e-03, -3.5307e-05, 1.8323e-03, -2.3449e-03,\n -1.7462e-03, -1.2740e-03, -3.3914e-03, -8.2116e-03, 5.4500e-03,\n -3.0439e-03, -6.7236e-03, -3.3678e-03, -6.2045e-03, 2.1275e-03,\n 4.6509e-03, -1.9296e-03, -2.3216e-03, 3.0205e-03, 2.2999e-03,\n -8.8400e-03, -1.3444e-02, -3.1557e-03, 1.0561e-03, -3.9836e-03,\n -5.4254e-03, -5.4340e-03, -1.6242e-03, 2.4975e-03, 2.7010e-03,\n -1.7493e-03, -1.3494e-02, -6.8247e-04, -4.1344e-03, 4.7789e-03,\n -1.3290e-03, -4.2390e-03, -8.3718e-03, -7.3298e-03, 2.0847e-03,\n -2.0121e-03, -1.9439e-03, -3.8365e-03, -2.0957e-03, 5.8303e-03,\n 1.1020e-03, 7.0487e-03, -6.2595e-03, 1.0694e-02, 5.7238e-03,\n 3.9116e-04, 1.4367e-03, 5.8885e-03, -6.0230e-04, 1.5902e-03,\n 4.9000e-03, 3.8069e-03, 6.0272e-03, 1.0566e-02, 7.0155e-03,\n 4.6006e-04, -2.7682e-03, -5.5391e-05, -6.6375e-04, -2.5775e-03,\n -2.7087e-03, 2.5814e-03, 1.6651e-03, -2.6612e-04, 2.6778e-03,\n 3.4593e-03, 2.8917e-03, -3.0320e-03, -1.7876e-03, 2.1569e-03,\n -6.2723e-03, -6.1374e-03, 3.9420e-04, 4.2423e-03, 8.1284e-03,\n 3.9472e-03, -1.3562e-03, 2.5492e-04, -1.5315e-03, 9.1608e-06,\n 4.4920e-03, -1.6295e-03, -3.2572e-03, 2.3427e-03, -5.6907e-03,\n -2.5304e-04, -2.0085e-03, 1.8788e-03, -1.3715e-03, -3.4304e-03,\n -2.3353e-03, 4.2155e-04, -1.3139e-03, 1.3409e-03, -8.9320e-04,\n 1.2753e-03, 5.4212e-04, -4.7313e-03, 8.5150e-04, -3.3219e-03,\n 2.9567e-03, 1.5426e-04, -1.4802e-03, -3.8006e-03, -5.4012e-04,\n 1.6278e-03, 7.9429e-03, -1.1580e-03, 7.8475e-04, 9.1835e-04,\n 3.3377e-03, 4.7380e-03, 7.4097e-04, 2.1094e-04, -2.3892e-03,\n 2.7122e-03, 2.8022e-03, -3.1145e-03, -6.5356e-04, 6.7523e-03,\n -1.3943e-04, 2.1075e-03, 3.1959e-04, 2.1102e-03, 5.0989e-03,\n 3.0644e-03, -1.0549e-02, 4.1461e-03, 1.8647e-03, -4.5991e-03,\n -8.2869e-03, -2.9202e-04, -1.6917e-03, -8.6949e-03, -8.3796e-03,\n 4.0026e-03, -4.0050e-04, 1.3245e-02, -9.2023e-04, 6.3701e-03,\n -2.1077e-03, 1.2066e-03, 1.7811e-03, -7.9896e-03, 3.1914e-03,\n 2.4350e-03, 3.7398e-03, 1.5302e-03, 3.8779e-04, 1.7003e-03,\n 1.0034e-03, 9.1192e-05, 5.4516e-03, 6.5580e-03, -9.6933e-04,\n 6.8581e-03, 9.7070e-03, -6.5832e-03, -2.9274e-03, 5.9390e-04,\n -5.4972e-04, 1.7227e-03, -9.3981e-04, -4.2824e-03, 2.1858e-04,\n -1.1587e-03, -3.1856e-03, -3.3232e-03, 1.4906e-03, 7.5799e-03,\n 1.4455e-04, -3.3660e-03, 1.5075e-03, 3.6470e-03, 1.8677e-04,\n -1.2670e-04, -7.3287e-04, -1.2467e-03, -3.1353e-03, 9.2901e-04,\n 3.1089e-03, -1.1255e-02, 9.3294e-04, 9.0421e-03, 8.4580e-04,\n 2.0753e-03, 4.0902e-04, -4.7472e-03, -2.5565e-03, -7.2146e-03,\n 1.0558e-03, 2.4445e-03, 2.6439e-03, -2.7508e-04, -6.1490e-03,\n -1.8190e-03, 7.7839e-04, -1.1356e-02, -2.3217e-04, 2.5387e-04,\n -3.7586e-03, -6.1103e-04, 4.6926e-03, 2.1626e-04, -2.0647e-02,\n -2.8967e-03, -2.1483e-03, -1.8751e-03, 5.7021e-03, 4.4498e-03,\n 2.0648e-03, -8.7055e-04, -5.4207e-05, 2.6909e-03, -1.1282e-02,\n 2.7386e-03, 1.8365e-03, -7.0980e-03, 2.4907e-03, -1.9638e-03,\n 3.0068e-03, 1.0957e-03, -1.3711e-03, -1.8079e-03, -6.4161e-03,\n -2.1170e-03, -2.8213e-03, -1.6690e-03, 4.9022e-04, 2.9877e-03,\n 9.0554e-02, 3.2485e-03, -3.7267e-04, -1.2323e-03, -5.9728e-03,\n -3.5801e-03, 1.4613e-03, 1.4700e-03, -2.9388e-03, 7.3458e-04,\n 2.0715e-04, 1.2102e-03, -3.3482e-03, 1.0972e-03, -1.9859e-03,\n 4.5918e-03, 3.0779e-04, -2.8896e-03, 2.3806e-03, -4.2459e-03,\n -6.8443e-03, 3.3495e-04], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([2.0510e-04, 8.3806e-05, 1.9209e-04, 2.9625e-04, 7.9022e-05, 1.4478e-04,\n 1.2275e-04, 1.6409e-04, 6.9286e-05, 1.7391e-04, 1.5150e-04, 2.1322e-04,\n 7.2641e-05, 1.4084e-04, 5.8408e-05, 8.2451e-05, 2.5407e-03, 2.0018e-04,\n 1.5281e-04, 2.1373e-04, 3.2352e-04, 3.0012e-04, 2.0695e-04, 1.1661e-04,\n 1.5065e-04, 3.5781e-04, 2.3552e-04, 1.6728e-04, 5.6722e-05, 4.1652e-04,\n 2.1092e-04, 9.4834e-05, 2.9447e-04, 7.2024e-05, 4.1247e-04, 3.3946e-04,\n 2.5006e-04, 2.9448e-04, 6.9191e-05, 1.2797e-04, 1.2808e-04, 1.3473e-04,\n 5.9471e-05, 6.2346e-05, 1.2532e-04, 8.0942e-05, 2.8777e-04, 2.1116e-04,\n 3.9965e-04, 1.3694e-04, 2.4569e-04, 3.1462e-04, 9.2454e-05, 2.6199e-04,\n 1.4459e-04, 2.1700e-04, 4.3294e-04, 1.7464e-04, 1.7391e-04, 1.9326e-04,\n 2.7840e-04, 5.8868e-05, 1.6706e-04, 3.2890e-04, 6.0247e-04, 1.7157e-04,\n 2.1882e-04, 1.6768e-04, 6.2217e-05, 2.0336e-04, 5.7408e-05, 9.9185e-05,\n 7.3481e-05, 1.0987e-04, 1.4915e-04, 1.0311e-04, 1.5816e-04, 1.3629e-04,\n 2.4542e-04, 1.7058e-04, 1.0789e-04, 7.2277e-05, 1.2309e-04, 1.5573e-04,\n 1.4836e-04, 2.5393e-04, 2.5395e-03, 1.0730e-04, 7.5708e-05, 1.9017e-04,\n 1.9706e-04, 2.8357e-04, 2.3382e-04, 9.3134e-05, 1.0376e-04, 1.0126e-04,\n 1.3854e-04, 1.1510e-04, 1.2616e-04, 7.8228e-05, 7.8773e-05, 7.7489e-05,\n 1.0683e-04, 2.8510e-04, 3.0522e-04, 1.9644e-04, 6.6259e-05, 2.8032e-04,\n 1.0643e-04, 7.1027e-05, 4.6332e-04, 6.9168e-05, 1.4722e-04, 7.3366e-05,\n 4.1206e-04, 9.3134e-05, 7.9513e-05, 1.1088e-04, 2.1105e-04, 1.3084e-04,\n 1.0466e-04, 2.0747e-04, 1.0347e-04, 1.1184e-04, 1.2189e-04, 1.0310e-04,\n 7.8701e-05, 1.9283e-04, 8.8784e-05, 3.3490e-04, 8.1582e-05, 1.4150e-04,\n 3.5524e-04, 2.8823e-04, 1.4981e-04, 2.2581e-04, 8.9535e-05, 2.4395e-04,\n 1.1347e-04, 1.0471e-04, 2.3610e-04, 2.0896e-04, 1.3018e-04, 1.4678e-04,\n 1.0351e-04, 2.0261e-04, 1.4695e-04, 1.8243e-04, 1.2525e-04, 1.2664e-04,\n 6.6111e-05, 2.0982e-04, 1.4609e-04, 2.6815e-04, 1.1765e-04, 2.4027e-04,\n 1.4334e-04, 1.3940e-04, 6.2795e-04, 5.1902e-05, 2.8186e-04, 1.3725e-04,\n 1.7962e-04, 1.2724e-04, 1.6694e-04, 9.9382e-05, 4.1418e-04, 2.0692e-04,\n 7.4849e-05, 1.1559e-04, 3.6700e-04, 1.3524e-04, 4.5050e-04, 1.1478e-04,\n 1.2134e-04, 1.5553e-04, 1.3849e-04, 1.9560e-04, 1.1046e-04, 7.5733e-05,\n 1.2253e-04, 1.3896e-04, 6.0924e-04, 1.6641e-04, 1.2331e-04, 1.7360e-04,\n 7.1555e-04, 4.6004e-04, 8.7272e-05, 2.4146e-04, 4.2352e-04, 4.3940e-04,\n 1.8470e-04, 1.2587e-04, 1.3193e-04, 1.3209e-04, 6.7299e-05, 1.3073e-04,\n 6.1991e-05, 1.1477e-04, 3.3397e-04, 2.2649e-04, 7.7903e-05, 8.7117e-05,\n 1.2864e-04, 2.8865e-04, 4.2018e-04, 2.0918e-04, 1.7651e-04, 1.1490e-04,\n 1.7025e-04, 1.3341e-04, 1.4298e-04, 1.9854e-04, 3.3073e-04, 1.5521e-04,\n 2.1915e-04, 8.1017e-05, 1.3977e-04, 1.4285e-04, 1.6545e-04, 8.3147e-05,\n 6.7920e-05, 1.2364e-04, 2.7555e-04, 1.2925e-04, 2.2466e-04, 7.6891e-05,\n 4.1015e-04, 2.0095e-04, 9.1202e-05, 1.4946e-04, 7.6665e-05, 1.4785e-04,\n 1.4158e-04, 1.2890e-04, 1.7002e-04, 1.6829e-04, 1.8605e-04, 1.3331e-04,\n 7.7996e-05, 1.6107e-04, 1.4757e-04, 1.3541e-04, 3.0117e-04, 8.8065e-05,\n 1.2691e-04, 6.0219e-05, 2.0511e-04, 1.6842e-04, 1.7660e-04, 1.8221e-04,\n 3.9772e-04, 1.2730e-04, 1.2165e-04, 8.0453e-05, 3.3735e-04, 1.2393e-04,\n 1.8029e-04, 1.6901e-04, 1.1408e-04, 1.0025e-04, 2.4972e-04, 3.6130e-04,\n 6.6500e-04, 2.5331e-04, 1.4062e-04, 1.2015e-04, 1.9486e-04, 1.0878e-04,\n 2.6774e-04, 1.6132e-04, 7.7448e-05, 6.4137e-05, 8.6694e-05, 3.5789e-04,\n 7.8061e-04, 2.1915e-04, 1.4181e-04, 9.3310e-05, 4.1607e-05, 1.6389e-04,\n 1.1918e-04, 2.0278e-04, 1.7800e-04, 9.1940e-05, 8.4070e-04, 8.9354e-05,\n 1.1730e-04, 2.5887e-04, 2.3837e-04, 1.2827e-04, 1.7326e-04, 1.1625e-04,\n 1.2378e-04, 1.3956e-04, 5.5600e-05, 1.7812e-04, 7.0813e-05, 1.1460e-04,\n 3.1849e-04, 1.2253e-04, 1.2244e-04, 3.2214e-04, 1.1476e-04, 1.6164e-04,\n 1.5737e-04, 1.3198e-04, 1.4015e-04, 1.1040e-04, 1.1715e-04, 7.9030e-05,\n 1.3946e-04, 9.7418e-05, 4.0768e-04, 2.0917e-04, 8.5965e-05, 2.3098e-04,\n 8.6741e-05, 3.2856e-04, 6.8589e-05, 2.9064e-04, 1.0368e-04, 1.3455e-04,\n 9.7613e-05, 8.8056e-05, 1.9199e-04, 1.2762e-04, 6.9496e-05, 1.4687e-04,\n 1.3379e-04, 2.2946e-04, 1.0574e-04, 1.4873e-04, 5.4321e-04, 3.0484e-04,\n 1.4350e-04, 3.6584e-04, 1.3862e-04, 9.3844e-05, 1.9260e-04, 2.5086e-04,\n 1.1899e-04, 1.3677e-04, 1.1516e-04, 9.8172e-05, 1.3084e-04, 8.7194e-05,\n 3.2213e-04, 6.1555e-05, 2.0787e-04, 9.5455e-05, 6.0082e-05, 1.0063e-04,\n 1.6786e-04, 1.0279e-04, 4.3376e-05, 8.4842e-05, 4.9939e-04, 9.6926e-05,\n 1.0319e-04, 5.5986e-05, 1.1522e-04, 2.0123e-04, 7.5428e-05, 1.1699e-04,\n 1.2990e-04, 1.1132e-04, 1.5856e-04, 2.0114e-04, 2.9910e-04, 1.1756e-04,\n 1.0341e-04, 1.4699e-04, 1.0161e-04, 2.0837e-04, 7.8338e-05, 1.4184e-04,\n 1.0956e-04, 2.1608e-04, 6.2273e-05, 1.1399e-04, 2.0266e-04, 1.7670e-04,\n 8.7753e-05, 9.2387e-05, 2.2438e-04, 6.5876e-05, 5.5013e-05, 8.0055e-04,\n 1.2898e-04, 8.3065e-05, 2.4892e-04, 1.3466e-04, 2.7972e-04, 2.2396e-04,\n 2.8546e-04, 2.7858e-04, 1.6484e-04, 2.7479e-04, 2.5179e-04, 1.0049e-04,\n 7.1356e-05, 6.6756e-05, 8.2939e-05, 8.7599e-05, 1.3917e-04, 1.5429e-04,\n 1.0062e-04, 4.6287e-04, 1.2996e-04, 6.1615e-05, 1.4490e-04, 4.6947e-04,\n 7.1587e-05, 2.7800e-04, 1.0798e-04, 1.3965e-04, 9.2970e-05, 3.1080e-04,\n 2.2700e-04, 1.2227e-04, 1.0361e-04, 1.4243e-04, 6.6576e-05, 1.1744e-04,\n 1.1754e-04, 1.0450e-04, 2.4434e-04, 2.3092e-04, 6.3726e-05, 4.4086e-04,\n 2.2675e-04, 9.1918e-05, 1.7193e-04, 1.5522e-04, 6.2440e-05, 1.5073e-04,\n 1.4978e-04, 8.4504e-05, 1.3663e-04, 1.6371e-04, 1.2061e-04, 3.3514e-04,\n 1.7109e-04, 7.1919e-05, 7.8839e-05, 2.6688e-04, 2.4804e-04, 2.6480e-04,\n 5.4605e-04, 1.2750e-04, 6.7659e-05, 2.6809e-04, 2.4718e-04, 1.0915e-04,\n 5.9791e-05, 6.9328e-04, 1.4067e-04, 1.7131e-04, 1.9421e-04, 9.9171e-05,\n 2.4729e-04, 6.6727e-05, 1.0412e-03, 2.2198e-04, 1.1562e-04, 6.9808e-05,\n 1.7009e-04, 8.0866e-05, 1.1611e-04, 2.8457e-04, 9.2861e-05, 1.6671e-04,\n 2.8571e-04, 2.2840e-04, 1.4715e-04, 2.3671e-04, 3.8296e-05, 1.2920e-04,\n 9.0639e-05, 4.5274e-05, 1.9487e-04, 1.6205e-04, 1.1172e-04, 2.9944e-04,\n 8.6509e-05, 1.7850e-04, 1.1396e-04, 1.2684e-04, 2.2122e-01, 8.6306e-05,\n 1.9528e-04, 1.2665e-04, 1.2955e-04, 2.6791e-04, 2.5287e-04, 1.3545e-04,\n 1.1063e-04, 2.0016e-04, 1.0104e-04, 1.2813e-04, 9.0627e-05, 1.2793e-04,\n 1.6235e-04, 1.8382e-04, 2.3865e-04, 1.9682e-04, 1.9711e-04, 2.1052e-04,\n 2.1417e-04, 1.4999e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(11268.)",
22
+ "exp_avg": "tensor([ 5.4963e-03, 4.7430e-03, -4.0248e-03, -1.4083e-03, 1.5550e-03,\n 3.3828e-05, 5.4506e-03, -7.7725e-03, 7.1548e-03, 9.5693e-03,\n 1.0954e-03, 2.6229e-03, 2.5646e-03, 6.4475e-04, 3.6525e-03,\n -3.7067e-03, -5.6845e-04, 1.0483e-02, -9.3560e-03, -4.7041e-03,\n 5.2376e-03, 2.0278e-03, -3.5902e-03, 1.2566e-03, 1.4355e-03,\n -4.7914e-04, -4.4044e-03, -8.3288e-04, 9.5268e-04, -7.7418e-04,\n 4.4206e-03, -3.2964e-03, -1.8292e-03, -2.2963e-03, 8.2590e-05,\n 2.2826e-03, 2.7676e-03, -3.0790e-03, 2.6787e-03, -1.8307e-03,\n 7.4212e-03, 3.5147e-03, 1.2242e-03, -1.8914e-03, -4.3575e-03,\n -6.7957e-04, -2.3989e-03, 4.1398e-03, 1.8731e-03, -4.4378e-03,\n -7.2047e-03, 2.2175e-03, 2.2836e-03, -2.1728e-03, -9.1427e-04,\n 3.5847e-03, -3.8941e-04, -3.9869e-03, 1.0360e-03, -1.8686e-03,\n 5.4004e-03, 2.8413e-03, 4.4163e-03, 8.9700e-03, -1.5906e-03,\n -7.0883e-03, 4.5340e-03, -1.2726e-03, 2.2709e-03, 1.4186e-03,\n 3.6788e-04, 6.8046e-04, 2.3485e-03, 1.1449e-03, 5.8374e-03,\n -2.8092e-05, -1.1297e-03, -3.5184e-03, -1.4009e-03, 3.5660e-03,\n 4.8421e-05, 2.7241e-03, 2.6430e-03, 4.8653e-03, 5.4275e-03,\n -1.8734e-03, -1.3438e-05, 2.4738e-03, 2.2906e-03, -3.6561e-04,\n 9.0090e-04, 6.0102e-03, 4.5734e-03, 2.0248e-03, 4.3238e-04,\n 1.0754e-03, 9.4883e-04, 5.3191e-04, -8.8331e-05, -1.3887e-03,\n 2.3399e-03, -2.5943e-03, -5.7812e-04, 3.0454e-03, 5.3244e-03,\n 1.3874e-03, 3.2678e-03, 3.1789e-03, -7.2287e-04, -6.4219e-03,\n -3.4301e-03, -1.4170e-02, 5.2553e-03, 3.6569e-03, 3.1406e-03,\n 6.1391e-03, 7.9604e-03, -6.0706e-03, -9.2640e-05, -8.0841e-03,\n -1.9609e-03, 1.2715e-04, 2.2329e-03, 2.2241e-03, -8.4700e-03,\n 1.8917e-03, 1.0053e-03, -1.2007e-03, 2.5422e-03, 5.5125e-04,\n -1.7878e-03, 5.0625e-03, 5.0555e-03, 6.8077e-03, -7.2427e-04,\n 3.0490e-03, -4.5750e-04, 2.5926e-03, 1.2036e-03, -5.4331e-03,\n -8.2960e-03, 2.0474e-03, 3.7691e-03, -2.9137e-03, 4.3434e-03,\n 5.7788e-04, 2.0554e-03, 2.5797e-04, 2.9001e-03, -4.8123e-03,\n 7.4253e-05, 1.8946e-03, -1.6701e-03, 8.3658e-03, -1.1522e-04,\n 3.5299e-03, 3.3998e-03, 3.9038e-03, -3.2444e-03, 1.2970e-03,\n 3.9325e-04, -1.2674e-03, -6.4865e-04, -1.9806e-03, -5.8817e-04,\n -3.7530e-03, 8.3653e-04, -2.9218e-03, 6.4118e-03, 4.9168e-03,\n -3.0033e-03, 4.9653e-03, -8.5044e-03, -2.4650e-04, -6.1642e-03,\n -2.6806e-04, 2.9728e-04, 2.4437e-03, -6.4031e-03, -5.6365e-03,\n 1.8616e-03, 3.7438e-03, 1.5763e-03, 1.9528e-03, 7.6535e-05,\n -2.6236e-03, 3.3895e-03, 5.1176e-04, 8.9902e-04, 2.9665e-03,\n -4.5563e-03, 2.8176e-03, -1.0759e-03, 2.3099e-03, 2.2629e-03,\n 4.1539e-03, -3.6871e-03, -1.0980e-03, 8.7271e-04, -1.8348e-04,\n 8.6846e-03, -1.9454e-03, -9.2581e-05, -2.7996e-03, 4.7113e-03,\n 5.1785e-03, -5.5440e-04, 1.3341e-04, 9.2039e-05, 5.0359e-03,\n 4.1494e-03, 3.1558e-03, 9.9468e-05, -2.4139e-03, 2.4921e-03,\n -5.0111e-03, -1.9165e-03, -1.3927e-03, 2.2155e-04, 2.9905e-03,\n 1.6916e-04, -8.7488e-03, 4.3917e-03, -2.5904e-03, 2.0570e-03,\n -9.6949e-04, 7.6255e-04, 4.7390e-03, 1.7449e-03, 7.4699e-04,\n -2.0459e-03, 3.3566e-03, 2.1540e-04, 5.6627e-03, -8.0548e-03,\n -2.0072e-04, -3.7390e-03, 4.0138e-03, 6.7495e-03, -1.3847e-03,\n -1.7496e-03, -2.3447e-03, 1.1748e-03, 5.6168e-04, 2.8790e-03,\n -2.1559e-03, -2.2870e-03, -3.5533e-03, 1.4087e-03, -4.2750e-03,\n -1.9081e-02, -4.9395e-03, 2.3475e-03, -2.0001e-03, 2.0495e-03,\n 2.6339e-03, 3.5503e-03, -2.7912e-04, 3.1838e-03, -1.3184e-03,\n -8.5373e-04, -1.6712e-03, -3.8906e-03, -4.4631e-03, 4.0177e-03,\n -2.6813e-03, -6.2399e-03, -4.1464e-03, -3.7556e-03, 1.5627e-03,\n 3.2060e-03, -1.1087e-03, -3.9357e-03, 2.8580e-03, 1.6715e-03,\n -6.9963e-03, -7.5373e-03, -2.9721e-03, 8.9668e-04, -4.5823e-03,\n -3.5330e-03, -4.6048e-03, -2.5657e-03, 2.3766e-03, 3.0524e-03,\n -3.2261e-03, -7.1544e-03, -1.8399e-03, -5.5553e-03, 4.1268e-03,\n -1.2135e-03, -5.1752e-03, -9.5901e-03, -8.6232e-03, 1.0752e-04,\n -1.6234e-03, -1.7640e-03, -3.9187e-03, -3.2315e-03, 6.0223e-03,\n 1.0689e-03, 6.5009e-03, -6.6701e-03, 8.9099e-03, 6.3557e-03,\n 7.2312e-04, 1.1623e-03, 5.0886e-03, -1.1016e-03, 1.2345e-03,\n 5.8901e-03, 4.6557e-03, 5.2362e-03, 1.0575e-02, 4.4074e-03,\n -9.2620e-04, -3.0635e-03, 1.3678e-03, -1.0071e-03, -1.4765e-03,\n -3.3488e-03, 2.6837e-03, 1.9822e-03, -1.2483e-03, 2.0844e-03,\n 4.6727e-03, 2.5145e-03, -2.5944e-03, -7.6292e-04, 2.1418e-03,\n -8.0467e-03, -5.7154e-03, 9.0032e-04, 1.7313e-03, 4.6672e-03,\n 3.4067e-03, -1.8186e-03, -8.9134e-04, -2.1030e-03, 4.6482e-04,\n 3.8873e-03, -1.6425e-03, -3.5835e-03, 2.5693e-03, -5.9273e-03,\n -2.1550e-03, -1.8777e-03, 1.8784e-03, -1.7701e-03, -5.7671e-03,\n -2.4007e-03, 1.7828e-03, -1.2788e-03, 2.1335e-03, -7.0581e-04,\n 1.3535e-03, 2.2416e-03, -5.6262e-03, 3.9882e-04, -3.0642e-03,\n 3.1964e-03, 6.8428e-04, -1.1841e-03, -3.2968e-03, -3.5521e-03,\n 5.8385e-04, 6.2244e-03, -3.0622e-03, 1.1526e-03, 3.8646e-05,\n 1.7998e-03, 3.8293e-03, 5.8043e-04, 6.8281e-04, -2.4062e-03,\n 2.1559e-03, 2.9930e-03, -5.1939e-03, -7.4762e-04, 6.1697e-03,\n 4.4007e-05, 2.7683e-03, 2.0966e-03, 1.8860e-03, 6.0972e-03,\n 3.0357e-03, -8.5546e-03, 5.1571e-03, 1.3850e-03, -3.1298e-03,\n -9.4185e-03, 5.4805e-04, -1.7740e-03, -1.0145e-02, -6.8361e-03,\n 4.1886e-03, -1.9090e-04, 1.0550e-02, -1.9185e-03, 4.7273e-03,\n -1.1728e-03, 1.3070e-03, 2.0972e-03, -1.4157e-02, 2.7003e-03,\n 2.3464e-03, 4.4675e-03, 1.5610e-03, -3.3328e-04, 8.6911e-04,\n 7.7954e-04, 8.4528e-04, 5.4616e-03, 4.6245e-03, -1.8661e-03,\n 4.4304e-03, 7.8746e-03, -7.2112e-03, -3.6785e-03, 4.9863e-04,\n 2.2939e-04, 2.8821e-03, -1.3124e-03, -4.8173e-03, -1.2533e-05,\n -5.7352e-04, -3.5639e-03, -1.9690e-03, 1.4324e-04, 6.3701e-03,\n 2.3421e-04, -3.4341e-03, 1.4396e-03, 5.0155e-03, -5.7275e-04,\n -1.5474e-03, -9.3677e-04, -7.4700e-04, -3.7526e-03, 1.1583e-03,\n 2.8651e-03, -1.1101e-02, 1.4088e-03, 6.9871e-03, 6.6163e-04,\n 1.8055e-03, 5.0432e-04, -2.8536e-03, -3.1850e-03, -6.1666e-03,\n 1.2365e-03, 3.3870e-03, 3.2168e-03, -6.4184e-04, -7.9035e-03,\n -6.5664e-04, 4.0548e-04, -7.4354e-03, 1.1238e-03, 3.8202e-05,\n -8.3827e-04, -1.6698e-03, 4.2179e-03, 8.6767e-04, -9.0839e-03,\n -1.3061e-03, -1.9247e-03, -2.0405e-03, 5.5290e-03, 5.2795e-03,\n 1.8482e-03, -1.6244e-03, 2.4344e-04, 2.2126e-03, -1.0107e-02,\n 1.1335e-03, 1.4815e-03, -4.4475e-03, 2.9604e-03, -2.3368e-03,\n 3.1147e-03, 1.5343e-03, -2.1815e-03, -2.1478e-03, -7.9553e-03,\n -2.8086e-03, -3.3085e-03, -1.1421e-03, 7.6244e-04, 3.7148e-03,\n 4.7414e-03, 3.5980e-03, 5.7304e-04, -4.4697e-04, -7.5879e-03,\n -4.0099e-03, 1.7911e-03, 1.5537e-03, -2.1261e-03, 6.9491e-04,\n -1.4616e-04, 6.5810e-04, -4.1163e-03, 2.2687e-03, -1.9653e-03,\n 5.1586e-03, -4.0530e-05, -2.5792e-03, 1.9678e-04, -3.6682e-03,\n -6.5094e-03, 8.3818e-04], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([2.1573e-04, 1.7040e-04, 1.7038e-04, 1.8062e-04, 1.4541e-04, 1.4436e-04,\n 1.3958e-04, 1.4443e-04, 1.4733e-04, 2.0846e-04, 1.2593e-04, 2.0160e-04,\n 1.1078e-04, 1.3399e-04, 1.0320e-04, 1.1724e-04, 3.0041e-04, 1.2290e-04,\n 1.3017e-04, 1.4862e-04, 1.8797e-04, 2.2170e-04, 1.8244e-04, 1.3957e-04,\n 2.2272e-04, 2.4817e-04, 1.9321e-04, 1.4417e-04, 1.0489e-04, 1.5551e-04,\n 1.6759e-04, 1.6332e-04, 2.7213e-04, 1.0508e-04, 2.2510e-04, 1.9691e-04,\n 1.5635e-04, 2.0118e-04, 9.8800e-05, 1.2709e-04, 1.7152e-04, 1.3141e-04,\n 1.0887e-04, 1.1577e-04, 1.4897e-04, 1.3871e-04, 3.4403e-04, 2.1911e-04,\n 2.1445e-04, 1.6440e-04, 1.4479e-04, 2.1446e-04, 1.3322e-04, 1.4894e-04,\n 2.3685e-04, 2.1748e-04, 2.0487e-04, 1.8971e-04, 1.5455e-04, 1.5976e-04,\n 2.2389e-04, 1.2586e-04, 2.2205e-04, 1.8355e-04, 2.0974e-04, 1.3205e-04,\n 1.7986e-04, 1.7977e-04, 1.7327e-04, 2.3673e-04, 1.3874e-04, 1.0916e-04,\n 1.7896e-04, 1.1262e-04, 1.6446e-04, 2.1029e-04, 1.6416e-04, 1.5940e-04,\n 2.0283e-04, 2.6305e-04, 2.0815e-04, 1.0759e-04, 1.9866e-04, 1.8840e-04,\n 1.4441e-04, 1.9160e-04, 2.6832e-04, 2.1348e-04, 1.0715e-04, 2.0264e-04,\n 1.6970e-04, 2.0024e-04, 1.6187e-04, 1.3800e-04, 1.7192e-04, 1.4157e-04,\n 2.0431e-04, 1.7878e-04, 1.5533e-04, 1.2926e-04, 9.8874e-05, 1.1021e-04,\n 1.5996e-04, 1.4446e-04, 2.0938e-04, 1.7107e-04, 1.5716e-04, 1.6736e-04,\n 1.6487e-04, 1.1305e-04, 1.6413e-04, 1.3798e-04, 1.6575e-04, 9.5472e-05,\n 2.0494e-04, 1.0427e-04, 1.0303e-04, 2.0808e-04, 1.4484e-04, 1.3261e-04,\n 9.8404e-05, 1.2040e-04, 1.3890e-04, 1.6777e-04, 1.7964e-04, 1.3609e-04,\n 1.3363e-04, 1.5826e-04, 1.3496e-04, 1.7776e-04, 1.2649e-04, 1.3881e-04,\n 2.1103e-04, 1.6071e-04, 1.6235e-04, 1.5001e-04, 1.1593e-04, 1.2461e-04,\n 1.6712e-04, 1.6665e-04, 2.4513e-04, 1.4922e-04, 1.1304e-04, 1.9934e-04,\n 1.4718e-04, 1.6173e-04, 1.6534e-04, 1.8727e-04, 1.7129e-04, 1.8882e-04,\n 9.4472e-05, 1.8466e-04, 1.2529e-04, 1.8097e-04, 9.7822e-05, 1.6977e-04,\n 1.7882e-04, 1.6294e-04, 2.4500e-04, 9.3801e-05, 1.7841e-04, 1.3550e-04,\n 1.8013e-04, 1.2506e-04, 1.9515e-04, 1.3114e-04, 1.6211e-04, 1.4212e-04,\n 1.2176e-04, 1.6249e-04, 2.8794e-04, 1.3669e-04, 1.8918e-04, 1.2198e-04,\n 1.5207e-04, 1.1973e-04, 1.4848e-04, 1.6911e-04, 9.1584e-05, 1.1319e-04,\n 1.6474e-04, 2.6720e-04, 1.9567e-04, 1.7426e-04, 2.0934e-04, 1.5328e-04,\n 1.8462e-04, 1.9925e-04, 1.2162e-04, 1.7499e-04, 1.7929e-04, 1.8170e-04,\n 2.5418e-04, 1.4203e-04, 1.4312e-04, 1.4255e-04, 9.5125e-05, 1.5373e-04,\n 1.0727e-04, 2.6027e-04, 1.7458e-04, 2.1639e-04, 9.1851e-05, 1.4744e-04,\n 1.4546e-04, 1.7229e-04, 1.8147e-04, 1.2985e-04, 1.7165e-04, 1.6421e-04,\n 1.1665e-04, 1.9152e-04, 2.0037e-04, 2.7069e-04, 2.4448e-04, 1.2516e-04,\n 1.8587e-04, 8.5424e-05, 1.3037e-04, 1.5884e-04, 1.4222e-04, 1.4875e-04,\n 1.4851e-04, 1.8273e-04, 1.3884e-04, 1.5180e-04, 1.5798e-04, 1.1732e-04,\n 2.5763e-04, 2.5122e-04, 1.4329e-04, 1.3350e-04, 1.3681e-04, 1.2451e-04,\n 1.5612e-04, 1.9751e-04, 1.8420e-04, 2.1707e-04, 1.7809e-04, 2.2998e-04,\n 1.4099e-04, 1.4122e-04, 1.7062e-04, 1.3771e-04, 1.7579e-04, 1.5282e-04,\n 1.2823e-04, 9.8942e-05, 1.4787e-04, 2.3973e-04, 1.4117e-04, 1.5558e-04,\n 1.4325e-04, 2.8170e-04, 1.5787e-04, 1.3163e-04, 3.8614e-04, 1.5408e-04,\n 1.8014e-04, 1.8856e-04, 1.4984e-04, 1.9578e-04, 1.5336e-04, 1.8215e-04,\n 1.9563e-04, 2.1130e-04, 1.1604e-04, 1.6738e-04, 1.1553e-04, 1.2225e-04,\n 1.5040e-04, 1.1279e-04, 1.3362e-04, 1.0925e-04, 1.2758e-04, 3.3495e-04,\n 1.7492e-04, 1.7874e-04, 2.5960e-04, 1.5510e-04, 8.3055e-05, 2.0366e-04,\n 1.5411e-04, 1.4091e-04, 2.4547e-04, 1.3766e-04, 2.8228e-04, 1.2292e-04,\n 1.6547e-04, 1.8751e-04, 2.1033e-04, 1.9490e-04, 2.0179e-04, 1.5419e-04,\n 2.1953e-04, 1.7697e-04, 1.2468e-04, 2.1175e-04, 1.5018e-04, 2.2734e-04,\n 2.5052e-04, 1.2400e-04, 1.4513e-04, 1.9984e-04, 1.2274e-04, 1.4489e-04,\n 1.6986e-04, 1.2330e-04, 1.8691e-04, 1.4393e-04, 1.3128e-04, 1.2291e-04,\n 1.1759e-04, 1.1409e-04, 1.9033e-04, 2.0421e-04, 8.9738e-05, 2.2919e-04,\n 1.3080e-04, 1.8510e-04, 1.2140e-04, 2.9983e-04, 1.5255e-04, 1.2502e-04,\n 1.2516e-04, 1.2181e-04, 1.5399e-04, 1.6615e-04, 7.4472e-05, 1.5488e-04,\n 1.8880e-04, 1.8616e-04, 1.2604e-04, 1.2078e-04, 2.1757e-04, 1.8014e-04,\n 1.3905e-04, 2.4975e-04, 1.8555e-04, 1.5663e-04, 1.6906e-04, 2.0859e-04,\n 1.2963e-04, 1.2522e-04, 1.4723e-04, 1.6353e-04, 1.1451e-04, 1.2347e-04,\n 1.9992e-04, 7.8648e-05, 1.9374e-04, 1.7710e-04, 8.7534e-05, 1.5056e-04,\n 1.3313e-04, 1.3718e-04, 1.0416e-04, 1.2937e-04, 2.3780e-04, 1.1484e-04,\n 1.2601e-04, 9.2594e-05, 1.3477e-04, 1.3041e-04, 1.0112e-04, 1.3617e-04,\n 1.1182e-04, 2.3110e-04, 1.2497e-04, 1.3350e-04, 1.7417e-04, 2.1045e-04,\n 1.6508e-04, 1.7232e-04, 1.2326e-04, 1.4890e-04, 1.2446e-04, 1.3582e-04,\n 1.4434e-04, 1.4024e-04, 8.4816e-05, 1.5148e-04, 1.8303e-04, 1.9110e-04,\n 1.4682e-04, 9.2828e-05, 1.4868e-04, 1.1589e-04, 1.3229e-04, 2.2120e-04,\n 1.5804e-04, 1.6090e-04, 1.8498e-04, 1.2864e-04, 1.7418e-04, 2.0941e-04,\n 1.6102e-04, 1.7146e-04, 2.5199e-04, 1.4477e-04, 1.2744e-04, 1.2812e-04,\n 1.2062e-04, 1.3431e-04, 1.3019e-04, 1.5634e-04, 1.4164e-04, 1.2436e-04,\n 1.5376e-04, 2.0460e-04, 1.5971e-04, 1.1762e-04, 1.9444e-04, 2.4276e-04,\n 1.4207e-04, 1.6886e-04, 1.2454e-04, 1.1528e-04, 1.7686e-04, 1.7112e-04,\n 1.8805e-04, 1.2942e-04, 1.6003e-04, 1.8195e-04, 1.5850e-04, 1.3352e-04,\n 2.0141e-04, 1.4256e-04, 1.5630e-04, 1.4623e-04, 1.2325e-04, 2.0054e-04,\n 1.7317e-04, 2.7049e-04, 1.2283e-04, 1.9596e-04, 1.0796e-04, 2.1750e-04,\n 1.9793e-04, 1.3829e-04, 1.7652e-04, 2.5102e-04, 1.5220e-04, 1.8967e-04,\n 1.3502e-04, 1.0457e-04, 1.3219e-04, 1.6260e-04, 1.7112e-04, 1.8851e-04,\n 1.9126e-04, 1.6657e-04, 1.0153e-04, 2.2633e-04, 1.9465e-04, 1.8915e-04,\n 1.1831e-04, 2.4155e-04, 1.5764e-04, 2.0595e-04, 1.9779e-04, 1.7722e-04,\n 1.7701e-04, 1.3326e-04, 1.8280e-04, 1.8936e-04, 1.2065e-04, 1.1169e-04,\n 1.4183e-04, 1.5032e-04, 1.4535e-04, 1.8723e-04, 1.6531e-04, 1.7936e-04,\n 1.7036e-04, 1.5361e-04, 1.4928e-04, 1.9462e-04, 7.8423e-05, 1.2686e-04,\n 1.1321e-04, 9.4586e-05, 1.4341e-04, 1.3840e-04, 1.2228e-04, 2.0378e-04,\n 9.4703e-05, 1.4659e-04, 1.9398e-04, 1.7071e-04, 5.8368e-04, 1.4062e-04,\n 1.9799e-04, 1.2984e-04, 1.9854e-04, 2.2106e-04, 2.3662e-04, 1.8237e-04,\n 1.5583e-04, 1.8674e-04, 1.2502e-04, 1.3130e-04, 1.4528e-04, 1.2179e-04,\n 1.9350e-04, 2.2034e-04, 1.8312e-04, 1.4296e-04, 1.6866e-04, 1.6247e-04,\n 2.2814e-04, 1.3198e-04], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(11268.)",
27
+ "exp_avg": "tensor([[-6.6093e-05, -9.0981e-05, 2.8383e-04, ..., 1.0390e-04,\n -4.2067e-05, 2.7262e-04],\n [-1.7330e-04, -1.8083e-04, -8.3192e-04, ..., 8.0123e-05,\n 1.4059e-04, 1.4396e-04],\n [ 2.8891e-04, -2.9809e-05, -4.9655e-04, ..., 9.5496e-05,\n -2.8312e-06, -5.1555e-04],\n ...,\n [-1.5571e-04, 3.6740e-04, 4.8044e-04, ..., -2.4438e-04,\n -1.2069e-04, -1.7865e-04],\n [ 1.4714e-04, 1.8102e-05, -1.0890e-04, ..., 6.8962e-05,\n -5.0974e-05, -3.0989e-04],\n [ 3.2825e-04, 1.7737e-04, 3.8763e-06, ..., 2.1211e-04,\n 9.8837e-05, 1.9245e-04]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[2.9485e-07, 4.8727e-07, 1.0663e-07, ..., 2.0237e-07, 2.3695e-07,\n 3.4306e-07],\n [6.0073e-07, 1.1080e-06, 3.9923e-07, ..., 7.0824e-07, 6.4851e-07,\n 8.9577e-07],\n [7.0705e-07, 3.8627e-07, 3.3574e-07, ..., 7.2503e-07, 3.8702e-07,\n 7.2226e-07],\n ...,\n [4.1669e-07, 1.0834e-06, 4.4769e-07, ..., 5.7935e-07, 8.3852e-07,\n 7.1874e-07],\n [4.0662e-07, 8.5070e-07, 2.4924e-07, ..., 8.1077e-07, 4.6570e-07,\n 5.6462e-07],\n [3.6075e-07, 5.4725e-07, 3.3877e-07, ..., 1.1538e-06, 4.3584e-07,\n 7.2429e-07]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(11268.)",
32
+ "exp_avg": "tensor([[-9.5391e-05, -6.8253e-05, -2.2773e-06, ..., 7.4789e-05,\n -1.2514e-04, 2.6546e-05],\n [-3.3656e-05, -8.8968e-05, -7.8372e-04, ..., 7.7203e-05,\n 2.7927e-06, 1.8347e-04],\n [ 2.2377e-04, -2.0569e-05, 1.1707e-04, ..., 2.6661e-05,\n 2.8227e-05, -2.3756e-04],\n ...,\n [ 6.4193e-06, -1.1272e-06, -2.3610e-04, ..., -1.9651e-04,\n -1.2410e-04, -2.3277e-05],\n [ 9.0196e-05, -9.5642e-05, 4.4119e-04, ..., -3.6351e-06,\n -9.4623e-05, 1.4011e-05],\n [-1.4057e-04, -5.8231e-05, 2.8134e-04, ..., -2.2363e-04,\n -1.8599e-05, 2.1270e-04]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[1.0448e-07, 8.3303e-08, 8.7546e-08, ..., 1.1312e-07, 8.1904e-08,\n 1.2972e-07],\n [1.9757e-07, 4.2987e-07, 2.3422e-07, ..., 1.6800e-07, 2.3281e-07,\n 2.6276e-07],\n [2.5370e-07, 1.5869e-07, 1.8138e-07, ..., 3.5818e-07, 2.9619e-07,\n 2.3982e-07],\n ...,\n [2.1249e-07, 5.5684e-07, 8.6314e-08, ..., 4.4893e-07, 2.8963e-07,\n 2.5907e-07],\n [2.2515e-07, 3.0465e-07, 9.0081e-08, ..., 3.8294e-07, 1.9086e-07,\n 2.3638e-07],\n [2.1392e-07, 1.5179e-07, 3.9882e-07, ..., 2.0801e-07, 1.5775e-07,\n 2.4518e-07]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(11268.)",
37
+ "exp_avg": "tensor([-0.0015, 0.0015], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([7.9509e-06, 7.9509e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 2.5447270110570814e-05,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.001,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 2.5447270110570814e-05,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.001,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 2.5447270110570814e-05,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.001,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 1.3211399184359193e-05,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.0005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 9,
137
+ "base_lrs": [
138
+ 0.001,
139
+ 0.001,
140
+ 0.001,
141
+ 0.0005
142
+ ],
143
+ "last_epoch": 9,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 2.5447270110570814e-05,
149
+ 2.5447270110570814e-05,
150
+ 2.5447270110570814e-05,
151
+ 1.3211399184359193e-05
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 71.914,
156
+ "best_epoch": 8,
157
+ "scale_accuracies": {
158
+ "256": 71.568,
159
+ "512": 71.708
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6,
169
+ 7,
170
+ 8,
171
+ 9
172
+ ],
173
+ "train_loss": [
174
+ 5.603825211905824,
175
+ 4.145695094292918,
176
+ 3.767625377201044,
177
+ 3.572572173021091,
178
+ 3.45402966711087,
179
+ 3.3684399169854844,
180
+ 3.3046513376906277,
181
+ 3.255301171407913,
182
+ 3.219453362611155
183
+ ],
184
+ "train_acc": [
185
+ 63.39829233815732,
186
+ 69.48282308239285,
187
+ 70.34266414917025,
188
+ 70.91120829681064,
189
+ 71.35642738222262,
190
+ 71.84090754757186,
191
+ 72.29198067074785,
192
+ 72.7457856782137,
193
+ 73.02162793765372
194
+ ],
195
+ "val_acc": [
196
+ 68.056,
197
+ 69.624,
198
+ 70.142,
199
+ 70.622,
200
+ 70.966,
201
+ 71.462,
202
+ 71.766,
203
+ 71.776,
204
+ 71.914
205
+ ],
206
+ "scale_accs": {
207
+ "256": [
208
+ 66.924,
209
+ 68.828,
210
+ 69.54,
211
+ 69.996,
212
+ 70.468,
213
+ 71.106,
214
+ 71.436,
215
+ 71.486,
216
+ 71.568
217
+ ],
218
+ "512": [
219
+ 67.882,
220
+ 69.572,
221
+ 70.028,
222
+ 70.482,
223
+ 70.854,
224
+ 71.28,
225
+ 71.578,
226
+ 71.694,
227
+ 71.708
228
+ ]
229
+ },
230
+ "lr": [
231
+ 0.0009755527298894294,
232
+ 0.0009046039886902864,
233
+ 0.0007940987335200904,
234
+ 0.0006548539886902864,
235
+ 0.0005005000000000001,
236
+ 0.0003461460113097139,
237
+ 0.00020690126647990973,
238
+ 9.639601130971382e-05,
239
+ 2.5447270110570814e-05
240
+ ]
241
+ }
242
+ },
243
+ "train_config": {
244
+ "name": "david_training",
245
+ "run_id": "20251012_135249",
246
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
247
+ "model_variant": "clip_vit_laion_b32",
248
+ "num_classes": 1000,
249
+ "preset": "small_fast",
250
+ "custom_config_path": null,
251
+ "num_classes_override": null,
252
+ "use_belly_override": null,
253
+ "belly_expand_override": null,
254
+ "progressive_training_override": false,
255
+ "scale_warmup_epochs_override": null,
256
+ "num_epochs": 10,
257
+ "batch_size": 1024,
258
+ "learning_rate": 0.001,
259
+ "weight_decay": 1e-05,
260
+ "warmup_epochs": 3,
261
+ "use_rose_loss": true,
262
+ "rose_initial_weight": 0.1,
263
+ "rose_max_weight": 0.5,
264
+ "rose_weight_schedule": "adaptive",
265
+ "use_cayley_loss": false,
266
+ "cayley_weight": 0.001,
267
+ "scale_loss_balance": null,
268
+ "use_mixed_precision": false,
269
+ "gradient_clip": 10.0,
270
+ "scheduler_type": "cosine_restarts",
271
+ "min_lr": 1e-06,
272
+ "freeze_strategy": "never",
273
+ "freeze_threshold": 90.0,
274
+ "unfreeze_on_plateau": true,
275
+ "patience": 10,
276
+ "track_gradients": true,
277
+ "gradient_scale_threshold": 1e-05,
278
+ "gradient_scale_multiplier": 10.0,
279
+ "log_interval": 50,
280
+ "val_interval": 1,
281
+ "save_interval": 5,
282
+ "log_fusion_weights": true,
283
+ "log_loss_components": true,
284
+ "save_format": "safetensors",
285
+ "hf_repo": "AbstractPhil/gated-david",
286
+ "upload_to_hub": true,
287
+ "base_dir": "./david_training",
288
+ "num_workers": 10,
289
+ "pin_memory": true,
290
+ "prefetch_factor": 4,
291
+ "persistent_workers": true
292
+ }
293
+ }
weights/David-fully_shared-weighted_sum/20251012_135249/training_history.json CHANGED
@@ -7,7 +7,8 @@
7
  5,
8
  6,
9
  7,
10
- 8
 
11
  ],
12
  "train_loss": [
13
  5.603825211905824,
@@ -17,7 +18,8 @@
17
  3.45402966711087,
18
  3.3684399169854844,
19
  3.3046513376906277,
20
- 3.255301171407913
 
21
  ],
22
  "train_acc": [
23
  63.39829233815732,
@@ -27,7 +29,8 @@
27
  71.35642738222262,
28
  71.84090754757186,
29
  72.29198067074785,
30
- 72.7457856782137
 
31
  ],
32
  "val_acc": [
33
  68.056,
@@ -37,7 +40,8 @@
37
  70.966,
38
  71.462,
39
  71.766,
40
- 71.776
 
41
  ],
42
  "scale_accs": {
43
  "256": [
@@ -48,7 +52,8 @@
48
  70.468,
49
  71.106,
50
  71.436,
51
- 71.486
 
52
  ],
53
  "512": [
54
  67.882,
@@ -58,7 +63,8 @@
58
  70.854,
59
  71.28,
60
  71.578,
61
- 71.694
 
62
  ]
63
  },
64
  "lr": [
@@ -69,6 +75,7 @@
69
  0.0005005000000000001,
70
  0.0003461460113097139,
71
  0.00020690126647990973,
72
- 9.639601130971382e-05
 
73
  ]
74
  }
 
7
  5,
8
  6,
9
  7,
10
+ 8,
11
+ 9
12
  ],
13
  "train_loss": [
14
  5.603825211905824,
 
18
  3.45402966711087,
19
  3.3684399169854844,
20
  3.3046513376906277,
21
+ 3.255301171407913,
22
+ 3.219453362611155
23
  ],
24
  "train_acc": [
25
  63.39829233815732,
 
29
  71.35642738222262,
30
  71.84090754757186,
31
  72.29198067074785,
32
+ 72.7457856782137,
33
+ 73.02162793765372
34
  ],
35
  "val_acc": [
36
  68.056,
 
40
  70.966,
41
  71.462,
42
  71.766,
43
+ 71.776,
44
+ 71.914
45
  ],
46
  "scale_accs": {
47
  "256": [
 
52
  70.468,
53
  71.106,
54
  71.436,
55
+ 71.486,
56
+ 71.568
57
  ],
58
  "512": [
59
  67.882,
 
63
  70.854,
64
  71.28,
65
  71.578,
66
+ 71.694,
67
+ 71.708
68
  ]
69
  },
70
  "lr": [
 
75
  0.0005005000000000001,
76
  0.0003461460113097139,
77
  0.00020690126647990973,
78
+ 9.639601130971382e-05,
79
+ 2.5447270110570814e-05
80
  ]
81
  }