Upload weights and configs - David-fully_shared-weighted_sum - Run 20251012_132646
Browse files
weights/David-fully_shared-weighted_sum/20251012_132646/best_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2628344
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f49ab08aa1141f7d9ff7772393084c6250114e0069dd401acf0fa0423324beee
|
| 3 |
size 2628344
|
weights/David-fully_shared-weighted_sum/20251012_132646/best_model_metadata.json
CHANGED
|
@@ -1,46 +1,46 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
"optimizer_state_dict": {
|
| 4 |
"state": {
|
| 5 |
"0": {
|
| 6 |
-
"step": "tensor(
|
| 7 |
-
"exp_avg": "tensor([[
|
| 8 |
-
"exp_avg_sq": "tensor([[4.
|
| 9 |
},
|
| 10 |
"1": {
|
| 11 |
-
"step": "tensor(
|
| 12 |
-
"exp_avg": "tensor([
|
| 13 |
-
"exp_avg_sq": "tensor([0.
|
| 14 |
},
|
| 15 |
"2": {
|
| 16 |
-
"step": "tensor(
|
| 17 |
-
"exp_avg": "tensor([
|
| 18 |
-
"exp_avg_sq": "tensor([
|
| 19 |
},
|
| 20 |
"3": {
|
| 21 |
-
"step": "tensor(
|
| 22 |
-
"exp_avg": "tensor([
|
| 23 |
-
"exp_avg_sq": "tensor([
|
| 24 |
},
|
| 25 |
"4": {
|
| 26 |
-
"step": "tensor(
|
| 27 |
-
"exp_avg": "tensor([[-
|
| 28 |
-
"exp_avg_sq": "tensor([[
|
| 29 |
},
|
| 30 |
"5": {
|
| 31 |
-
"step": "tensor(
|
| 32 |
-
"exp_avg": "tensor([[
|
| 33 |
-
"exp_avg_sq": "tensor([[
|
| 34 |
},
|
| 35 |
"6": {
|
| 36 |
-
"step": "tensor(
|
| 37 |
-
"exp_avg": "tensor([ 0.
|
| 38 |
-
"exp_avg_sq": "tensor([
|
| 39 |
}
|
| 40 |
},
|
| 41 |
"param_groups": [
|
| 42 |
{
|
| 43 |
-
"lr": 0.
|
| 44 |
"name": "shared",
|
| 45 |
"betas": [
|
| 46 |
0.9,
|
|
@@ -64,7 +64,7 @@
|
|
| 64 |
]
|
| 65 |
},
|
| 66 |
{
|
| 67 |
-
"lr": 0.
|
| 68 |
"name": "scale_256",
|
| 69 |
"betas": [
|
| 70 |
0.9,
|
|
@@ -85,7 +85,7 @@
|
|
| 85 |
]
|
| 86 |
},
|
| 87 |
{
|
| 88 |
-
"lr": 0.
|
| 89 |
"name": "scale_512",
|
| 90 |
"betas": [
|
| 91 |
0.9,
|
|
@@ -106,7 +106,7 @@
|
|
| 106 |
]
|
| 107 |
},
|
| 108 |
{
|
| 109 |
-
"lr": 0.
|
| 110 |
"name": "fusion",
|
| 111 |
"betas": [
|
| 112 |
0.9,
|
|
@@ -133,30 +133,30 @@
|
|
| 133 |
"T_i": 10,
|
| 134 |
"T_mult": 2,
|
| 135 |
"eta_min": 1e-06,
|
| 136 |
-
"T_cur":
|
| 137 |
"base_lrs": [
|
| 138 |
0.001,
|
| 139 |
0.001,
|
| 140 |
0.001,
|
| 141 |
0.0005
|
| 142 |
],
|
| 143 |
-
"last_epoch":
|
| 144 |
"_step_count": 0,
|
| 145 |
"_is_initial": false,
|
| 146 |
"_get_lr_called_within_step": false,
|
| 147 |
"_last_lr": [
|
| 148 |
-
0.
|
| 149 |
-
0.
|
| 150 |
-
0.
|
| 151 |
-
0.
|
| 152 |
]
|
| 153 |
},
|
| 154 |
"metrics": {
|
| 155 |
-
"best_val_acc":
|
| 156 |
-
"best_epoch":
|
| 157 |
"scale_accuracies": {
|
| 158 |
-
"256": 70.
|
| 159 |
-
"512": 70.
|
| 160 |
}
|
| 161 |
},
|
| 162 |
"train_config": {
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 4,
|
| 3 |
"optimizer_state_dict": {
|
| 4 |
"state": {
|
| 5 |
"0": {
|
| 6 |
+
"step": "tensor(6260.)",
|
| 7 |
+
"exp_avg": "tensor([[ 3.1722e-04, 3.1541e-03, 1.0743e-03, ..., 2.9322e-04,\n 5.6603e-04, 5.1939e-04],\n [-5.8773e-04, 2.7085e-03, -8.4152e-05, ..., 3.6830e-04,\n -2.1589e-04, -6.0146e-04],\n [-1.8794e-04, 3.5767e-03, -3.1200e-03, ..., 5.7624e-04,\n 2.0440e-04, -8.9246e-04],\n ...,\n [-1.0678e-03, -1.1239e-03, 7.4526e-05, ..., -2.6899e-04,\n -1.9413e-04, -7.7857e-05],\n [-7.1220e-04, -4.6767e-03, 3.2651e-03, ..., -1.8281e-05,\n 2.7817e-05, 9.0213e-04],\n [ 3.8004e-04, -1.6752e-03, 9.4115e-04, ..., 1.0887e-04,\n -4.9217e-05, -1.1337e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[4.0182e-06, 2.6264e-05, 1.5469e-05, ..., 3.1997e-06, 2.6607e-06,\n 6.2000e-06],\n [1.1437e-05, 2.9362e-05, 1.8885e-05, ..., 3.8683e-06, 2.4802e-06,\n 5.4786e-06],\n [7.1391e-06, 4.6127e-05, 2.1583e-05, ..., 5.3447e-06, 2.9660e-06,\n 6.4597e-06],\n ...,\n [5.4480e-06, 4.6049e-05, 1.7139e-05, ..., 4.3088e-06, 2.8427e-06,\n 3.2124e-06],\n [4.5123e-06, 3.4993e-05, 1.5051e-05, ..., 3.0318e-06, 2.1692e-06,\n 2.5714e-06],\n [2.8421e-06, 1.2176e-05, 1.0831e-05, ..., 2.6425e-06, 1.6447e-06,\n 2.0784e-06]], device='cuda:0')"
|
| 9 |
},
|
| 10 |
"1": {
|
| 11 |
+
"step": "tensor(6260.)",
|
| 12 |
+
"exp_avg": "tensor([ 3.8287e-02, 7.0744e-03, 4.6278e-03, -4.6743e-02, 7.3604e-04,\n -4.8485e-03, -2.4281e-02, -1.1458e-02, 1.1361e-02, 3.7939e-02,\n 1.0578e-02, -1.1545e-02, -4.1391e-03, -2.1389e-03, -3.7640e-02,\n -5.0695e-02, -4.8609e-02, -4.2825e-02, 9.5611e-03, -2.7086e-03,\n -8.1645e-02, 5.0465e-02, -4.7577e-03, -1.6391e-02, -2.6154e-02,\n 1.4030e-02, -7.6941e-04, 1.6214e-02, -1.1239e-02, -1.0351e-02,\n -1.9110e-02, 2.6022e-02, -4.2750e-03, 2.5497e-02, 3.0964e-02,\n 2.4860e-02, 1.2926e-02, -1.4974e-02, 1.3156e-02, -1.0836e-02,\n 9.6476e-03, 1.7289e-02, -1.8028e-02, -1.1272e-02, -2.0952e-02,\n 3.3801e-02, 5.0331e-03, 7.3461e-03, 8.5710e-04, 3.6008e-03,\n -1.6823e-02, -2.1902e-02, 8.0397e-04, -3.6462e-03, 4.6077e-03,\n 1.8185e-02, -3.4063e-02, -7.9193e-03, 4.4017e-03, -1.3735e-02,\n 6.9905e-03, -4.8321e-03, 4.2993e-03, 3.5981e-03, 1.6529e-02,\n 1.3397e-02, 3.5450e-02, 7.1683e-03, -3.1353e-03, -4.6232e-02,\n -1.1754e-02, 5.9485e-03, 4.7543e-03, 3.1489e-02, -2.9745e-02,\n -1.2485e-03, 2.0774e-02, 1.4253e-02, 6.3192e-03, -5.0772e-03,\n 5.5329e-03, 1.6628e-02, -1.6319e-02, 1.3700e-02, 5.5130e-03,\n -6.1528e-02, -3.9390e-02, 1.3808e-02, -4.0851e-03, 1.7086e-03,\n 2.3253e-03, -3.1091e-03, -1.0949e-02, -2.6634e-02, 6.6016e-03,\n 7.0280e-03, -4.3269e-03, -2.1167e-02, 1.0241e-02, 2.7441e-02,\n 2.5566e-02, -2.9024e-02, -1.9951e-02, -2.3090e-02, 1.8327e-02,\n 3.2079e-03, -8.4656e-03, 4.0309e-02, -1.6775e-02, 4.1465e-03,\n -6.1990e-03, -3.5925e-02, 6.5328e-03, -1.0797e-01, -2.6290e-02,\n -7.1130e-03, -7.1726e-03, 1.9910e-02, 1.2287e-03, 2.6613e-02,\n -1.6972e-02, -1.6264e-02, 1.0138e-02, 1.7948e-02, -1.7570e-02,\n -1.3114e-02, 7.1512e-03, -1.2897e-02, -2.3700e-03, 1.0894e-03,\n 1.1488e-02, -2.1036e-03, -1.0666e-02, -1.3311e-02, -2.5209e-02,\n -3.0498e-03, 3.3425e-02, -2.6824e-02, 2.9714e-02, 1.5678e-02,\n -1.1071e-02, 3.0084e-02, 1.7867e-02, 1.1807e-02, 1.6388e-03,\n -1.3518e-03, -2.6451e-02, -2.0514e-02, -2.0630e-03, 1.3967e-02,\n -5.0451e-03, 1.2406e-03, -1.8563e-02, 6.1663e-03, -2.5807e-02,\n 3.8573e-02, 1.8341e-02, 1.1298e-02, -4.6710e-03, -1.6627e-02,\n -5.9884e-03, 8.0051e-03, 6.1325e-03, 1.3148e-02, -2.0617e-02,\n 9.3196e-03, 4.4715e-03, 7.1958e-03, 7.7074e-03, -1.4054e-03,\n 8.3743e-03, 9.6104e-03, -1.4407e-02, 1.0630e-02, -2.4000e-02,\n 2.1516e-02, -7.1061e-04, 7.3933e-03, -6.3228e-02, 1.6711e-02,\n -1.5630e-02, 3.3160e-03, -1.4658e-02, 2.6277e-02, 1.0896e-02,\n -1.9270e-02, 1.8549e-02, -3.3540e-02, 1.2401e-02, 3.9251e-04,\n 1.4241e-03, -4.4019e-02, -1.0013e-03, 1.9363e-02, 1.6239e-02,\n 4.6009e-02, 3.2435e-02, -5.5079e-02, -5.0769e-03, 2.3695e-02,\n -3.8212e-03, 1.7573e-02, 1.3671e-02, -1.7148e-02, 5.7055e-03,\n 4.1058e-03, -1.2033e-03, 1.5867e-02, 9.6781e-03, -1.0136e-02,\n 9.7165e-03, -9.9817e-03, 7.1484e-04, -7.5623e-03, 8.7494e-03,\n 7.6246e-04, -4.4888e-06, 6.2455e-03, -1.4076e-03, -5.2628e-02,\n -4.8459e-02, -2.8403e-02, -2.3883e-02, -5.8512e-03, -3.5528e-03,\n -1.2671e-02, -1.0262e-02, -1.0329e-02, 1.3604e-02, 1.4568e-02,\n 5.7667e-03, 3.5003e-04, 9.4511e-03, 1.2720e-02, -5.0806e-04,\n 1.1562e-02, -2.2015e-02, 2.9138e-03, -7.2606e-03, -3.1598e-02,\n -1.0548e-03, 3.8577e-02, -8.2498e-03, 1.7948e-03, 2.2289e-02,\n -7.2965e-03, 4.1943e-03, 4.6692e-03, 1.9245e-02, -3.3418e-03,\n 1.4388e-02, 9.6351e-03, 9.2271e-03, 7.5374e-03, -3.5625e-02,\n -7.3475e-03, -8.2203e-03, -5.2415e-04, -6.6258e-03, -3.3096e-03,\n 1.5189e-02, 2.4717e-03, -1.5238e-02, 9.3064e-03, -3.7153e-02,\n -2.4327e-02, 7.9849e-03, -1.0039e-02, 1.6063e-02, 8.3004e-03,\n 2.1386e-02, 4.9097e-02, 2.2039e-02, -9.0948e-02, 4.6477e-03,\n 1.7955e-02, 3.1564e-03, -3.8319e-03, 2.0519e-02, 6.1444e-03,\n -1.7738e-02, -1.9348e-02, 9.6401e-03, 1.6220e-03, -6.4844e-03,\n -5.4217e-03, 4.4311e-03, 6.6548e-03, -2.7304e-03, 2.4047e-02,\n -1.8420e-02, -9.1805e-03, 1.9927e-02, 2.5734e-02, 2.1368e-02,\n 1.7123e-02, -4.4288e-02, -6.7710e-02, 1.1993e-02, -6.4020e-03,\n -3.8266e-03, 2.1033e-02, 1.2275e-02, 1.7655e-02, 3.9573e-03,\n 2.0131e-02, 2.6364e-02, -3.3657e-02, 1.1919e-02, 3.7818e-02,\n 2.1630e-02, -2.2235e-03, 5.9623e-03, 1.2263e-02, 1.8866e-02,\n -1.9704e-02, -1.9742e-02, -4.4306e-02, 9.1872e-03, 1.7408e-02,\n 1.0881e-02, -9.1749e-03, 2.9778e-03, 4.5772e-02, 8.6524e-03,\n -3.0541e-02, 6.9361e-03, 2.0173e-02, 5.7835e-03, 8.9142e-02,\n 8.5473e-03, -2.3475e-02, 2.1740e-02, 1.1176e-02, 6.8983e-03,\n 2.0390e-02, -2.4504e-02, -5.1879e-03, -5.2856e-02, 3.7868e-02,\n -6.6295e-03, -1.3594e-02, -1.5865e-02, 3.5660e-02, -9.8688e-03,\n -2.4674e-03, -6.8197e-02, 4.4329e-02, -9.8988e-03, 7.1389e-05,\n 2.9316e-02, 1.9902e-02, -1.5678e-02, -1.9607e-02, 2.1865e-02,\n -6.8571e-02, -1.0837e-02, 9.3229e-03, 5.9236e-03, 1.7944e-03,\n -7.8694e-03, -2.5341e-02, 1.2404e-03, 2.6575e-02, -7.9784e-03,\n 1.9583e-02, 1.3037e-02, 1.1921e-02, -3.1066e-02, -2.9630e-02,\n -6.1752e-02, -4.5265e-02, 3.2678e-02, -1.1861e-02, 2.7437e-03,\n 1.8793e-02, 7.4700e-03, -6.1178e-03, 3.8862e-02, 1.1419e-02,\n 1.9983e-02, 1.0733e-02, 4.6946e-02, 1.7694e-02, 6.3264e-03,\n -2.6940e-03, 2.2536e-04, -1.6457e-02, -3.1861e-02, 5.1680e-03,\n -1.4922e-03, 2.8381e-02, 1.7341e-02, 1.5049e-02, 6.9486e-03,\n -1.0157e-02, 3.6269e-03, -4.1289e-02, 1.3396e-02, 6.3983e-03,\n -7.7330e-03, 2.2355e-02, 3.6277e-02, 5.9523e-02, -1.6178e-03,\n -3.3730e-02, -1.8006e-02, -9.1209e-04, 3.7044e-02, -1.3934e-02,\n -3.1090e-02, 6.5350e-03, -4.0766e-03, -1.3798e-03, 1.2118e-02,\n -2.9756e-02, -6.7338e-03, 2.0605e-02, -3.5522e-03, 3.0580e-02,\n -1.3392e-03, 2.2960e-03, -4.2043e-02, 5.0153e-03, 1.7484e-02,\n -1.1225e-02, 2.7259e-02, 2.2224e-02, -1.5206e-02, -3.6641e-02,\n -4.8472e-03, -1.2165e-03, -1.9333e-02, 1.3597e-02, -1.0023e-02,\n -6.6633e-03, 7.1999e-03, -7.4278e-03, 2.7650e-03, -1.1078e-02,\n -1.7142e-02, -1.8912e-03, 2.3660e-02, -1.3783e-02, -4.1150e-02,\n 2.6868e-02, -3.2397e-04, -1.4040e-02, 1.3942e-02, 3.8696e-02,\n -1.4583e-02, 6.8208e-03, 2.0938e-02, -3.6576e-03, -1.6154e-02,\n 5.1289e-02, -2.2931e-02, -1.3640e-02, 3.1942e-03, 2.2815e-02,\n -5.1017e-02, -1.0102e-02, 7.1091e-02, 7.6073e-03, 3.3452e-02,\n 1.1638e-02, -5.8458e-02, -7.3556e-03, 3.0164e-02, -3.6774e-04,\n -1.1932e-02, 1.7343e-02, 1.2772e-02, 7.6630e-03, 1.3012e-02,\n -1.7053e-02, 1.4923e-02, 6.2616e-03, -4.3743e-03, 1.8084e-02,\n 8.2520e-03, 1.1635e-02, -1.3706e-02, 4.9977e-03, 1.2277e-03,\n 5.4867e-03, 9.3740e-03, -1.5906e-02, 1.1762e-02, -1.5932e-03,\n 1.7397e-02, 3.2279e-02, 8.4486e-03, -6.0659e-04, -4.2045e-03,\n 2.1770e-02, 3.8335e-02, -4.4980e-02, 1.0806e-02, 3.0911e-02,\n -2.0104e-02, 2.2958e-02, -1.4797e-02, 5.2966e-03, 4.5325e-03,\n 5.5271e-03, 2.7181e-02, -7.6531e-03, 3.3271e-03, -2.7743e-02,\n -3.4526e-02, -7.6148e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0041, 0.0044, 0.0059, 0.0047, 0.0032, 0.0024, 0.0055, 0.0039, 0.0035,\n 0.0034, 0.0047, 0.0034, 0.0030, 0.0044, 0.0047, 0.0044, 0.0045, 0.0038,\n 0.0031, 0.0040, 0.0065, 0.0037, 0.0030, 0.0032, 0.0051, 0.0035, 0.0035,\n 0.0028, 0.0046, 0.0053, 0.0042, 0.0050, 0.0048, 0.0044, 0.0037, 0.0085,\n 0.0036, 0.0029, 0.0026, 0.0062, 0.0065, 0.0037, 0.0039, 0.0055, 0.0017,\n 0.0052, 0.0018, 0.0036, 0.0033, 0.0019, 0.0031, 0.0023, 0.0042, 0.0039,\n 0.0037, 0.0025, 0.0028, 0.0024, 0.0028, 0.0042, 0.0018, 0.0040, 0.0043,\n 0.0048, 0.0039, 0.0056, 0.0034, 0.0026, 0.0065, 0.0056, 0.0031, 0.0086,\n 0.0026, 0.0032, 0.0034, 0.0041, 0.0031, 0.0017, 0.0026, 0.0050, 0.0032,\n 0.0034, 0.0033, 0.0032, 0.0006, 0.0048, 0.0046, 0.0057, 0.0027, 0.0034,\n 0.0062, 0.0030, 0.0059, 0.0030, 0.0048, 0.0037, 0.0042, 0.0037, 0.0035,\n 0.0050, 0.0032, 0.0033, 0.0048, 0.0043, 0.0025, 0.0050, 0.0025, 0.0044,\n 0.0046, 0.0034, 0.0044, 0.0042, 0.0027, 0.0043, 0.0025, 0.0028, 0.0058,\n 0.0023, 0.0020, 0.0047, 0.0033, 0.0033, 0.0017, 0.0028, 0.0038, 0.0040,\n 0.0061, 0.0035, 0.0040, 0.0037, 0.0050, 0.0015, 0.0024, 0.0035, 0.0023,\n 0.0048, 0.0048, 0.0017, 0.0064, 0.0037, 0.0021, 0.0046, 0.0034, 0.0027,\n 0.0014, 0.0058, 0.0118, 0.0074, 0.0039, 0.0059, 0.0045, 0.0041, 0.0041,\n 0.0035, 0.0042, 0.0054, 0.0028, 0.0035, 0.0017, 0.0031, 0.0045, 0.0062,\n 0.0029, 0.0044, 0.0057, 0.0092, 0.0036, 0.0028, 0.0035, 0.0053, 0.0020,\n 0.0030, 0.0036, 0.0026, 0.0044, 0.0044, 0.0048, 0.0033, 0.0064, 0.0043,\n 0.0059, 0.0038, 0.0070, 0.0035, 0.0046, 0.0054, 0.0042, 0.0036, 0.0031,\n 0.0043, 0.0049, 0.0036, 0.0031, 0.0035, 0.0045, 0.0062, 0.0056, 0.0051,\n 0.0025, 0.0044, 0.0041, 0.0034, 0.0043, 0.0054, 0.0032, 0.0042, 0.0025,\n 0.0043, 0.0032, 0.0085, 0.0036, 0.0031, 0.0061, 0.0037, 0.0072, 0.0036,\n 0.0025, 0.0031, 0.0036, 0.0063, 0.0040, 0.0046, 0.0035, 0.0024, 0.0044,\n 0.0038, 0.0036, 0.0051, 0.0048, 0.0035, 0.0024, 0.0046, 0.0042, 0.0061,\n 0.0040, 0.0034, 0.0031, 0.0057, 0.0045, 0.0056, 0.0023, 0.0042, 0.0040,\n 0.0048, 0.0079, 0.0078, 0.0016, 0.0035, 0.0061, 0.0023, 0.0048, 0.0036,\n 0.0035, 0.0007, 0.0017, 0.0038, 0.0038, 0.0038, 0.0044, 0.0034, 0.0030,\n 0.0036, 0.0054, 0.0043, 0.0034, 0.0047, 0.0049, 0.0038, 0.0021, 0.0036,\n 0.0071, 0.0045, 0.0036, 0.0065, 0.0052, 0.0038, 0.0033, 0.0048, 0.0040,\n 0.0042, 0.0035, 0.0057, 0.0046, 0.0041, 0.0040, 0.0021, 0.0062, 0.0028,\n 0.0034, 0.0056, 0.0051, 0.0040, 0.0021, 0.0051, 0.0038, 0.0072, 0.0047,\n 0.0040, 0.0038, 0.0041, 0.0042, 0.0034, 0.0055, 0.0048, 0.0035, 0.0036,\n 0.0040, 0.0048, 0.0039, 0.0052, 0.0057, 0.0050, 0.0028, 0.0043, 0.0059,\n 0.0026, 0.0036, 0.0039, 0.0033, 0.0022, 0.0044, 0.0021, 0.0032, 0.0053,\n 0.0041, 0.0057, 0.0045, 0.0049, 0.0042, 0.0050, 0.0032, 0.0022, 0.0020,\n 0.0045, 0.0048, 0.0030, 0.0052, 0.0052, 0.0053, 0.0039, 0.0056, 0.0028,\n 0.0038, 0.0032, 0.0024, 0.0041, 0.0051, 0.0040, 0.0027, 0.0040, 0.0022,\n 0.0045, 0.0034, 0.0046, 0.0044, 0.0078, 0.0039, 0.0039, 0.0029, 0.0037,\n 0.0036, 0.0053, 0.0038, 0.0023, 0.0055, 0.0059, 0.0044, 0.0030, 0.0028,\n 0.0055, 0.0055, 0.0050, 0.0023, 0.0028, 0.0040, 0.0039, 0.0033, 0.0043,\n 0.0048, 0.0047, 0.0061, 0.0035, 0.0071, 0.0039, 0.0030, 0.0052, 0.0047,\n 0.0027, 0.0063, 0.0058, 0.0046, 0.0042, 0.0026, 0.0058, 0.0042, 0.0057,\n 0.0041, 0.0060, 0.0038, 0.0036, 0.0020, 0.0030, 0.0033, 0.0059, 0.0021,\n 0.0060, 0.0059, 0.0037, 0.0040, 0.0027, 0.0032, 0.0023, 0.0016, 0.0045,\n 0.0046, 0.0043, 0.0025, 0.0046, 0.0031, 0.0089, 0.0026, 0.0061, 0.0089,\n 0.0021, 0.0050, 0.0048, 0.0041, 0.0050, 0.0039, 0.0033, 0.0034, 0.0043,\n 0.0055, 0.0034, 0.0029, 0.0041, 0.0029, 0.0034, 0.0024, 0.0035, 0.0022,\n 0.0069, 0.0053, 0.0035, 0.0022, 0.0044, 0.0044, 0.0035, 0.0069, 0.0060,\n 0.0030, 0.0034, 0.0033, 0.0043, 0.0039, 0.0043, 0.0036, 0.0018, 0.0020,\n 0.0094, 0.0043, 0.0032, 0.0061, 0.0044, 0.0047, 0.0055, 0.0064, 0.0044,\n 0.0032, 0.0040, 0.0067, 0.0034, 0.0041, 0.0028, 0.0047, 0.0025, 0.0025,\n 0.0056, 0.0034, 0.0045, 0.0066, 0.0036, 0.0048, 0.0020, 0.0033, 0.0032,\n 0.0031, 0.0063, 0.0033, 0.0050, 0.0030, 0.0032, 0.0046, 0.0030, 0.0096,\n 0.0054, 0.0038, 0.0050, 0.0030, 0.0065, 0.0037, 0.0047, 0.0030, 0.0024,\n 0.0045, 0.0024, 0.0055, 0.0032, 0.0096, 0.0043, 0.0036, 0.0028],\n device='cuda:0')"
|
| 14 |
},
|
| 15 |
"2": {
|
| 16 |
+
"step": "tensor(6260.)",
|
| 17 |
+
"exp_avg": "tensor([ 4.8867e-03, 5.0872e-04, -1.1179e-03, -6.3669e-03, 9.7841e-04,\n -1.2620e-03, -3.2520e-03, -1.2085e-03, 2.0005e-03, 7.2467e-03,\n 2.6847e-03, -5.1920e-03, -3.2815e-04, 1.1350e-03, -5.7392e-03,\n -1.1850e-02, -4.9809e-03, -1.4367e-02, 8.5089e-04, -2.7555e-04,\n -9.9056e-03, 1.3274e-02, -1.8857e-04, -3.3881e-03, -1.8454e-03,\n 2.0691e-03, -9.8235e-04, 2.7665e-03, -1.0355e-03, -1.1666e-03,\n -2.3049e-03, 2.0035e-03, 1.5425e-03, 4.4901e-03, 7.7570e-03,\n 3.9652e-03, 3.4477e-03, -5.4886e-03, 3.4431e-03, -2.7509e-03,\n 2.8473e-04, 3.2787e-03, -4.1174e-03, -2.0651e-03, -6.0493e-03,\n 4.7881e-03, 2.2355e-03, 4.3603e-04, -6.1181e-04, 2.7814e-03,\n -2.4060e-03, -4.4140e-03, 4.5694e-05, 2.6914e-05, 1.5530e-03,\n 3.7221e-03, -6.0255e-03, -1.6161e-03, 3.3332e-04, -3.9637e-03,\n 1.0653e-03, -1.6633e-03, 7.4323e-04, -1.7207e-04, 3.7102e-03,\n 1.6462e-03, 8.2696e-03, 4.0024e-03, -1.3782e-03, -8.3224e-03,\n -2.1287e-03, 1.0655e-03, 1.1073e-03, 5.8886e-03, -6.9749e-03,\n -6.9178e-04, 2.2777e-03, 5.3585e-03, 1.3174e-03, -7.5804e-04,\n 1.6006e-03, 3.1378e-03, -4.8349e-03, 2.9375e-03, 6.2926e-02,\n -4.9597e-03, -7.9912e-03, 1.1522e-03, -1.7965e-03, 9.0977e-04,\n 9.3490e-04, -1.8803e-04, -1.6911e-03, -9.3432e-03, 1.9296e-04,\n 1.7120e-04, -6.0348e-04, -3.6702e-03, 4.4878e-03, 4.8650e-03,\n 5.2098e-03, -5.3342e-03, -4.6895e-03, -2.5844e-03, 6.8568e-03,\n 9.2179e-04, -5.0121e-03, 6.6449e-03, 2.3190e-04, 1.7712e-04,\n -1.8432e-03, -4.7253e-03, 1.7649e-03, -1.5790e-02, -6.2896e-03,\n -2.5629e-03, -1.3048e-03, 7.2159e-03, -1.1316e-03, 4.7814e-03,\n -2.5444e-03, -4.2504e-04, 5.8342e-03, 4.1806e-03, -1.3246e-03,\n -2.0185e-03, 1.4937e-04, -2.3415e-03, -1.0825e-03, 2.0923e-04,\n 1.5273e-03, -2.0230e-03, -3.8851e-03, -1.3077e-03, -7.4009e-03,\n -2.7793e-03, 3.3375e-03, -1.7036e-02, 3.7110e-03, 5.4149e-03,\n -2.8434e-03, 3.0604e-03, 4.0370e-03, 2.6169e-03, -2.2187e-03,\n 9.4422e-04, -2.0355e-03, -2.6657e-03, -4.1957e-04, 3.0747e-03,\n -1.3988e-03, -8.0024e-04, -4.4096e-03, 1.7475e-03, -4.8512e-03,\n 5.2750e-03, 5.6642e-03, 2.7110e-03, -3.2228e-03, -1.6431e-03,\n -6.8941e-04, 6.7973e-04, 2.6017e-03, 2.4595e-03, -1.9450e-03,\n 1.5458e-03, 2.2050e-03, 2.8669e-03, 1.8986e-03, -1.7492e-04,\n 1.8372e-03, 1.7970e-03, -3.0099e-03, 3.5868e-03, -2.7756e-03,\n 3.0318e-03, -1.4662e-03, 3.2855e-04, -8.3113e-03, 2.4346e-03,\n -3.9485e-03, -6.1455e-04, -2.1049e-03, 5.2822e-03, 1.2449e-03,\n -3.5912e-03, 3.5329e-03, -6.7198e-03, 2.7082e-03, -8.1415e-04,\n 8.8920e-04, -6.0166e-03, -6.5106e-04, 2.8041e-03, 1.4105e-03,\n 5.0365e-03, 2.3220e-03, -6.4239e-03, -4.0117e-03, 4.3123e-03,\n -6.8566e-04, 4.3732e-03, 1.9032e-03, -2.4268e-03, -7.1627e-04,\n 1.1084e-04, -1.4613e-03, 2.1403e-03, 2.0373e-03, 1.8801e-03,\n 5.1170e-04, -3.3361e-03, -3.7526e-04, -8.6339e-04, 2.4656e-03,\n -9.6281e-04, -8.2122e-04, 4.2926e-04, -1.4004e-03, -5.5838e-03,\n -1.4807e-02, -4.8923e-03, -4.3213e-03, -2.1803e-03, -5.3398e-04,\n -1.1681e-03, -9.1127e-04, -7.4865e-04, 3.6239e-03, 1.0889e-03,\n 6.5428e-04, -1.0703e-05, 1.1373e-03, 2.1438e-03, -4.1434e-04,\n 4.0449e-03, -4.2720e-03, 1.3878e-04, -1.2676e-03, -3.5601e-03,\n -8.9969e-04, 1.0198e-02, -3.6208e-03, -3.7273e-04, 3.9022e-03,\n -7.5235e-04, 2.4885e-03, -4.0734e-04, 2.4597e-03, -1.4699e-03,\n 1.2869e-03, 2.8020e-03, 1.1916e-03, 2.5705e-02, -1.3206e-02,\n -1.0751e-03, -1.1769e-03, -3.9161e-05, -6.2010e-04, 7.6244e-04,\n 4.3166e-03, 4.7065e-04, -9.5820e-04, 6.7457e-04, -7.9814e-03,\n -2.7726e-03, 1.0704e-03, -1.3961e-03, 6.9104e-03, 1.3303e-03,\n 1.9127e-03, 1.6537e-02, 4.0638e-03, -9.1654e-03, 8.8158e-04,\n 2.9526e-03, 1.0356e-03, -9.7502e-04, 3.0738e-03, 6.4695e-04,\n -3.1488e-03, -1.1554e-03, 1.1232e-03, -2.5902e-04, -1.0267e-03,\n -6.4079e-05, 1.6548e-04, 1.4237e-03, 1.6725e-04, 2.1010e-03,\n -2.1379e-03, -2.0094e-03, 7.9690e-03, 3.4915e-03, 3.3876e-03,\n 2.7499e-03, -5.7693e-03, -9.4365e-03, 2.5063e-03, -1.8646e-03,\n -2.3048e-04, 2.7507e-03, 6.8362e-04, 1.8233e-03, 2.1790e-03,\n 2.2745e-03, 3.6279e-03, -5.3239e-03, 1.2008e-03, 6.9363e-03,\n 2.5056e-03, 4.4390e-04, 1.2272e-03, 1.4655e-03, 3.6269e-03,\n -5.4146e-03, -4.0763e-03, -1.1614e-02, 1.3809e-03, 2.8938e-03,\n 2.0831e-03, -4.8253e-03, 4.0983e-04, 6.7578e-03, 2.1646e-03,\n -3.3930e-03, 1.4760e-03, 4.3051e-03, 7.7662e-04, 1.8646e-02,\n 2.5540e-03, -6.7559e-03, 1.2408e-02, 6.3797e-04, 1.7023e-03,\n 5.9787e-03, -5.2527e-03, -2.3116e-03, -9.1865e-03, 8.9471e-03,\n -1.5752e-03, -4.1685e-03, -1.6283e-03, 7.2006e-03, -3.2105e-03,\n -2.5230e-05, -1.2684e-02, 8.4796e-03, -1.7509e-03, 2.7738e-04,\n 7.3405e-03, 3.4225e-03, -3.3818e-03, -2.9039e-03, 3.8495e-03,\n -8.4090e-03, -2.6074e-03, 1.4745e-03, 1.7347e-03, 3.0088e-04,\n -1.1235e-03, -3.7340e-03, 5.8534e-04, 6.6134e-03, -2.5978e-03,\n 2.8600e-03, 1.2610e-03, 4.3294e-03, -8.8621e-03, -4.9124e-03,\n -7.7673e-03, -8.3438e-03, 8.2868e-03, -2.6217e-03, 1.2684e-03,\n 3.7026e-03, 8.7610e-04, -1.1838e-03, 6.6596e-03, 1.8273e-03,\n 1.5483e-03, 3.0108e-03, 4.9411e-03, 4.1947e-03, -7.1165e-07,\n 2.1385e-03, 2.2143e-03, -2.7983e-03, -4.6172e-03, 1.8923e-03,\n -1.9843e-03, 4.7586e-03, 5.1999e-03, 2.8251e-03, 9.4055e-04,\n -5.4168e-03, 2.7846e-04, -8.8146e-03, 1.1342e-03, 1.0348e-03,\n -3.1423e-03, 3.5030e-03, 1.2380e-02, 7.7151e-03, 3.0760e-04,\n -4.4095e-03, -1.9055e-03, 3.2680e-04, 5.1586e-03, -8.6181e-04,\n -5.4959e-03, 1.7943e-03, -3.6391e-03, 4.1673e-04, 1.0527e-03,\n -5.1937e-03, -1.6752e-03, 3.2007e-03, -1.0697e-03, 3.3488e-03,\n -3.0408e-04, -1.2251e-03, -3.2794e-03, 2.7430e-03, 1.9908e-03,\n -2.0770e-03, 6.8514e-03, 2.3879e-03, -5.0781e-03, -8.4566e-03,\n -2.0373e-03, -1.2098e-03, -2.9571e-03, 2.1479e-03, -1.6534e-03,\n -2.3728e-03, 1.4389e-03, -1.0436e-03, 1.5819e-03, -2.5597e-03,\n -7.3902e-03, -1.3219e-03, 2.7013e-03, -2.2526e-03, -1.6069e-02,\n 3.7173e-03, 4.2608e-04, -1.4553e-03, -1.4548e-04, 4.7333e-03,\n -2.7314e-03, 5.2138e-04, 3.6016e-03, -9.5947e-04, -3.3567e-03,\n 1.2448e-02, -4.6116e-03, -1.0872e-02, 1.5687e-03, 1.5182e-03,\n -7.9019e-03, -2.3635e-03, 9.1619e-03, 2.2213e-03, 4.6617e-03,\n 1.4724e-03, -6.8670e-03, -2.3906e-03, 7.0449e-03, 5.4083e-04,\n -2.2398e-03, 2.9774e-03, 1.2333e-03, 1.0830e-03, 1.1886e-03,\n -4.8377e-03, 2.8849e-03, 3.6970e-04, -4.5830e-04, 2.1080e-03,\n 2.2285e-03, 1.5534e-03, -2.0126e-03, 2.5872e-03, -2.9912e-04,\n 1.3546e-03, 7.1378e-04, -9.3051e-04, 1.3557e-03, -8.4275e-04,\n 4.4472e-03, 7.5092e-03, 8.7805e-04, -1.0514e-03, -9.5651e-04,\n 3.0572e-03, 8.4259e-03, -5.1380e-03, 3.4458e-03, 5.0327e-03,\n -4.3082e-03, 5.9500e-03, -3.2243e-03, 1.8296e-03, 1.3737e-03,\n -3.4129e-05, 2.1228e-03, -1.7475e-03, 5.3503e-04, -4.3082e-03,\n -1.1050e-02, -2.4932e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([1.0792e-04, 9.0773e-05, 7.7478e-05, 7.0284e-05, 2.0606e-04, 1.7382e-04,\n 6.7362e-05, 6.2944e-05, 8.5263e-05, 1.1758e-04, 1.0870e-04, 3.3140e-04,\n 7.9464e-05, 1.1656e-04, 1.1921e-04, 1.0743e-04, 5.9468e-05, 3.4468e-04,\n 1.9544e-04, 1.3167e-04, 9.6660e-05, 2.0093e-04, 2.2937e-04, 1.5615e-04,\n 5.5964e-05, 9.0342e-05, 2.6454e-04, 8.3666e-05, 5.9381e-05, 1.0581e-04,\n 8.1942e-05, 4.5587e-05, 1.5460e-04, 5.1406e-05, 1.0676e-04, 1.0959e-04,\n 1.0808e-04, 1.6558e-04, 1.6960e-04, 1.3375e-04, 1.7639e-04, 1.3367e-04,\n 1.3215e-04, 9.2916e-05, 1.3905e-04, 1.1919e-04, 3.9185e-04, 1.3130e-04,\n 1.8960e-04, 3.6746e-04, 1.4445e-04, 1.1928e-04, 9.7207e-05, 8.1171e-05,\n 1.3278e-04, 9.8138e-05, 1.3436e-04, 2.1676e-04, 1.0071e-04, 1.6460e-04,\n 2.3541e-04, 1.8001e-04, 1.1689e-04, 7.1375e-05, 1.6981e-04, 6.3839e-05,\n 1.4972e-04, 2.0867e-04, 7.1932e-05, 1.1391e-04, 9.8710e-05, 1.2239e-04,\n 8.3071e-05, 7.8341e-05, 1.3802e-04, 1.2147e-04, 6.8727e-05, 5.0341e-04,\n 2.2837e-04, 5.9238e-05, 2.6835e-04, 9.8945e-05, 1.0126e-04, 1.5626e-04,\n 1.8265e-01, 8.5799e-05, 1.1625e-04, 5.2007e-05, 1.3491e-04, 1.0109e-04,\n 1.7189e-04, 1.2908e-04, 4.8529e-05, 2.6843e-04, 6.5262e-05, 9.0394e-05,\n 1.2401e-04, 8.7091e-05, 1.4041e-04, 1.5788e-04, 9.9385e-05, 7.8370e-05,\n 2.1022e-04, 8.3179e-05, 4.9730e-04, 6.7922e-05, 2.7224e-04, 1.0873e-04,\n 9.6273e-05, 1.1185e-04, 1.7513e-04, 9.9811e-05, 9.6998e-05, 1.2109e-04,\n 1.9311e-04, 1.4158e-04, 1.6021e-04, 3.6983e-04, 3.4689e-04, 1.3618e-04,\n 1.2525e-04, 5.4673e-05, 6.8177e-04, 1.0754e-04, 4.9694e-05, 6.9982e-05,\n 7.4220e-05, 1.0672e-04, 4.3816e-05, 8.4941e-05, 1.2389e-04, 6.8842e-04,\n 1.9690e-04, 8.5505e-05, 1.0317e-04, 6.8448e-05, 7.2588e-05, 5.1974e-04,\n 7.0582e-05, 1.9039e-04, 2.0312e-04, 5.6723e-05, 1.1405e-04, 2.2860e-04,\n 3.3016e-04, 9.3178e-05, 1.3381e-04, 1.1165e-04, 6.3504e-05, 6.4653e-05,\n 1.3613e-04, 1.0739e-04, 1.2789e-04, 1.0706e-04, 6.5977e-05, 9.2151e-05,\n 3.6195e-04, 1.5915e-04, 2.3896e-04, 1.0791e-04, 8.5331e-05, 1.2916e-04,\n 3.5238e-04, 9.6609e-05, 5.9611e-05, 1.2944e-04, 2.4423e-04, 2.4205e-04,\n 9.3840e-05, 9.7066e-05, 2.6742e-04, 1.2821e-04, 1.2967e-04, 2.3409e-04,\n 8.8927e-05, 8.8945e-05, 9.6447e-05, 8.9413e-05, 7.0898e-05, 1.0188e-04,\n 1.0641e-04, 1.6001e-04, 6.9191e-05, 5.6665e-05, 6.5863e-05, 9.8997e-05,\n 1.1538e-04, 1.2909e-04, 1.1369e-04, 1.3225e-04, 3.1254e-04, 2.4712e-04,\n 7.8887e-05, 1.2204e-04, 4.8840e-05, 7.4660e-05, 5.5167e-05, 8.8031e-05,\n 2.9919e-04, 1.4351e-04, 4.0146e-05, 1.1328e-04, 1.0609e-04, 7.3102e-05,\n 9.4959e-05, 7.3513e-05, 2.2534e-04, 8.9023e-05, 2.5034e-04, 2.1784e-04,\n 1.4688e-04, 1.6458e-04, 1.1312e-04, 5.7794e-05, 1.5459e-04, 1.7661e-04,\n 2.9837e-04, 7.8849e-05, 1.1800e-04, 1.0241e-04, 1.8850e-04, 8.4100e-05,\n 1.0510e-04, 1.9210e-04, 7.5606e-05, 1.2741e-04, 1.4106e-04, 6.0376e-05,\n 1.3675e-04, 1.2316e-04, 2.8231e-04, 1.2109e-04, 7.8818e-05, 8.5911e-05,\n 5.2847e-05, 2.3495e-04, 1.4444e-04, 8.4533e-05, 7.2124e-05, 6.2424e-05,\n 2.7410e-04, 1.6171e-04, 1.3607e-04, 6.0380e-05, 9.5745e-05, 1.0252e-04,\n 5.4768e-04, 9.2122e-05, 8.5727e-05, 2.5686e-04, 8.5908e-05, 1.3866e-04,\n 3.3664e-04, 9.0643e-03, 3.7432e-04, 1.0873e-04, 1.5788e-04, 1.5483e-04,\n 7.8344e-05, 1.7510e-04, 1.2820e-04, 1.6233e-04, 8.7034e-05, 9.2719e-05,\n 2.1298e-04, 8.9637e-05, 1.2885e-04, 6.3668e-05, 3.3003e-04, 7.5529e-05,\n 9.9450e-05, 2.9308e-04, 8.7183e-05, 9.3543e-05, 1.2998e-04, 7.0202e-05,\n 2.5591e-04, 6.1789e-05, 5.8334e-05, 8.0637e-05, 1.1825e-04, 5.0052e-05,\n 5.6604e-05, 1.5276e-04, 1.1111e-04, 2.5987e-04, 8.1534e-05, 1.3289e-04,\n 1.0164e-04, 1.5643e-04, 8.7080e-05, 1.1466e-04, 2.4778e-04, 1.3974e-04,\n 9.6759e-05, 7.5543e-05, 1.2742e-04, 8.6746e-05, 8.9915e-05, 6.0370e-05,\n 6.8627e-05, 8.5938e-05, 9.4896e-05, 9.2362e-05, 1.0326e-04, 6.8984e-05,\n 8.0581e-05, 1.5293e-04, 7.3110e-05, 1.0483e-04, 1.0117e-04, 7.6617e-05,\n 1.1731e-04, 6.8658e-05, 1.0037e-04, 1.7185e-04, 2.2787e-04, 1.4640e-04,\n 5.8551e-05, 2.0328e-04, 1.0779e-04, 3.7033e-04, 1.0311e-04, 1.3255e-04,\n 2.1325e-04, 8.0628e-05, 1.4747e-04, 8.4007e-05, 2.0089e-04, 1.7388e-04,\n 2.5681e-04, 1.7806e-04, 4.8905e-04, 6.8115e-05, 1.3929e-04, 1.8226e-04,\n 1.3247e-04, 2.0558e-04, 1.3466e-04, 1.3887e-04, 8.4411e-05, 2.1834e-04,\n 5.2822e-05, 1.0176e-04, 2.1825e-04, 8.3836e-05, 1.2652e-04, 1.1945e-04,\n 1.7376e-04, 1.1973e-04, 1.4859e-04, 1.3853e-04, 1.5667e-04, 8.0877e-05,\n 1.3025e-04, 1.2427e-04, 1.3072e-04, 7.1213e-05, 1.0712e-04, 6.1010e-05,\n 7.9518e-05, 8.2784e-05, 6.9502e-05, 2.1214e-04, 8.8811e-05, 2.2914e-04,\n 7.0151e-05, 3.3518e-04, 1.8356e-04, 7.0150e-05, 1.4000e-04, 1.7000e-04,\n 1.7629e-04, 1.1690e-04, 1.4700e-04, 1.5039e-04, 7.0644e-05, 5.3187e-05,\n 1.4254e-04, 8.3098e-05, 7.5045e-05, 2.0402e-04, 9.2433e-05, 1.7278e-04,\n 1.7165e-04, 2.9715e-04, 8.9655e-05, 1.2465e-04, 1.3469e-04, 1.8962e-04,\n 7.9172e-05, 1.4432e-04, 2.8665e-04, 1.3863e-04, 5.6675e-05, 2.0410e-04,\n 7.4273e-05, 2.3233e-04, 7.1642e-05, 2.3668e-04, 2.4007e-04, 1.1151e-04,\n 3.5575e-04, 1.0606e-04, 2.8336e-04, 8.7962e-05, 9.4872e-05, 6.6499e-05,\n 1.0199e-04, 1.8137e-04, 1.4876e-04, 1.9311e-04, 6.7667e-04, 7.7809e-05,\n 6.9968e-05, 6.8193e-05, 9.7806e-05, 1.1829e-04, 2.5237e-04, 7.0415e-05,\n 9.4459e-05, 1.0944e-04, 6.0876e-05, 4.0843e-04, 1.1683e-04, 1.2658e-04,\n 2.5623e-04, 7.9128e-05, 2.8916e-04, 1.5294e-04, 1.7881e-04, 8.6799e-05,\n 1.1889e-04, 1.0089e-04, 1.3520e-04, 1.0283e-04, 1.8143e-04, 4.6377e-05,\n 3.2253e-04, 8.1746e-05, 2.1466e-04, 6.3853e-05, 3.9842e-05, 5.6384e-05,\n 4.0739e-04, 1.0075e-04, 8.7633e-05, 4.4923e-05, 1.2219e-04, 7.2685e-05,\n 1.2746e-04, 1.1325e-04, 6.3969e-05, 1.2327e-04, 6.8187e-05, 2.0386e-04,\n 9.8931e-05, 8.4716e-04, 3.6577e-04, 9.6047e-05, 8.4695e-05, 8.7840e-05,\n 6.8798e-05, 9.6087e-05, 1.2179e-04, 3.9986e-05, 7.7385e-05, 2.3668e-04,\n 1.3526e-04, 6.4968e-05, 1.3383e-04, 6.6248e-05, 5.7501e-05, 2.1494e-04,\n 6.7186e-05, 1.5650e-04, 1.7472e-04, 8.8961e-05, 1.7171e-04, 7.7478e-05,\n 9.0730e-05, 1.3632e-04, 1.0665e-04, 5.3253e-04, 2.5189e-04, 2.0321e-04,\n 9.3765e-05, 1.1508e-04, 6.5757e-05, 7.6875e-05, 2.6611e-04, 1.7254e-04,\n 1.0786e-04, 2.1497e-04, 1.2025e-04, 9.7063e-05, 2.2226e-04, 6.5228e-05,\n 2.1468e-04, 1.1085e-04, 1.6930e-04, 7.4267e-05, 1.1345e-04, 2.0424e-04,\n 1.3223e-04, 2.2672e-04, 1.0070e-04, 9.4908e-05, 1.0856e-04, 1.1736e-04,\n 3.6340e-04, 2.4406e-04], device='cuda:0')"
|
| 19 |
},
|
| 20 |
"3": {
|
| 21 |
+
"step": "tensor(6260.)",
|
| 22 |
+
"exp_avg": "tensor([ 6.5936e-03, 1.1896e-03, -5.0563e-04, -7.4915e-03, 2.6546e-04,\n -9.9585e-04, -4.1941e-03, -2.5255e-03, 1.8572e-03, 6.9235e-03,\n 2.2968e-03, -3.4442e-03, -7.3726e-04, 1.5316e-04, -6.0679e-03,\n -1.0157e-02, -8.2887e-03, -9.3257e-03, 1.5103e-03, -3.0279e-04,\n -1.6971e-02, 1.1692e-02, -1.7167e-03, -2.8821e-03, -3.7607e-03,\n 2.4524e-03, 4.2655e-05, 3.0777e-03, -1.8064e-03, -1.3582e-03,\n -3.6452e-03, 4.1541e-03, 1.5816e-03, 3.5059e-03, 5.3233e-03,\n 4.6033e-03, 3.1643e-03, -4.6820e-03, 3.5589e-03, -2.0642e-03,\n 1.3118e-03, 3.3629e-03, -3.5940e-03, -2.6429e-03, -3.9873e-03,\n 5.9555e-03, 2.2178e-03, 1.7363e-03, 1.9350e-04, 9.2571e-04,\n -3.7084e-03, -4.6472e-03, 9.3669e-04, -4.6792e-04, 1.2628e-03,\n 3.5393e-03, -7.0226e-03, -1.1334e-03, 7.7335e-04, -2.9711e-03,\n 1.4594e-03, -1.3083e-03, 1.6890e-04, 6.4026e-04, 3.2287e-03,\n 2.3185e-03, 8.2296e-03, 2.6423e-03, -1.1963e-03, -9.0908e-03,\n -2.8086e-03, 1.2918e-03, 8.0634e-04, 6.5974e-03, -6.5277e-03,\n -6.5682e-04, 3.2977e-03, 4.3411e-03, 4.1987e-04, -9.1263e-04,\n 1.2841e-03, 3.8271e-03, -3.1331e-03, 2.6616e-03, 3.5247e-03,\n -7.5258e-03, -8.3000e-03, 2.0191e-03, -1.0541e-03, -1.1944e-04,\n 1.0101e-03, -1.1438e-03, -1.5252e-03, -7.4187e-03, 7.1159e-04,\n 1.9368e-03, -1.0312e-03, -4.2518e-03, 2.1283e-03, 6.2732e-03,\n 4.8872e-03, -5.8156e-03, -4.0569e-03, -3.3439e-03, 4.2690e-03,\n 7.4194e-04, -2.6519e-03, 7.9385e-03, -2.2503e-03, 6.3317e-04,\n -1.6069e-03, -7.8601e-03, 1.7050e-03, -1.7223e-02, -6.0011e-03,\n -1.2895e-03, -1.7636e-03, 5.2802e-03, -3.5089e-04, 5.3815e-03,\n -3.6036e-03, -3.2066e-03, 3.3231e-03, 3.7462e-03, -1.8971e-03,\n -2.1568e-03, 1.8754e-04, -2.3440e-03, -5.3024e-04, 5.5007e-04,\n 2.3241e-03, -1.1130e-03, -2.3732e-03, -1.7660e-03, -5.7150e-03,\n -1.1365e-03, 5.7380e-03, -8.8755e-03, 4.4198e-03, 4.4902e-03,\n -1.8295e-03, 4.9779e-03, 3.4482e-03, 3.2950e-03, -6.3343e-04,\n 2.1640e-04, -3.5598e-03, -3.1751e-03, -1.4713e-03, 2.5914e-03,\n -1.3604e-03, -7.4860e-05, -3.6553e-03, 1.8916e-03, -4.5745e-03,\n 6.3626e-03, 4.0188e-03, 2.3253e-03, -1.0803e-03, -4.0328e-03,\n -9.2515e-04, 1.6980e-03, 1.0648e-03, 2.0089e-03, -2.8730e-03,\n 1.4450e-03, 1.5035e-03, 1.2080e-03, 1.6250e-03, 3.7270e-04,\n 1.8052e-03, 1.9897e-03, -2.8347e-03, 3.1325e-03, -4.5082e-03,\n 2.8384e-03, 2.6309e-04, 1.5265e-03, -1.1645e-02, 2.5962e-03,\n -3.0479e-03, -3.0059e-04, -3.7233e-03, 5.2628e-03, 1.7481e-03,\n -3.6924e-03, 3.0029e-03, -7.1065e-03, 2.2279e-03, -3.9385e-04,\n 3.1901e-04, -7.1231e-03, -2.3241e-04, 3.8676e-03, 2.3142e-03,\n 7.1210e-03, 4.4992e-03, -1.0707e-02, -2.8359e-03, 4.9074e-03,\n -7.4047e-04, 3.3955e-03, 2.5787e-03, -3.0021e-03, 2.9790e-04,\n 7.0792e-04, 1.1648e-04, 3.0105e-03, 2.0562e-03, -1.3292e-03,\n 1.6110e-03, -2.9840e-03, -2.1809e-04, -5.0096e-04, 1.7456e-03,\n 7.5357e-05, -1.7320e-04, 8.6628e-04, -1.6480e-04, -9.9287e-03,\n -1.0883e-02, -5.9667e-03, -4.8220e-03, -1.7565e-03, -8.5419e-04,\n -2.6408e-03, -1.8273e-03, -1.3795e-03, 2.4250e-03, 2.6066e-03,\n 6.9231e-04, -1.2363e-04, 2.0000e-03, 2.1655e-03, -4.1665e-04,\n 2.5296e-03, -4.7914e-03, 5.2238e-04, -1.1875e-03, -4.2807e-03,\n -1.9132e-04, 8.8770e-03, -2.4273e-03, 3.4417e-04, 4.0713e-03,\n -1.3153e-03, 1.5050e-03, 4.3390e-04, 3.3259e-03, -1.5498e-03,\n 2.2988e-03, 2.1816e-03, 1.8305e-03, 5.2378e-03, -8.1518e-03,\n -1.6015e-03, -1.3515e-03, -4.0487e-04, -2.8048e-04, 1.7408e-04,\n 2.4742e-03, 6.0270e-04, -1.9395e-03, 7.5296e-04, -7.7572e-03,\n -4.2122e-03, 1.7807e-03, -1.4512e-03, 4.5213e-03, 1.5880e-03,\n 3.1325e-03, 1.1604e-02, 4.0154e-03, -1.6928e-02, 2.4390e-04,\n 3.3653e-03, 9.2839e-04, -1.1077e-03, 3.1341e-03, 7.6772e-04,\n -3.6586e-03, -1.9267e-03, 2.0644e-03, -9.7815e-05, -1.6944e-03,\n -6.5447e-04, 2.9358e-04, 1.5196e-03, 8.4775e-05, 4.1781e-03,\n -3.2747e-03, -1.6734e-03, 5.3132e-03, 4.4260e-03, 3.8258e-03,\n 2.1634e-03, -8.1195e-03, -1.2488e-02, 1.6600e-03, -9.2338e-04,\n -9.9431e-04, 3.3417e-03, 1.9330e-03, 3.2097e-03, 1.1007e-03,\n 3.0188e-03, 4.6290e-03, -7.0154e-03, 1.7630e-03, 6.5104e-03,\n 4.2256e-03, -6.5387e-04, 8.5264e-04, 2.1452e-03, 3.2120e-03,\n -5.0989e-03, -4.2399e-03, -9.5398e-03, 1.3750e-03, 3.6652e-03,\n 2.3065e-03, -2.8187e-03, 2.3368e-04, 7.3485e-03, 2.1539e-03,\n -4.9271e-03, 1.3303e-03, 2.9252e-03, 1.2075e-03, 1.7311e-02,\n 1.9609e-03, -6.0004e-03, 7.0697e-03, 1.7806e-03, 1.7983e-03,\n 4.4307e-03, -4.9672e-03, -1.7716e-03, -9.6870e-03, 7.7847e-03,\n -8.6779e-04, -3.8887e-03, -2.7203e-03, 7.0542e-03, -2.3426e-03,\n 1.8489e-04, -1.4084e-02, 8.6603e-03, -2.4365e-03, 9.1543e-05,\n 6.4823e-03, 3.6271e-03, -3.1617e-03, -3.8594e-03, 4.1755e-03,\n -1.2361e-02, -2.2489e-03, 1.9218e-03, 1.3042e-03, 6.0264e-04,\n -1.0475e-03, -5.4257e-03, 4.2230e-05, 5.5030e-03, -1.1079e-03,\n 3.7429e-03, 2.1193e-03, 2.8632e-03, -7.7220e-03, -4.5074e-03,\n -1.1174e-02, -7.5125e-03, 7.1657e-03, -3.3154e-03, 1.0750e-03,\n 3.7191e-03, 1.8097e-03, -8.5331e-04, 7.0096e-03, 2.1095e-03,\n 2.8208e-03, 3.1790e-03, 7.2482e-03, 3.4535e-03, 1.2731e-03,\n -4.9758e-04, 1.1436e-03, -3.1838e-03, -4.8770e-03, 7.7653e-04,\n -1.1670e-03, 5.8724e-03, 3.9518e-03, 2.4306e-03, 1.3322e-03,\n -2.3090e-03, 2.0827e-04, -7.9451e-03, 1.9031e-03, 1.9339e-03,\n -1.9706e-03, 4.2827e-03, 8.9863e-03, 1.0219e-02, 1.4770e-04,\n -5.5539e-03, -3.6721e-03, 6.9482e-05, 7.2983e-03, -2.3164e-03,\n -6.2482e-03, 1.4368e-03, -2.6136e-03, 2.2961e-04, 1.4488e-03,\n -4.7855e-03, -1.5540e-03, 3.5234e-03, -1.1314e-03, 4.3382e-03,\n -4.6037e-04, 1.5632e-04, -6.4678e-03, 9.5986e-04, 3.2047e-03,\n -1.7480e-03, 5.3464e-03, 4.1775e-03, -4.2921e-03, -7.6855e-03,\n -1.3624e-03, -2.3977e-06, -2.8608e-03, 2.1526e-03, -2.1872e-03,\n -1.6377e-03, 1.5019e-03, -1.6374e-03, 9.9057e-04, -2.2022e-03,\n -4.4503e-03, 8.5544e-05, 3.1861e-03, -2.4511e-03, -9.9849e-03,\n 4.1231e-03, 3.4452e-04, -2.2725e-03, 8.1517e-04, 6.3879e-03,\n -3.0817e-03, 9.1640e-04, 3.4646e-03, -6.0133e-04, -3.4296e-03,\n 1.2108e-02, -4.0666e-03, -5.8869e-03, 9.4028e-04, 3.4290e-03,\n -8.2380e-03, -1.6503e-03, 1.2035e-02, 2.3587e-03, 5.3181e-03,\n 1.4879e-03, -9.9646e-03, -1.6769e-03, 7.0585e-03, -7.3951e-05,\n -2.4927e-03, 2.9834e-03, 2.0717e-03, 2.1251e-03, 1.5643e-03,\n -4.0666e-03, 2.6792e-03, 4.8865e-04, -1.1527e-03, 2.8985e-03,\n 1.3729e-03, 1.7010e-03, -2.3779e-03, 1.5440e-03, -7.1371e-04,\n 1.4621e-03, 1.6267e-03, -1.9764e-03, 2.2300e-03, -2.2441e-04,\n 4.0509e-03, 7.5744e-03, 1.6251e-03, -3.0041e-04, -6.1343e-04,\n 3.4326e-03, 8.5691e-03, -7.8756e-03, 2.9864e-03, 4.9767e-03,\n -4.7135e-03, 4.6276e-03, -2.5782e-03, 1.5023e-03, 8.9126e-04,\n 1.5143e-03, 4.6476e-03, -1.8130e-03, 9.7806e-04, -4.8972e-03,\n -8.5198e-03, -1.5693e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([1.2575e-04, 1.1000e-04, 1.4853e-04, 1.0790e-04, 1.1689e-04, 1.1994e-04,\n 1.2042e-04, 8.3786e-05, 1.0066e-04, 1.1758e-04, 1.4321e-04, 2.0947e-04,\n 7.9407e-05, 1.4941e-04, 1.3370e-04, 1.3395e-04, 1.0358e-04, 1.7976e-04,\n 1.6539e-04, 1.3574e-04, 1.7957e-04, 1.6151e-04, 1.5214e-04, 1.0937e-04,\n 1.0507e-04, 1.0668e-04, 1.9472e-04, 8.6102e-05, 9.0176e-05, 1.3947e-04,\n 1.0810e-04, 9.5406e-05, 1.8360e-04, 7.9955e-05, 1.0622e-04, 2.2178e-04,\n 1.3546e-04, 1.2563e-04, 1.3335e-04, 1.7606e-04, 2.0830e-04, 1.2664e-04,\n 1.2977e-04, 1.7466e-04, 8.5656e-05, 1.6422e-04, 1.3602e-04, 1.4659e-04,\n 1.6684e-04, 1.5493e-04, 1.3218e-04, 1.0377e-04, 1.3956e-04, 1.1536e-04,\n 1.4880e-04, 1.0114e-04, 1.1284e-04, 1.4253e-04, 9.0337e-05, 1.6920e-04,\n 1.1017e-04, 1.5402e-04, 1.2568e-04, 1.3139e-04, 1.3772e-04, 1.2733e-04,\n 1.4557e-04, 1.2641e-04, 1.3337e-04, 1.6276e-04, 1.0907e-04, 2.2896e-04,\n 9.2585e-05, 1.1183e-04, 1.2796e-04, 1.1160e-04, 8.3736e-05, 1.5235e-04,\n 1.3562e-04, 9.1302e-05, 1.6880e-04, 1.0571e-04, 1.0654e-04, 1.2084e-04,\n 5.1514e-04, 1.1092e-04, 1.2624e-04, 1.1369e-04, 1.1871e-04, 1.1673e-04,\n 2.1200e-04, 1.1819e-04, 9.4825e-05, 1.7451e-04, 1.0308e-04, 1.2202e-04,\n 1.4866e-04, 1.0438e-04, 1.3927e-04, 1.9917e-04, 1.1133e-04, 8.5160e-05,\n 1.9570e-04, 1.0874e-04, 1.9018e-04, 1.0692e-04, 1.3976e-04, 1.3453e-04,\n 1.5635e-04, 1.1528e-04, 1.7300e-04, 1.2378e-04, 1.0746e-04, 1.3408e-04,\n 1.1785e-04, 1.1193e-04, 1.8323e-04, 1.5437e-04, 1.2366e-04, 1.6786e-04,\n 1.2922e-04, 8.3686e-05, 1.6885e-04, 1.0141e-04, 7.9075e-05, 1.0774e-04,\n 1.2854e-04, 1.2675e-04, 7.7291e-05, 1.1609e-04, 1.5346e-04, 1.5997e-04,\n 1.1358e-04, 9.8705e-05, 8.7336e-05, 1.1353e-04, 1.0783e-04, 1.5747e-04,\n 1.2447e-04, 1.6934e-04, 1.1865e-04, 9.8465e-05, 1.0633e-04, 1.7365e-04,\n 1.1530e-04, 1.4571e-04, 2.6459e-04, 1.9017e-04, 9.8277e-05, 1.1923e-04,\n 1.6173e-04, 1.2335e-04, 1.4261e-04, 1.2066e-04, 1.0987e-04, 1.3121e-04,\n 1.6915e-04, 1.2035e-04, 1.1793e-04, 1.1047e-04, 1.0563e-04, 1.6705e-04,\n 1.9592e-04, 1.0277e-04, 1.1855e-04, 1.7744e-04, 1.9155e-04, 1.3172e-04,\n 1.0166e-04, 1.5879e-04, 1.3906e-04, 1.1632e-04, 1.1341e-04, 1.4606e-04,\n 1.3269e-04, 8.8860e-05, 1.4078e-04, 1.0602e-04, 1.4402e-04, 1.4076e-04,\n 1.4517e-04, 1.2777e-04, 1.4080e-04, 9.1410e-05, 1.1748e-04, 1.3379e-04,\n 1.3527e-04, 1.1644e-04, 1.0026e-04, 1.3877e-04, 1.9976e-04, 1.3400e-04,\n 9.9860e-05, 1.1761e-04, 8.2055e-05, 1.4588e-04, 9.9417e-05, 1.5820e-04,\n 1.5452e-04, 1.6007e-04, 7.6271e-05, 1.0207e-04, 1.2688e-04, 1.1372e-04,\n 9.5097e-05, 1.0875e-04, 1.3613e-04, 1.1886e-04, 1.4595e-04, 2.7612e-04,\n 1.4144e-04, 1.2324e-04, 2.0046e-04, 8.4263e-05, 2.2611e-04, 1.2919e-04,\n 1.4159e-04, 8.1693e-05, 1.2549e-04, 1.6713e-04, 1.4994e-04, 1.2748e-04,\n 1.1321e-04, 1.2718e-04, 1.1452e-04, 1.2561e-04, 1.2411e-04, 1.1358e-04,\n 1.3451e-04, 1.0998e-04, 1.3017e-04, 1.4769e-04, 1.2380e-04, 1.4775e-04,\n 9.1067e-05, 1.7515e-04, 1.2009e-04, 1.6161e-04, 1.0826e-04, 1.0752e-04,\n 1.1786e-04, 1.6453e-04, 1.5385e-04, 1.1798e-04, 1.7141e-04, 1.8763e-04,\n 1.4071e-04, 1.1014e-04, 1.2684e-04, 1.3600e-04, 1.1745e-04, 1.3431e-04,\n 1.6915e-04, 3.5317e-04, 1.2631e-04, 1.2282e-04, 1.5549e-04, 1.5346e-04,\n 1.1445e-04, 1.4159e-04, 1.0035e-04, 1.3886e-04, 1.2890e-04, 1.3108e-04,\n 1.5599e-04, 1.2711e-04, 1.4553e-04, 9.0238e-05, 1.3555e-04, 1.0397e-04,\n 1.7808e-04, 2.0605e-04, 1.0666e-04, 1.7577e-04, 1.4785e-04, 1.0447e-04,\n 1.6971e-04, 1.1103e-04, 9.7360e-05, 1.2040e-04, 1.3565e-04, 9.5824e-05,\n 1.0174e-04, 1.3469e-04, 1.3914e-04, 1.2258e-04, 1.4951e-04, 1.2563e-04,\n 1.2839e-04, 1.9973e-04, 1.3034e-04, 1.2885e-04, 1.2525e-04, 1.6251e-04,\n 1.1228e-04, 1.3803e-04, 1.5723e-04, 1.1917e-04, 1.1137e-04, 9.5429e-05,\n 1.1811e-04, 9.9385e-05, 1.5471e-04, 1.2078e-04, 9.8845e-05, 9.7197e-05,\n 1.0709e-04, 1.7745e-04, 8.9234e-05, 1.3001e-04, 1.6969e-04, 1.0507e-04,\n 9.3495e-05, 1.0687e-04, 1.5229e-04, 1.2995e-04, 1.8916e-04, 1.3661e-04,\n 8.0220e-05, 1.0921e-04, 1.1968e-04, 1.5239e-04, 9.1484e-05, 1.3868e-04,\n 1.6829e-04, 1.2274e-04, 1.4523e-04, 1.1474e-04, 1.5032e-04, 1.6515e-04,\n 1.7835e-04, 1.0880e-04, 1.7595e-04, 1.1059e-04, 1.4168e-04, 1.1741e-04,\n 1.7436e-04, 2.2089e-04, 1.4951e-04, 1.3388e-04, 1.1310e-04, 1.3159e-04,\n 8.8906e-05, 1.0484e-04, 1.1186e-04, 1.2214e-04, 1.6313e-04, 1.2798e-04,\n 1.1798e-04, 1.5698e-04, 9.5005e-05, 1.4566e-04, 1.3568e-04, 1.2314e-04,\n 1.4403e-04, 2.3316e-04, 1.3406e-04, 1.0424e-04, 9.9949e-05, 9.2307e-05,\n 1.0515e-04, 1.5595e-04, 9.7020e-05, 1.3210e-04, 1.2769e-04, 2.4305e-04,\n 1.1027e-04, 1.7642e-04, 1.2912e-04, 1.0851e-04, 1.7292e-04, 1.8084e-04,\n 1.0380e-04, 1.2488e-04, 1.5836e-04, 1.1815e-04, 9.7291e-05, 1.0293e-04,\n 1.5490e-04, 1.2267e-04, 1.2926e-04, 1.7759e-04, 1.8104e-04, 1.3336e-04,\n 1.2224e-04, 2.4573e-04, 1.3035e-04, 1.0638e-04, 1.7288e-04, 2.1376e-04,\n 1.2714e-04, 1.5203e-04, 1.3508e-04, 2.0087e-04, 1.0833e-04, 2.2335e-04,\n 9.6232e-05, 2.0327e-04, 1.0331e-04, 1.9455e-04, 1.1878e-04, 1.1493e-04,\n 2.0433e-04, 1.7636e-04, 1.3180e-04, 1.3527e-04, 1.3676e-04, 1.0270e-04,\n 1.1883e-04, 1.1368e-04, 1.2805e-04, 1.1891e-04, 1.6220e-04, 1.2455e-04,\n 1.1407e-04, 9.5182e-05, 8.4393e-05, 1.2808e-04, 1.4889e-04, 1.6445e-04,\n 9.9863e-05, 1.8467e-04, 1.5592e-04, 1.7429e-04, 1.3614e-04, 1.5140e-04,\n 1.8024e-04, 1.1585e-04, 2.1083e-04, 1.2486e-04, 1.6011e-04, 1.1759e-04,\n 1.4532e-04, 1.0453e-04, 1.2250e-04, 1.2093e-04, 1.3837e-04, 8.4479e-05,\n 1.8112e-04, 1.0243e-04, 1.2245e-04, 1.2267e-04, 9.3212e-05, 7.8245e-05,\n 1.5809e-04, 1.1292e-04, 1.1268e-04, 7.9275e-05, 1.9621e-04, 1.5245e-04,\n 1.2435e-04, 1.1393e-04, 8.4688e-05, 1.4695e-04, 9.7378e-05, 2.0232e-04,\n 1.0383e-04, 1.7800e-04, 1.4327e-04, 2.1578e-04, 1.0945e-04, 9.2735e-05,\n 1.2897e-04, 1.2934e-04, 1.4422e-04, 9.2588e-05, 1.5003e-04, 1.8087e-04,\n 1.3970e-04, 1.0480e-04, 1.9953e-04, 8.8275e-05, 9.7254e-05, 1.3081e-04,\n 1.0098e-04, 1.0231e-04, 1.1759e-04, 1.4872e-04, 1.6157e-04, 1.0221e-04,\n 1.2619e-04, 1.4369e-04, 1.2637e-04, 1.5565e-04, 1.4342e-04, 1.6224e-04,\n 1.0167e-04, 1.4033e-04, 8.0997e-05, 1.1940e-04, 1.5674e-04, 1.5033e-04,\n 1.4426e-04, 1.4225e-04, 2.1080e-04, 1.3861e-04, 1.7826e-04, 1.2386e-04,\n 1.5484e-04, 1.9373e-04, 1.6261e-04, 1.3055e-04, 1.0951e-04, 1.2689e-04,\n 1.2895e-04, 1.1988e-04, 1.7875e-04, 1.0219e-04, 2.5556e-04, 1.2248e-04,\n 1.7413e-04, 1.4658e-04], device='cuda:0')"
|
| 24 |
},
|
| 25 |
"4": {
|
| 26 |
+
"step": "tensor(6260.)",
|
| 27 |
+
"exp_avg": "tensor([[-2.4307e-05, 9.4611e-05, 6.3634e-05, ..., 3.6809e-04,\n -7.1694e-04, -9.9097e-05],\n [-1.7877e-04, -7.8264e-05, -7.5887e-05, ..., 3.1284e-05,\n 1.0259e-03, -1.8599e-04],\n [ 2.0717e-04, 1.2593e-04, -2.2736e-04, ..., -3.5495e-04,\n 6.9710e-05, 3.2759e-04],\n ...,\n [ 2.0303e-04, 3.8214e-04, -3.1978e-04, ..., -1.8504e-04,\n 4.5403e-04, 5.4143e-04],\n [ 1.9841e-04, -1.7725e-04, 5.2030e-06, ..., -2.4801e-04,\n 6.9651e-04, 8.8638e-05],\n [ 1.2187e-04, 2.9725e-04, -1.7300e-05, ..., -1.1983e-04,\n 1.2005e-04, -4.0230e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[2.4906e-07, 7.0951e-07, 2.5918e-07, ..., 3.2336e-07, 5.3992e-07,\n 5.4305e-07],\n [6.7211e-07, 2.8950e-07, 1.1404e-06, ..., 1.0186e-06, 1.1489e-06,\n 6.5343e-07],\n [4.3169e-07, 3.1296e-07, 5.8489e-07, ..., 1.1070e-06, 8.3938e-07,\n 4.4997e-07],\n ...,\n [9.1299e-07, 3.5147e-07, 6.5943e-07, ..., 9.3078e-07, 1.3141e-06,\n 1.7089e-06],\n [5.3956e-07, 3.9846e-07, 7.0520e-07, ..., 7.1024e-07, 1.1042e-06,\n 1.1504e-06],\n [5.0558e-07, 5.8065e-07, 1.0803e-06, ..., 8.1991e-07, 1.4335e-06,\n 9.7408e-07]], device='cuda:0')"
|
| 29 |
},
|
| 30 |
"5": {
|
| 31 |
+
"step": "tensor(6260.)",
|
| 32 |
+
"exp_avg": "tensor([[-1.4470e-04, 3.1142e-05, 4.8491e-06, ..., 1.0279e-04,\n 4.4636e-05, 8.6155e-06],\n [-5.4609e-05, -6.5727e-05, -9.7730e-05, ..., -2.0548e-04,\n 4.1089e-04, -2.9224e-04],\n [ 9.0119e-05, 4.7032e-05, -1.0009e-04, ..., 6.4305e-05,\n -1.6036e-06, 2.2026e-04],\n ...,\n [-3.8610e-05, 9.9904e-05, -3.2030e-04, ..., -1.2563e-04,\n 1.8940e-04, 4.3564e-04],\n [-5.6906e-05, -3.2452e-06, -2.3419e-04, ..., 4.0425e-04,\n -1.0487e-04, -3.1641e-04],\n [-8.1440e-05, -2.7528e-04, -1.7591e-04, ..., 4.8396e-04,\n -1.0290e-04, 2.2857e-05]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.8768e-08, 4.9331e-08, 1.0050e-07, ..., 1.7118e-07, 1.7352e-07,\n 9.5275e-08],\n [2.1355e-07, 9.0563e-08, 1.9602e-07, ..., 3.1747e-07, 2.9845e-07,\n 2.9380e-07],\n [1.5014e-07, 1.8703e-07, 2.2293e-07, ..., 3.7066e-07, 3.1937e-07,\n 1.6941e-07],\n ...,\n [2.4526e-07, 2.0357e-07, 3.2612e-07, ..., 2.5150e-07, 4.7541e-07,\n 4.0504e-07],\n [1.5069e-07, 2.9833e-07, 2.9376e-07, ..., 2.6648e-07, 3.7469e-07,\n 4.1035e-07],\n [2.0446e-07, 2.1553e-07, 3.8131e-07, ..., 4.3858e-07, 4.2578e-07,\n 2.2549e-07]], device='cuda:0')"
|
| 34 |
},
|
| 35 |
"6": {
|
| 36 |
+
"step": "tensor(6260.)",
|
| 37 |
+
"exp_avg": "tensor([ 0.0016, -0.0016], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([9.3679e-06, 9.3679e-06], device='cuda:0')"
|
| 39 |
}
|
| 40 |
},
|
| 41 |
"param_groups": [
|
| 42 |
{
|
| 43 |
+
"lr": 0.0005005000000000001,
|
| 44 |
"name": "shared",
|
| 45 |
"betas": [
|
| 46 |
0.9,
|
|
|
|
| 64 |
]
|
| 65 |
},
|
| 66 |
{
|
| 67 |
+
"lr": 0.0005005000000000001,
|
| 68 |
"name": "scale_256",
|
| 69 |
"betas": [
|
| 70 |
0.9,
|
|
|
|
| 85 |
]
|
| 86 |
},
|
| 87 |
{
|
| 88 |
+
"lr": 0.0005005000000000001,
|
| 89 |
"name": "scale_512",
|
| 90 |
"betas": [
|
| 91 |
0.9,
|
|
|
|
| 106 |
]
|
| 107 |
},
|
| 108 |
{
|
| 109 |
+
"lr": 0.0002505,
|
| 110 |
"name": "fusion",
|
| 111 |
"betas": [
|
| 112 |
0.9,
|
|
|
|
| 133 |
"T_i": 10,
|
| 134 |
"T_mult": 2,
|
| 135 |
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 5,
|
| 137 |
"base_lrs": [
|
| 138 |
0.001,
|
| 139 |
0.001,
|
| 140 |
0.001,
|
| 141 |
0.0005
|
| 142 |
],
|
| 143 |
+
"last_epoch": 5,
|
| 144 |
"_step_count": 0,
|
| 145 |
"_is_initial": false,
|
| 146 |
"_get_lr_called_within_step": false,
|
| 147 |
"_last_lr": [
|
| 148 |
+
0.0005005000000000001,
|
| 149 |
+
0.0005005000000000001,
|
| 150 |
+
0.0005005000000000001,
|
| 151 |
+
0.0002505
|
| 152 |
]
|
| 153 |
},
|
| 154 |
"metrics": {
|
| 155 |
+
"best_val_acc": 71.096,
|
| 156 |
+
"best_epoch": 4,
|
| 157 |
"scale_accuracies": {
|
| 158 |
+
"256": 70.456,
|
| 159 |
+
"512": 70.964
|
| 160 |
}
|
| 161 |
},
|
| 162 |
"train_config": {
|