Upload weights and configs - David-fully_shared-weighted_sum - Run 20251012_135249
Browse files- weights/David-fully_shared-weighted_sum/20251012_135249/MODEL_SUMMARY.txt +1 -1
- weights/David-fully_shared-weighted_sum/20251012_135249/checkpoint_epoch_10_acc71.99.safetensors +3 -0
- weights/David-fully_shared-weighted_sum/20251012_135249/checkpoint_epoch_10_acc71.99_metadata.json +207 -0
- weights/David-fully_shared-weighted_sum/20251012_135249/final_model.safetensors +3 -0
- weights/David-fully_shared-weighted_sum/20251012_135249/final_model_metadata.json +207 -0
weights/David-fully_shared-weighted_sum/20251012_135249/MODEL_SUMMARY.txt
CHANGED
|
@@ -61,4 +61,4 @@ History: training_history.json
|
|
| 61 |
|
| 62 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
|
| 64 |
-
Generated: 2025-10-12 14:09:
|
|
|
|
| 61 |
|
| 62 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
|
| 64 |
+
Generated: 2025-10-12 14:09:56
|
weights/David-fully_shared-weighted_sum/20251012_135249/checkpoint_epoch_10_acc71.99.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80113e18544f43052b7bfd9b98cd716cbbfc60ee9c47b1546bc067d87db19427
|
| 3 |
+
size 2628344
|
weights/David-fully_shared-weighted_sum/20251012_135249/checkpoint_epoch_10_acc71.99_metadata.json
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 9,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(12520.)",
|
| 7 |
+
"exp_avg": "tensor([[-3.3046e-05, 1.3184e-03, -8.5554e-04, ..., 2.5362e-04,\n 7.8119e-04, -3.5119e-04],\n [ 1.7381e-03, 9.8076e-04, -1.0046e-03, ..., 2.8027e-03,\n 1.5395e-04, 2.3507e-03],\n [ 4.3466e-04, 3.5545e-04, 4.1837e-04, ..., -4.1433e-04,\n -1.8521e-04, -5.4486e-04],\n ...,\n [-2.4468e-04, 1.7150e-03, 7.1752e-04, ..., -8.9465e-04,\n 4.6466e-04, 1.6760e-04],\n [ 1.1223e-04, 1.9717e-03, 5.1268e-04, ..., 4.1239e-04,\n -6.7351e-05, 2.6199e-04],\n [-8.7841e-04, -3.9226e-04, 2.6469e-04, ..., -1.0100e-04,\n 7.0977e-04, 5.8649e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[6.0538e-06, 3.2370e-05, 1.5984e-05, ..., 6.3342e-06, 3.1433e-06,\n 8.3356e-06],\n [5.6413e-06, 2.3848e-05, 1.7510e-05, ..., 8.3732e-06, 2.2645e-06,\n 8.7786e-06],\n [9.4105e-06, 5.9058e-05, 2.1278e-05, ..., 6.1329e-06, 2.8654e-06,\n 3.6291e-06],\n ...,\n [5.8094e-06, 2.3533e-05, 1.4798e-05, ..., 3.4493e-06, 1.8469e-06,\n 2.7238e-06],\n [7.2410e-06, 4.6908e-05, 1.5391e-05, ..., 6.6735e-06, 3.7259e-06,\n 6.4761e-06],\n [3.8777e-06, 4.8038e-05, 1.1613e-05, ..., 3.2244e-06, 1.8040e-06,\n 2.1119e-06]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(12520.)",
|
| 12 |
+
"exp_avg": "tensor([-2.8793e-04, 4.8640e-02, 2.8733e-04, -1.9829e-03, -1.1588e-02,\n -1.3039e-03, 2.1500e-02, -5.7016e-02, -7.6601e-03, -1.4245e-02,\n 1.5816e-02, 3.6534e-03, 1.8377e-02, 1.6194e-03, -1.1017e-02,\n -2.4213e-02, -1.8920e-02, -3.5318e-02, 1.4785e-02, 2.9861e-02,\n -1.2753e-02, 1.9380e-02, -1.5568e-02, -3.5700e-04, -6.7014e-02,\n 1.2010e-02, 2.1371e-02, 8.3862e-03, -4.6259e-02, 2.6552e-03,\n -1.4616e-02, 2.1523e-02, 2.7220e-02, 3.5274e-02, -1.7959e-02,\n 1.8219e-02, 2.2884e-02, 8.2567e-03, 1.9731e-03, 2.3003e-02,\n -1.4999e-02, 2.6860e-02, -2.0848e-03, -3.0692e-03, -2.5609e-02,\n -5.5685e-03, 3.1910e-02, 1.1133e-02, -3.9296e-02, 4.8808e-03,\n 1.1479e-02, -1.7903e-02, 7.2915e-04, 6.7181e-03, 1.2693e-04,\n 2.8869e-02, 2.2346e-02, 9.8493e-03, 1.9897e-02, 6.9848e-03,\n -3.3656e-02, 5.6311e-03, 1.3728e-02, -1.6328e-02, 9.4522e-03,\n 1.4691e-02, 3.7510e-02, -2.1718e-02, -4.6480e-02, 5.0563e-03,\n -9.1711e-03, -3.6435e-02, 3.4365e-02, 2.0534e-02, 2.3343e-02,\n 3.5536e-04, -1.7489e-02, 2.3245e-02, 1.7542e-02, -8.4864e-03,\n 2.3270e-02, -1.2804e-02, 1.6030e-02, 6.8781e-03, -2.8725e-02,\n -1.3784e-02, 1.1327e-02, 1.3873e-02, -1.4216e-02, -2.7350e-03,\n 6.3497e-03, -2.2506e-02, -1.7909e-02, 1.9545e-02, 7.2716e-03,\n 1.2278e-02, -5.5581e-03, 2.7476e-02, -1.3676e-02, 4.0674e-03,\n -7.8614e-03, 4.2977e-03, 4.4696e-03, -1.1024e-03, -1.1395e-02,\n -1.8583e-02, -2.6280e-03, 2.2310e-02, -9.7073e-03, 7.3026e-03,\n -2.5477e-02, -3.5233e-02, 2.2879e-03, -6.4351e-03, -3.0403e-02,\n 1.2316e-03, -4.9787e-03, 1.7197e-02, 6.0468e-03, -6.7373e-03,\n -2.1082e-02, -1.0879e-02, 2.3980e-03, 2.5176e-02, -8.6008e-04,\n -1.3698e-02, 3.1740e-02, 2.3570e-02, -4.7870e-03, -6.6659e-03,\n 1.3564e-02, 1.3084e-02, -5.6567e-03, 2.4881e-03, 3.0487e-02,\n -7.5755e-03, 1.6282e-02, -3.5152e-03, -1.3207e-02, 8.1424e-03,\n -2.2068e-02, 6.1958e-03, 2.5182e-03, 3.5589e-03, -2.4931e-02,\n 1.1914e-02, -1.7428e-02, -1.3469e-02, 2.1464e-03, -5.5896e-05,\n -1.1783e-04, 1.5242e-02, 5.3966e-02, -1.0550e-02, -2.2377e-02,\n 5.6345e-03, -4.5011e-03, -1.2159e-02, 5.1875e-03, -9.7485e-04,\n -3.2005e-02, -1.8260e-02, 1.7123e-02, 2.7339e-03, 1.2096e-02,\n -5.3047e-03, 1.9343e-03, 5.2266e-03, 2.3647e-02, -2.0105e-02,\n -2.5488e-02, 2.0962e-02, -5.7194e-03, -2.3721e-02, 4.0386e-02,\n 2.4981e-03, 2.9079e-02, -2.5089e-02, 1.5006e-03, -3.4057e-03,\n 1.1228e-02, 2.3742e-02, -2.2092e-03, -2.1806e-02, 2.0597e-02,\n 1.9895e-02, -3.1090e-03, 2.5461e-03, -2.8593e-03, 1.2681e-02,\n -1.2726e-02, 1.0669e-02, 2.5700e-02, 3.0368e-02, 2.2922e-02,\n -2.8858e-02, -3.2277e-03, 1.9166e-02, -2.3755e-02, -2.9156e-02,\n 5.6695e-04, -1.5116e-02, 6.7286e-03, 7.7313e-04, 3.4038e-02,\n 1.0291e-02, -8.8464e-03, -1.4205e-03, -9.6294e-03, 1.8529e-02,\n -4.6589e-03, 2.5836e-03, 6.4105e-03, 4.8391e-03, -2.4215e-02,\n -1.1979e-02, -9.1469e-03, -4.5127e-03, 3.2258e-03, -2.3501e-03,\n -2.9681e-02, -3.1474e-02, 4.5832e-02, -5.0786e-02, -1.6554e-02,\n 5.8334e-03, 5.1814e-02, 1.8588e-03, -1.3108e-02, 1.5109e-02,\n -2.8642e-02, -1.3774e-02, 2.9255e-02, 2.9146e-03, -1.3881e-02,\n 1.6939e-02, -3.8032e-02, 2.4095e-03, -4.9286e-03, 2.5536e-02,\n -4.4408e-02, -8.6553e-03, 6.9803e-03, -2.4412e-02, 5.2963e-03,\n 3.8081e-02, -6.0606e-03, 3.0404e-02, 1.4011e-02, -1.2722e-02,\n 1.7489e-02, 5.4902e-03, -3.1039e-03, 1.3576e-02, 1.2996e-02,\n 2.0182e-02, 3.0371e-03, -2.6611e-03, -2.6177e-02, -5.2031e-03,\n -2.9615e-02, 2.7173e-02, 4.2806e-03, 1.0883e-02, 3.5543e-04,\n 3.5302e-02, -2.0747e-02, -2.8691e-03, 1.0282e-02, -3.4250e-02,\n 4.7217e-03, -7.2308e-03, -1.8177e-02, -8.3949e-03, -9.5230e-03,\n -1.5611e-02, -2.3456e-03, -4.3520e-04, -1.5216e-02, -2.4419e-02,\n -2.5750e-02, -6.4866e-02, 1.3522e-02, 1.7967e-02, -4.4644e-03,\n -3.5477e-02, 1.1020e-02, -1.5887e-02, 1.2085e-02, -1.7164e-02,\n 5.4228e-03, 4.4098e-03, -1.4811e-02, -1.5271e-02, 5.9134e-02,\n -4.0889e-02, 2.1173e-02, 4.6069e-02, 2.5419e-03, 2.2181e-02,\n -3.1539e-02, -2.0548e-02, 1.1235e-02, 6.5333e-03, 2.1797e-02,\n -6.1835e-02, 1.0255e-02, -4.0199e-03, -1.1448e-02, -6.2124e-02,\n -3.8762e-02, 2.4050e-02, -6.6370e-05, 2.5254e-02, 3.3064e-03,\n -7.1282e-03, 1.4965e-02, -2.1311e-02, 1.6650e-03, -1.5619e-02,\n -1.3186e-02, -9.8527e-03, 3.9895e-02, -1.1098e-02, -1.3239e-02,\n 1.0263e-02, 3.5431e-02, 7.3234e-03, -3.1150e-02, 5.9824e-03,\n 6.5632e-03, -1.4225e-02, -2.3449e-02, -1.1968e-02, -4.4559e-03,\n 1.9645e-02, 2.3660e-02, -1.0703e-02, -3.4435e-02, 2.2531e-02,\n 1.2187e-02, 1.1577e-02, -1.1584e-02, 1.1901e-02, -1.8690e-02,\n -6.2761e-03, -2.7386e-02, 9.2859e-04, 2.2606e-02, -3.6247e-03,\n 3.9740e-02, 2.2307e-02, 2.5435e-02, -2.7447e-03, 6.1269e-03,\n 3.1727e-02, -3.1187e-03, -3.6492e-02, 3.0464e-02, -4.0531e-03,\n 9.8909e-03, 6.5008e-03, 2.2741e-02, -1.6599e-02, 1.6784e-02,\n -8.8681e-03, 1.8242e-02, 3.9401e-03, -2.0300e-02, -2.1352e-02,\n 2.0905e-02, 1.8328e-04, 4.2949e-02, -5.4018e-03, 2.9712e-02,\n -1.7098e-02, -9.1278e-05, 3.1655e-02, -1.4564e-02, -8.4472e-03,\n -4.7460e-04, -2.2772e-03, 3.0636e-02, -9.5513e-03, 3.3363e-02,\n -5.0501e-03, -4.1604e-03, -2.3930e-02, -1.4385e-02, 1.0042e-02,\n 3.8460e-03, -6.3307e-03, 3.8870e-02, -2.4196e-02, -2.1113e-02,\n -1.2244e-02, -2.8602e-04, -1.3285e-02, 1.3032e-02, 7.7296e-03,\n -5.5512e-03, 6.5145e-03, -1.9832e-03, -1.4500e-02, 5.5845e-05,\n -2.9590e-04, 7.4529e-03, 9.9104e-03, -7.0313e-03, -1.8558e-03,\n 3.1571e-02, -1.6084e-03, -3.7218e-02, -1.5569e-02, -7.9845e-03,\n 9.8934e-03, -2.0579e-02, -5.0971e-02, -4.7316e-02, -2.3560e-03,\n 1.1576e-02, 3.1863e-02, -3.3888e-02, -7.9227e-03, 2.7374e-02,\n 9.0963e-03, 3.7167e-02, 1.9652e-02, -8.7403e-03, -1.2271e-03,\n -1.9661e-02, 3.8851e-03, -7.2920e-03, -3.5539e-03, 9.2494e-03,\n -4.8650e-02, 4.1687e-03, 1.0675e-02, -6.2658e-03, 1.0242e-02,\n 1.3878e-03, 3.0521e-02, 1.0698e-02, 1.7632e-02, 2.2394e-02,\n 2.5427e-03, 1.6602e-02, 1.6313e-02, -3.3836e-03, 2.2478e-04,\n -1.7979e-02, 3.2327e-02, 2.5872e-03, -1.4869e-02, 4.0177e-03,\n 2.5543e-02, -1.1046e-02, 1.8446e-04, -7.5710e-03, -1.3402e-02,\n -1.1399e-02, 3.3958e-02, 1.8227e-02, 4.8199e-03, -7.1095e-03,\n 2.1465e-02, 2.1446e-02, 9.4828e-03, 2.3874e-02, -4.4376e-02,\n -2.1337e-02, -2.9199e-04, -1.6897e-02, -2.0548e-02, 1.5580e-02,\n -3.1435e-02, -1.0484e-04, 6.5918e-03, -1.3160e-02, 2.3095e-02,\n -5.1776e-03, -7.4214e-03, -1.7959e-02, -1.3251e-02, 7.2772e-04,\n -5.3829e-03, -1.0224e-03, 1.0862e-02, -1.2710e-02, 1.1892e-02,\n -1.5025e-02, -1.6410e-02, -3.0264e-02, 3.6341e-02, 3.9090e-02,\n -1.2201e-02, -7.8130e-03, -7.7499e-03, -2.3496e-05, -6.2482e-02,\n -2.8714e-02, 1.1824e-02, 1.5038e-03, 2.7053e-02, 2.4697e-03,\n 6.2774e-04, 2.9676e-02, 2.4279e-03, 1.6174e-02, -1.5760e-02,\n 1.6092e-02, -1.9818e-02], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0049, 0.0049, 0.0048, 0.0028, 0.0067, 0.0030, 0.0034, 0.0035, 0.0062,\n 0.0040, 0.0026, 0.0041, 0.0047, 0.0032, 0.0041, 0.0036, 0.0012, 0.0021,\n 0.0029, 0.0030, 0.0032, 0.0037, 0.0040, 0.0045, 0.0055, 0.0038, 0.0040,\n 0.0035, 0.0051, 0.0017, 0.0033, 0.0046, 0.0050, 0.0032, 0.0036, 0.0032,\n 0.0024, 0.0039, 0.0033, 0.0030, 0.0057, 0.0034, 0.0035, 0.0047, 0.0040,\n 0.0051, 0.0078, 0.0046, 0.0034, 0.0046, 0.0032, 0.0040, 0.0042, 0.0021,\n 0.0064, 0.0045, 0.0026, 0.0048, 0.0031, 0.0033, 0.0035, 0.0067, 0.0050,\n 0.0026, 0.0025, 0.0026, 0.0034, 0.0052, 0.0080, 0.0059, 0.0051, 0.0032,\n 0.0068, 0.0025, 0.0041, 0.0076, 0.0043, 0.0039, 0.0039, 0.0076, 0.0057,\n 0.0030, 0.0055, 0.0047, 0.0032, 0.0031, 0.0013, 0.0078, 0.0046, 0.0040,\n 0.0024, 0.0033, 0.0025, 0.0045, 0.0057, 0.0036, 0.0052, 0.0051, 0.0039,\n 0.0044, 0.0030, 0.0034, 0.0042, 0.0018, 0.0029, 0.0034, 0.0060, 0.0029,\n 0.0046, 0.0036, 0.0019, 0.0054, 0.0037, 0.0024, 0.0027, 0.0025, 0.0044,\n 0.0054, 0.0025, 0.0037, 0.0028, 0.0027, 0.0042, 0.0049, 0.0051, 0.0044,\n 0.0050, 0.0034, 0.0050, 0.0030, 0.0043, 0.0034, 0.0034, 0.0031, 0.0045,\n 0.0027, 0.0029, 0.0018, 0.0046, 0.0072, 0.0051, 0.0027, 0.0032, 0.0050,\n 0.0048, 0.0031, 0.0040, 0.0039, 0.0041, 0.0045, 0.0031, 0.0043, 0.0029,\n 0.0026, 0.0020, 0.0025, 0.0044, 0.0039, 0.0027, 0.0040, 0.0032, 0.0036,\n 0.0036, 0.0035, 0.0047, 0.0044, 0.0019, 0.0027, 0.0041, 0.0040, 0.0045,\n 0.0033, 0.0024, 0.0033, 0.0046, 0.0026, 0.0033, 0.0034, 0.0022, 0.0035,\n 0.0047, 0.0076, 0.0020, 0.0046, 0.0067, 0.0046, 0.0015, 0.0023, 0.0044,\n 0.0035, 0.0021, 0.0022, 0.0058, 0.0036, 0.0046, 0.0035, 0.0028, 0.0033,\n 0.0034, 0.0085, 0.0021, 0.0037, 0.0026, 0.0050, 0.0034, 0.0032, 0.0022,\n 0.0028, 0.0033, 0.0050, 0.0024, 0.0049, 0.0053, 0.0076, 0.0040, 0.0025,\n 0.0037, 0.0024, 0.0033, 0.0043, 0.0024, 0.0042, 0.0064, 0.0057, 0.0017,\n 0.0036, 0.0040, 0.0048, 0.0039, 0.0060, 0.0040, 0.0029, 0.0048, 0.0025,\n 0.0038, 0.0045, 0.0046, 0.0045, 0.0035, 0.0081, 0.0056, 0.0033, 0.0039,\n 0.0029, 0.0027, 0.0049, 0.0035, 0.0040, 0.0024, 0.0069, 0.0026, 0.0030,\n 0.0016, 0.0090, 0.0044, 0.0047, 0.0075, 0.0032, 0.0042, 0.0041, 0.0037,\n 0.0067, 0.0028, 0.0028, 0.0018, 0.0041, 0.0031, 0.0047, 0.0018, 0.0032,\n 0.0023, 0.0024, 0.0046, 0.0039, 0.0031, 0.0063, 0.0016, 0.0030, 0.0081,\n 0.0034, 0.0033, 0.0049, 0.0035, 0.0026, 0.0065, 0.0044, 0.0036, 0.0040,\n 0.0055, 0.0031, 0.0036, 0.0056, 0.0040, 0.0042, 0.0066, 0.0051, 0.0053,\n 0.0045, 0.0067, 0.0061, 0.0048, 0.0031, 0.0036, 0.0026, 0.0031, 0.0038,\n 0.0042, 0.0029, 0.0055, 0.0044, 0.0032, 0.0045, 0.0027, 0.0039, 0.0027,\n 0.0051, 0.0020, 0.0051, 0.0034, 0.0027, 0.0047, 0.0066, 0.0035, 0.0026,\n 0.0041, 0.0029, 0.0035, 0.0036, 0.0026, 0.0037, 0.0054, 0.0036, 0.0034,\n 0.0029, 0.0026, 0.0028, 0.0035, 0.0041, 0.0052, 0.0051, 0.0034, 0.0036,\n 0.0032, 0.0036, 0.0043, 0.0047, 0.0028, 0.0031, 0.0030, 0.0036, 0.0043,\n 0.0065, 0.0032, 0.0053, 0.0024, 0.0039, 0.0050, 0.0040, 0.0027, 0.0034,\n 0.0039, 0.0038, 0.0037, 0.0023, 0.0031, 0.0030, 0.0027, 0.0074, 0.0027,\n 0.0026, 0.0028, 0.0079, 0.0050, 0.0039, 0.0039, 0.0024, 0.0038, 0.0030,\n 0.0044, 0.0029, 0.0025, 0.0041, 0.0034, 0.0043, 0.0051, 0.0030, 0.0024,\n 0.0038, 0.0049, 0.0017, 0.0041, 0.0075, 0.0033, 0.0036, 0.0028, 0.0049,\n 0.0024, 0.0032, 0.0074, 0.0019, 0.0022, 0.0033, 0.0038, 0.0049, 0.0044,\n 0.0067, 0.0036, 0.0030, 0.0047, 0.0023, 0.0051, 0.0050, 0.0056, 0.0027,\n 0.0060, 0.0029, 0.0037, 0.0027, 0.0052, 0.0028, 0.0042, 0.0036, 0.0045,\n 0.0055, 0.0063, 0.0034, 0.0068, 0.0058, 0.0027, 0.0024, 0.0043, 0.0026,\n 0.0028, 0.0108, 0.0023, 0.0042, 0.0043, 0.0053, 0.0053, 0.0049, 0.0054,\n 0.0067, 0.0037, 0.0032, 0.0028, 0.0032, 0.0045, 0.0027, 0.0030, 0.0029,\n 0.0017, 0.0063, 0.0037, 0.0045, 0.0033, 0.0057, 0.0038, 0.0029, 0.0041,\n 0.0044, 0.0047, 0.0057, 0.0033, 0.0050, 0.0014, 0.0038, 0.0029, 0.0039,\n 0.0039, 0.0053, 0.0045, 0.0031, 0.0055, 0.0037, 0.0034, 0.0027, 0.0033,\n 0.0039, 0.0033, 0.0029, 0.0031, 0.0038, 0.0029, 0.0035, 0.0033, 0.0037,\n 0.0027, 0.0032, 0.0073, 0.0047, 0.0007, 0.0044, 0.0039, 0.0037, 0.0052,\n 0.0047, 0.0044, 0.0048, 0.0045, 0.0043, 0.0026, 0.0037, 0.0048, 0.0030,\n 0.0046, 0.0051, 0.0040, 0.0028, 0.0043, 0.0034, 0.0055, 0.0029],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(12520.)",
|
| 17 |
+
"exp_avg": "tensor([-4.0443e-04, 6.0422e-03, -1.0697e-03, 8.0438e-04, -1.9288e-03,\n -4.1859e-04, 4.3914e-03, -1.0068e-02, 2.8355e-04, -3.2395e-03,\n 3.8719e-03, -9.0882e-04, 3.4476e-03, -2.9723e-04, -5.7432e-04,\n -3.0472e-03, -2.0661e-02, -9.9126e-03, 2.5149e-03, 9.2905e-03,\n -7.9131e-04, 7.0090e-03, -4.1218e-03, 1.0662e-04, -1.1244e-02,\n 4.4356e-03, 5.1012e-03, 3.3816e-03, -3.3507e-03, 3.1785e-03,\n -3.9020e-03, 4.1379e-03, 4.2129e-03, 4.8523e-03, -5.4532e-03,\n 5.4129e-03, 8.7950e-03, 2.0240e-03, 3.7411e-04, 5.9718e-03,\n -3.0263e-03, 4.9135e-03, -6.1001e-04, -3.3775e-04, -4.8629e-03,\n -5.7551e-04, 4.6407e-03, 1.8367e-03, -8.1390e-03, 1.7614e-03,\n 2.4498e-03, -3.9680e-03, -3.7540e-04, 2.4693e-03, 9.5717e-04,\n 5.3974e-03, 1.0037e-02, 1.8286e-03, 4.3574e-03, 3.1163e-03,\n -8.5755e-03, 6.9398e-04, 4.6927e-03, -5.1997e-03, 3.7424e-03,\n 3.3919e-03, 9.7112e-03, -4.6852e-03, -4.4092e-03, 1.9147e-04,\n -7.0065e-04, -5.4925e-03, 4.1000e-03, 5.7908e-03, 5.3176e-03,\n 2.6463e-04, -3.6224e-03, 3.7584e-03, 3.7748e-03, -1.4897e-03,\n 3.8406e-03, -1.7857e-03, 2.9279e-03, 1.0864e-03, -5.2336e-03,\n -3.2070e-03, 2.0097e-02, 2.4131e-03, -1.7695e-03, -3.3027e-04,\n 2.2131e-03, -6.5989e-03, -2.2444e-03, 2.7898e-03, 1.3190e-03,\n 1.1732e-03, -1.6730e-03, 4.2063e-03, -2.0643e-03, -1.8472e-04,\n 5.1920e-04, -4.7105e-05, 3.1108e-04, 1.6776e-03, -3.4007e-03,\n -5.7018e-03, 7.6289e-04, 8.8700e-03, -7.9353e-04, 6.0520e-04,\n -1.7436e-02, -3.6595e-03, 1.3040e-03, -1.2549e-03, -1.0891e-02,\n -2.0584e-04, 8.7123e-05, 1.9290e-03, 1.2452e-03, -6.5689e-04,\n -5.7043e-03, -1.9851e-03, 4.6441e-04, 4.2971e-03, -7.6141e-04,\n -1.9949e-03, 3.8387e-03, 7.1399e-03, -3.0409e-04, -1.8809e-03,\n 1.6387e-03, 2.4225e-03, -2.4879e-03, 6.1250e-06, 5.6080e-03,\n -1.8668e-03, 1.3348e-03, -1.1977e-03, -8.2272e-04, 9.7971e-04,\n -4.9072e-03, 2.4532e-03, 1.0039e-03, 3.4701e-04, -3.4253e-03,\n 4.1981e-03, -1.3990e-03, -2.4355e-03, 1.7110e-03, -1.2904e-04,\n 7.7228e-04, 4.5416e-03, 1.2397e-02, -4.6870e-03, -3.7674e-03,\n 3.1044e-03, 5.7595e-05, -2.2933e-03, 1.3068e-03, 1.1375e-03,\n -8.8992e-03, -3.2983e-03, 2.7630e-03, 1.3277e-03, 1.8909e-03,\n -1.6507e-03, 3.2950e-03, 6.9753e-04, 3.3066e-03, -1.3852e-03,\n -6.6903e-03, 4.5391e-03, -1.8571e-03, -4.8445e-03, 7.2653e-03,\n 3.5317e-04, 6.0150e-03, -6.4371e-03, -1.6845e-04, 3.5103e-04,\n 2.5399e-03, 2.8094e-03, 1.3265e-03, -4.0003e-03, 2.8890e-03,\n 3.9545e-03, -2.3200e-03, 1.2286e-03, -8.0280e-05, 1.8379e-03,\n -3.8289e-03, 5.1392e-03, 5.9042e-03, 6.1560e-03, 3.3621e-03,\n -5.4540e-03, -2.6336e-04, 4.8973e-03, -3.3222e-03, -3.5215e-03,\n 1.2792e-03, -2.6576e-03, 6.9043e-04, 5.1855e-04, 7.3760e-03,\n 3.4964e-03, -2.1949e-03, -3.1291e-04, -4.6262e-04, 8.8157e-04,\n -1.5521e-03, 9.9026e-04, 7.9276e-04, 1.0404e-04, -7.3241e-03,\n -2.7700e-03, -2.1894e-03, -7.4166e-05, 2.2007e-04, -8.4981e-04,\n -8.1053e-03, -5.1421e-03, 4.6480e-03, -8.0673e-03, -2.1410e-03,\n 1.1459e-03, 1.6332e-02, -7.9816e-04, -5.1047e-03, 1.6283e-03,\n -5.0701e-03, -1.6684e-03, 3.6661e-03, 1.0234e-03, -3.6174e-03,\n 2.2973e-03, -7.6214e-03, 1.3821e-03, -7.1229e-04, 5.1810e-03,\n -5.1264e-03, -8.1063e-04, 1.4788e-03, -4.1562e-03, 2.4708e-03,\n 3.9629e-03, -6.1682e-04, 2.8683e-03, 5.0204e-03, -2.4605e-03,\n 5.5259e-03, -1.7014e-03, -7.3790e-04, 1.7754e-03, 4.1502e-03,\n 3.1795e-03, -3.5535e-04, -3.2151e-04, -5.0465e-03, -1.9591e-04,\n -4.6600e-03, 4.2108e-03, 3.2326e-03, 2.1764e-03, 3.9454e-04,\n 8.5605e-03, -3.7902e-03, -4.7759e-04, 3.7662e-03, -9.5284e-03,\n 1.6743e-03, -2.0444e-03, -2.3348e-03, -4.0631e-04, -1.4696e-03,\n -3.5211e-03, -5.7381e-04, 3.0211e-04, -3.2457e-03, -2.2899e-03,\n -3.2930e-03, -1.1047e-02, 2.9233e-03, 6.8151e-03, -9.2710e-04,\n -5.3187e-03, 6.6661e-03, -1.5740e-03, 1.6932e-03, -4.5643e-03,\n 2.1565e-03, 2.7959e-04, -2.7138e-03, -2.2140e-03, 9.3918e-03,\n -7.2901e-03, 2.1505e-03, 9.6389e-03, 7.5530e-04, 1.9383e-03,\n -8.1739e-03, -3.3910e-03, 3.8047e-03, 1.7875e-03, 4.9742e-03,\n -1.3726e-02, 1.6187e-03, -4.6646e-04, -2.2488e-03, -7.4673e-03,\n -5.6278e-03, 3.6064e-03, 2.1207e-04, 4.3295e-03, 2.2379e-03,\n -1.6315e-03, 3.6481e-03, -4.5664e-03, 3.6713e-04, -3.2373e-03,\n -2.7618e-03, -1.8171e-03, 7.1304e-03, -2.6920e-03, -2.1052e-03,\n 1.0110e-03, 8.2065e-03, 2.3065e-03, -6.4971e-03, 1.0503e-03,\n 4.8803e-04, -3.0815e-03, -4.5731e-03, -3.0657e-03, -2.7672e-03,\n 6.9491e-03, 5.1902e-03, -2.3625e-03, -7.1509e-03, 1.8752e-03,\n 1.8829e-03, 2.8044e-03, -3.1782e-03, 3.9968e-03, -2.2083e-03,\n -5.9830e-05, -7.2259e-03, 3.9634e-05, 7.9005e-03, -1.0011e-03,\n 9.3722e-03, 3.2084e-03, 3.5647e-03, 1.5119e-03, 1.7865e-03,\n 4.6465e-03, -5.1290e-04, -4.5637e-03, 1.4207e-02, -3.6005e-04,\n 2.2965e-03, 7.8714e-04, 4.3153e-03, -3.1679e-03, 2.9335e-03,\n -1.1237e-03, 3.4691e-03, 6.8402e-04, -5.3066e-03, -2.1745e-03,\n 7.2737e-03, -3.8675e-04, 4.8839e-03, -2.5327e-04, 4.7472e-03,\n -4.6255e-03, 3.4210e-04, 6.9871e-03, -1.4237e-03, -9.1050e-04,\n -8.5971e-05, 1.4695e-03, 7.2004e-03, -1.7391e-03, 4.0913e-03,\n -9.6017e-04, -9.3104e-04, -3.4529e-03, -1.8414e-03, 6.5093e-03,\n -1.6358e-03, -4.7894e-05, 9.3478e-03, -4.7395e-03, -6.1491e-03,\n -3.5250e-03, 7.9346e-04, -2.6473e-03, -4.5430e-04, 3.3930e-03,\n -2.4975e-03, 1.7374e-03, -6.0813e-04, -1.7307e-03, -2.7901e-04,\n -9.0319e-05, 2.1065e-03, 4.6125e-03, -8.1726e-04, -2.3776e-03,\n 6.7349e-03, -8.7930e-04, -3.7060e-03, -3.7182e-03, -1.5447e-03,\n 2.3315e-03, -4.0747e-03, -1.2081e-02, -6.4542e-03, -4.7889e-04,\n 2.7743e-03, 7.3435e-03, -5.2960e-03, -3.1771e-04, 2.7398e-03,\n 1.1035e-03, 4.6605e-03, 2.3964e-03, -2.3482e-03, -3.9457e-04,\n -2.2406e-03, 3.0386e-03, -2.6736e-03, -2.8722e-04, 2.5746e-03,\n -8.5946e-03, 5.5391e-04, 1.9106e-03, -1.1873e-03, 1.5691e-03,\n -2.2016e-04, 4.6308e-03, 1.6799e-03, 6.7766e-03, 7.8078e-03,\n -5.9398e-04, 1.5913e-03, 7.7270e-03, -1.8637e-03, -3.2504e-04,\n -1.0197e-02, 3.9570e-03, 6.3552e-04, -4.3221e-03, 1.1024e-03,\n 5.4988e-03, -1.2093e-03, 1.0430e-03, -1.0145e-03, -2.6068e-03,\n -3.0442e-03, 4.9549e-03, 5.0148e-03, -6.6063e-04, -5.8936e-03,\n 4.0998e-03, 3.9117e-03, 1.9074e-03, 5.6305e-03, -5.6804e-03,\n -3.4897e-03, -2.5441e-04, -1.5990e-03, -4.6084e-03, 4.6338e-03,\n -1.3493e-02, -5.5425e-04, 3.1143e-03, -1.4614e-03, 4.6142e-03,\n -1.1849e-03, -9.3435e-04, -4.5253e-03, -1.9109e-03, 4.3930e-04,\n -1.3098e-03, 2.1257e-04, 4.0078e-03, -2.0735e-03, 2.5501e-03,\n -1.5265e-02, -2.5973e-03, -8.1922e-03, 1.0503e-02, 6.2958e-03,\n -2.7940e-03, -3.3228e-03, -1.8786e-03, -5.3515e-05, -1.0406e-02,\n -4.3877e-03, 2.1653e-03, -2.3072e-04, 5.8595e-03, -1.2362e-03,\n -3.8227e-04, 7.9261e-03, 1.0654e-03, 4.0550e-03, -5.4737e-03,\n 2.5813e-03, -6.2959e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([2.0997e-04, 8.2483e-05, 1.7301e-04, 2.8416e-04, 8.1032e-05, 1.4126e-04,\n 1.1953e-04, 1.7578e-04, 7.1747e-05, 1.7711e-04, 1.4664e-04, 2.2072e-04,\n 7.3998e-05, 1.4476e-04, 6.1569e-05, 8.3891e-05, 2.5732e-03, 2.0664e-04,\n 1.6063e-04, 2.2610e-04, 3.1132e-04, 3.0995e-04, 1.9617e-04, 1.1631e-04,\n 1.6285e-04, 3.6242e-04, 2.3394e-04, 1.6791e-04, 5.5251e-05, 4.5190e-04,\n 2.0644e-04, 9.4083e-05, 3.0434e-04, 7.2381e-05, 3.9890e-04, 3.4267e-04,\n 2.5786e-04, 2.8893e-04, 6.8932e-05, 1.2780e-04, 1.2150e-04, 1.3651e-04,\n 5.7791e-05, 6.2610e-05, 1.3494e-04, 8.0258e-05, 2.8151e-04, 2.1877e-04,\n 3.9255e-04, 1.2945e-04, 2.7116e-04, 3.2952e-04, 8.9190e-05, 2.5673e-04,\n 1.4231e-04, 2.2031e-04, 4.4104e-04, 1.7330e-04, 1.7627e-04, 1.8964e-04,\n 2.8526e-04, 5.8132e-05, 1.5953e-04, 3.2595e-04, 6.0756e-04, 1.7553e-04,\n 2.2218e-04, 1.7570e-04, 5.8679e-05, 1.9916e-04, 5.6853e-05, 1.0113e-04,\n 7.3383e-05, 1.1432e-04, 1.4997e-04, 1.0886e-04, 1.6769e-04, 1.3825e-04,\n 2.4266e-04, 1.5715e-04, 1.0042e-04, 6.9548e-05, 1.2088e-04, 1.5918e-04,\n 1.4824e-04, 2.2567e-04, 2.5666e-03, 1.0691e-04, 7.6480e-05, 1.8072e-04,\n 1.9041e-04, 2.7757e-04, 2.2892e-04, 9.6946e-05, 1.0011e-04, 9.7788e-05,\n 1.4215e-04, 1.1920e-04, 1.2282e-04, 7.4453e-05, 7.8555e-05, 8.3663e-05,\n 9.6478e-05, 2.7251e-04, 2.8895e-04, 2.1704e-04, 6.3406e-05, 2.8803e-04,\n 1.0866e-04, 7.1851e-05, 5.2611e-04, 6.3309e-05, 1.5364e-04, 7.5254e-05,\n 4.3652e-04, 9.3991e-05, 8.3836e-05, 1.1124e-04, 2.2226e-04, 1.2362e-04,\n 1.1099e-04, 2.1933e-04, 1.1418e-04, 1.1414e-04, 1.1775e-04, 9.9453e-05,\n 8.0666e-05, 1.9793e-04, 8.8462e-05, 3.4714e-04, 7.9220e-05, 1.3909e-04,\n 3.4330e-04, 2.8575e-04, 1.4824e-04, 2.3124e-04, 8.7997e-05, 2.4824e-04,\n 1.1001e-04, 1.0144e-04, 2.4084e-04, 2.1548e-04, 1.2938e-04, 1.4243e-04,\n 1.0150e-04, 2.0962e-04, 1.5429e-04, 1.6456e-04, 1.1180e-04, 1.2012e-04,\n 6.5027e-05, 2.1457e-04, 1.5291e-04, 2.7141e-04, 1.2528e-04, 2.5104e-04,\n 1.3902e-04, 1.2978e-04, 6.7177e-04, 5.3411e-05, 2.9634e-04, 1.3585e-04,\n 1.9378e-04, 1.2823e-04, 1.7251e-04, 9.7199e-05, 4.3250e-04, 1.9958e-04,\n 7.1082e-05, 1.1910e-04, 3.8021e-04, 1.3718e-04, 4.8847e-04, 1.1256e-04,\n 1.2300e-04, 1.5686e-04, 1.3317e-04, 1.9192e-04, 1.1347e-04, 7.3058e-05,\n 1.1559e-04, 1.3212e-04, 6.1194e-04, 1.7520e-04, 1.2348e-04, 1.8456e-04,\n 6.7624e-04, 5.0410e-04, 9.6045e-05, 2.4934e-04, 4.1384e-04, 4.1222e-04,\n 1.7680e-04, 1.2317e-04, 1.4860e-04, 1.2792e-04, 6.9535e-05, 1.3735e-04,\n 6.1335e-05, 1.0629e-04, 3.2522e-04, 2.1864e-04, 8.0335e-05, 8.5613e-05,\n 1.2873e-04, 2.9299e-04, 4.2503e-04, 2.2458e-04, 1.7823e-04, 1.2069e-04,\n 1.8047e-04, 1.3590e-04, 1.4531e-04, 1.9774e-04, 3.3070e-04, 1.5922e-04,\n 2.1722e-04, 8.0998e-05, 1.4071e-04, 1.3219e-04, 1.7123e-04, 8.2122e-05,\n 7.3004e-05, 1.3366e-04, 2.6893e-04, 1.3044e-04, 2.3290e-04, 7.8025e-05,\n 4.2295e-04, 1.9852e-04, 9.2965e-05, 1.4528e-04, 7.8575e-05, 1.4739e-04,\n 1.4903e-04, 1.2625e-04, 1.7640e-04, 1.8171e-04, 1.8717e-04, 1.2653e-04,\n 7.7900e-05, 1.6666e-04, 1.5883e-04, 1.2698e-04, 3.0981e-04, 8.9597e-05,\n 1.3958e-04, 6.1214e-05, 2.0702e-04, 1.8340e-04, 1.6965e-04, 1.7544e-04,\n 3.8971e-04, 1.2737e-04, 1.2306e-04, 8.5598e-05, 3.3532e-04, 1.3148e-04,\n 1.7803e-04, 1.7587e-04, 1.0231e-04, 9.4114e-05, 2.5421e-04, 3.5882e-04,\n 6.8267e-04, 2.4612e-04, 1.4733e-04, 1.1444e-04, 1.9865e-04, 1.1217e-04,\n 2.6316e-04, 1.5914e-04, 7.2086e-05, 6.7539e-05, 8.6714e-05, 3.3947e-04,\n 7.7021e-04, 2.3092e-04, 1.4322e-04, 8.6797e-05, 4.1730e-05, 1.6955e-04,\n 1.0980e-04, 2.0043e-04, 1.8008e-04, 8.7738e-05, 8.4624e-04, 8.6771e-05,\n 1.1149e-04, 2.6885e-04, 2.4554e-04, 1.2431e-04, 1.7425e-04, 1.1767e-04,\n 1.3812e-04, 1.4584e-04, 5.7068e-05, 1.8025e-04, 7.4501e-05, 1.1470e-04,\n 3.4210e-04, 1.3332e-04, 1.2645e-04, 3.1580e-04, 1.1735e-04, 1.8093e-04,\n 1.6192e-04, 1.3279e-04, 1.4186e-04, 1.1876e-04, 1.0871e-04, 8.0539e-05,\n 1.3991e-04, 9.8522e-05, 3.9282e-04, 2.1081e-04, 8.5676e-05, 2.2693e-04,\n 8.2849e-05, 3.3137e-04, 6.7773e-05, 2.6086e-04, 1.0237e-04, 1.3234e-04,\n 9.5248e-05, 9.0059e-05, 1.9636e-04, 1.2403e-04, 8.0983e-05, 1.3908e-04,\n 1.3257e-04, 2.4168e-04, 1.0504e-04, 1.5031e-04, 5.2240e-04, 2.9251e-04,\n 1.3927e-04, 3.8108e-04, 1.4417e-04, 9.4710e-05, 1.8999e-04, 2.6726e-04,\n 1.2219e-04, 1.4462e-04, 1.0964e-04, 1.0076e-04, 1.3624e-04, 8.4646e-05,\n 3.2998e-04, 6.2620e-05, 2.1201e-04, 9.3090e-05, 6.1732e-05, 1.1613e-04,\n 1.7950e-04, 1.0350e-04, 4.3049e-05, 8.1863e-05, 5.3784e-04, 9.0190e-05,\n 1.0709e-04, 5.4010e-05, 1.1213e-04, 2.0881e-04, 7.5875e-05, 1.1133e-04,\n 1.3057e-04, 1.1567e-04, 1.6208e-04, 1.9922e-04, 2.8821e-04, 1.1859e-04,\n 1.0539e-04, 1.3511e-04, 1.0630e-04, 1.9886e-04, 7.6406e-05, 1.4693e-04,\n 1.1686e-04, 2.0939e-04, 6.3162e-05, 1.1813e-04, 2.0487e-04, 1.7572e-04,\n 8.6034e-05, 8.9554e-05, 2.1624e-04, 6.1879e-05, 5.4676e-05, 8.0243e-04,\n 1.2944e-04, 8.7012e-05, 2.4418e-04, 1.2853e-04, 2.9277e-04, 2.0375e-04,\n 2.8905e-04, 2.8808e-04, 1.6455e-04, 2.5419e-04, 2.5719e-04, 9.6074e-05,\n 7.1133e-05, 6.5111e-05, 7.7605e-05, 8.7360e-05, 1.4613e-04, 1.5647e-04,\n 1.0351e-04, 4.5821e-04, 1.3735e-04, 6.2957e-05, 1.4961e-04, 4.5984e-04,\n 7.1745e-05, 2.8230e-04, 1.1129e-04, 1.3999e-04, 8.9150e-05, 3.0615e-04,\n 2.2862e-04, 1.2943e-04, 1.0235e-04, 1.3575e-04, 6.7199e-05, 1.1416e-04,\n 1.1377e-04, 1.0530e-04, 2.3559e-04, 2.3327e-04, 6.2561e-05, 4.2554e-04,\n 2.1019e-04, 9.3048e-05, 1.7070e-04, 1.4622e-04, 6.2668e-05, 1.6212e-04,\n 1.5125e-04, 8.6700e-05, 1.4367e-04, 1.6016e-04, 1.2208e-04, 3.2139e-04,\n 1.8434e-04, 7.1268e-05, 7.8644e-05, 2.6624e-04, 2.4748e-04, 2.5175e-04,\n 5.2765e-04, 1.2343e-04, 6.9042e-05, 2.7934e-04, 2.5617e-04, 1.1286e-04,\n 6.0445e-05, 7.5153e-04, 1.4197e-04, 1.7546e-04, 1.9948e-04, 1.0420e-04,\n 2.6334e-04, 6.4468e-05, 1.0815e-03, 2.0024e-04, 1.2040e-04, 7.0638e-05,\n 1.8322e-04, 8.3688e-05, 1.1316e-04, 2.8172e-04, 9.2685e-05, 1.7758e-04,\n 2.8210e-04, 2.5075e-04, 1.5365e-04, 2.3348e-04, 3.6211e-05, 1.2733e-04,\n 9.1082e-05, 4.6180e-05, 2.0252e-04, 1.6458e-04, 1.1020e-04, 2.7799e-04,\n 8.8200e-05, 1.9478e-04, 1.1463e-04, 1.2616e-04, 2.1527e-01, 9.0273e-05,\n 1.8108e-04, 1.5007e-04, 1.3512e-04, 2.8344e-04, 2.4514e-04, 1.3872e-04,\n 1.0268e-04, 2.0564e-04, 9.9959e-05, 1.3488e-04, 9.5448e-05, 1.2790e-04,\n 1.6294e-04, 1.7298e-04, 2.4838e-04, 1.8966e-04, 1.9819e-04, 1.8978e-04,\n 2.2191e-04, 1.6151e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(12520.)",
|
| 22 |
+
"exp_avg": "tensor([-8.5455e-05, 9.5517e-03, 2.6119e-04, -2.1294e-04, -1.9192e-03,\n -6.7118e-04, 4.4197e-03, -9.7111e-03, -1.1959e-03, -3.3790e-03,\n 3.9408e-03, 2.4850e-04, 3.0228e-03, 9.6268e-04, -1.5387e-03,\n -3.7670e-03, -7.1104e-03, -8.6330e-03, 2.5953e-03, 7.1418e-03,\n -1.8120e-03, 5.5528e-03, -2.8868e-03, -4.1836e-04, -1.5892e-02,\n 3.4232e-03, 4.7878e-03, 1.4841e-03, -5.7454e-03, 1.5857e-03,\n -3.4174e-03, 5.0108e-03, 5.2116e-03, 5.5747e-03, -3.6997e-03,\n 4.3475e-03, 6.7498e-03, 2.0646e-03, 7.6403e-04, 5.8196e-03,\n -3.0591e-03, 4.7130e-03, -4.0019e-05, -4.5723e-04, -5.1663e-03,\n -2.7020e-04, 6.8135e-03, 3.0298e-03, -8.1077e-03, 1.0648e-03,\n 3.0228e-03, -4.0511e-03, 4.1743e-04, 2.5604e-03, 5.0335e-04,\n 6.5335e-03, 6.2926e-03, 2.6609e-03, 4.9662e-03, 1.7092e-03,\n -8.8593e-03, 4.3508e-04, 3.0162e-03, -4.7308e-03, 2.6408e-03,\n 3.5252e-03, 9.5377e-03, -4.3364e-03, -6.2737e-03, 1.1812e-03,\n -1.2598e-03, -6.4145e-03, 6.0854e-03, 5.3422e-03, 4.5349e-03,\n 9.4150e-04, -3.3045e-03, 5.2843e-03, 3.4257e-03, -1.2264e-03,\n 4.0811e-03, -2.0759e-03, 3.0890e-03, 2.3535e-03, -5.0194e-03,\n -1.8064e-03, 6.5123e-03, 2.2079e-03, -1.8888e-03, -5.4523e-04,\n 1.8954e-03, -6.0054e-03, -2.2817e-03, 4.0318e-03, 1.7342e-03,\n 2.6158e-03, -1.9402e-03, 5.8063e-03, -2.2547e-03, 4.7542e-04,\n -6.0798e-04, 1.0284e-03, 7.5750e-04, 4.4014e-04, -2.4631e-03,\n -4.0086e-03, 2.4697e-04, 6.1088e-03, -2.1393e-03, 2.0237e-03,\n -8.2988e-03, -5.1312e-03, 4.5280e-04, -1.3590e-03, -9.2413e-03,\n 1.5377e-05, -7.2176e-04, 3.5174e-03, 1.1506e-03, -9.3898e-04,\n -3.7480e-03, -2.3099e-03, 5.4591e-04, 4.9359e-03, -4.0561e-04,\n -1.7840e-03, 5.1318e-03, 5.5108e-03, -6.5598e-04, -2.7580e-03,\n 2.1905e-03, 2.7082e-03, -1.8751e-03, 8.2094e-04, 4.9079e-03,\n -1.0748e-03, 3.0487e-03, -1.4230e-04, -1.8662e-03, 9.7071e-04,\n -5.1814e-03, 1.7428e-03, 8.6712e-04, 6.4624e-04, -3.9351e-03,\n 4.0979e-03, -2.3934e-03, -2.6109e-03, 1.0397e-04, 2.0489e-04,\n 1.1749e-04, 3.2631e-03, 1.1287e-02, -3.0843e-03, -3.5828e-03,\n 2.5781e-03, -7.4078e-05, -2.0462e-03, 2.3273e-03, 5.0128e-05,\n -7.4522e-03, -3.2022e-03, 3.3802e-03, 7.3060e-04, 2.1189e-03,\n -6.0732e-04, 1.4923e-03, 5.8319e-04, 4.3510e-03, -9.8179e-04,\n -6.7505e-03, 4.6437e-03, -1.4241e-03, -4.6519e-03, 8.9680e-03,\n 8.4043e-04, 6.2006e-03, -4.7045e-03, 2.2516e-04, 2.1148e-05,\n 2.0897e-03, 2.8902e-03, 2.5020e-05, -4.2813e-03, 4.4810e-03,\n 4.0434e-03, -6.4196e-04, 1.4134e-03, 2.9458e-05, 2.8913e-03,\n -2.9120e-03, 4.0388e-03, 5.7947e-03, 5.9760e-03, 4.0892e-03,\n -5.3882e-03, -3.3106e-04, 4.5055e-03, -4.4899e-03, -4.7816e-03,\n 7.8536e-04, -3.5805e-03, 1.2303e-03, 8.5114e-04, 7.6480e-03,\n 2.2882e-03, -2.0809e-03, -8.4538e-04, -1.5754e-03, 3.0530e-03,\n -8.5798e-04, 3.7402e-04, 1.5115e-03, 9.1120e-04, -6.7952e-03,\n -2.5772e-03, -1.8668e-03, -9.1244e-04, 5.6470e-04, 2.8039e-05,\n -6.8114e-03, -5.3486e-03, 8.2919e-03, -9.7120e-03, -3.4668e-03,\n 1.3318e-03, 1.0427e-02, 4.0898e-04, -3.5439e-03, 3.7255e-03,\n -5.7028e-03, -2.1785e-03, 5.0001e-03, 1.4601e-03, -3.7284e-03,\n 3.6999e-03, -7.7756e-03, 5.8687e-04, -5.9955e-04, 5.1457e-03,\n -7.2753e-03, -1.4621e-03, 6.5888e-04, -4.6369e-03, 1.2141e-03,\n 7.3460e-03, -3.0522e-04, 4.8710e-03, 3.8062e-03, -2.8808e-03,\n 4.5552e-03, 2.3945e-04, -8.8771e-04, 2.8341e-03, 3.2834e-03,\n 3.7259e-03, 6.3389e-04, -4.8671e-04, -4.4034e-03, -3.7888e-04,\n -5.3187e-03, 4.9879e-03, 1.3272e-03, 2.2522e-03, 7.7580e-04,\n 7.6465e-03, -3.6559e-03, -7.6639e-04, 3.4429e-03, -7.9501e-03,\n 1.5368e-03, -1.2096e-03, -2.5843e-03, -1.3032e-03, -1.7771e-03,\n -3.4374e-03, -1.9388e-04, 2.5820e-04, -2.6390e-03, -4.0259e-03,\n -3.9066e-03, -1.3797e-02, 2.6685e-03, 4.7722e-03, -9.8357e-04,\n -6.6242e-03, 3.8033e-03, -2.2080e-03, 2.3917e-03, -3.5452e-03,\n 1.8320e-03, 1.7670e-03, -2.8282e-03, -2.2917e-03, 1.2129e-02,\n -7.7354e-03, 3.4898e-03, 9.8263e-03, 4.4135e-04, 4.1348e-03,\n -6.3486e-03, -3.6903e-03, 2.5637e-03, 1.7471e-03, 5.1474e-03,\n -1.2082e-02, 1.5724e-03, -3.0313e-04, -2.3357e-03, -9.4334e-03,\n -7.1659e-03, 4.2150e-03, -1.2459e-04, 4.4262e-03, 1.5927e-03,\n -5.8324e-04, 3.7979e-03, -3.8816e-03, 4.7964e-04, -3.2079e-03,\n -2.6921e-03, -2.0155e-03, 7.8607e-03, -2.3222e-03, -2.0710e-03,\n 2.1014e-03, 8.7597e-03, 2.0522e-03, -5.5344e-03, 1.2642e-03,\n 1.0275e-03, -2.9578e-03, -4.7454e-03, -2.1811e-03, -3.9992e-04,\n 5.0690e-03, 5.1917e-03, -1.8962e-03, -6.7408e-03, 3.9411e-03,\n 2.7697e-03, 2.8470e-03, -2.4637e-03, 3.1361e-03, -3.1836e-03,\n -7.6308e-04, -4.9725e-03, 1.7250e-04, 5.6174e-03, -8.3261e-04,\n 8.8667e-03, 4.4792e-03, 4.3812e-03, 3.9971e-04, 1.9598e-03,\n 5.7834e-03, -3.7840e-04, -6.3817e-03, 9.6660e-03, -8.2472e-04,\n 1.9953e-03, 1.0955e-03, 4.5544e-03, -3.1179e-03, 3.4148e-03,\n -1.3256e-03, 4.0160e-03, 1.3885e-03, -4.7096e-03, -3.1973e-03,\n 6.3364e-03, 5.7953e-04, 7.6163e-03, 5.5759e-04, 5.5730e-03,\n -3.3762e-03, 4.5889e-04, 7.6727e-03, -2.5274e-03, -1.7257e-03,\n 1.4982e-04, 1.1879e-05, 7.6862e-03, -1.3926e-03, 6.0483e-03,\n -5.8498e-04, -5.9114e-04, -3.8349e-03, -2.0978e-03, 4.4445e-03,\n 1.3534e-03, -7.6484e-04, 8.3614e-03, -4.3448e-03, -5.9095e-03,\n -2.4753e-03, 7.7359e-04, -2.7221e-03, 2.1956e-03, 2.8489e-03,\n -9.4681e-04, 1.9080e-03, -1.8701e-04, -1.4861e-03, -2.9878e-05,\n 4.0948e-05, 1.1960e-03, 1.6543e-03, -1.2624e-03, -1.0418e-04,\n 5.4673e-03, -4.9027e-04, -6.1206e-03, -2.9876e-03, -1.1180e-03,\n 2.3715e-03, -4.0696e-03, -1.0495e-02, -8.1667e-03, -3.1640e-04,\n 2.6580e-03, 6.0212e-03, -6.0996e-03, -2.1213e-03, 4.6932e-03,\n 2.1639e-03, 7.3484e-03, 3.1646e-03, -1.8838e-03, -4.3358e-04,\n -3.1901e-03, 2.4158e-03, -1.7329e-03, -8.2955e-06, 2.3383e-03,\n -9.1723e-03, 1.0314e-03, 1.6117e-03, -1.3856e-03, 1.9647e-03,\n -3.8367e-04, 6.0030e-03, 3.2722e-03, 4.5819e-03, 5.7885e-03,\n 5.5090e-05, 3.4623e-03, 4.9934e-03, -9.6220e-04, -4.0049e-04,\n -5.5672e-03, 4.4548e-03, 8.0626e-04, -3.3233e-03, 1.3321e-03,\n 4.2899e-03, -1.4445e-03, 5.3165e-04, -1.8638e-03, -3.1171e-03,\n -1.4159e-03, 6.7943e-03, 4.2614e-03, 7.4131e-04, -2.7485e-03,\n 4.7291e-03, 4.5991e-03, 1.7278e-03, 5.0057e-03, -7.4147e-03,\n -3.8051e-03, 5.0321e-04, -2.7246e-03, -3.6259e-03, 3.5049e-03,\n -8.0794e-03, 2.8790e-04, 2.2421e-03, -1.3821e-03, 5.3270e-03,\n -3.7359e-04, -1.1563e-03, -3.8240e-03, -2.1423e-03, 6.8117e-05,\n -1.9911e-04, -3.2066e-04, 2.5710e-03, -3.2122e-03, 2.8756e-03,\n -5.2553e-04, -2.3597e-03, -6.8195e-03, 7.8487e-03, 8.6953e-03,\n -3.6046e-03, -2.0569e-03, -1.7050e-03, 2.3935e-04, -1.3532e-02,\n -6.5213e-03, 2.2754e-03, 7.1022e-04, 5.8983e-03, -8.1946e-04,\n 1.2713e-04, 6.1241e-03, 6.3115e-04, 3.7037e-03, -3.4526e-03,\n 3.5581e-03, -4.0646e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([2.2023e-04, 1.6988e-04, 1.5360e-04, 1.7176e-04, 1.5139e-04, 1.4280e-04,\n 1.3803e-04, 1.5251e-04, 1.5788e-04, 2.1012e-04, 1.2305e-04, 2.1133e-04,\n 1.1349e-04, 1.3621e-04, 1.0756e-04, 1.2019e-04, 3.0584e-04, 1.2413e-04,\n 1.3577e-04, 1.5429e-04, 1.8037e-04, 2.2373e-04, 1.7637e-04, 1.3643e-04,\n 2.4411e-04, 2.5170e-04, 1.8911e-04, 1.4644e-04, 1.0571e-04, 1.6833e-04,\n 1.6397e-04, 1.6113e-04, 2.8063e-04, 1.0437e-04, 2.1941e-04, 2.0012e-04,\n 1.6065e-04, 1.9726e-04, 9.9419e-05, 1.2678e-04, 1.6255e-04, 1.3277e-04,\n 1.0811e-04, 1.1904e-04, 1.6157e-04, 1.3753e-04, 3.4811e-04, 2.2914e-04,\n 2.1412e-04, 1.6072e-04, 1.5860e-04, 2.2499e-04, 1.2949e-04, 1.4668e-04,\n 2.3216e-04, 2.1800e-04, 2.1150e-04, 1.8991e-04, 1.5787e-04, 1.5446e-04,\n 2.2694e-04, 1.2467e-04, 2.0920e-04, 1.8221e-04, 2.1324e-04, 1.3766e-04,\n 1.8269e-04, 1.9319e-04, 1.6209e-04, 2.3645e-04, 1.3794e-04, 1.1082e-04,\n 1.7857e-04, 1.1756e-04, 1.6779e-04, 2.2804e-04, 1.7847e-04, 1.6097e-04,\n 2.0089e-04, 2.4321e-04, 1.9350e-04, 1.0362e-04, 1.9551e-04, 1.9396e-04,\n 1.4439e-04, 1.7202e-04, 2.7088e-04, 2.0980e-04, 1.0769e-04, 1.9585e-04,\n 1.6474e-04, 1.9860e-04, 1.6002e-04, 1.4459e-04, 1.6808e-04, 1.3512e-04,\n 2.1226e-04, 1.8359e-04, 1.5415e-04, 1.2343e-04, 1.0161e-04, 1.1733e-04,\n 1.4515e-04, 1.4013e-04, 1.9621e-04, 1.8995e-04, 1.5191e-04, 1.6979e-04,\n 1.6970e-04, 1.1567e-04, 1.8595e-04, 1.2828e-04, 1.7215e-04, 9.8801e-05,\n 2.2034e-04, 1.0467e-04, 1.0742e-04, 2.0344e-04, 1.4876e-04, 1.2946e-04,\n 1.0250e-04, 1.2535e-04, 1.5057e-04, 1.6575e-04, 1.6754e-04, 1.3376e-04,\n 1.3704e-04, 1.6266e-04, 1.3492e-04, 1.8581e-04, 1.2291e-04, 1.3744e-04,\n 2.0541e-04, 1.5945e-04, 1.6128e-04, 1.5564e-04, 1.1473e-04, 1.2536e-04,\n 1.6299e-04, 1.6026e-04, 2.4732e-04, 1.5330e-04, 1.1214e-04, 1.9419e-04,\n 1.4372e-04, 1.6884e-04, 1.7250e-04, 1.7194e-04, 1.5353e-04, 1.7681e-04,\n 9.2472e-05, 1.9052e-04, 1.2929e-04, 1.8490e-04, 1.0128e-04, 1.7673e-04,\n 1.7518e-04, 1.5222e-04, 2.6364e-04, 9.8101e-05, 1.8888e-04, 1.3459e-04,\n 1.9758e-04, 1.2723e-04, 2.0044e-04, 1.3042e-04, 1.6992e-04, 1.3646e-04,\n 1.2141e-04, 1.7028e-04, 2.9662e-04, 1.3705e-04, 2.0630e-04, 1.1841e-04,\n 1.5401e-04, 1.2122e-04, 1.4028e-04, 1.6159e-04, 9.4485e-05, 1.0916e-04,\n 1.5312e-04, 2.5697e-04, 1.9613e-04, 1.8795e-04, 2.1261e-04, 1.6826e-04,\n 1.7563e-04, 2.1515e-04, 1.3907e-04, 1.8035e-04, 1.7532e-04, 1.7164e-04,\n 2.4059e-04, 1.3729e-04, 1.5677e-04, 1.3442e-04, 9.7031e-05, 1.6076e-04,\n 1.0642e-04, 2.3677e-04, 1.7041e-04, 2.0932e-04, 9.5085e-05, 1.4844e-04,\n 1.4708e-04, 1.7619e-04, 1.8173e-04, 1.3994e-04, 1.7351e-04, 1.6873e-04,\n 1.2424e-04, 1.9695e-04, 2.0832e-04, 2.7174e-04, 2.4240e-04, 1.2730e-04,\n 1.8417e-04, 8.7342e-05, 1.2843e-04, 1.4613e-04, 1.4594e-04, 1.4392e-04,\n 1.6003e-04, 2.0066e-04, 1.3616e-04, 1.5310e-04, 1.5986e-04, 1.2104e-04,\n 2.6699e-04, 2.4425e-04, 1.4557e-04, 1.3342e-04, 1.3940e-04, 1.2479e-04,\n 1.6436e-04, 1.9377e-04, 1.9416e-04, 2.3948e-04, 1.7886e-04, 2.2409e-04,\n 1.4441e-04, 1.4830e-04, 1.8034e-04, 1.3224e-04, 1.8044e-04, 1.5171e-04,\n 1.4062e-04, 1.0157e-04, 1.4913e-04, 2.6176e-04, 1.3111e-04, 1.4957e-04,\n 1.4123e-04, 2.9247e-04, 1.5927e-04, 1.4132e-04, 3.8669e-04, 1.6698e-04,\n 1.8258e-04, 1.9545e-04, 1.3452e-04, 1.8854e-04, 1.5528e-04, 1.8155e-04,\n 1.9993e-04, 2.0679e-04, 1.2170e-04, 1.5636e-04, 1.1732e-04, 1.2744e-04,\n 1.4568e-04, 1.1196e-04, 1.2575e-04, 1.1632e-04, 1.2892e-04, 3.2225e-04,\n 1.7253e-04, 1.8667e-04, 2.6904e-04, 1.4132e-04, 8.3963e-05, 2.1888e-04,\n 1.4211e-04, 1.3943e-04, 2.5202e-04, 1.3023e-04, 2.8856e-04, 1.1857e-04,\n 1.5923e-04, 1.9658e-04, 2.1945e-04, 1.9249e-04, 2.0370e-04, 1.5381e-04,\n 2.4005e-04, 1.8498e-04, 1.3006e-04, 2.1534e-04, 1.5832e-04, 2.2873e-04,\n 2.7176e-04, 1.3705e-04, 1.5162e-04, 1.9744e-04, 1.2718e-04, 1.6328e-04,\n 1.7321e-04, 1.2447e-04, 1.8881e-04, 1.5742e-04, 1.2328e-04, 1.2306e-04,\n 1.1880e-04, 1.1661e-04, 1.8421e-04, 2.0924e-04, 8.9276e-05, 2.2579e-04,\n 1.2479e-04, 1.8474e-04, 1.2016e-04, 2.7043e-04, 1.5033e-04, 1.2269e-04,\n 1.2358e-04, 1.2400e-04, 1.6017e-04, 1.6355e-04, 8.4015e-05, 1.4770e-04,\n 1.8285e-04, 1.9726e-04, 1.2509e-04, 1.2307e-04, 2.1053e-04, 1.7237e-04,\n 1.3484e-04, 2.5856e-04, 1.9342e-04, 1.6011e-04, 1.6606e-04, 2.1806e-04,\n 1.3467e-04, 1.3308e-04, 1.4382e-04, 1.6378e-04, 1.1747e-04, 1.1956e-04,\n 2.0118e-04, 7.7195e-05, 2.0006e-04, 1.7472e-04, 9.0913e-05, 1.7229e-04,\n 1.4236e-04, 1.3873e-04, 1.0374e-04, 1.2743e-04, 2.5338e-04, 1.0589e-04,\n 1.3080e-04, 8.7027e-05, 1.2812e-04, 1.3613e-04, 9.9982e-05, 1.3117e-04,\n 1.1333e-04, 2.3736e-04, 1.2867e-04, 1.3331e-04, 1.7001e-04, 2.1401e-04,\n 1.6903e-04, 1.5788e-04, 1.3045e-04, 1.4159e-04, 1.2429e-04, 1.3986e-04,\n 1.5336e-04, 1.3512e-04, 8.7925e-05, 1.5376e-04, 1.8429e-04, 1.8842e-04,\n 1.4532e-04, 9.1444e-05, 1.4321e-04, 1.1051e-04, 1.2973e-04, 2.2222e-04,\n 1.5344e-04, 1.6699e-04, 1.8115e-04, 1.2515e-04, 1.8470e-04, 1.9276e-04,\n 1.6425e-04, 1.8024e-04, 2.5409e-04, 1.3561e-04, 1.2921e-04, 1.2483e-04,\n 1.2339e-04, 1.3263e-04, 1.2331e-04, 1.5609e-04, 1.4658e-04, 1.2422e-04,\n 1.5892e-04, 2.0071e-04, 1.6849e-04, 1.1997e-04, 1.9831e-04, 2.3564e-04,\n 1.3947e-04, 1.7006e-04, 1.2853e-04, 1.1302e-04, 1.7009e-04, 1.6996e-04,\n 1.8715e-04, 1.3104e-04, 1.5784e-04, 1.7633e-04, 1.6489e-04, 1.2846e-04,\n 1.9655e-04, 1.4613e-04, 1.5129e-04, 1.4968e-04, 1.2460e-04, 1.9171e-04,\n 1.6254e-04, 2.7918e-04, 1.2190e-04, 1.8746e-04, 1.0706e-04, 2.3583e-04,\n 2.0113e-04, 1.4367e-04, 1.8513e-04, 2.4978e-04, 1.5404e-04, 1.8255e-04,\n 1.4598e-04, 1.0343e-04, 1.3513e-04, 1.6254e-04, 1.7496e-04, 1.8334e-04,\n 1.8406e-04, 1.5995e-04, 1.0342e-04, 2.3501e-04, 2.0279e-04, 1.9051e-04,\n 1.1715e-04, 2.6306e-04, 1.5917e-04, 2.1098e-04, 2.0600e-04, 1.8779e-04,\n 1.8787e-04, 1.2703e-04, 1.9057e-04, 1.7798e-04, 1.2574e-04, 1.1376e-04,\n 1.5565e-04, 1.5455e-04, 1.4185e-04, 1.8740e-04, 1.6416e-04, 1.9092e-04,\n 1.6730e-04, 1.6168e-04, 1.5464e-04, 1.8981e-04, 7.4531e-05, 1.2403e-04,\n 1.1387e-04, 9.7162e-05, 1.5150e-04, 1.4077e-04, 1.2620e-04, 1.9078e-04,\n 9.7418e-05, 1.5726e-04, 1.9552e-04, 1.7204e-04, 5.6736e-04, 1.4494e-04,\n 1.8215e-04, 1.4966e-04, 2.0471e-04, 2.3391e-04, 2.3397e-04, 1.8939e-04,\n 1.4005e-04, 1.9653e-04, 1.2407e-04, 1.3891e-04, 1.5598e-04, 1.2292e-04,\n 1.9766e-04, 2.0910e-04, 1.9260e-04, 1.3784e-04, 1.7302e-04, 1.4754e-04,\n 2.3619e-04, 1.4012e-04], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(12520.)",
|
| 27 |
+
"exp_avg": "tensor([[-2.0246e-04, -1.4011e-04, -3.1570e-05, ..., -8.1749e-05,\n 4.6258e-05, -2.2533e-04],\n [-2.3327e-04, -1.7501e-05, 1.5190e-04, ..., -3.9176e-04,\n 4.2563e-05, 7.8972e-04],\n [ 1.5040e-04, -7.6225e-05, -6.0831e-06, ..., 6.6490e-05,\n -5.1094e-05, 4.5848e-04],\n ...,\n [ 6.2600e-05, 2.5542e-04, 6.4520e-05, ..., -7.8986e-05,\n -9.7499e-05, -1.9847e-04],\n [ 3.5809e-05, 4.1177e-04, -3.2527e-05, ..., -2.3733e-04,\n -9.3664e-04, 3.0401e-04],\n [-1.1982e-04, 1.4403e-04, 1.9024e-04, ..., 3.2121e-04,\n 3.7607e-04, -2.0438e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[2.8621e-07, 4.6067e-07, 9.5983e-08, ..., 2.0737e-07, 2.4261e-07,\n 3.5775e-07],\n [5.7186e-07, 1.0102e-06, 3.5232e-07, ..., 7.0228e-07, 6.8152e-07,\n 1.0264e-06],\n [6.9414e-07, 3.8528e-07, 2.9531e-07, ..., 7.1309e-07, 3.9268e-07,\n 6.9428e-07],\n ...,\n [3.9637e-07, 1.1327e-06, 4.3034e-07, ..., 5.8144e-07, 8.3804e-07,\n 7.4037e-07],\n [4.0325e-07, 8.9210e-07, 2.2686e-07, ..., 8.0919e-07, 5.1329e-07,\n 5.9158e-07],\n [3.7646e-07, 5.6332e-07, 3.2005e-07, ..., 1.0891e-06, 4.5526e-07,\n 7.1399e-07]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(12520.)",
|
| 32 |
+
"exp_avg": "tensor([[-9.8896e-05, -2.3269e-04, 7.0111e-05, ..., -5.6463e-05,\n 8.9844e-05, -1.4083e-04],\n [-1.6430e-04, -1.2316e-04, 4.2355e-05, ..., -4.0494e-05,\n 2.9090e-05, 1.0010e-04],\n [ 1.0755e-04, -1.5241e-04, -6.9675e-05, ..., -1.6107e-04,\n -2.2497e-04, -1.4267e-05],\n ...,\n [-1.3228e-06, -6.4983e-04, -1.8165e-06, ..., -1.8714e-04,\n -1.3323e-04, 8.8994e-06],\n [ 1.5436e-04, -1.7798e-05, 1.2432e-05, ..., -1.0484e-04,\n 2.6286e-05, 1.5954e-04],\n [ 7.5220e-05, -2.2870e-04, -3.0899e-05, ..., -2.3464e-04,\n -2.1540e-04, 2.4139e-04]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[1.0860e-07, 8.9260e-08, 8.2359e-08, ..., 1.1364e-07, 8.4695e-08,\n 1.2979e-07],\n [1.9905e-07, 4.0383e-07, 1.8234e-07, ..., 1.7030e-07, 2.4010e-07,\n 2.8055e-07],\n [2.5728e-07, 1.6123e-07, 1.6243e-07, ..., 3.4002e-07, 3.0811e-07,\n 2.3176e-07],\n ...,\n [1.9871e-07, 5.7393e-07, 7.8882e-08, ..., 4.4170e-07, 2.7646e-07,\n 2.7884e-07],\n [2.2534e-07, 2.8120e-07, 7.6269e-08, ..., 3.7380e-07, 1.9931e-07,\n 2.3376e-07],\n [2.2485e-07, 1.6238e-07, 3.8176e-07, ..., 1.9743e-07, 1.6398e-07,\n 2.2993e-07]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(12520.)",
|
| 37 |
+
"exp_avg": "tensor([ 0.0006, -0.0006], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.1744e-06, 7.1744e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.001,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.001,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.001,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.001,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.001,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.001,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.0005,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.0005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 20,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 0,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.001,
|
| 139 |
+
0.001,
|
| 140 |
+
0.001,
|
| 141 |
+
0.0005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 10,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.001,
|
| 149 |
+
0.001,
|
| 150 |
+
0.001,
|
| 151 |
+
0.0005
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"val_acc": 71.994
|
| 156 |
+
},
|
| 157 |
+
"train_config": {
|
| 158 |
+
"name": "david_training",
|
| 159 |
+
"run_id": "20251012_135249",
|
| 160 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 161 |
+
"model_variant": "clip_vit_laion_b32",
|
| 162 |
+
"num_classes": 1000,
|
| 163 |
+
"preset": "small_fast",
|
| 164 |
+
"custom_config_path": null,
|
| 165 |
+
"num_classes_override": null,
|
| 166 |
+
"use_belly_override": null,
|
| 167 |
+
"belly_expand_override": null,
|
| 168 |
+
"progressive_training_override": false,
|
| 169 |
+
"scale_warmup_epochs_override": null,
|
| 170 |
+
"num_epochs": 10,
|
| 171 |
+
"batch_size": 1024,
|
| 172 |
+
"learning_rate": 0.001,
|
| 173 |
+
"weight_decay": 1e-05,
|
| 174 |
+
"warmup_epochs": 3,
|
| 175 |
+
"use_rose_loss": true,
|
| 176 |
+
"rose_initial_weight": 0.1,
|
| 177 |
+
"rose_max_weight": 0.5,
|
| 178 |
+
"rose_weight_schedule": "adaptive",
|
| 179 |
+
"use_cayley_loss": false,
|
| 180 |
+
"cayley_weight": 0.001,
|
| 181 |
+
"scale_loss_balance": null,
|
| 182 |
+
"use_mixed_precision": false,
|
| 183 |
+
"gradient_clip": 10.0,
|
| 184 |
+
"scheduler_type": "cosine_restarts",
|
| 185 |
+
"min_lr": 1e-06,
|
| 186 |
+
"freeze_strategy": "never",
|
| 187 |
+
"freeze_threshold": 90.0,
|
| 188 |
+
"unfreeze_on_plateau": true,
|
| 189 |
+
"patience": 10,
|
| 190 |
+
"track_gradients": true,
|
| 191 |
+
"gradient_scale_threshold": 1e-05,
|
| 192 |
+
"gradient_scale_multiplier": 10.0,
|
| 193 |
+
"log_interval": 50,
|
| 194 |
+
"val_interval": 1,
|
| 195 |
+
"save_interval": 5,
|
| 196 |
+
"log_fusion_weights": true,
|
| 197 |
+
"log_loss_components": true,
|
| 198 |
+
"save_format": "safetensors",
|
| 199 |
+
"hf_repo": "AbstractPhil/gated-david",
|
| 200 |
+
"upload_to_hub": true,
|
| 201 |
+
"base_dir": "./david_training",
|
| 202 |
+
"num_workers": 10,
|
| 203 |
+
"pin_memory": true,
|
| 204 |
+
"prefetch_factor": 4,
|
| 205 |
+
"persistent_workers": true
|
| 206 |
+
}
|
| 207 |
+
}
|
weights/David-fully_shared-weighted_sum/20251012_135249/final_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80113e18544f43052b7bfd9b98cd716cbbfc60ee9c47b1546bc067d87db19427
|
| 3 |
+
size 2628344
|
weights/David-fully_shared-weighted_sum/20251012_135249/final_model_metadata.json
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 9,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(12520.)",
|
| 7 |
+
"exp_avg": "tensor([[-3.3046e-05, 1.3184e-03, -8.5554e-04, ..., 2.5362e-04,\n 7.8119e-04, -3.5119e-04],\n [ 1.7381e-03, 9.8076e-04, -1.0046e-03, ..., 2.8027e-03,\n 1.5395e-04, 2.3507e-03],\n [ 4.3466e-04, 3.5545e-04, 4.1837e-04, ..., -4.1433e-04,\n -1.8521e-04, -5.4486e-04],\n ...,\n [-2.4468e-04, 1.7150e-03, 7.1752e-04, ..., -8.9465e-04,\n 4.6466e-04, 1.6760e-04],\n [ 1.1223e-04, 1.9717e-03, 5.1268e-04, ..., 4.1239e-04,\n -6.7351e-05, 2.6199e-04],\n [-8.7841e-04, -3.9226e-04, 2.6469e-04, ..., -1.0100e-04,\n 7.0977e-04, 5.8649e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[6.0538e-06, 3.2370e-05, 1.5984e-05, ..., 6.3342e-06, 3.1433e-06,\n 8.3356e-06],\n [5.6413e-06, 2.3848e-05, 1.7510e-05, ..., 8.3732e-06, 2.2645e-06,\n 8.7786e-06],\n [9.4105e-06, 5.9058e-05, 2.1278e-05, ..., 6.1329e-06, 2.8654e-06,\n 3.6291e-06],\n ...,\n [5.8094e-06, 2.3533e-05, 1.4798e-05, ..., 3.4493e-06, 1.8469e-06,\n 2.7238e-06],\n [7.2410e-06, 4.6908e-05, 1.5391e-05, ..., 6.6735e-06, 3.7259e-06,\n 6.4761e-06],\n [3.8777e-06, 4.8038e-05, 1.1613e-05, ..., 3.2244e-06, 1.8040e-06,\n 2.1119e-06]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(12520.)",
|
| 12 |
+
"exp_avg": "tensor([-2.8793e-04, 4.8640e-02, 2.8733e-04, -1.9829e-03, -1.1588e-02,\n -1.3039e-03, 2.1500e-02, -5.7016e-02, -7.6601e-03, -1.4245e-02,\n 1.5816e-02, 3.6534e-03, 1.8377e-02, 1.6194e-03, -1.1017e-02,\n -2.4213e-02, -1.8920e-02, -3.5318e-02, 1.4785e-02, 2.9861e-02,\n -1.2753e-02, 1.9380e-02, -1.5568e-02, -3.5700e-04, -6.7014e-02,\n 1.2010e-02, 2.1371e-02, 8.3862e-03, -4.6259e-02, 2.6552e-03,\n -1.4616e-02, 2.1523e-02, 2.7220e-02, 3.5274e-02, -1.7959e-02,\n 1.8219e-02, 2.2884e-02, 8.2567e-03, 1.9731e-03, 2.3003e-02,\n -1.4999e-02, 2.6860e-02, -2.0848e-03, -3.0692e-03, -2.5609e-02,\n -5.5685e-03, 3.1910e-02, 1.1133e-02, -3.9296e-02, 4.8808e-03,\n 1.1479e-02, -1.7903e-02, 7.2915e-04, 6.7181e-03, 1.2693e-04,\n 2.8869e-02, 2.2346e-02, 9.8493e-03, 1.9897e-02, 6.9848e-03,\n -3.3656e-02, 5.6311e-03, 1.3728e-02, -1.6328e-02, 9.4522e-03,\n 1.4691e-02, 3.7510e-02, -2.1718e-02, -4.6480e-02, 5.0563e-03,\n -9.1711e-03, -3.6435e-02, 3.4365e-02, 2.0534e-02, 2.3343e-02,\n 3.5536e-04, -1.7489e-02, 2.3245e-02, 1.7542e-02, -8.4864e-03,\n 2.3270e-02, -1.2804e-02, 1.6030e-02, 6.8781e-03, -2.8725e-02,\n -1.3784e-02, 1.1327e-02, 1.3873e-02, -1.4216e-02, -2.7350e-03,\n 6.3497e-03, -2.2506e-02, -1.7909e-02, 1.9545e-02, 7.2716e-03,\n 1.2278e-02, -5.5581e-03, 2.7476e-02, -1.3676e-02, 4.0674e-03,\n -7.8614e-03, 4.2977e-03, 4.4696e-03, -1.1024e-03, -1.1395e-02,\n -1.8583e-02, -2.6280e-03, 2.2310e-02, -9.7073e-03, 7.3026e-03,\n -2.5477e-02, -3.5233e-02, 2.2879e-03, -6.4351e-03, -3.0403e-02,\n 1.2316e-03, -4.9787e-03, 1.7197e-02, 6.0468e-03, -6.7373e-03,\n -2.1082e-02, -1.0879e-02, 2.3980e-03, 2.5176e-02, -8.6008e-04,\n -1.3698e-02, 3.1740e-02, 2.3570e-02, -4.7870e-03, -6.6659e-03,\n 1.3564e-02, 1.3084e-02, -5.6567e-03, 2.4881e-03, 3.0487e-02,\n -7.5755e-03, 1.6282e-02, -3.5152e-03, -1.3207e-02, 8.1424e-03,\n -2.2068e-02, 6.1958e-03, 2.5182e-03, 3.5589e-03, -2.4931e-02,\n 1.1914e-02, -1.7428e-02, -1.3469e-02, 2.1464e-03, -5.5896e-05,\n -1.1783e-04, 1.5242e-02, 5.3966e-02, -1.0550e-02, -2.2377e-02,\n 5.6345e-03, -4.5011e-03, -1.2159e-02, 5.1875e-03, -9.7485e-04,\n -3.2005e-02, -1.8260e-02, 1.7123e-02, 2.7339e-03, 1.2096e-02,\n -5.3047e-03, 1.9343e-03, 5.2266e-03, 2.3647e-02, -2.0105e-02,\n -2.5488e-02, 2.0962e-02, -5.7194e-03, -2.3721e-02, 4.0386e-02,\n 2.4981e-03, 2.9079e-02, -2.5089e-02, 1.5006e-03, -3.4057e-03,\n 1.1228e-02, 2.3742e-02, -2.2092e-03, -2.1806e-02, 2.0597e-02,\n 1.9895e-02, -3.1090e-03, 2.5461e-03, -2.8593e-03, 1.2681e-02,\n -1.2726e-02, 1.0669e-02, 2.5700e-02, 3.0368e-02, 2.2922e-02,\n -2.8858e-02, -3.2277e-03, 1.9166e-02, -2.3755e-02, -2.9156e-02,\n 5.6695e-04, -1.5116e-02, 6.7286e-03, 7.7313e-04, 3.4038e-02,\n 1.0291e-02, -8.8464e-03, -1.4205e-03, -9.6294e-03, 1.8529e-02,\n -4.6589e-03, 2.5836e-03, 6.4105e-03, 4.8391e-03, -2.4215e-02,\n -1.1979e-02, -9.1469e-03, -4.5127e-03, 3.2258e-03, -2.3501e-03,\n -2.9681e-02, -3.1474e-02, 4.5832e-02, -5.0786e-02, -1.6554e-02,\n 5.8334e-03, 5.1814e-02, 1.8588e-03, -1.3108e-02, 1.5109e-02,\n -2.8642e-02, -1.3774e-02, 2.9255e-02, 2.9146e-03, -1.3881e-02,\n 1.6939e-02, -3.8032e-02, 2.4095e-03, -4.9286e-03, 2.5536e-02,\n -4.4408e-02, -8.6553e-03, 6.9803e-03, -2.4412e-02, 5.2963e-03,\n 3.8081e-02, -6.0606e-03, 3.0404e-02, 1.4011e-02, -1.2722e-02,\n 1.7489e-02, 5.4902e-03, -3.1039e-03, 1.3576e-02, 1.2996e-02,\n 2.0182e-02, 3.0371e-03, -2.6611e-03, -2.6177e-02, -5.2031e-03,\n -2.9615e-02, 2.7173e-02, 4.2806e-03, 1.0883e-02, 3.5543e-04,\n 3.5302e-02, -2.0747e-02, -2.8691e-03, 1.0282e-02, -3.4250e-02,\n 4.7217e-03, -7.2308e-03, -1.8177e-02, -8.3949e-03, -9.5230e-03,\n -1.5611e-02, -2.3456e-03, -4.3520e-04, -1.5216e-02, -2.4419e-02,\n -2.5750e-02, -6.4866e-02, 1.3522e-02, 1.7967e-02, -4.4644e-03,\n -3.5477e-02, 1.1020e-02, -1.5887e-02, 1.2085e-02, -1.7164e-02,\n 5.4228e-03, 4.4098e-03, -1.4811e-02, -1.5271e-02, 5.9134e-02,\n -4.0889e-02, 2.1173e-02, 4.6069e-02, 2.5419e-03, 2.2181e-02,\n -3.1539e-02, -2.0548e-02, 1.1235e-02, 6.5333e-03, 2.1797e-02,\n -6.1835e-02, 1.0255e-02, -4.0199e-03, -1.1448e-02, -6.2124e-02,\n -3.8762e-02, 2.4050e-02, -6.6370e-05, 2.5254e-02, 3.3064e-03,\n -7.1282e-03, 1.4965e-02, -2.1311e-02, 1.6650e-03, -1.5619e-02,\n -1.3186e-02, -9.8527e-03, 3.9895e-02, -1.1098e-02, -1.3239e-02,\n 1.0263e-02, 3.5431e-02, 7.3234e-03, -3.1150e-02, 5.9824e-03,\n 6.5632e-03, -1.4225e-02, -2.3449e-02, -1.1968e-02, -4.4559e-03,\n 1.9645e-02, 2.3660e-02, -1.0703e-02, -3.4435e-02, 2.2531e-02,\n 1.2187e-02, 1.1577e-02, -1.1584e-02, 1.1901e-02, -1.8690e-02,\n -6.2761e-03, -2.7386e-02, 9.2859e-04, 2.2606e-02, -3.6247e-03,\n 3.9740e-02, 2.2307e-02, 2.5435e-02, -2.7447e-03, 6.1269e-03,\n 3.1727e-02, -3.1187e-03, -3.6492e-02, 3.0464e-02, -4.0531e-03,\n 9.8909e-03, 6.5008e-03, 2.2741e-02, -1.6599e-02, 1.6784e-02,\n -8.8681e-03, 1.8242e-02, 3.9401e-03, -2.0300e-02, -2.1352e-02,\n 2.0905e-02, 1.8328e-04, 4.2949e-02, -5.4018e-03, 2.9712e-02,\n -1.7098e-02, -9.1278e-05, 3.1655e-02, -1.4564e-02, -8.4472e-03,\n -4.7460e-04, -2.2772e-03, 3.0636e-02, -9.5513e-03, 3.3363e-02,\n -5.0501e-03, -4.1604e-03, -2.3930e-02, -1.4385e-02, 1.0042e-02,\n 3.8460e-03, -6.3307e-03, 3.8870e-02, -2.4196e-02, -2.1113e-02,\n -1.2244e-02, -2.8602e-04, -1.3285e-02, 1.3032e-02, 7.7296e-03,\n -5.5512e-03, 6.5145e-03, -1.9832e-03, -1.4500e-02, 5.5845e-05,\n -2.9590e-04, 7.4529e-03, 9.9104e-03, -7.0313e-03, -1.8558e-03,\n 3.1571e-02, -1.6084e-03, -3.7218e-02, -1.5569e-02, -7.9845e-03,\n 9.8934e-03, -2.0579e-02, -5.0971e-02, -4.7316e-02, -2.3560e-03,\n 1.1576e-02, 3.1863e-02, -3.3888e-02, -7.9227e-03, 2.7374e-02,\n 9.0963e-03, 3.7167e-02, 1.9652e-02, -8.7403e-03, -1.2271e-03,\n -1.9661e-02, 3.8851e-03, -7.2920e-03, -3.5539e-03, 9.2494e-03,\n -4.8650e-02, 4.1687e-03, 1.0675e-02, -6.2658e-03, 1.0242e-02,\n 1.3878e-03, 3.0521e-02, 1.0698e-02, 1.7632e-02, 2.2394e-02,\n 2.5427e-03, 1.6602e-02, 1.6313e-02, -3.3836e-03, 2.2478e-04,\n -1.7979e-02, 3.2327e-02, 2.5872e-03, -1.4869e-02, 4.0177e-03,\n 2.5543e-02, -1.1046e-02, 1.8446e-04, -7.5710e-03, -1.3402e-02,\n -1.1399e-02, 3.3958e-02, 1.8227e-02, 4.8199e-03, -7.1095e-03,\n 2.1465e-02, 2.1446e-02, 9.4828e-03, 2.3874e-02, -4.4376e-02,\n -2.1337e-02, -2.9199e-04, -1.6897e-02, -2.0548e-02, 1.5580e-02,\n -3.1435e-02, -1.0484e-04, 6.5918e-03, -1.3160e-02, 2.3095e-02,\n -5.1776e-03, -7.4214e-03, -1.7959e-02, -1.3251e-02, 7.2772e-04,\n -5.3829e-03, -1.0224e-03, 1.0862e-02, -1.2710e-02, 1.1892e-02,\n -1.5025e-02, -1.6410e-02, -3.0264e-02, 3.6341e-02, 3.9090e-02,\n -1.2201e-02, -7.8130e-03, -7.7499e-03, -2.3496e-05, -6.2482e-02,\n -2.8714e-02, 1.1824e-02, 1.5038e-03, 2.7053e-02, 2.4697e-03,\n 6.2774e-04, 2.9676e-02, 2.4279e-03, 1.6174e-02, -1.5760e-02,\n 1.6092e-02, -1.9818e-02], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0049, 0.0049, 0.0048, 0.0028, 0.0067, 0.0030, 0.0034, 0.0035, 0.0062,\n 0.0040, 0.0026, 0.0041, 0.0047, 0.0032, 0.0041, 0.0036, 0.0012, 0.0021,\n 0.0029, 0.0030, 0.0032, 0.0037, 0.0040, 0.0045, 0.0055, 0.0038, 0.0040,\n 0.0035, 0.0051, 0.0017, 0.0033, 0.0046, 0.0050, 0.0032, 0.0036, 0.0032,\n 0.0024, 0.0039, 0.0033, 0.0030, 0.0057, 0.0034, 0.0035, 0.0047, 0.0040,\n 0.0051, 0.0078, 0.0046, 0.0034, 0.0046, 0.0032, 0.0040, 0.0042, 0.0021,\n 0.0064, 0.0045, 0.0026, 0.0048, 0.0031, 0.0033, 0.0035, 0.0067, 0.0050,\n 0.0026, 0.0025, 0.0026, 0.0034, 0.0052, 0.0080, 0.0059, 0.0051, 0.0032,\n 0.0068, 0.0025, 0.0041, 0.0076, 0.0043, 0.0039, 0.0039, 0.0076, 0.0057,\n 0.0030, 0.0055, 0.0047, 0.0032, 0.0031, 0.0013, 0.0078, 0.0046, 0.0040,\n 0.0024, 0.0033, 0.0025, 0.0045, 0.0057, 0.0036, 0.0052, 0.0051, 0.0039,\n 0.0044, 0.0030, 0.0034, 0.0042, 0.0018, 0.0029, 0.0034, 0.0060, 0.0029,\n 0.0046, 0.0036, 0.0019, 0.0054, 0.0037, 0.0024, 0.0027, 0.0025, 0.0044,\n 0.0054, 0.0025, 0.0037, 0.0028, 0.0027, 0.0042, 0.0049, 0.0051, 0.0044,\n 0.0050, 0.0034, 0.0050, 0.0030, 0.0043, 0.0034, 0.0034, 0.0031, 0.0045,\n 0.0027, 0.0029, 0.0018, 0.0046, 0.0072, 0.0051, 0.0027, 0.0032, 0.0050,\n 0.0048, 0.0031, 0.0040, 0.0039, 0.0041, 0.0045, 0.0031, 0.0043, 0.0029,\n 0.0026, 0.0020, 0.0025, 0.0044, 0.0039, 0.0027, 0.0040, 0.0032, 0.0036,\n 0.0036, 0.0035, 0.0047, 0.0044, 0.0019, 0.0027, 0.0041, 0.0040, 0.0045,\n 0.0033, 0.0024, 0.0033, 0.0046, 0.0026, 0.0033, 0.0034, 0.0022, 0.0035,\n 0.0047, 0.0076, 0.0020, 0.0046, 0.0067, 0.0046, 0.0015, 0.0023, 0.0044,\n 0.0035, 0.0021, 0.0022, 0.0058, 0.0036, 0.0046, 0.0035, 0.0028, 0.0033,\n 0.0034, 0.0085, 0.0021, 0.0037, 0.0026, 0.0050, 0.0034, 0.0032, 0.0022,\n 0.0028, 0.0033, 0.0050, 0.0024, 0.0049, 0.0053, 0.0076, 0.0040, 0.0025,\n 0.0037, 0.0024, 0.0033, 0.0043, 0.0024, 0.0042, 0.0064, 0.0057, 0.0017,\n 0.0036, 0.0040, 0.0048, 0.0039, 0.0060, 0.0040, 0.0029, 0.0048, 0.0025,\n 0.0038, 0.0045, 0.0046, 0.0045, 0.0035, 0.0081, 0.0056, 0.0033, 0.0039,\n 0.0029, 0.0027, 0.0049, 0.0035, 0.0040, 0.0024, 0.0069, 0.0026, 0.0030,\n 0.0016, 0.0090, 0.0044, 0.0047, 0.0075, 0.0032, 0.0042, 0.0041, 0.0037,\n 0.0067, 0.0028, 0.0028, 0.0018, 0.0041, 0.0031, 0.0047, 0.0018, 0.0032,\n 0.0023, 0.0024, 0.0046, 0.0039, 0.0031, 0.0063, 0.0016, 0.0030, 0.0081,\n 0.0034, 0.0033, 0.0049, 0.0035, 0.0026, 0.0065, 0.0044, 0.0036, 0.0040,\n 0.0055, 0.0031, 0.0036, 0.0056, 0.0040, 0.0042, 0.0066, 0.0051, 0.0053,\n 0.0045, 0.0067, 0.0061, 0.0048, 0.0031, 0.0036, 0.0026, 0.0031, 0.0038,\n 0.0042, 0.0029, 0.0055, 0.0044, 0.0032, 0.0045, 0.0027, 0.0039, 0.0027,\n 0.0051, 0.0020, 0.0051, 0.0034, 0.0027, 0.0047, 0.0066, 0.0035, 0.0026,\n 0.0041, 0.0029, 0.0035, 0.0036, 0.0026, 0.0037, 0.0054, 0.0036, 0.0034,\n 0.0029, 0.0026, 0.0028, 0.0035, 0.0041, 0.0052, 0.0051, 0.0034, 0.0036,\n 0.0032, 0.0036, 0.0043, 0.0047, 0.0028, 0.0031, 0.0030, 0.0036, 0.0043,\n 0.0065, 0.0032, 0.0053, 0.0024, 0.0039, 0.0050, 0.0040, 0.0027, 0.0034,\n 0.0039, 0.0038, 0.0037, 0.0023, 0.0031, 0.0030, 0.0027, 0.0074, 0.0027,\n 0.0026, 0.0028, 0.0079, 0.0050, 0.0039, 0.0039, 0.0024, 0.0038, 0.0030,\n 0.0044, 0.0029, 0.0025, 0.0041, 0.0034, 0.0043, 0.0051, 0.0030, 0.0024,\n 0.0038, 0.0049, 0.0017, 0.0041, 0.0075, 0.0033, 0.0036, 0.0028, 0.0049,\n 0.0024, 0.0032, 0.0074, 0.0019, 0.0022, 0.0033, 0.0038, 0.0049, 0.0044,\n 0.0067, 0.0036, 0.0030, 0.0047, 0.0023, 0.0051, 0.0050, 0.0056, 0.0027,\n 0.0060, 0.0029, 0.0037, 0.0027, 0.0052, 0.0028, 0.0042, 0.0036, 0.0045,\n 0.0055, 0.0063, 0.0034, 0.0068, 0.0058, 0.0027, 0.0024, 0.0043, 0.0026,\n 0.0028, 0.0108, 0.0023, 0.0042, 0.0043, 0.0053, 0.0053, 0.0049, 0.0054,\n 0.0067, 0.0037, 0.0032, 0.0028, 0.0032, 0.0045, 0.0027, 0.0030, 0.0029,\n 0.0017, 0.0063, 0.0037, 0.0045, 0.0033, 0.0057, 0.0038, 0.0029, 0.0041,\n 0.0044, 0.0047, 0.0057, 0.0033, 0.0050, 0.0014, 0.0038, 0.0029, 0.0039,\n 0.0039, 0.0053, 0.0045, 0.0031, 0.0055, 0.0037, 0.0034, 0.0027, 0.0033,\n 0.0039, 0.0033, 0.0029, 0.0031, 0.0038, 0.0029, 0.0035, 0.0033, 0.0037,\n 0.0027, 0.0032, 0.0073, 0.0047, 0.0007, 0.0044, 0.0039, 0.0037, 0.0052,\n 0.0047, 0.0044, 0.0048, 0.0045, 0.0043, 0.0026, 0.0037, 0.0048, 0.0030,\n 0.0046, 0.0051, 0.0040, 0.0028, 0.0043, 0.0034, 0.0055, 0.0029],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(12520.)",
|
| 17 |
+
"exp_avg": "tensor([-4.0443e-04, 6.0422e-03, -1.0697e-03, 8.0438e-04, -1.9288e-03,\n -4.1859e-04, 4.3914e-03, -1.0068e-02, 2.8355e-04, -3.2395e-03,\n 3.8719e-03, -9.0882e-04, 3.4476e-03, -2.9723e-04, -5.7432e-04,\n -3.0472e-03, -2.0661e-02, -9.9126e-03, 2.5149e-03, 9.2905e-03,\n -7.9131e-04, 7.0090e-03, -4.1218e-03, 1.0662e-04, -1.1244e-02,\n 4.4356e-03, 5.1012e-03, 3.3816e-03, -3.3507e-03, 3.1785e-03,\n -3.9020e-03, 4.1379e-03, 4.2129e-03, 4.8523e-03, -5.4532e-03,\n 5.4129e-03, 8.7950e-03, 2.0240e-03, 3.7411e-04, 5.9718e-03,\n -3.0263e-03, 4.9135e-03, -6.1001e-04, -3.3775e-04, -4.8629e-03,\n -5.7551e-04, 4.6407e-03, 1.8367e-03, -8.1390e-03, 1.7614e-03,\n 2.4498e-03, -3.9680e-03, -3.7540e-04, 2.4693e-03, 9.5717e-04,\n 5.3974e-03, 1.0037e-02, 1.8286e-03, 4.3574e-03, 3.1163e-03,\n -8.5755e-03, 6.9398e-04, 4.6927e-03, -5.1997e-03, 3.7424e-03,\n 3.3919e-03, 9.7112e-03, -4.6852e-03, -4.4092e-03, 1.9147e-04,\n -7.0065e-04, -5.4925e-03, 4.1000e-03, 5.7908e-03, 5.3176e-03,\n 2.6463e-04, -3.6224e-03, 3.7584e-03, 3.7748e-03, -1.4897e-03,\n 3.8406e-03, -1.7857e-03, 2.9279e-03, 1.0864e-03, -5.2336e-03,\n -3.2070e-03, 2.0097e-02, 2.4131e-03, -1.7695e-03, -3.3027e-04,\n 2.2131e-03, -6.5989e-03, -2.2444e-03, 2.7898e-03, 1.3190e-03,\n 1.1732e-03, -1.6730e-03, 4.2063e-03, -2.0643e-03, -1.8472e-04,\n 5.1920e-04, -4.7105e-05, 3.1108e-04, 1.6776e-03, -3.4007e-03,\n -5.7018e-03, 7.6289e-04, 8.8700e-03, -7.9353e-04, 6.0520e-04,\n -1.7436e-02, -3.6595e-03, 1.3040e-03, -1.2549e-03, -1.0891e-02,\n -2.0584e-04, 8.7123e-05, 1.9290e-03, 1.2452e-03, -6.5689e-04,\n -5.7043e-03, -1.9851e-03, 4.6441e-04, 4.2971e-03, -7.6141e-04,\n -1.9949e-03, 3.8387e-03, 7.1399e-03, -3.0409e-04, -1.8809e-03,\n 1.6387e-03, 2.4225e-03, -2.4879e-03, 6.1250e-06, 5.6080e-03,\n -1.8668e-03, 1.3348e-03, -1.1977e-03, -8.2272e-04, 9.7971e-04,\n -4.9072e-03, 2.4532e-03, 1.0039e-03, 3.4701e-04, -3.4253e-03,\n 4.1981e-03, -1.3990e-03, -2.4355e-03, 1.7110e-03, -1.2904e-04,\n 7.7228e-04, 4.5416e-03, 1.2397e-02, -4.6870e-03, -3.7674e-03,\n 3.1044e-03, 5.7595e-05, -2.2933e-03, 1.3068e-03, 1.1375e-03,\n -8.8992e-03, -3.2983e-03, 2.7630e-03, 1.3277e-03, 1.8909e-03,\n -1.6507e-03, 3.2950e-03, 6.9753e-04, 3.3066e-03, -1.3852e-03,\n -6.6903e-03, 4.5391e-03, -1.8571e-03, -4.8445e-03, 7.2653e-03,\n 3.5317e-04, 6.0150e-03, -6.4371e-03, -1.6845e-04, 3.5103e-04,\n 2.5399e-03, 2.8094e-03, 1.3265e-03, -4.0003e-03, 2.8890e-03,\n 3.9545e-03, -2.3200e-03, 1.2286e-03, -8.0280e-05, 1.8379e-03,\n -3.8289e-03, 5.1392e-03, 5.9042e-03, 6.1560e-03, 3.3621e-03,\n -5.4540e-03, -2.6336e-04, 4.8973e-03, -3.3222e-03, -3.5215e-03,\n 1.2792e-03, -2.6576e-03, 6.9043e-04, 5.1855e-04, 7.3760e-03,\n 3.4964e-03, -2.1949e-03, -3.1291e-04, -4.6262e-04, 8.8157e-04,\n -1.5521e-03, 9.9026e-04, 7.9276e-04, 1.0404e-04, -7.3241e-03,\n -2.7700e-03, -2.1894e-03, -7.4166e-05, 2.2007e-04, -8.4981e-04,\n -8.1053e-03, -5.1421e-03, 4.6480e-03, -8.0673e-03, -2.1410e-03,\n 1.1459e-03, 1.6332e-02, -7.9816e-04, -5.1047e-03, 1.6283e-03,\n -5.0701e-03, -1.6684e-03, 3.6661e-03, 1.0234e-03, -3.6174e-03,\n 2.2973e-03, -7.6214e-03, 1.3821e-03, -7.1229e-04, 5.1810e-03,\n -5.1264e-03, -8.1063e-04, 1.4788e-03, -4.1562e-03, 2.4708e-03,\n 3.9629e-03, -6.1682e-04, 2.8683e-03, 5.0204e-03, -2.4605e-03,\n 5.5259e-03, -1.7014e-03, -7.3790e-04, 1.7754e-03, 4.1502e-03,\n 3.1795e-03, -3.5535e-04, -3.2151e-04, -5.0465e-03, -1.9591e-04,\n -4.6600e-03, 4.2108e-03, 3.2326e-03, 2.1764e-03, 3.9454e-04,\n 8.5605e-03, -3.7902e-03, -4.7759e-04, 3.7662e-03, -9.5284e-03,\n 1.6743e-03, -2.0444e-03, -2.3348e-03, -4.0631e-04, -1.4696e-03,\n -3.5211e-03, -5.7381e-04, 3.0211e-04, -3.2457e-03, -2.2899e-03,\n -3.2930e-03, -1.1047e-02, 2.9233e-03, 6.8151e-03, -9.2710e-04,\n -5.3187e-03, 6.6661e-03, -1.5740e-03, 1.6932e-03, -4.5643e-03,\n 2.1565e-03, 2.7959e-04, -2.7138e-03, -2.2140e-03, 9.3918e-03,\n -7.2901e-03, 2.1505e-03, 9.6389e-03, 7.5530e-04, 1.9383e-03,\n -8.1739e-03, -3.3910e-03, 3.8047e-03, 1.7875e-03, 4.9742e-03,\n -1.3726e-02, 1.6187e-03, -4.6646e-04, -2.2488e-03, -7.4673e-03,\n -5.6278e-03, 3.6064e-03, 2.1207e-04, 4.3295e-03, 2.2379e-03,\n -1.6315e-03, 3.6481e-03, -4.5664e-03, 3.6713e-04, -3.2373e-03,\n -2.7618e-03, -1.8171e-03, 7.1304e-03, -2.6920e-03, -2.1052e-03,\n 1.0110e-03, 8.2065e-03, 2.3065e-03, -6.4971e-03, 1.0503e-03,\n 4.8803e-04, -3.0815e-03, -4.5731e-03, -3.0657e-03, -2.7672e-03,\n 6.9491e-03, 5.1902e-03, -2.3625e-03, -7.1509e-03, 1.8752e-03,\n 1.8829e-03, 2.8044e-03, -3.1782e-03, 3.9968e-03, -2.2083e-03,\n -5.9830e-05, -7.2259e-03, 3.9634e-05, 7.9005e-03, -1.0011e-03,\n 9.3722e-03, 3.2084e-03, 3.5647e-03, 1.5119e-03, 1.7865e-03,\n 4.6465e-03, -5.1290e-04, -4.5637e-03, 1.4207e-02, -3.6005e-04,\n 2.2965e-03, 7.8714e-04, 4.3153e-03, -3.1679e-03, 2.9335e-03,\n -1.1237e-03, 3.4691e-03, 6.8402e-04, -5.3066e-03, -2.1745e-03,\n 7.2737e-03, -3.8675e-04, 4.8839e-03, -2.5327e-04, 4.7472e-03,\n -4.6255e-03, 3.4210e-04, 6.9871e-03, -1.4237e-03, -9.1050e-04,\n -8.5971e-05, 1.4695e-03, 7.2004e-03, -1.7391e-03, 4.0913e-03,\n -9.6017e-04, -9.3104e-04, -3.4529e-03, -1.8414e-03, 6.5093e-03,\n -1.6358e-03, -4.7894e-05, 9.3478e-03, -4.7395e-03, -6.1491e-03,\n -3.5250e-03, 7.9346e-04, -2.6473e-03, -4.5430e-04, 3.3930e-03,\n -2.4975e-03, 1.7374e-03, -6.0813e-04, -1.7307e-03, -2.7901e-04,\n -9.0319e-05, 2.1065e-03, 4.6125e-03, -8.1726e-04, -2.3776e-03,\n 6.7349e-03, -8.7930e-04, -3.7060e-03, -3.7182e-03, -1.5447e-03,\n 2.3315e-03, -4.0747e-03, -1.2081e-02, -6.4542e-03, -4.7889e-04,\n 2.7743e-03, 7.3435e-03, -5.2960e-03, -3.1771e-04, 2.7398e-03,\n 1.1035e-03, 4.6605e-03, 2.3964e-03, -2.3482e-03, -3.9457e-04,\n -2.2406e-03, 3.0386e-03, -2.6736e-03, -2.8722e-04, 2.5746e-03,\n -8.5946e-03, 5.5391e-04, 1.9106e-03, -1.1873e-03, 1.5691e-03,\n -2.2016e-04, 4.6308e-03, 1.6799e-03, 6.7766e-03, 7.8078e-03,\n -5.9398e-04, 1.5913e-03, 7.7270e-03, -1.8637e-03, -3.2504e-04,\n -1.0197e-02, 3.9570e-03, 6.3552e-04, -4.3221e-03, 1.1024e-03,\n 5.4988e-03, -1.2093e-03, 1.0430e-03, -1.0145e-03, -2.6068e-03,\n -3.0442e-03, 4.9549e-03, 5.0148e-03, -6.6063e-04, -5.8936e-03,\n 4.0998e-03, 3.9117e-03, 1.9074e-03, 5.6305e-03, -5.6804e-03,\n -3.4897e-03, -2.5441e-04, -1.5990e-03, -4.6084e-03, 4.6338e-03,\n -1.3493e-02, -5.5425e-04, 3.1143e-03, -1.4614e-03, 4.6142e-03,\n -1.1849e-03, -9.3435e-04, -4.5253e-03, -1.9109e-03, 4.3930e-04,\n -1.3098e-03, 2.1257e-04, 4.0078e-03, -2.0735e-03, 2.5501e-03,\n -1.5265e-02, -2.5973e-03, -8.1922e-03, 1.0503e-02, 6.2958e-03,\n -2.7940e-03, -3.3228e-03, -1.8786e-03, -5.3515e-05, -1.0406e-02,\n -4.3877e-03, 2.1653e-03, -2.3072e-04, 5.8595e-03, -1.2362e-03,\n -3.8227e-04, 7.9261e-03, 1.0654e-03, 4.0550e-03, -5.4737e-03,\n 2.5813e-03, -6.2959e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([2.0997e-04, 8.2483e-05, 1.7301e-04, 2.8416e-04, 8.1032e-05, 1.4126e-04,\n 1.1953e-04, 1.7578e-04, 7.1747e-05, 1.7711e-04, 1.4664e-04, 2.2072e-04,\n 7.3998e-05, 1.4476e-04, 6.1569e-05, 8.3891e-05, 2.5732e-03, 2.0664e-04,\n 1.6063e-04, 2.2610e-04, 3.1132e-04, 3.0995e-04, 1.9617e-04, 1.1631e-04,\n 1.6285e-04, 3.6242e-04, 2.3394e-04, 1.6791e-04, 5.5251e-05, 4.5190e-04,\n 2.0644e-04, 9.4083e-05, 3.0434e-04, 7.2381e-05, 3.9890e-04, 3.4267e-04,\n 2.5786e-04, 2.8893e-04, 6.8932e-05, 1.2780e-04, 1.2150e-04, 1.3651e-04,\n 5.7791e-05, 6.2610e-05, 1.3494e-04, 8.0258e-05, 2.8151e-04, 2.1877e-04,\n 3.9255e-04, 1.2945e-04, 2.7116e-04, 3.2952e-04, 8.9190e-05, 2.5673e-04,\n 1.4231e-04, 2.2031e-04, 4.4104e-04, 1.7330e-04, 1.7627e-04, 1.8964e-04,\n 2.8526e-04, 5.8132e-05, 1.5953e-04, 3.2595e-04, 6.0756e-04, 1.7553e-04,\n 2.2218e-04, 1.7570e-04, 5.8679e-05, 1.9916e-04, 5.6853e-05, 1.0113e-04,\n 7.3383e-05, 1.1432e-04, 1.4997e-04, 1.0886e-04, 1.6769e-04, 1.3825e-04,\n 2.4266e-04, 1.5715e-04, 1.0042e-04, 6.9548e-05, 1.2088e-04, 1.5918e-04,\n 1.4824e-04, 2.2567e-04, 2.5666e-03, 1.0691e-04, 7.6480e-05, 1.8072e-04,\n 1.9041e-04, 2.7757e-04, 2.2892e-04, 9.6946e-05, 1.0011e-04, 9.7788e-05,\n 1.4215e-04, 1.1920e-04, 1.2282e-04, 7.4453e-05, 7.8555e-05, 8.3663e-05,\n 9.6478e-05, 2.7251e-04, 2.8895e-04, 2.1704e-04, 6.3406e-05, 2.8803e-04,\n 1.0866e-04, 7.1851e-05, 5.2611e-04, 6.3309e-05, 1.5364e-04, 7.5254e-05,\n 4.3652e-04, 9.3991e-05, 8.3836e-05, 1.1124e-04, 2.2226e-04, 1.2362e-04,\n 1.1099e-04, 2.1933e-04, 1.1418e-04, 1.1414e-04, 1.1775e-04, 9.9453e-05,\n 8.0666e-05, 1.9793e-04, 8.8462e-05, 3.4714e-04, 7.9220e-05, 1.3909e-04,\n 3.4330e-04, 2.8575e-04, 1.4824e-04, 2.3124e-04, 8.7997e-05, 2.4824e-04,\n 1.1001e-04, 1.0144e-04, 2.4084e-04, 2.1548e-04, 1.2938e-04, 1.4243e-04,\n 1.0150e-04, 2.0962e-04, 1.5429e-04, 1.6456e-04, 1.1180e-04, 1.2012e-04,\n 6.5027e-05, 2.1457e-04, 1.5291e-04, 2.7141e-04, 1.2528e-04, 2.5104e-04,\n 1.3902e-04, 1.2978e-04, 6.7177e-04, 5.3411e-05, 2.9634e-04, 1.3585e-04,\n 1.9378e-04, 1.2823e-04, 1.7251e-04, 9.7199e-05, 4.3250e-04, 1.9958e-04,\n 7.1082e-05, 1.1910e-04, 3.8021e-04, 1.3718e-04, 4.8847e-04, 1.1256e-04,\n 1.2300e-04, 1.5686e-04, 1.3317e-04, 1.9192e-04, 1.1347e-04, 7.3058e-05,\n 1.1559e-04, 1.3212e-04, 6.1194e-04, 1.7520e-04, 1.2348e-04, 1.8456e-04,\n 6.7624e-04, 5.0410e-04, 9.6045e-05, 2.4934e-04, 4.1384e-04, 4.1222e-04,\n 1.7680e-04, 1.2317e-04, 1.4860e-04, 1.2792e-04, 6.9535e-05, 1.3735e-04,\n 6.1335e-05, 1.0629e-04, 3.2522e-04, 2.1864e-04, 8.0335e-05, 8.5613e-05,\n 1.2873e-04, 2.9299e-04, 4.2503e-04, 2.2458e-04, 1.7823e-04, 1.2069e-04,\n 1.8047e-04, 1.3590e-04, 1.4531e-04, 1.9774e-04, 3.3070e-04, 1.5922e-04,\n 2.1722e-04, 8.0998e-05, 1.4071e-04, 1.3219e-04, 1.7123e-04, 8.2122e-05,\n 7.3004e-05, 1.3366e-04, 2.6893e-04, 1.3044e-04, 2.3290e-04, 7.8025e-05,\n 4.2295e-04, 1.9852e-04, 9.2965e-05, 1.4528e-04, 7.8575e-05, 1.4739e-04,\n 1.4903e-04, 1.2625e-04, 1.7640e-04, 1.8171e-04, 1.8717e-04, 1.2653e-04,\n 7.7900e-05, 1.6666e-04, 1.5883e-04, 1.2698e-04, 3.0981e-04, 8.9597e-05,\n 1.3958e-04, 6.1214e-05, 2.0702e-04, 1.8340e-04, 1.6965e-04, 1.7544e-04,\n 3.8971e-04, 1.2737e-04, 1.2306e-04, 8.5598e-05, 3.3532e-04, 1.3148e-04,\n 1.7803e-04, 1.7587e-04, 1.0231e-04, 9.4114e-05, 2.5421e-04, 3.5882e-04,\n 6.8267e-04, 2.4612e-04, 1.4733e-04, 1.1444e-04, 1.9865e-04, 1.1217e-04,\n 2.6316e-04, 1.5914e-04, 7.2086e-05, 6.7539e-05, 8.6714e-05, 3.3947e-04,\n 7.7021e-04, 2.3092e-04, 1.4322e-04, 8.6797e-05, 4.1730e-05, 1.6955e-04,\n 1.0980e-04, 2.0043e-04, 1.8008e-04, 8.7738e-05, 8.4624e-04, 8.6771e-05,\n 1.1149e-04, 2.6885e-04, 2.4554e-04, 1.2431e-04, 1.7425e-04, 1.1767e-04,\n 1.3812e-04, 1.4584e-04, 5.7068e-05, 1.8025e-04, 7.4501e-05, 1.1470e-04,\n 3.4210e-04, 1.3332e-04, 1.2645e-04, 3.1580e-04, 1.1735e-04, 1.8093e-04,\n 1.6192e-04, 1.3279e-04, 1.4186e-04, 1.1876e-04, 1.0871e-04, 8.0539e-05,\n 1.3991e-04, 9.8522e-05, 3.9282e-04, 2.1081e-04, 8.5676e-05, 2.2693e-04,\n 8.2849e-05, 3.3137e-04, 6.7773e-05, 2.6086e-04, 1.0237e-04, 1.3234e-04,\n 9.5248e-05, 9.0059e-05, 1.9636e-04, 1.2403e-04, 8.0983e-05, 1.3908e-04,\n 1.3257e-04, 2.4168e-04, 1.0504e-04, 1.5031e-04, 5.2240e-04, 2.9251e-04,\n 1.3927e-04, 3.8108e-04, 1.4417e-04, 9.4710e-05, 1.8999e-04, 2.6726e-04,\n 1.2219e-04, 1.4462e-04, 1.0964e-04, 1.0076e-04, 1.3624e-04, 8.4646e-05,\n 3.2998e-04, 6.2620e-05, 2.1201e-04, 9.3090e-05, 6.1732e-05, 1.1613e-04,\n 1.7950e-04, 1.0350e-04, 4.3049e-05, 8.1863e-05, 5.3784e-04, 9.0190e-05,\n 1.0709e-04, 5.4010e-05, 1.1213e-04, 2.0881e-04, 7.5875e-05, 1.1133e-04,\n 1.3057e-04, 1.1567e-04, 1.6208e-04, 1.9922e-04, 2.8821e-04, 1.1859e-04,\n 1.0539e-04, 1.3511e-04, 1.0630e-04, 1.9886e-04, 7.6406e-05, 1.4693e-04,\n 1.1686e-04, 2.0939e-04, 6.3162e-05, 1.1813e-04, 2.0487e-04, 1.7572e-04,\n 8.6034e-05, 8.9554e-05, 2.1624e-04, 6.1879e-05, 5.4676e-05, 8.0243e-04,\n 1.2944e-04, 8.7012e-05, 2.4418e-04, 1.2853e-04, 2.9277e-04, 2.0375e-04,\n 2.8905e-04, 2.8808e-04, 1.6455e-04, 2.5419e-04, 2.5719e-04, 9.6074e-05,\n 7.1133e-05, 6.5111e-05, 7.7605e-05, 8.7360e-05, 1.4613e-04, 1.5647e-04,\n 1.0351e-04, 4.5821e-04, 1.3735e-04, 6.2957e-05, 1.4961e-04, 4.5984e-04,\n 7.1745e-05, 2.8230e-04, 1.1129e-04, 1.3999e-04, 8.9150e-05, 3.0615e-04,\n 2.2862e-04, 1.2943e-04, 1.0235e-04, 1.3575e-04, 6.7199e-05, 1.1416e-04,\n 1.1377e-04, 1.0530e-04, 2.3559e-04, 2.3327e-04, 6.2561e-05, 4.2554e-04,\n 2.1019e-04, 9.3048e-05, 1.7070e-04, 1.4622e-04, 6.2668e-05, 1.6212e-04,\n 1.5125e-04, 8.6700e-05, 1.4367e-04, 1.6016e-04, 1.2208e-04, 3.2139e-04,\n 1.8434e-04, 7.1268e-05, 7.8644e-05, 2.6624e-04, 2.4748e-04, 2.5175e-04,\n 5.2765e-04, 1.2343e-04, 6.9042e-05, 2.7934e-04, 2.5617e-04, 1.1286e-04,\n 6.0445e-05, 7.5153e-04, 1.4197e-04, 1.7546e-04, 1.9948e-04, 1.0420e-04,\n 2.6334e-04, 6.4468e-05, 1.0815e-03, 2.0024e-04, 1.2040e-04, 7.0638e-05,\n 1.8322e-04, 8.3688e-05, 1.1316e-04, 2.8172e-04, 9.2685e-05, 1.7758e-04,\n 2.8210e-04, 2.5075e-04, 1.5365e-04, 2.3348e-04, 3.6211e-05, 1.2733e-04,\n 9.1082e-05, 4.6180e-05, 2.0252e-04, 1.6458e-04, 1.1020e-04, 2.7799e-04,\n 8.8200e-05, 1.9478e-04, 1.1463e-04, 1.2616e-04, 2.1527e-01, 9.0273e-05,\n 1.8108e-04, 1.5007e-04, 1.3512e-04, 2.8344e-04, 2.4514e-04, 1.3872e-04,\n 1.0268e-04, 2.0564e-04, 9.9959e-05, 1.3488e-04, 9.5448e-05, 1.2790e-04,\n 1.6294e-04, 1.7298e-04, 2.4838e-04, 1.8966e-04, 1.9819e-04, 1.8978e-04,\n 2.2191e-04, 1.6151e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(12520.)",
|
| 22 |
+
"exp_avg": "tensor([-8.5455e-05, 9.5517e-03, 2.6119e-04, -2.1294e-04, -1.9192e-03,\n -6.7118e-04, 4.4197e-03, -9.7111e-03, -1.1959e-03, -3.3790e-03,\n 3.9408e-03, 2.4850e-04, 3.0228e-03, 9.6268e-04, -1.5387e-03,\n -3.7670e-03, -7.1104e-03, -8.6330e-03, 2.5953e-03, 7.1418e-03,\n -1.8120e-03, 5.5528e-03, -2.8868e-03, -4.1836e-04, -1.5892e-02,\n 3.4232e-03, 4.7878e-03, 1.4841e-03, -5.7454e-03, 1.5857e-03,\n -3.4174e-03, 5.0108e-03, 5.2116e-03, 5.5747e-03, -3.6997e-03,\n 4.3475e-03, 6.7498e-03, 2.0646e-03, 7.6403e-04, 5.8196e-03,\n -3.0591e-03, 4.7130e-03, -4.0019e-05, -4.5723e-04, -5.1663e-03,\n -2.7020e-04, 6.8135e-03, 3.0298e-03, -8.1077e-03, 1.0648e-03,\n 3.0228e-03, -4.0511e-03, 4.1743e-04, 2.5604e-03, 5.0335e-04,\n 6.5335e-03, 6.2926e-03, 2.6609e-03, 4.9662e-03, 1.7092e-03,\n -8.8593e-03, 4.3508e-04, 3.0162e-03, -4.7308e-03, 2.6408e-03,\n 3.5252e-03, 9.5377e-03, -4.3364e-03, -6.2737e-03, 1.1812e-03,\n -1.2598e-03, -6.4145e-03, 6.0854e-03, 5.3422e-03, 4.5349e-03,\n 9.4150e-04, -3.3045e-03, 5.2843e-03, 3.4257e-03, -1.2264e-03,\n 4.0811e-03, -2.0759e-03, 3.0890e-03, 2.3535e-03, -5.0194e-03,\n -1.8064e-03, 6.5123e-03, 2.2079e-03, -1.8888e-03, -5.4523e-04,\n 1.8954e-03, -6.0054e-03, -2.2817e-03, 4.0318e-03, 1.7342e-03,\n 2.6158e-03, -1.9402e-03, 5.8063e-03, -2.2547e-03, 4.7542e-04,\n -6.0798e-04, 1.0284e-03, 7.5750e-04, 4.4014e-04, -2.4631e-03,\n -4.0086e-03, 2.4697e-04, 6.1088e-03, -2.1393e-03, 2.0237e-03,\n -8.2988e-03, -5.1312e-03, 4.5280e-04, -1.3590e-03, -9.2413e-03,\n 1.5377e-05, -7.2176e-04, 3.5174e-03, 1.1506e-03, -9.3898e-04,\n -3.7480e-03, -2.3099e-03, 5.4591e-04, 4.9359e-03, -4.0561e-04,\n -1.7840e-03, 5.1318e-03, 5.5108e-03, -6.5598e-04, -2.7580e-03,\n 2.1905e-03, 2.7082e-03, -1.8751e-03, 8.2094e-04, 4.9079e-03,\n -1.0748e-03, 3.0487e-03, -1.4230e-04, -1.8662e-03, 9.7071e-04,\n -5.1814e-03, 1.7428e-03, 8.6712e-04, 6.4624e-04, -3.9351e-03,\n 4.0979e-03, -2.3934e-03, -2.6109e-03, 1.0397e-04, 2.0489e-04,\n 1.1749e-04, 3.2631e-03, 1.1287e-02, -3.0843e-03, -3.5828e-03,\n 2.5781e-03, -7.4078e-05, -2.0462e-03, 2.3273e-03, 5.0128e-05,\n -7.4522e-03, -3.2022e-03, 3.3802e-03, 7.3060e-04, 2.1189e-03,\n -6.0732e-04, 1.4923e-03, 5.8319e-04, 4.3510e-03, -9.8179e-04,\n -6.7505e-03, 4.6437e-03, -1.4241e-03, -4.6519e-03, 8.9680e-03,\n 8.4043e-04, 6.2006e-03, -4.7045e-03, 2.2516e-04, 2.1148e-05,\n 2.0897e-03, 2.8902e-03, 2.5020e-05, -4.2813e-03, 4.4810e-03,\n 4.0434e-03, -6.4196e-04, 1.4134e-03, 2.9458e-05, 2.8913e-03,\n -2.9120e-03, 4.0388e-03, 5.7947e-03, 5.9760e-03, 4.0892e-03,\n -5.3882e-03, -3.3106e-04, 4.5055e-03, -4.4899e-03, -4.7816e-03,\n 7.8536e-04, -3.5805e-03, 1.2303e-03, 8.5114e-04, 7.6480e-03,\n 2.2882e-03, -2.0809e-03, -8.4538e-04, -1.5754e-03, 3.0530e-03,\n -8.5798e-04, 3.7402e-04, 1.5115e-03, 9.1120e-04, -6.7952e-03,\n -2.5772e-03, -1.8668e-03, -9.1244e-04, 5.6470e-04, 2.8039e-05,\n -6.8114e-03, -5.3486e-03, 8.2919e-03, -9.7120e-03, -3.4668e-03,\n 1.3318e-03, 1.0427e-02, 4.0898e-04, -3.5439e-03, 3.7255e-03,\n -5.7028e-03, -2.1785e-03, 5.0001e-03, 1.4601e-03, -3.7284e-03,\n 3.6999e-03, -7.7756e-03, 5.8687e-04, -5.9955e-04, 5.1457e-03,\n -7.2753e-03, -1.4621e-03, 6.5888e-04, -4.6369e-03, 1.2141e-03,\n 7.3460e-03, -3.0522e-04, 4.8710e-03, 3.8062e-03, -2.8808e-03,\n 4.5552e-03, 2.3945e-04, -8.8771e-04, 2.8341e-03, 3.2834e-03,\n 3.7259e-03, 6.3389e-04, -4.8671e-04, -4.4034e-03, -3.7888e-04,\n -5.3187e-03, 4.9879e-03, 1.3272e-03, 2.2522e-03, 7.7580e-04,\n 7.6465e-03, -3.6559e-03, -7.6639e-04, 3.4429e-03, -7.9501e-03,\n 1.5368e-03, -1.2096e-03, -2.5843e-03, -1.3032e-03, -1.7771e-03,\n -3.4374e-03, -1.9388e-04, 2.5820e-04, -2.6390e-03, -4.0259e-03,\n -3.9066e-03, -1.3797e-02, 2.6685e-03, 4.7722e-03, -9.8357e-04,\n -6.6242e-03, 3.8033e-03, -2.2080e-03, 2.3917e-03, -3.5452e-03,\n 1.8320e-03, 1.7670e-03, -2.8282e-03, -2.2917e-03, 1.2129e-02,\n -7.7354e-03, 3.4898e-03, 9.8263e-03, 4.4135e-04, 4.1348e-03,\n -6.3486e-03, -3.6903e-03, 2.5637e-03, 1.7471e-03, 5.1474e-03,\n -1.2082e-02, 1.5724e-03, -3.0313e-04, -2.3357e-03, -9.4334e-03,\n -7.1659e-03, 4.2150e-03, -1.2459e-04, 4.4262e-03, 1.5927e-03,\n -5.8324e-04, 3.7979e-03, -3.8816e-03, 4.7964e-04, -3.2079e-03,\n -2.6921e-03, -2.0155e-03, 7.8607e-03, -2.3222e-03, -2.0710e-03,\n 2.1014e-03, 8.7597e-03, 2.0522e-03, -5.5344e-03, 1.2642e-03,\n 1.0275e-03, -2.9578e-03, -4.7454e-03, -2.1811e-03, -3.9992e-04,\n 5.0690e-03, 5.1917e-03, -1.8962e-03, -6.7408e-03, 3.9411e-03,\n 2.7697e-03, 2.8470e-03, -2.4637e-03, 3.1361e-03, -3.1836e-03,\n -7.6308e-04, -4.9725e-03, 1.7250e-04, 5.6174e-03, -8.3261e-04,\n 8.8667e-03, 4.4792e-03, 4.3812e-03, 3.9971e-04, 1.9598e-03,\n 5.7834e-03, -3.7840e-04, -6.3817e-03, 9.6660e-03, -8.2472e-04,\n 1.9953e-03, 1.0955e-03, 4.5544e-03, -3.1179e-03, 3.4148e-03,\n -1.3256e-03, 4.0160e-03, 1.3885e-03, -4.7096e-03, -3.1973e-03,\n 6.3364e-03, 5.7953e-04, 7.6163e-03, 5.5759e-04, 5.5730e-03,\n -3.3762e-03, 4.5889e-04, 7.6727e-03, -2.5274e-03, -1.7257e-03,\n 1.4982e-04, 1.1879e-05, 7.6862e-03, -1.3926e-03, 6.0483e-03,\n -5.8498e-04, -5.9114e-04, -3.8349e-03, -2.0978e-03, 4.4445e-03,\n 1.3534e-03, -7.6484e-04, 8.3614e-03, -4.3448e-03, -5.9095e-03,\n -2.4753e-03, 7.7359e-04, -2.7221e-03, 2.1956e-03, 2.8489e-03,\n -9.4681e-04, 1.9080e-03, -1.8701e-04, -1.4861e-03, -2.9878e-05,\n 4.0948e-05, 1.1960e-03, 1.6543e-03, -1.2624e-03, -1.0418e-04,\n 5.4673e-03, -4.9027e-04, -6.1206e-03, -2.9876e-03, -1.1180e-03,\n 2.3715e-03, -4.0696e-03, -1.0495e-02, -8.1667e-03, -3.1640e-04,\n 2.6580e-03, 6.0212e-03, -6.0996e-03, -2.1213e-03, 4.6932e-03,\n 2.1639e-03, 7.3484e-03, 3.1646e-03, -1.8838e-03, -4.3358e-04,\n -3.1901e-03, 2.4158e-03, -1.7329e-03, -8.2955e-06, 2.3383e-03,\n -9.1723e-03, 1.0314e-03, 1.6117e-03, -1.3856e-03, 1.9647e-03,\n -3.8367e-04, 6.0030e-03, 3.2722e-03, 4.5819e-03, 5.7885e-03,\n 5.5090e-05, 3.4623e-03, 4.9934e-03, -9.6220e-04, -4.0049e-04,\n -5.5672e-03, 4.4548e-03, 8.0626e-04, -3.3233e-03, 1.3321e-03,\n 4.2899e-03, -1.4445e-03, 5.3165e-04, -1.8638e-03, -3.1171e-03,\n -1.4159e-03, 6.7943e-03, 4.2614e-03, 7.4131e-04, -2.7485e-03,\n 4.7291e-03, 4.5991e-03, 1.7278e-03, 5.0057e-03, -7.4147e-03,\n -3.8051e-03, 5.0321e-04, -2.7246e-03, -3.6259e-03, 3.5049e-03,\n -8.0794e-03, 2.8790e-04, 2.2421e-03, -1.3821e-03, 5.3270e-03,\n -3.7359e-04, -1.1563e-03, -3.8240e-03, -2.1423e-03, 6.8117e-05,\n -1.9911e-04, -3.2066e-04, 2.5710e-03, -3.2122e-03, 2.8756e-03,\n -5.2553e-04, -2.3597e-03, -6.8195e-03, 7.8487e-03, 8.6953e-03,\n -3.6046e-03, -2.0569e-03, -1.7050e-03, 2.3935e-04, -1.3532e-02,\n -6.5213e-03, 2.2754e-03, 7.1022e-04, 5.8983e-03, -8.1946e-04,\n 1.2713e-04, 6.1241e-03, 6.3115e-04, 3.7037e-03, -3.4526e-03,\n 3.5581e-03, -4.0646e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([2.2023e-04, 1.6988e-04, 1.5360e-04, 1.7176e-04, 1.5139e-04, 1.4280e-04,\n 1.3803e-04, 1.5251e-04, 1.5788e-04, 2.1012e-04, 1.2305e-04, 2.1133e-04,\n 1.1349e-04, 1.3621e-04, 1.0756e-04, 1.2019e-04, 3.0584e-04, 1.2413e-04,\n 1.3577e-04, 1.5429e-04, 1.8037e-04, 2.2373e-04, 1.7637e-04, 1.3643e-04,\n 2.4411e-04, 2.5170e-04, 1.8911e-04, 1.4644e-04, 1.0571e-04, 1.6833e-04,\n 1.6397e-04, 1.6113e-04, 2.8063e-04, 1.0437e-04, 2.1941e-04, 2.0012e-04,\n 1.6065e-04, 1.9726e-04, 9.9419e-05, 1.2678e-04, 1.6255e-04, 1.3277e-04,\n 1.0811e-04, 1.1904e-04, 1.6157e-04, 1.3753e-04, 3.4811e-04, 2.2914e-04,\n 2.1412e-04, 1.6072e-04, 1.5860e-04, 2.2499e-04, 1.2949e-04, 1.4668e-04,\n 2.3216e-04, 2.1800e-04, 2.1150e-04, 1.8991e-04, 1.5787e-04, 1.5446e-04,\n 2.2694e-04, 1.2467e-04, 2.0920e-04, 1.8221e-04, 2.1324e-04, 1.3766e-04,\n 1.8269e-04, 1.9319e-04, 1.6209e-04, 2.3645e-04, 1.3794e-04, 1.1082e-04,\n 1.7857e-04, 1.1756e-04, 1.6779e-04, 2.2804e-04, 1.7847e-04, 1.6097e-04,\n 2.0089e-04, 2.4321e-04, 1.9350e-04, 1.0362e-04, 1.9551e-04, 1.9396e-04,\n 1.4439e-04, 1.7202e-04, 2.7088e-04, 2.0980e-04, 1.0769e-04, 1.9585e-04,\n 1.6474e-04, 1.9860e-04, 1.6002e-04, 1.4459e-04, 1.6808e-04, 1.3512e-04,\n 2.1226e-04, 1.8359e-04, 1.5415e-04, 1.2343e-04, 1.0161e-04, 1.1733e-04,\n 1.4515e-04, 1.4013e-04, 1.9621e-04, 1.8995e-04, 1.5191e-04, 1.6979e-04,\n 1.6970e-04, 1.1567e-04, 1.8595e-04, 1.2828e-04, 1.7215e-04, 9.8801e-05,\n 2.2034e-04, 1.0467e-04, 1.0742e-04, 2.0344e-04, 1.4876e-04, 1.2946e-04,\n 1.0250e-04, 1.2535e-04, 1.5057e-04, 1.6575e-04, 1.6754e-04, 1.3376e-04,\n 1.3704e-04, 1.6266e-04, 1.3492e-04, 1.8581e-04, 1.2291e-04, 1.3744e-04,\n 2.0541e-04, 1.5945e-04, 1.6128e-04, 1.5564e-04, 1.1473e-04, 1.2536e-04,\n 1.6299e-04, 1.6026e-04, 2.4732e-04, 1.5330e-04, 1.1214e-04, 1.9419e-04,\n 1.4372e-04, 1.6884e-04, 1.7250e-04, 1.7194e-04, 1.5353e-04, 1.7681e-04,\n 9.2472e-05, 1.9052e-04, 1.2929e-04, 1.8490e-04, 1.0128e-04, 1.7673e-04,\n 1.7518e-04, 1.5222e-04, 2.6364e-04, 9.8101e-05, 1.8888e-04, 1.3459e-04,\n 1.9758e-04, 1.2723e-04, 2.0044e-04, 1.3042e-04, 1.6992e-04, 1.3646e-04,\n 1.2141e-04, 1.7028e-04, 2.9662e-04, 1.3705e-04, 2.0630e-04, 1.1841e-04,\n 1.5401e-04, 1.2122e-04, 1.4028e-04, 1.6159e-04, 9.4485e-05, 1.0916e-04,\n 1.5312e-04, 2.5697e-04, 1.9613e-04, 1.8795e-04, 2.1261e-04, 1.6826e-04,\n 1.7563e-04, 2.1515e-04, 1.3907e-04, 1.8035e-04, 1.7532e-04, 1.7164e-04,\n 2.4059e-04, 1.3729e-04, 1.5677e-04, 1.3442e-04, 9.7031e-05, 1.6076e-04,\n 1.0642e-04, 2.3677e-04, 1.7041e-04, 2.0932e-04, 9.5085e-05, 1.4844e-04,\n 1.4708e-04, 1.7619e-04, 1.8173e-04, 1.3994e-04, 1.7351e-04, 1.6873e-04,\n 1.2424e-04, 1.9695e-04, 2.0832e-04, 2.7174e-04, 2.4240e-04, 1.2730e-04,\n 1.8417e-04, 8.7342e-05, 1.2843e-04, 1.4613e-04, 1.4594e-04, 1.4392e-04,\n 1.6003e-04, 2.0066e-04, 1.3616e-04, 1.5310e-04, 1.5986e-04, 1.2104e-04,\n 2.6699e-04, 2.4425e-04, 1.4557e-04, 1.3342e-04, 1.3940e-04, 1.2479e-04,\n 1.6436e-04, 1.9377e-04, 1.9416e-04, 2.3948e-04, 1.7886e-04, 2.2409e-04,\n 1.4441e-04, 1.4830e-04, 1.8034e-04, 1.3224e-04, 1.8044e-04, 1.5171e-04,\n 1.4062e-04, 1.0157e-04, 1.4913e-04, 2.6176e-04, 1.3111e-04, 1.4957e-04,\n 1.4123e-04, 2.9247e-04, 1.5927e-04, 1.4132e-04, 3.8669e-04, 1.6698e-04,\n 1.8258e-04, 1.9545e-04, 1.3452e-04, 1.8854e-04, 1.5528e-04, 1.8155e-04,\n 1.9993e-04, 2.0679e-04, 1.2170e-04, 1.5636e-04, 1.1732e-04, 1.2744e-04,\n 1.4568e-04, 1.1196e-04, 1.2575e-04, 1.1632e-04, 1.2892e-04, 3.2225e-04,\n 1.7253e-04, 1.8667e-04, 2.6904e-04, 1.4132e-04, 8.3963e-05, 2.1888e-04,\n 1.4211e-04, 1.3943e-04, 2.5202e-04, 1.3023e-04, 2.8856e-04, 1.1857e-04,\n 1.5923e-04, 1.9658e-04, 2.1945e-04, 1.9249e-04, 2.0370e-04, 1.5381e-04,\n 2.4005e-04, 1.8498e-04, 1.3006e-04, 2.1534e-04, 1.5832e-04, 2.2873e-04,\n 2.7176e-04, 1.3705e-04, 1.5162e-04, 1.9744e-04, 1.2718e-04, 1.6328e-04,\n 1.7321e-04, 1.2447e-04, 1.8881e-04, 1.5742e-04, 1.2328e-04, 1.2306e-04,\n 1.1880e-04, 1.1661e-04, 1.8421e-04, 2.0924e-04, 8.9276e-05, 2.2579e-04,\n 1.2479e-04, 1.8474e-04, 1.2016e-04, 2.7043e-04, 1.5033e-04, 1.2269e-04,\n 1.2358e-04, 1.2400e-04, 1.6017e-04, 1.6355e-04, 8.4015e-05, 1.4770e-04,\n 1.8285e-04, 1.9726e-04, 1.2509e-04, 1.2307e-04, 2.1053e-04, 1.7237e-04,\n 1.3484e-04, 2.5856e-04, 1.9342e-04, 1.6011e-04, 1.6606e-04, 2.1806e-04,\n 1.3467e-04, 1.3308e-04, 1.4382e-04, 1.6378e-04, 1.1747e-04, 1.1956e-04,\n 2.0118e-04, 7.7195e-05, 2.0006e-04, 1.7472e-04, 9.0913e-05, 1.7229e-04,\n 1.4236e-04, 1.3873e-04, 1.0374e-04, 1.2743e-04, 2.5338e-04, 1.0589e-04,\n 1.3080e-04, 8.7027e-05, 1.2812e-04, 1.3613e-04, 9.9982e-05, 1.3117e-04,\n 1.1333e-04, 2.3736e-04, 1.2867e-04, 1.3331e-04, 1.7001e-04, 2.1401e-04,\n 1.6903e-04, 1.5788e-04, 1.3045e-04, 1.4159e-04, 1.2429e-04, 1.3986e-04,\n 1.5336e-04, 1.3512e-04, 8.7925e-05, 1.5376e-04, 1.8429e-04, 1.8842e-04,\n 1.4532e-04, 9.1444e-05, 1.4321e-04, 1.1051e-04, 1.2973e-04, 2.2222e-04,\n 1.5344e-04, 1.6699e-04, 1.8115e-04, 1.2515e-04, 1.8470e-04, 1.9276e-04,\n 1.6425e-04, 1.8024e-04, 2.5409e-04, 1.3561e-04, 1.2921e-04, 1.2483e-04,\n 1.2339e-04, 1.3263e-04, 1.2331e-04, 1.5609e-04, 1.4658e-04, 1.2422e-04,\n 1.5892e-04, 2.0071e-04, 1.6849e-04, 1.1997e-04, 1.9831e-04, 2.3564e-04,\n 1.3947e-04, 1.7006e-04, 1.2853e-04, 1.1302e-04, 1.7009e-04, 1.6996e-04,\n 1.8715e-04, 1.3104e-04, 1.5784e-04, 1.7633e-04, 1.6489e-04, 1.2846e-04,\n 1.9655e-04, 1.4613e-04, 1.5129e-04, 1.4968e-04, 1.2460e-04, 1.9171e-04,\n 1.6254e-04, 2.7918e-04, 1.2190e-04, 1.8746e-04, 1.0706e-04, 2.3583e-04,\n 2.0113e-04, 1.4367e-04, 1.8513e-04, 2.4978e-04, 1.5404e-04, 1.8255e-04,\n 1.4598e-04, 1.0343e-04, 1.3513e-04, 1.6254e-04, 1.7496e-04, 1.8334e-04,\n 1.8406e-04, 1.5995e-04, 1.0342e-04, 2.3501e-04, 2.0279e-04, 1.9051e-04,\n 1.1715e-04, 2.6306e-04, 1.5917e-04, 2.1098e-04, 2.0600e-04, 1.8779e-04,\n 1.8787e-04, 1.2703e-04, 1.9057e-04, 1.7798e-04, 1.2574e-04, 1.1376e-04,\n 1.5565e-04, 1.5455e-04, 1.4185e-04, 1.8740e-04, 1.6416e-04, 1.9092e-04,\n 1.6730e-04, 1.6168e-04, 1.5464e-04, 1.8981e-04, 7.4531e-05, 1.2403e-04,\n 1.1387e-04, 9.7162e-05, 1.5150e-04, 1.4077e-04, 1.2620e-04, 1.9078e-04,\n 9.7418e-05, 1.5726e-04, 1.9552e-04, 1.7204e-04, 5.6736e-04, 1.4494e-04,\n 1.8215e-04, 1.4966e-04, 2.0471e-04, 2.3391e-04, 2.3397e-04, 1.8939e-04,\n 1.4005e-04, 1.9653e-04, 1.2407e-04, 1.3891e-04, 1.5598e-04, 1.2292e-04,\n 1.9766e-04, 2.0910e-04, 1.9260e-04, 1.3784e-04, 1.7302e-04, 1.4754e-04,\n 2.3619e-04, 1.4012e-04], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(12520.)",
|
| 27 |
+
"exp_avg": "tensor([[-2.0246e-04, -1.4011e-04, -3.1570e-05, ..., -8.1749e-05,\n 4.6258e-05, -2.2533e-04],\n [-2.3327e-04, -1.7501e-05, 1.5190e-04, ..., -3.9176e-04,\n 4.2563e-05, 7.8972e-04],\n [ 1.5040e-04, -7.6225e-05, -6.0831e-06, ..., 6.6490e-05,\n -5.1094e-05, 4.5848e-04],\n ...,\n [ 6.2600e-05, 2.5542e-04, 6.4520e-05, ..., -7.8986e-05,\n -9.7499e-05, -1.9847e-04],\n [ 3.5809e-05, 4.1177e-04, -3.2527e-05, ..., -2.3733e-04,\n -9.3664e-04, 3.0401e-04],\n [-1.1982e-04, 1.4403e-04, 1.9024e-04, ..., 3.2121e-04,\n 3.7607e-04, -2.0438e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[2.8621e-07, 4.6067e-07, 9.5983e-08, ..., 2.0737e-07, 2.4261e-07,\n 3.5775e-07],\n [5.7186e-07, 1.0102e-06, 3.5232e-07, ..., 7.0228e-07, 6.8152e-07,\n 1.0264e-06],\n [6.9414e-07, 3.8528e-07, 2.9531e-07, ..., 7.1309e-07, 3.9268e-07,\n 6.9428e-07],\n ...,\n [3.9637e-07, 1.1327e-06, 4.3034e-07, ..., 5.8144e-07, 8.3804e-07,\n 7.4037e-07],\n [4.0325e-07, 8.9210e-07, 2.2686e-07, ..., 8.0919e-07, 5.1329e-07,\n 5.9158e-07],\n [3.7646e-07, 5.6332e-07, 3.2005e-07, ..., 1.0891e-06, 4.5526e-07,\n 7.1399e-07]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(12520.)",
|
| 32 |
+
"exp_avg": "tensor([[-9.8896e-05, -2.3269e-04, 7.0111e-05, ..., -5.6463e-05,\n 8.9844e-05, -1.4083e-04],\n [-1.6430e-04, -1.2316e-04, 4.2355e-05, ..., -4.0494e-05,\n 2.9090e-05, 1.0010e-04],\n [ 1.0755e-04, -1.5241e-04, -6.9675e-05, ..., -1.6107e-04,\n -2.2497e-04, -1.4267e-05],\n ...,\n [-1.3228e-06, -6.4983e-04, -1.8165e-06, ..., -1.8714e-04,\n -1.3323e-04, 8.8994e-06],\n [ 1.5436e-04, -1.7798e-05, 1.2432e-05, ..., -1.0484e-04,\n 2.6286e-05, 1.5954e-04],\n [ 7.5220e-05, -2.2870e-04, -3.0899e-05, ..., -2.3464e-04,\n -2.1540e-04, 2.4139e-04]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[1.0860e-07, 8.9260e-08, 8.2359e-08, ..., 1.1364e-07, 8.4695e-08,\n 1.2979e-07],\n [1.9905e-07, 4.0383e-07, 1.8234e-07, ..., 1.7030e-07, 2.4010e-07,\n 2.8055e-07],\n [2.5728e-07, 1.6123e-07, 1.6243e-07, ..., 3.4002e-07, 3.0811e-07,\n 2.3176e-07],\n ...,\n [1.9871e-07, 5.7393e-07, 7.8882e-08, ..., 4.4170e-07, 2.7646e-07,\n 2.7884e-07],\n [2.2534e-07, 2.8120e-07, 7.6269e-08, ..., 3.7380e-07, 1.9931e-07,\n 2.3376e-07],\n [2.2485e-07, 1.6238e-07, 3.8176e-07, ..., 1.9743e-07, 1.6398e-07,\n 2.2993e-07]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(12520.)",
|
| 37 |
+
"exp_avg": "tensor([ 0.0006, -0.0006], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.1744e-06, 7.1744e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.001,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.001,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.001,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.001,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.001,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.001,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.0005,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.0005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 20,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 0,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.001,
|
| 139 |
+
0.001,
|
| 140 |
+
0.001,
|
| 141 |
+
0.0005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 10,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.001,
|
| 149 |
+
0.001,
|
| 150 |
+
0.001,
|
| 151 |
+
0.0005
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"final_val_acc": 71.994
|
| 156 |
+
},
|
| 157 |
+
"train_config": {
|
| 158 |
+
"name": "david_training",
|
| 159 |
+
"run_id": "20251012_135249",
|
| 160 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 161 |
+
"model_variant": "clip_vit_laion_b32",
|
| 162 |
+
"num_classes": 1000,
|
| 163 |
+
"preset": "small_fast",
|
| 164 |
+
"custom_config_path": null,
|
| 165 |
+
"num_classes_override": null,
|
| 166 |
+
"use_belly_override": null,
|
| 167 |
+
"belly_expand_override": null,
|
| 168 |
+
"progressive_training_override": false,
|
| 169 |
+
"scale_warmup_epochs_override": null,
|
| 170 |
+
"num_epochs": 10,
|
| 171 |
+
"batch_size": 1024,
|
| 172 |
+
"learning_rate": 0.001,
|
| 173 |
+
"weight_decay": 1e-05,
|
| 174 |
+
"warmup_epochs": 3,
|
| 175 |
+
"use_rose_loss": true,
|
| 176 |
+
"rose_initial_weight": 0.1,
|
| 177 |
+
"rose_max_weight": 0.5,
|
| 178 |
+
"rose_weight_schedule": "adaptive",
|
| 179 |
+
"use_cayley_loss": false,
|
| 180 |
+
"cayley_weight": 0.001,
|
| 181 |
+
"scale_loss_balance": null,
|
| 182 |
+
"use_mixed_precision": false,
|
| 183 |
+
"gradient_clip": 10.0,
|
| 184 |
+
"scheduler_type": "cosine_restarts",
|
| 185 |
+
"min_lr": 1e-06,
|
| 186 |
+
"freeze_strategy": "never",
|
| 187 |
+
"freeze_threshold": 90.0,
|
| 188 |
+
"unfreeze_on_plateau": true,
|
| 189 |
+
"patience": 10,
|
| 190 |
+
"track_gradients": true,
|
| 191 |
+
"gradient_scale_threshold": 1e-05,
|
| 192 |
+
"gradient_scale_multiplier": 10.0,
|
| 193 |
+
"log_interval": 50,
|
| 194 |
+
"val_interval": 1,
|
| 195 |
+
"save_interval": 5,
|
| 196 |
+
"log_fusion_weights": true,
|
| 197 |
+
"log_loss_components": true,
|
| 198 |
+
"save_format": "safetensors",
|
| 199 |
+
"hf_repo": "AbstractPhil/gated-david",
|
| 200 |
+
"upload_to_hub": true,
|
| 201 |
+
"base_dir": "./david_training",
|
| 202 |
+
"num_workers": 10,
|
| 203 |
+
"pin_memory": true,
|
| 204 |
+
"prefetch_factor": 4,
|
| 205 |
+
"persistent_workers": true
|
| 206 |
+
}
|
| 207 |
+
}
|