Update best_model_acc64.73_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc64.73_metadata.json
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 3,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(15016.)",
|
| 7 |
+
"exp_avg": "tensor([[-2.6447e-05, 1.1745e-04, -3.4533e-05, ..., -5.4458e-05,\n -1.3337e-05, 2.0956e-05],\n [-6.6874e-06, 1.5314e-04, -1.6184e-04, ..., 8.2489e-05,\n -5.8039e-05, 6.9174e-05],\n [-2.5478e-05, 3.0295e-06, 6.5207e-05, ..., 1.8109e-05,\n -2.9141e-05, -2.2293e-05],\n ...,\n [-7.7047e-07, 1.0034e-04, -7.2194e-06, ..., 1.4797e-04,\n 3.5304e-05, -1.5765e-05],\n [ 1.1773e-05, -1.4446e-04, 1.0983e-05, ..., -6.7619e-07,\n -2.1675e-05, -6.5646e-06],\n [ 3.9531e-06, 1.3953e-05, 4.2784e-05, ..., 2.6157e-05,\n -6.2862e-06, -1.4762e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.5620e-07, 1.4090e-07, 6.2966e-08, ..., 6.7537e-08, 3.8491e-08,\n 3.4990e-08],\n [5.4325e-08, 2.1017e-07, 1.0459e-07, ..., 6.6974e-08, 2.7612e-08,\n 2.8991e-08],\n [2.5255e-08, 3.1136e-08, 2.4833e-08, ..., 7.0850e-08, 1.4428e-08,\n 1.7221e-08],\n ...,\n [4.3590e-08, 3.7059e-07, 5.6753e-08, ..., 8.8058e-08, 2.5963e-08,\n 4.1080e-08],\n [9.4490e-08, 1.5688e-07, 6.2404e-08, ..., 7.1195e-08, 2.9712e-08,\n 3.7677e-08],\n [7.2894e-09, 2.5451e-08, 1.1285e-08, ..., 7.5063e-09, 3.2098e-09,\n 4.5392e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(15016.)",
|
| 12 |
+
"exp_avg": "tensor([-2.0665e-03, 3.0468e-03, -1.8852e-04, 1.0745e-03, -2.6553e-04,\n -1.9093e-03, 7.1757e-04, 2.8728e-03, 1.1077e-03, -1.7252e-03,\n 7.6503e-04, -4.6548e-04, 2.3756e-03, 2.9955e-03, -1.4018e-03,\n -1.6714e-03, -4.5902e-03, -1.4347e-03, 1.1213e-03, -5.4055e-03,\n 9.4479e-04, 9.0543e-04, 2.4702e-03, 1.4494e-03, -3.5467e-04,\n 1.2197e-03, 2.5225e-03, -7.5519e-04, -4.4613e-03, -3.3529e-03,\n -7.4945e-04, -4.7693e-04, 4.6034e-03, -3.5836e-04, 1.0062e-03,\n -3.2534e-03, -1.8581e-03, -3.3163e-04, -3.1769e-04, 8.9701e-04,\n -2.8357e-03, 1.1165e-03, 2.8979e-04, 1.5136e-05, -5.0803e-04,\n -2.5535e-03, 4.6452e-04, 2.2825e-04, 1.2149e-03, 1.9129e-03,\n -2.3129e-03, 1.9109e-03, 4.0164e-04, 2.2032e-03, 2.8210e-03,\n -6.2396e-04, 1.1415e-03, 3.2885e-03, 2.0644e-03, -2.3675e-03,\n -2.9699e-03, -1.8699e-03, -6.9823e-04, 8.1698e-04, -1.1250e-03,\n -1.8575e-03, 2.3317e-03, 2.4321e-04, -1.8498e-03, -1.7028e-04,\n -1.6881e-03, 3.7140e-04, 9.3650e-04, -5.6041e-04, 2.3555e-04,\n -9.2844e-04, -7.5102e-04, -9.9713e-05, 2.6565e-03, 8.2891e-04,\n -6.6419e-04, 9.0002e-05, 1.8570e-03, 2.9477e-04, -1.0773e-04,\n -1.7773e-03, -1.6536e-03, -1.5562e-03, -6.2668e-04, 1.6907e-03,\n -1.2878e-03, -1.2347e-03, -1.5190e-03, -5.5513e-04, 6.1322e-04,\n -6.9344e-04, 1.1799e-03, -2.5288e-03, -3.6605e-03, 1.3856e-03,\n -8.5335e-04, -3.0761e-04, 4.1449e-04, -1.7231e-03, -3.7780e-04,\n 1.2443e-03, 2.2534e-04, -1.0876e-03, -7.6108e-04, -4.8415e-04,\n -7.6842e-05, -2.9790e-03, -9.4275e-04, 5.6916e-04, -1.3381e-03,\n -4.5943e-04, 4.8282e-04, -1.0299e-03, 1.4059e-03, -1.8829e-03,\n 4.7976e-04, 4.8128e-03, -1.6650e-04, 3.4391e-03, -2.5848e-03,\n 6.0018e-04, 1.9618e-04, 9.4685e-06, 1.1907e-03, -3.9234e-03,\n -8.7049e-04, -1.8597e-03, 1.0293e-03, -1.5745e-03, 1.6502e-03,\n 7.6186e-04, -6.3348e-04, -2.9707e-03, -6.4559e-04, 3.8430e-04,\n 2.1154e-03, -8.6437e-04, 1.4523e-03, -3.2496e-03, 9.9398e-04,\n 6.6313e-04, 7.1106e-04, 7.9850e-04, 3.6835e-04, -2.6761e-04,\n 8.5555e-04, 5.3927e-04, 1.0781e-03, -2.2119e-03, -8.7731e-04,\n -5.9451e-04, -4.3491e-04, -9.0246e-04, -2.6496e-04, 6.9845e-04,\n 1.9859e-03, 5.0228e-04, -8.2774e-04, -5.6735e-04, -1.0859e-03,\n 1.2724e-03, -4.8889e-03, -1.2548e-03, -7.8686e-04, 7.1692e-04,\n 5.1249e-04, -5.1285e-04, 2.7449e-03, -1.2042e-03, -3.5814e-03,\n 2.1531e-04, 4.1874e-04, 1.2665e-03, -4.5036e-04, -4.4204e-04,\n -1.5963e-03, -2.3526e-04, 1.1150e-03, 2.1218e-03, 1.1600e-03,\n -1.4772e-04, -8.8541e-04, -5.1418e-04, -2.2892e-04, -2.5742e-04,\n 1.0561e-03, 1.4925e-03, 1.1210e-03, -1.0584e-03, 3.4171e-04,\n 5.2287e-05, -1.0794e-04, -1.9146e-03, -6.1624e-04, -3.5510e-04,\n 1.3023e-03, 9.9702e-04, 1.2755e-03, -2.8430e-03, 4.3224e-04,\n -2.2501e-03, 7.6182e-05, 3.0471e-03, 2.5099e-04, -1.3853e-03,\n 1.5789e-03, -1.5216e-04, 3.1525e-04, -4.1877e-03, 3.7838e-03,\n 1.1227e-03, -4.2004e-04, 3.9906e-04, 7.0996e-04, 8.3158e-04,\n -2.9639e-04, -1.5026e-04, 9.5529e-04, -1.2137e-03, 3.4100e-04,\n -3.5012e-04, -1.1348e-03, -8.3909e-04, 7.4223e-04, 8.0864e-04,\n 8.0826e-04, 4.0653e-04, -1.1120e-03, -1.2951e-03, 1.9969e-03,\n -1.3180e-03, -1.1036e-03, 3.2228e-04, 1.0675e-03, -1.6832e-03,\n 8.5818e-04, 2.6376e-03, -1.7457e-03, 3.3723e-03, -1.6051e-03,\n 2.7520e-03, 2.4781e-04, -8.5544e-04, -3.4915e-03, 2.0164e-03,\n -7.7143e-04, -3.8465e-04, 3.2096e-04, 9.6934e-04, -4.6338e-04,\n 3.2290e-04, -1.6363e-03, -1.2233e-03, 1.9431e-03, -4.8781e-04,\n 1.3025e-03, 2.1201e-04, -1.9730e-03, -1.7799e-05, 4.0653e-04,\n -6.6413e-04, -3.8066e-04, 1.3043e-03, -5.6072e-04, 2.7643e-03,\n -1.0107e-03, 1.8627e-03, -4.8301e-03, 8.9789e-05, 1.5999e-03,\n 6.2439e-04, -6.1002e-04, -1.4641e-03, -1.4999e-03, 2.5383e-04,\n -2.3673e-03, 2.2428e-03, 3.1393e-04, -9.0421e-04, 6.4190e-04,\n -1.8595e-04, 1.1600e-03, 2.7690e-04, -2.9803e-03, -1.1882e-03,\n 6.9401e-05, 5.6618e-04, -9.3717e-04, 2.2894e-04, -4.7255e-04,\n -2.1355e-04, -6.3724e-04, -2.4394e-03, -5.7374e-04, 3.5335e-03,\n -1.4840e-04, -1.1395e-03, 8.5685e-04, 6.1742e-04, -5.0498e-04,\n -3.3611e-05, 4.1186e-03, -5.3349e-04, 2.2073e-03, -1.8890e-03,\n -1.6444e-03, -9.9211e-04, -3.6786e-04, 1.0474e-03, 1.4969e-03,\n -1.2368e-03, 1.9049e-05, -4.8997e-04, -9.1449e-04, 1.5349e-03,\n 1.1358e-03, -1.3314e-04, -5.7135e-04, 7.0854e-04, -2.8486e-04,\n 1.6655e-03, -1.5423e-03, -2.6816e-03, 1.6597e-03, -3.6872e-04,\n -4.5427e-04, 3.2869e-04, -2.6841e-05, 3.0441e-03, -7.9392e-04,\n 2.9000e-03, 8.0452e-04, 1.3945e-03, -1.7326e-03, 7.0634e-04,\n 9.1589e-05, 1.3419e-03, 6.2324e-04, 1.9015e-03, -4.0033e-03,\n -1.6372e-03, 2.0315e-03, -2.7054e-03, -9.4487e-04, 3.0750e-03,\n 1.0313e-04, -2.0529e-03, -2.0738e-03, -5.0132e-04, -3.6649e-04,\n 2.1863e-05, -3.0711e-04, 1.4679e-03, -2.7534e-04, 1.5991e-03,\n -1.2665e-03, -7.9286e-04, -1.5415e-03, 1.3453e-03, 1.3828e-03,\n 7.5770e-04, -7.7821e-04, 6.6419e-04, 1.1806e-03, 2.5884e-05,\n 3.5127e-05, 6.3652e-04, -2.3253e-04, 2.3544e-03, -3.7816e-03,\n -2.1732e-04, 8.4945e-05, 3.1851e-04, 1.1124e-04, -9.5483e-04,\n -2.1720e-03, 3.0747e-03, 4.1140e-04, 1.3233e-03, 2.4554e-04,\n 9.2644e-04, 1.5630e-03, -1.3012e-03, -1.8599e-03, -3.5387e-05,\n -2.2292e-03, -7.4724e-04, 2.2543e-03, -2.9395e-04, 1.9220e-04,\n -8.3881e-04, 3.2279e-04, 1.7969e-03, -1.6518e-03, 2.4342e-03,\n -2.8385e-03, 7.9799e-04, 1.5061e-03, 1.2289e-03, 1.4136e-03,\n -1.0175e-03, -1.7356e-03, 2.5604e-03, 6.8354e-04, -9.1361e-04,\n 1.9236e-04, 6.7991e-04, -1.7745e-03, 2.9019e-04, -7.1209e-04,\n 1.3069e-03, 5.1879e-04, 1.0970e-03, 1.8922e-04, -2.6985e-04,\n 2.6043e-04, 1.0784e-03, -2.0200e-04, 7.7059e-04, 8.2120e-04,\n -2.0180e-03, 1.8643e-03, -1.6483e-03, 7.8442e-04, -3.7535e-05,\n 1.8006e-04, -2.0576e-04, 2.0301e-03, 2.2358e-03, 1.2818e-04,\n -2.6877e-03, -2.3500e-03, 2.2143e-03, -8.5105e-05, 7.1598e-04,\n 8.6790e-04, -2.1403e-03, 3.6889e-04, -2.3064e-03, 1.4678e-03,\n 4.7602e-04, -8.8685e-04, 2.6693e-03, 1.4529e-04, -1.3662e-03,\n 2.7267e-04, 3.1690e-03, 1.5568e-04, 1.3793e-03, 1.8822e-03,\n -4.2774e-04, -4.5249e-03, 1.8493e-04, 1.6437e-04, -5.6900e-04,\n 4.2422e-04, 4.6799e-04, 1.9764e-03, -8.5863e-05, 1.2381e-03,\n -2.2723e-03, 1.0254e-04, 8.9128e-04, 1.5210e-03, 1.3690e-03,\n 1.5288e-03, -3.1392e-03, -5.4417e-04, -1.2684e-03, -2.3180e-03,\n 1.6698e-03, -2.6722e-03, 1.6005e-03, 9.8905e-04, 7.8116e-04,\n -7.3893e-04, -1.2147e-03, 3.1345e-03, 1.5120e-03, 4.8170e-04,\n -9.6890e-04, 6.3098e-04, -2.5225e-06, 5.0316e-03, 4.0197e-04,\n -1.4756e-03, 2.4293e-04, 1.1685e-03, 1.6873e-03, 1.2521e-03,\n -1.7834e-04, 3.3005e-03, 4.6471e-04, 4.0376e-03, -1.2864e-03,\n -2.0030e-03, 1.7021e-04, 5.8833e-04, -1.9079e-03, 1.0433e-03,\n -1.2289e-03, -4.0436e-03, 3.5836e-04, 6.7535e-04, 1.2560e-03,\n 5.8089e-04, 4.6445e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([5.1594e-05, 5.1425e-05, 3.1402e-05, 3.9864e-05, 4.2166e-05, 7.7668e-05,\n 3.4547e-05, 6.7502e-05, 4.9824e-05, 5.4787e-05, 6.9986e-05, 7.3076e-05,\n 1.1028e-04, 2.8363e-05, 6.8364e-05, 5.3847e-05, 4.5217e-05, 4.5762e-05,\n 5.3250e-05, 6.3948e-05, 4.7237e-05, 4.1152e-05, 3.3863e-05, 6.6232e-05,\n 1.1030e-04, 4.0802e-05, 3.6316e-05, 7.3170e-05, 6.6612e-05, 5.0794e-05,\n 2.1338e-05, 3.5678e-05, 5.9043e-05, 4.5717e-05, 2.7190e-05, 5.6947e-05,\n 3.9855e-05, 3.0990e-05, 3.3853e-05, 2.4379e-05, 3.7605e-05, 2.8048e-05,\n 6.7033e-06, 7.5336e-05, 1.2925e-04, 4.3700e-05, 3.4236e-05, 2.9197e-05,\n 5.4939e-05, 9.0504e-05, 9.3423e-05, 3.1067e-05, 3.7094e-05, 4.7294e-05,\n 6.3011e-05, 4.1373e-05, 4.3884e-05, 1.0866e-04, 3.6661e-05, 4.0206e-05,\n 1.2067e-04, 2.8139e-05, 3.6453e-05, 3.3586e-05, 4.3857e-05, 3.4122e-05,\n 4.0991e-05, 3.4098e-05, 6.7812e-05, 4.2911e-05, 4.5719e-05, 4.3993e-05,\n 3.4930e-05, 3.8351e-05, 3.6339e-05, 4.1223e-05, 5.5117e-05, 4.7335e-05,\n 7.0750e-05, 4.2063e-05, 6.9318e-05, 3.6437e-05, 3.8773e-05, 2.2625e-05,\n 4.4206e-05, 4.6428e-05, 6.1020e-05, 3.6141e-05, 3.1051e-05, 5.2022e-05,\n 3.4529e-05, 7.6531e-05, 4.3899e-05, 4.5407e-05, 4.4861e-05, 5.5540e-05,\n 6.6764e-05, 4.3094e-05, 4.3085e-05, 2.1509e-05, 5.5459e-05, 8.0406e-05,\n 6.7491e-05, 3.9142e-05, 8.1915e-05, 7.0307e-05, 4.7345e-05, 1.1158e-04,\n 3.6082e-05, 5.2154e-05, 6.0608e-05, 5.4327e-05, 3.9491e-05, 5.9293e-05,\n 4.7665e-05, 4.5525e-05, 6.6589e-05, 5.2400e-05, 4.0300e-05, 5.3816e-05,\n 1.7162e-05, 6.5768e-05, 5.3704e-05, 6.2285e-05, 3.6412e-05, 5.1522e-05,\n 3.9676e-05, 6.0175e-05, 4.9551e-05, 8.3722e-05, 5.3710e-05, 4.5180e-05,\n 4.9731e-05, 4.5448e-05, 1.1733e-04, 3.8973e-05, 3.0415e-05, 5.2242e-05,\n 3.6224e-05, 5.6067e-05, 1.8081e-05, 4.0016e-05, 5.0355e-05, 1.1410e-04,\n 5.4597e-05, 4.1818e-05, 2.9241e-05, 5.5858e-05, 5.7035e-05, 1.0153e-04,\n 2.6560e-05, 5.3972e-05, 2.7753e-05, 8.1007e-05, 3.0526e-05, 4.3248e-05,\n 5.9845e-05, 5.1309e-05, 3.8557e-05, 3.8217e-05, 5.4749e-05, 1.7122e-05,\n 4.6768e-05, 8.0082e-05, 1.8954e-05, 7.3691e-05, 4.4935e-05, 2.4919e-05,\n 1.6967e-05, 4.4276e-05, 3.4783e-05, 5.5548e-05, 4.8834e-05, 4.1070e-05,\n 4.4412e-05, 3.0210e-05, 9.5258e-05, 2.7575e-05, 2.2694e-05, 4.9426e-05,\n 3.6126e-05, 3.4749e-05, 8.7498e-05, 4.3313e-05, 3.9539e-05, 4.8935e-05,\n 1.1608e-04, 2.4220e-05, 2.4733e-05, 1.7384e-05, 4.2220e-05, 6.1993e-05,\n 6.3627e-05, 6.9815e-05, 2.7123e-05, 2.0522e-05, 4.6860e-05, 3.0003e-05,\n 2.3572e-05, 2.1999e-05, 2.6491e-05, 5.0392e-05, 3.5713e-05, 2.5185e-05,\n 4.7180e-05, 4.7060e-05, 3.7790e-05, 2.9353e-05, 6.4095e-05, 2.5618e-05,\n 3.9969e-05, 7.1106e-05, 4.7603e-05, 3.4077e-05, 6.8101e-05, 1.9937e-05,\n 3.1524e-05, 2.7222e-05, 6.4942e-05, 6.4889e-05, 2.8047e-05, 2.0820e-05,\n 4.6539e-05, 1.8717e-05, 3.1944e-05, 6.4389e-05, 3.3409e-05, 2.4378e-05,\n 2.5795e-05, 6.2426e-05, 4.9389e-05, 2.4953e-05, 6.0956e-05, 4.1647e-05,\n 3.8609e-05, 4.1685e-05, 3.9745e-05, 7.2345e-06, 4.7636e-05, 1.2982e-04,\n 3.6464e-05, 8.1339e-05, 3.3726e-05, 5.8675e-05, 9.6641e-05, 5.4819e-05,\n 5.1973e-05, 3.6528e-05, 6.0850e-05, 3.8596e-05, 6.4305e-05, 3.2399e-05,\n 4.6711e-05, 5.4301e-05, 4.3701e-05, 4.3198e-05, 3.3058e-05, 5.6744e-05,\n 6.6691e-05, 3.0805e-05, 3.3695e-05, 7.6084e-05, 3.4833e-05, 5.5597e-05,\n 4.4356e-05, 3.0843e-05, 3.1422e-05, 4.3164e-05, 3.1199e-05, 4.3982e-05,\n 4.0409e-05, 4.6293e-05, 3.9569e-05, 3.2085e-05, 2.8227e-05, 6.6536e-05,\n 2.3578e-05, 7.4453e-05, 4.3867e-05, 2.9703e-05, 3.3710e-05, 3.3407e-05,\n 4.2678e-05, 6.4082e-05, 2.4728e-05, 6.4572e-05, 4.2750e-05, 4.0137e-05,\n 8.2780e-05, 5.7789e-05, 3.0626e-05, 3.8305e-05, 7.4086e-05, 3.2237e-05,\n 6.0831e-05, 2.6245e-05, 3.2705e-05, 6.5683e-05, 1.2260e-04, 4.7317e-05,\n 3.2964e-05, 3.7545e-05, 4.6184e-05, 3.7676e-05, 7.6379e-05, 2.4925e-05,\n 4.6946e-05, 4.5540e-05, 8.4991e-05, 5.0730e-05, 5.4318e-05, 5.9648e-05,\n 2.8716e-05, 2.8711e-05, 5.8626e-05, 3.5381e-05, 4.0662e-05, 1.9498e-05,\n 4.5207e-05, 5.9119e-05, 1.0785e-04, 4.4661e-05, 5.2494e-05, 5.2276e-05,\n 7.0526e-05, 1.1036e-04, 2.8382e-05, 5.9767e-05, 4.2345e-05, 9.1027e-05,\n 4.6135e-05, 6.0254e-05, 1.8273e-05, 2.9352e-05, 3.3080e-05, 4.2478e-05,\n 7.0406e-05, 2.7079e-05, 7.2934e-05, 5.4909e-05, 4.0629e-05, 4.0807e-05,\n 4.9058e-05, 3.1382e-05, 6.4002e-05, 6.5296e-05, 5.0190e-05, 3.0265e-05,\n 4.9643e-05, 4.6381e-05, 3.7621e-05, 4.8647e-05, 4.3311e-05, 3.6287e-05,\n 4.4263e-05, 3.0128e-05, 3.3026e-05, 3.9291e-05, 2.8305e-05, 3.0209e-05,\n 2.5931e-05, 9.7704e-05, 1.3410e-04, 4.3212e-05, 4.3469e-05, 3.0703e-05,\n 4.2576e-05, 6.5132e-05, 2.9270e-05, 3.3875e-05, 2.4524e-05, 4.6129e-05,\n 2.4685e-05, 4.3712e-05, 8.4412e-05, 4.5400e-05, 2.3723e-05, 1.8654e-05,\n 3.2936e-05, 3.6126e-05, 4.0051e-05, 5.5070e-05, 5.2879e-05, 3.7361e-05,\n 4.4273e-05, 1.0974e-04, 3.9803e-05, 6.7653e-05, 4.7871e-05, 4.7401e-05,\n 5.7285e-05, 5.8287e-05, 3.8596e-05, 4.1562e-05, 4.6739e-05, 3.0612e-05,\n 2.2429e-05, 6.9150e-05, 5.1170e-05, 8.4894e-05, 5.2699e-05, 3.3934e-05,\n 5.5075e-05, 4.2101e-05, 5.2285e-05, 4.4512e-05, 8.4870e-05, 2.9014e-05,\n 3.0551e-05, 5.5624e-05, 6.2174e-05, 6.3671e-05, 4.8403e-05, 7.2674e-05,\n 3.4377e-05, 2.7277e-05, 7.0061e-05, 2.4355e-05, 6.2013e-05, 8.9879e-05,\n 5.3153e-05, 3.2053e-05, 5.9874e-05, 5.8511e-05, 4.4877e-05, 4.7843e-05,\n 8.3594e-05, 4.6797e-05, 3.5160e-05, 4.3309e-05, 3.1989e-05, 2.4622e-05,\n 6.6415e-05, 5.1954e-05, 3.6996e-05, 3.8485e-05, 7.4607e-05, 5.2448e-05,\n 3.9700e-05, 4.5486e-05, 5.4790e-05, 7.8761e-05, 3.8739e-05, 1.6668e-04,\n 5.1813e-05, 4.3741e-05, 2.8877e-05, 4.5514e-05, 4.2345e-05, 4.7689e-05,\n 4.3852e-05, 4.6893e-05, 4.8841e-05, 7.9204e-05, 3.9033e-05, 2.1300e-05,\n 9.9843e-05, 4.5450e-05, 2.3059e-05, 5.7111e-05, 4.6606e-05, 3.1738e-05,\n 4.4653e-05, 3.4289e-05, 9.0357e-05, 3.9748e-05, 3.5496e-05, 4.3180e-05,\n 7.0005e-05, 6.0128e-05, 5.3832e-05, 7.7277e-05, 6.0015e-05, 5.3597e-05,\n 4.9437e-05, 5.1320e-05, 4.4787e-05, 3.9648e-05, 4.3512e-05, 3.2069e-05,\n 2.4951e-05, 3.0857e-05, 6.3820e-05, 4.6323e-05, 3.7371e-05, 4.3994e-05,\n 6.7629e-05, 4.1225e-05, 8.8365e-05, 4.8328e-05, 4.1784e-05, 4.7551e-05,\n 6.4267e-05, 3.1901e-05, 2.6054e-05, 3.9985e-05, 5.2383e-05, 2.4869e-05,\n 4.2586e-05, 3.8691e-05, 5.5419e-05, 3.0845e-05, 5.2545e-05, 4.2866e-05,\n 4.0659e-05, 2.1842e-05, 4.1209e-05, 5.4572e-05, 4.1675e-05, 4.6203e-05,\n 5.2844e-05, 4.7356e-06], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(15016.)",
|
| 17 |
+
"exp_avg": "tensor([-4.3468e-03, 6.7473e-03, 1.5180e-04, 1.4117e-03, 2.0356e-04,\n -1.6270e-03, 2.6014e-03, 5.0951e-03, 3.4442e-03, -4.8075e-03,\n 1.0049e-03, -1.3875e-03, 4.0256e-03, 7.4495e-03, -2.4061e-03,\n -3.8107e-03, -6.9863e-03, -2.9697e-03, 1.3968e-03, -1.2958e-02,\n 2.5400e-03, 7.5469e-04, 5.4859e-03, 1.5521e-03, -1.2669e-03,\n 2.2612e-03, 5.7722e-03, -7.4617e-04, -9.5738e-03, -7.5796e-03,\n -2.2570e-03, -1.0834e-03, 1.0265e-02, -6.1307e-04, 2.7536e-03,\n -4.9161e-03, -3.6328e-03, 3.7270e-04, 5.4424e-05, 1.9132e-03,\n -4.9018e-03, 1.6675e-03, -5.6052e-45, -2.2976e-04, -1.6640e-03,\n -4.6228e-03, 1.4103e-03, 6.4768e-04, 4.1857e-03, 3.1800e-03,\n -3.9707e-03, 4.2583e-03, 1.8140e-04, 5.3355e-03, 4.7036e-03,\n -1.0676e-03, 3.7159e-03, 3.8817e-03, 4.5494e-03, -3.6002e-03,\n -4.8980e-03, -5.6804e-03, -4.8008e-04, 1.2239e-03, -2.3034e-03,\n -4.6917e-03, 6.7354e-03, 1.0362e-04, -3.1872e-03, 2.4732e-03,\n -4.2242e-03, 3.7213e-04, 2.7405e-03, -2.0501e-03, 1.1852e-03,\n -2.2273e-03, -1.9502e-03, -7.2956e-04, 6.1072e-03, 2.2603e-03,\n 6.3200e-04, -5.0046e-05, 3.4291e-03, -3.4396e-04, 2.3538e-04,\n -3.5018e-03, -3.7341e-03, -4.5419e-03, -1.7077e-03, 3.8958e-03,\n -2.4657e-03, -2.1750e-03, -5.3079e-03, -1.0678e-03, 1.6242e-03,\n -9.3821e-04, 2.6715e-03, -5.1257e-03, -8.2591e-03, 5.1791e-03,\n -2.9243e-04, 1.9815e-04, -1.3906e-07, -5.7192e-03, 4.0328e-04,\n 4.1501e-03, 7.1525e-04, 4.4178e-05, -2.1551e-03, -2.0815e-03,\n -2.3939e-04, -7.1718e-03, -3.2226e-03, 1.3327e-03, -2.7329e-03,\n -1.3885e-03, 4.8463e-04, -3.2313e-03, 2.9605e-03, -5.1363e-03,\n 1.7722e-03, 1.1096e-02, -2.1398e-03, 8.5177e-03, -8.0560e-03,\n 2.4004e-03, 1.7500e-03, -1.0515e-03, 3.3975e-03, -1.2359e-02,\n -1.7137e-03, -2.9438e-03, 3.2794e-03, -3.4254e-03, 1.4457e-03,\n 1.2958e-03, -9.9536e-04, -8.7337e-03, -2.7100e-03, 2.4913e-04,\n 6.0902e-03, -1.0569e-03, 3.2413e-03, -4.4786e-03, 1.5226e-03,\n 4.5712e-04, 1.4396e-03, 1.3589e-03, 2.4981e-03, -8.9082e-04,\n 1.2529e-03, 1.6711e-03, 2.6951e-03, -5.1137e-03, -1.8769e-03,\n -1.1994e-03, -1.6557e-03, -3.8492e-03, -1.3238e-03, 9.4925e-04,\n 4.8499e-03, 1.2226e-03, -1.4545e-03, -4.0477e-04, -2.9771e-03,\n 3.8867e-03, -1.1314e-02, -3.2524e-03, -1.8658e-03, 2.3117e-03,\n 4.2865e-04, -1.0687e-03, 6.2748e-03, -4.6361e-03, -7.5814e-03,\n 1.1643e-03, -2.1336e-04, 2.6682e-03, -1.3522e-03, 1.2995e-04,\n -4.8777e-03, -2.2102e-03, 2.3113e-03, 3.5359e-03, 4.3512e-03,\n -1.7159e-05, -3.6956e-03, -1.1302e-03, -1.2206e-03, -9.3981e-04,\n 1.2565e-03, 2.3714e-03, 1.1758e-03, -1.3578e-03, 7.1278e-04,\n -9.5605e-04, -3.4358e-04, -3.7023e-03, -7.2748e-04, -1.4359e-03,\n 2.5173e-03, 2.6544e-03, 2.1555e-03, -5.3891e-03, -7.1924e-05,\n -5.0122e-03, -1.3080e-04, 7.5015e-03, -1.9829e-04, -2.2901e-03,\n 3.4073e-03, -4.6145e-04, 1.0219e-03, -8.0823e-03, 1.0148e-02,\n 3.8373e-03, -9.4623e-04, 2.2338e-03, 2.3794e-03, 4.0650e-03,\n -1.2960e-03, -1.0945e-03, 2.5406e-03, -4.0335e-03, -6.8276e-04,\n -1.9602e-03, -1.0767e-03, -2.5693e-03, 1.6182e-03, 9.2761e-04,\n 2.1292e-03, 8.4725e-04, -4.0926e-03, -3.6747e-03, 3.4624e-03,\n -2.1604e-03, -2.9115e-03, 5.6052e-45, 1.8453e-03, -3.1482e-03,\n 1.2661e-03, 4.1696e-03, -7.6599e-03, 6.7281e-03, -2.4317e-03,\n 6.6295e-03, 8.3228e-04, -7.8810e-04, -9.1249e-03, 4.2367e-03,\n -1.4872e-03, -1.5103e-03, 7.8609e-04, 1.6917e-03, -1.1194e-03,\n 6.9057e-04, -4.3570e-03, -2.5255e-03, 2.7915e-03, -8.8969e-05,\n 5.6861e-03, -5.8223e-04, -4.0374e-03, -4.2984e-04, 4.9886e-04,\n -2.5638e-03, 7.5372e-04, 2.7603e-03, -1.3611e-03, 8.8450e-03,\n -2.1947e-03, 4.3049e-03, -7.3199e-03, -5.3116e-04, 2.8114e-03,\n 1.1492e-03, -2.9130e-03, -2.9045e-03, -3.4627e-03, 3.7279e-04,\n -5.3454e-03, 3.0964e-03, -7.8643e-05, -2.7358e-03, 2.2812e-03,\n -3.4601e-04, 3.4700e-03, 3.4707e-06, -7.5361e-03, -2.1654e-03,\n -8.8602e-04, 7.2888e-05, -3.4007e-04, 1.0759e-03, -7.0078e-04,\n -4.3179e-05, -1.3283e-03, -3.2959e-03, -1.3133e-03, 8.4499e-03,\n 7.8012e-05, -2.1887e-03, 3.1250e-03, 3.0210e-04, -4.4997e-04,\n 7.2261e-04, 7.1422e-03, -1.4186e-03, 4.4253e-03, -7.6165e-03,\n -4.5101e-03, -2.3868e-03, -5.8837e-04, 2.4661e-03, 2.0493e-03,\n -3.0512e-03, 1.6730e-04, -8.6795e-04, -3.8314e-03, 3.6822e-03,\n 4.0605e-03, 1.5652e-03, -8.9623e-04, 2.4828e-03, -1.5972e-03,\n 1.9111e-03, -3.9494e-03, -6.7541e-03, 2.1186e-03, 4.4975e-05,\n -2.8570e-03, 1.2571e-03, -7.1102e-04, 6.8869e-03, -2.5566e-03,\n 7.1146e-03, -5.6793e-05, 3.5375e-03, -2.7068e-03, 5.2470e-05,\n 8.5285e-04, 2.8844e-03, -2.5648e-04, 3.8481e-03, -7.2536e-03,\n -4.6133e-03, 5.4907e-03, -5.7005e-03, -1.8987e-03, 7.2421e-03,\n 3.3756e-04, -5.1143e-03, -5.6451e-03, -2.5054e-03, -2.0684e-03,\n -8.9679e-04, -1.2906e-03, 3.1144e-03, -1.4405e-03, 3.2743e-03,\n -3.7447e-03, -2.3650e-03, -3.2745e-03, 3.7561e-03, 2.5396e-03,\n 1.7478e-03, -1.1586e-03, 2.7342e-03, 2.1118e-03, -1.5200e-03,\n -1.0167e-03, 1.6578e-03, -4.5050e-03, 3.8766e-03, -4.9373e-03,\n 9.7830e-05, 2.6344e-05, 1.0548e-03, 1.2654e-03, -1.7219e-03,\n -7.1680e-03, 8.0625e-03, 1.2628e-03, 3.5433e-03, 8.3480e-04,\n 1.4798e-03, 3.3840e-03, -2.1277e-03, -3.1804e-03, -9.0791e-04,\n -2.2568e-03, 3.5180e-05, 4.9250e-03, -8.9390e-04, 2.7753e-04,\n -2.2146e-03, -5.4923e-05, 5.5359e-03, -3.4805e-03, 5.0803e-03,\n -6.1446e-03, 1.0103e-04, 2.0272e-03, 2.1902e-03, 2.8322e-03,\n -1.6058e-03, -4.0944e-03, 3.5028e-03, 1.1297e-03, -2.5022e-03,\n 5.5652e-04, 4.3136e-04, -5.5943e-03, 1.9341e-03, -2.5630e-03,\n 6.4388e-03, 1.9979e-03, 4.4310e-03, 7.2918e-04, -3.7591e-04,\n 7.5689e-04, 1.5917e-03, -1.5426e-03, 1.4511e-03, 3.0834e-03,\n -4.7360e-03, 2.2292e-03, -4.2702e-03, 1.3799e-03, -3.9544e-04,\n -1.2947e-04, -1.5078e-03, 4.0382e-03, 4.6936e-03, 1.1018e-03,\n -8.4522e-03, -3.7252e-03, 6.7570e-03, -3.5466e-04, 1.7110e-03,\n 1.6019e-03, -4.2606e-03, -2.2477e-04, -3.5731e-03, 2.4536e-03,\n 5.6830e-04, -2.0923e-03, 5.3561e-03, 3.5087e-04, -1.9343e-03,\n -3.9451e-04, 5.8832e-03, -8.9938e-05, 5.4799e-03, 5.1330e-03,\n -2.0174e-03, -8.6281e-03, 6.1698e-04, -7.1837e-04, -1.5884e-03,\n 2.0457e-03, 9.0484e-04, 5.1305e-03, -3.1701e-04, 3.1840e-04,\n -4.0360e-03, 1.3565e-03, 2.4139e-03, 3.1050e-03, 3.3866e-03,\n 3.3364e-03, -5.4507e-03, -1.9953e-03, -2.3204e-03, -4.6710e-03,\n 3.0310e-03, -5.8433e-03, 4.3037e-03, 2.2246e-03, 1.5140e-03,\n -1.5920e-03, -7.1252e-03, 6.0387e-03, 2.2891e-03, 1.4019e-04,\n -1.9192e-03, 5.7401e-04, 1.2456e-03, 9.7567e-03, 1.6292e-03,\n -3.6756e-03, 1.2227e-04, 2.0944e-03, 4.5631e-03, 4.0370e-03,\n -1.2045e-03, 6.6815e-03, 1.8535e-03, 7.7356e-03, -4.2065e-03,\n -2.4198e-03, -4.2987e-05, -4.8828e-05, -4.5115e-03, 1.9142e-03,\n -4.6868e-03, -5.9137e-03, 5.6089e-04, 6.0791e-04, 1.7884e-03,\n 1.6512e-03, 5.6052e-45], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([3.6312e-04, 2.2212e-04, 1.1584e-04, 1.6942e-04, 1.0865e-04, 2.7014e-04,\n 1.7529e-04, 2.5402e-04, 2.5538e-04, 3.3056e-04, 3.6633e-04, 3.4032e-04,\n 3.2022e-04, 2.0139e-04, 1.8105e-04, 2.0404e-04, 1.7309e-04, 2.0361e-04,\n 1.8608e-04, 2.9993e-04, 2.8306e-04, 1.2428e-04, 1.2473e-04, 2.4195e-04,\n 4.8835e-04, 2.1699e-04, 1.8345e-04, 3.4733e-04, 2.3778e-04, 2.3832e-04,\n 1.9610e-04, 2.6755e-04, 2.2705e-04, 1.6830e-04, 1.3908e-04, 1.2500e-04,\n 1.1734e-04, 1.4548e-04, 1.1754e-04, 1.5884e-04, 1.0372e-04, 8.2107e-05,\n 2.2593e-10, 2.0480e-04, 4.3858e-04, 1.6788e-04, 1.7691e-04, 5.6905e-05,\n 5.5282e-04, 2.4363e-04, 3.2886e-04, 1.1667e-04, 1.3933e-04, 2.5504e-04,\n 1.9687e-04, 1.7899e-04, 3.2567e-04, 1.1457e-04, 1.7045e-04, 9.8201e-05,\n 2.0053e-04, 1.8152e-04, 2.3924e-04, 1.3962e-04, 1.2695e-04, 1.5696e-04,\n 2.5992e-04, 1.3251e-04, 2.0699e-04, 1.2800e-04, 2.4888e-04, 1.3786e-04,\n 2.7094e-04, 2.7040e-04, 1.4505e-04, 2.2221e-04, 2.4662e-04, 1.6268e-04,\n 2.4592e-04, 2.7718e-04, 2.4020e-04, 1.4550e-04, 1.3981e-04, 9.5320e-05,\n 1.6324e-04, 3.8143e-04, 3.5734e-04, 2.1440e-04, 9.2289e-05, 6.1372e-04,\n 2.1457e-04, 1.8455e-04, 2.4888e-04, 2.6810e-04, 3.8442e-04, 2.3413e-04,\n 2.6067e-04, 2.2172e-04, 2.3898e-04, 4.5099e-04, 2.3146e-04, 2.7473e-04,\n 2.1109e-04, 2.1477e-04, 2.6944e-04, 3.9820e-04, 1.5343e-04, 4.4818e-04,\n 1.8502e-04, 2.2107e-04, 2.2129e-04, 3.2530e-04, 2.2187e-04, 1.5649e-04,\n 1.8035e-04, 1.1559e-04, 1.8505e-04, 2.5893e-04, 1.4283e-04, 3.5134e-04,\n 1.6573e-04, 2.8832e-04, 4.6051e-04, 3.6562e-04, 3.7670e-04, 5.0778e-04,\n 2.1131e-04, 2.6925e-04, 4.1702e-04, 4.7589e-04, 2.1776e-04, 2.3609e-04,\n 3.4175e-04, 1.9669e-04, 3.3058e-04, 1.4222e-04, 2.0927e-04, 3.1768e-04,\n 1.6273e-04, 1.0840e-04, 1.6161e-04, 2.2103e-04, 2.4471e-04, 2.1358e-04,\n 2.0798e-04, 1.6840e-04, 9.8385e-05, 3.3527e-04, 2.3886e-04, 3.1301e-04,\n 1.0704e-04, 2.7096e-04, 1.3891e-04, 2.5635e-04, 9.9660e-05, 9.5294e-05,\n 3.7148e-04, 3.3317e-04, 3.0181e-04, 1.9212e-04, 3.2447e-04, 1.3545e-04,\n 4.3869e-04, 1.5572e-04, 1.1438e-04, 4.2117e-04, 2.5886e-04, 1.1030e-04,\n 9.3535e-05, 2.0588e-04, 8.9457e-05, 2.2745e-04, 2.1348e-04, 4.4188e-04,\n 2.8165e-04, 1.3714e-04, 2.1480e-04, 1.1827e-04, 1.3887e-04, 2.2660e-04,\n 1.9564e-04, 2.1155e-04, 4.7949e-04, 2.2814e-04, 8.2333e-04, 1.4182e-04,\n 5.0413e-04, 1.6478e-04, 1.8832e-04, 8.7672e-05, 1.5100e-04, 2.7068e-04,\n 1.3031e-04, 1.1255e-04, 1.5164e-04, 9.6129e-05, 2.2397e-04, 1.2821e-04,\n 9.2289e-05, 1.0694e-04, 1.2418e-04, 1.8466e-04, 1.4171e-04, 1.0243e-04,\n 5.0619e-05, 1.9405e-04, 1.5796e-04, 1.3939e-04, 2.8517e-04, 9.4368e-05,\n 2.7555e-04, 2.5366e-04, 3.6621e-05, 1.3438e-04, 2.4737e-04, 2.1728e-04,\n 1.0376e-04, 6.5013e-04, 2.5281e-04, 4.6792e-04, 1.4836e-04, 1.4847e-04,\n 2.3727e-04, 1.6530e-04, 2.2093e-04, 3.0199e-04, 1.0996e-04, 1.2554e-04,\n 7.6197e-05, 1.6102e-04, 2.0052e-04, 1.3396e-04, 6.6454e-04, 3.1377e-04,\n 1.0081e-04, 1.6265e-04, 2.0048e-04, 3.2048e-11, 1.3179e-04, 3.3290e-04,\n 1.2766e-04, 3.0331e-04, 4.4841e-04, 2.3897e-04, 2.0021e-04, 3.0539e-04,\n 1.2562e-04, 1.3378e-04, 3.0037e-04, 1.5752e-04, 3.3420e-04, 1.3308e-04,\n 3.7881e-04, 2.4678e-04, 1.3836e-04, 2.3054e-04, 1.9158e-04, 1.5276e-04,\n 1.0843e-04, 1.9218e-04, 2.2435e-04, 2.3675e-04, 1.8673e-04, 1.5130e-04,\n 2.0021e-04, 2.8591e-04, 2.4038e-04, 2.1646e-04, 2.0213e-04, 3.5404e-04,\n 3.5891e-04, 3.0428e-04, 9.0909e-05, 2.2319e-04, 1.0382e-04, 1.5976e-04,\n 1.8360e-04, 2.2981e-04, 2.2906e-04, 2.3404e-04, 2.2885e-04, 1.0912e-04,\n 1.3323e-04, 1.7427e-04, 1.5580e-04, 2.3336e-04, 2.5801e-04, 1.1124e-04,\n 5.5518e-04, 2.1145e-04, 1.0475e-04, 7.5329e-05, 1.7290e-04, 8.5226e-05,\n 2.4425e-04, 1.4130e-04, 1.6768e-04, 1.4268e-04, 3.6893e-04, 2.5540e-04,\n 1.3201e-04, 1.3432e-04, 3.6413e-04, 1.5345e-04, 1.8519e-04, 1.9040e-04,\n 1.5658e-04, 1.9018e-04, 5.9695e-04, 4.8675e-04, 3.9915e-04, 2.3234e-04,\n 1.7149e-04, 2.1049e-04, 1.3834e-04, 3.1133e-04, 1.8733e-04, 1.2202e-04,\n 2.1556e-04, 2.3827e-04, 5.1518e-04, 3.6687e-04, 2.0332e-04, 2.8448e-04,\n 3.9478e-04, 3.2258e-04, 2.0190e-04, 3.3747e-04, 1.3547e-04, 4.8817e-04,\n 4.0230e-04, 2.8573e-04, 6.9516e-05, 1.5322e-04, 1.8374e-04, 1.8816e-04,\n 8.4244e-05, 1.4901e-04, 2.7661e-04, 2.3546e-04, 1.8477e-04, 1.3695e-04,\n 2.0509e-04, 3.4930e-04, 1.5501e-04, 2.5928e-04, 2.2874e-04, 1.2346e-04,\n 1.5850e-04, 2.2216e-04, 2.2930e-04, 2.2131e-04, 2.2130e-04, 2.0754e-04,\n 2.6446e-04, 1.5789e-04, 2.4411e-04, 1.3477e-04, 1.0070e-04, 1.4234e-04,\n 1.1564e-04, 4.9615e-04, 3.1736e-04, 4.7267e-04, 1.9014e-04, 2.0343e-04,\n 6.8647e-05, 3.8044e-04, 1.1199e-04, 1.4484e-04, 1.4401e-04, 2.1015e-04,\n 3.4311e-04, 1.6782e-04, 1.6219e-04, 2.1339e-04, 1.3030e-04, 1.3065e-04,\n 2.6489e-04, 1.0802e-04, 2.1934e-04, 1.7975e-04, 2.5698e-04, 2.1462e-04,\n 2.2926e-04, 3.2092e-04, 1.4395e-04, 1.6358e-04, 2.1899e-04, 1.4941e-04,\n 1.2955e-04, 1.2624e-04, 2.2768e-04, 5.0220e-04, 1.4303e-04, 8.7910e-05,\n 1.5594e-04, 4.3910e-04, 1.8147e-04, 3.4347e-04, 2.0106e-04, 1.2435e-04,\n 3.8714e-04, 1.6176e-04, 1.9125e-04, 1.3566e-04, 4.7114e-04, 5.4544e-05,\n 2.6478e-04, 2.6657e-04, 2.7403e-04, 5.8289e-05, 3.4184e-04, 2.2984e-04,\n 2.0452e-04, 7.1558e-04, 4.6086e-04, 3.2715e-04, 2.3345e-04, 1.4914e-04,\n 2.4232e-04, 1.6581e-04, 3.3839e-04, 2.8215e-04, 6.6430e-04, 2.4960e-04,\n 1.7759e-04, 1.8802e-04, 3.0002e-04, 1.6485e-04, 1.9074e-04, 1.3444e-04,\n 1.4994e-04, 2.3271e-04, 3.7425e-04, 2.4197e-04, 2.7208e-04, 4.5008e-04,\n 1.9906e-04, 9.6301e-05, 3.0340e-04, 2.0064e-04, 2.0199e-04, 6.3051e-04,\n 1.8014e-04, 2.7293e-04, 3.1895e-04, 1.5456e-04, 2.3271e-04, 2.4334e-04,\n 2.9947e-04, 1.2848e-04, 1.7348e-04, 4.0999e-04, 2.7097e-04, 1.2102e-04,\n 3.0127e-04, 1.6096e-04, 1.2705e-04, 1.5306e-04, 2.1939e-04, 1.4891e-04,\n 2.3515e-04, 1.5974e-04, 1.6633e-04, 1.5882e-04, 1.2844e-04, 2.3648e-04,\n 3.5494e-04, 2.9813e-04, 2.2767e-04, 3.1367e-04, 2.0925e-04, 1.4495e-04,\n 2.4468e-04, 2.3286e-04, 2.2082e-04, 2.2826e-04, 4.9995e-04, 1.4251e-04,\n 8.7079e-05, 6.0984e-04, 2.0547e-04, 1.3301e-04, 1.8499e-04, 1.1253e-04,\n 1.1166e-04, 2.0981e-04, 4.0934e-04, 1.4016e-04, 1.3493e-04, 1.1768e-04,\n 2.9510e-04, 1.7901e-04, 1.7215e-04, 2.9739e-04, 2.1035e-04, 1.9620e-04,\n 1.8706e-04, 2.2362e-04, 2.1424e-04, 1.5166e-04, 1.5938e-04, 1.8901e-04,\n 1.0030e-04, 2.5563e-04, 1.1370e-04, 1.7581e-04, 7.9439e-05, 1.0587e-04,\n 1.1844e-04, 8.3292e-09], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(15016.)",
|
| 22 |
+
"exp_avg": "tensor([-2.0698e-03, 2.9632e-03, -3.8541e-05, 1.0637e-03, -3.2439e-04,\n -8.4140e-04, 1.0203e-03, 3.1159e-03, 9.7895e-04, -1.8856e-03,\n 8.3208e-04, -4.8612e-04, 1.6989e-03, 3.9194e-03, -1.2032e-03,\n -1.4972e-03, -4.2205e-03, -1.5987e-03, 9.8194e-04, -6.2494e-03,\n 1.0973e-03, 7.9801e-04, 2.3829e-03, 1.1224e-03, -3.1001e-04,\n 1.3285e-03, 2.7362e-03, -8.9392e-04, -4.7228e-03, -3.8665e-03,\n -1.0193e-03, -7.3817e-04, 5.4778e-03, -2.4444e-04, 1.1633e-03,\n -2.7682e-03, -1.8078e-03, -3.1921e-05, -2.1344e-04, 9.4560e-04,\n -2.5866e-03, 8.8697e-04, 5.6052e-45, 2.4933e-05, -8.8972e-04,\n -2.3760e-03, 5.6747e-04, 2.4225e-04, 1.6716e-03, 2.1964e-03,\n -2.7783e-03, 2.1821e-03, 2.3138e-04, 2.2433e-03, 3.0717e-03,\n -5.5653e-04, 1.5559e-03, 2.5702e-03, 2.3117e-03, -2.0549e-03,\n -2.7753e-03, -2.5219e-03, -9.7980e-04, 8.6763e-04, -1.1095e-03,\n -1.7894e-03, 2.7371e-03, 2.1840e-04, -1.7131e-03, 5.2652e-04,\n -1.9625e-03, 4.1946e-04, 9.9923e-04, -7.9430e-04, 4.0027e-04,\n -8.5737e-04, -7.8120e-04, -3.9603e-04, 3.0117e-03, 8.8752e-04,\n -9.1996e-04, 2.1351e-04, 1.9321e-03, 2.8410e-04, -1.9683e-04,\n -1.8868e-03, -1.5493e-03, -2.2286e-03, -9.3125e-04, 1.8248e-03,\n -1.5176e-03, -1.0743e-03, -2.3497e-03, -5.6043e-04, 2.7266e-04,\n -7.5640e-04, 1.3806e-03, -2.7297e-03, -4.1297e-03, 2.0695e-03,\n -7.1099e-04, 7.2817e-05, 2.6633e-04, -2.1537e-03, 7.2657e-05,\n 1.5279e-03, 4.0240e-04, -3.6155e-04, -7.0659e-04, -5.4492e-04,\n -4.3317e-04, -3.1918e-03, -1.2100e-03, 6.0189e-04, -1.2133e-03,\n -5.5706e-04, 5.6436e-04, -1.6013e-03, 1.6129e-03, -2.0855e-03,\n 4.8163e-04, 5.3788e-03, -6.1728e-04, 4.6813e-03, -3.7189e-03,\n 8.5089e-04, 5.1322e-04, 2.6616e-04, 1.4415e-03, -5.7887e-03,\n -5.7221e-04, -1.4654e-03, 7.2235e-04, -1.4577e-03, 9.8850e-04,\n 6.4231e-04, -5.8401e-04, -3.8364e-03, -8.2267e-04, 3.3857e-04,\n 2.8839e-03, -8.2633e-04, 1.5293e-03, -3.1578e-03, 8.3685e-04,\n 2.6620e-04, 7.0487e-04, 7.4919e-04, 5.9221e-04, -2.9490e-04,\n 7.2542e-04, 8.8497e-04, 1.2994e-03, -1.8831e-03, -1.0734e-03,\n -9.5212e-04, -4.5419e-04, -1.2566e-03, -4.0092e-04, 7.9263e-04,\n 2.1846e-03, 3.4387e-04, -7.6400e-04, -2.7640e-04, -1.1733e-03,\n 1.8009e-03, -4.7337e-03, -1.5934e-03, -8.8684e-04, 1.0058e-03,\n 4.8996e-04, -7.5927e-04, 2.6547e-03, -2.7032e-03, -3.9375e-03,\n 4.5017e-04, 5.0078e-04, 1.4185e-03, -7.7423e-04, -5.7445e-04,\n -2.0406e-03, -3.3753e-04, 1.3387e-03, 2.1672e-03, 1.4495e-03,\n 1.3221e-04, -1.9876e-03, -3.7896e-04, -4.3414e-04, -4.8127e-04,\n 8.3337e-04, 1.3416e-03, 8.3943e-04, -7.6806e-04, 3.9254e-04,\n -2.7521e-05, -9.6314e-05, -2.1457e-03, -3.7129e-04, -5.7248e-04,\n 1.3937e-03, 1.1016e-03, 1.5827e-03, -2.5115e-03, 5.0894e-04,\n -1.8394e-03, 9.8924e-05, 3.2746e-03, 6.0522e-05, -1.2286e-03,\n 1.6873e-03, -3.9807e-04, 4.1465e-04, -4.5752e-03, 4.7381e-03,\n 1.4728e-03, -1.3039e-04, 6.2985e-04, 7.6929e-04, 1.6588e-03,\n -4.0620e-04, -3.1200e-04, 1.3504e-03, -2.0414e-03, -5.1215e-04,\n -3.4201e-04, -1.0308e-03, -7.4914e-04, 4.2067e-04, 5.0222e-04,\n 6.6531e-04, 6.6174e-04, -1.6327e-03, -1.4993e-03, 1.8261e-03,\n -1.2337e-03, -1.0732e-03, 5.6052e-45, 1.0460e-03, -1.3133e-03,\n 9.1228e-04, 2.2303e-03, -2.8226e-03, 3.6978e-03, -1.9400e-03,\n 3.0984e-03, 4.7141e-04, -6.2373e-04, -4.0217e-03, 2.1021e-03,\n -9.1957e-04, -5.1308e-04, 5.1605e-04, 1.0908e-03, -5.3354e-04,\n 6.0837e-04, -2.4793e-03, -1.0209e-03, 1.4029e-03, -1.2908e-04,\n 2.0753e-03, -1.3252e-04, -2.0174e-03, -1.8960e-04, 2.0553e-04,\n -1.2922e-03, -2.7625e-04, 1.6620e-03, -6.1064e-04, 3.8333e-03,\n -1.2546e-03, 2.2547e-03, -4.9707e-03, -7.9100e-05, 1.6888e-03,\n 7.9893e-04, -1.4186e-03, -1.8042e-03, -1.5693e-03, 7.7379e-05,\n -3.0140e-03, 2.3612e-03, 3.3914e-04, -1.0158e-03, 7.3366e-04,\n -1.5920e-04, 1.7048e-03, 2.5043e-04, -3.4614e-03, -1.2978e-03,\n -6.9258e-05, 4.7968e-04, -5.9078e-04, 3.1536e-04, -4.1729e-04,\n -3.4653e-04, -7.0051e-04, -2.0606e-03, -6.3733e-04, 4.1203e-03,\n 2.1635e-05, -9.6827e-04, 1.1850e-03, 5.9021e-04, -6.9351e-05,\n 3.4440e-04, 3.7206e-03, -8.6473e-04, 2.0855e-03, -2.5811e-03,\n -2.2295e-03, -8.3596e-04, -2.0495e-04, 1.1869e-03, 1.4556e-03,\n -1.3076e-03, 1.6696e-04, -4.3883e-04, -1.1126e-03, 2.2225e-03,\n 1.4010e-03, 1.1897e-04, -5.3892e-04, 1.0458e-03, -6.7365e-04,\n 1.1147e-03, -2.5300e-03, -3.2160e-03, 1.7311e-03, -2.8606e-04,\n -9.2429e-04, 5.8857e-04, -6.6576e-05, 3.3581e-03, -9.1135e-04,\n 3.2346e-03, 3.9202e-04, 1.4872e-03, -1.5558e-03, 5.1421e-04,\n 3.1482e-04, 1.2039e-03, 5.3724e-04, 2.0672e-03, -4.3509e-03,\n -2.2271e-03, 2.9220e-03, -2.5670e-03, -1.0393e-03, 3.8472e-03,\n 1.9627e-04, -2.0170e-03, -2.5340e-03, -9.7330e-04, -6.6122e-04,\n -2.5624e-04, -4.2938e-04, 1.5206e-03, -3.5351e-04, 1.4469e-03,\n -1.3794e-03, -9.0226e-04, -1.5984e-03, 1.5821e-03, 1.7093e-03,\n 1.0475e-03, -5.6390e-04, 9.6017e-04, 1.2666e-03, -3.3415e-04,\n -2.2750e-05, 8.1307e-04, -9.0422e-04, 2.9042e-03, -3.2418e-03,\n -2.2294e-04, 1.0344e-04, 4.0828e-04, 1.5230e-04, -8.4412e-04,\n -2.5643e-03, 3.3601e-03, 4.8341e-04, 1.4502e-03, 6.3792e-04,\n 1.2170e-03, 1.6173e-03, -1.4439e-03, -2.0111e-03, -1.7366e-04,\n -1.9485e-03, -7.8024e-04, 2.9328e-03, -1.6489e-04, 2.6366e-04,\n -9.7572e-04, 4.1601e-04, 2.1870e-03, -1.6171e-03, 2.7651e-03,\n -2.6048e-03, 6.1553e-04, 1.4432e-03, 1.3610e-03, 1.3505e-03,\n -9.6572e-04, -1.6452e-03, 2.0723e-03, 8.8164e-04, -1.5386e-03,\n 2.8307e-04, 6.0589e-04, -2.7497e-03, 8.3386e-04, -9.0276e-04,\n 1.8149e-03, 8.6498e-04, 1.9529e-03, 6.1177e-04, -2.7407e-04,\n 4.5803e-04, 1.0552e-03, -6.2377e-04, 5.5464e-04, 8.3487e-04,\n -2.0623e-03, 1.5789e-03, -1.9297e-03, 7.5778e-04, -1.1338e-04,\n 5.9373e-05, -5.0358e-04, 1.9229e-03, 2.8193e-03, 3.2328e-04,\n -3.5838e-03, -1.5041e-03, 3.2318e-03, -3.8816e-04, 7.2938e-04,\n 7.0554e-04, -2.1380e-03, -1.1950e-04, -2.5374e-03, 1.5605e-03,\n 3.5401e-04, -9.4692e-04, 2.8534e-03, 2.8918e-04, -1.5864e-03,\n 9.5094e-05, 3.3475e-03, -1.5670e-04, 2.4415e-03, 2.5545e-03,\n -5.4545e-04, -5.2617e-03, 1.2376e-04, -2.5024e-04, -6.6515e-04,\n 6.6383e-04, 4.4029e-04, 2.1504e-03, -3.8365e-04, 5.3654e-04,\n -2.3488e-03, -5.0459e-05, 1.2248e-03, 1.5910e-03, 1.6348e-03,\n 2.0154e-03, -4.0771e-03, -6.8432e-04, -1.7384e-03, -2.5964e-03,\n 1.6254e-03, -3.4793e-03, 2.1521e-03, 8.4308e-04, 6.6362e-04,\n -8.6735e-04, -2.3480e-03, 3.2126e-03, 1.2563e-03, 4.8143e-04,\n -9.5493e-04, 7.2565e-04, 3.1871e-04, 4.8902e-03, 7.2522e-04,\n -1.6270e-03, 5.1281e-04, 8.2568e-04, 2.7532e-03, 1.6309e-03,\n -2.8279e-04, 3.4892e-03, 6.9666e-04, 3.8746e-03, -1.8144e-03,\n -1.8364e-03, -9.1023e-05, 3.8837e-04, -1.9519e-03, 1.0690e-03,\n -1.8535e-03, -3.4869e-03, 1.7648e-04, 8.6594e-04, 1.1200e-03,\n 4.7510e-04, 5.6052e-45], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([7.5063e-05, 5.0118e-05, 3.7116e-05, 4.0730e-05, 2.9939e-05, 7.4788e-05,\n 4.2342e-05, 8.7616e-05, 6.2211e-05, 7.7381e-05, 8.7977e-05, 8.1905e-05,\n 1.1240e-04, 4.6987e-05, 6.4491e-05, 5.9717e-05, 4.4588e-05, 5.9191e-05,\n 4.8402e-05, 7.1607e-05, 6.9163e-05, 3.6617e-05, 3.2519e-05, 6.6067e-05,\n 1.5826e-04, 6.9758e-05, 4.4563e-05, 8.7067e-05, 6.5268e-05, 5.9634e-05,\n 3.6686e-05, 4.3727e-05, 7.2391e-05, 5.0064e-05, 3.5120e-05, 4.8974e-05,\n 3.6866e-05, 3.2123e-05, 3.5768e-05, 3.0802e-05, 3.0443e-05, 2.8785e-05,\n 4.0411e-12, 8.2453e-05, 1.3863e-04, 4.0161e-05, 5.7480e-05, 1.5943e-05,\n 8.5994e-05, 1.1334e-04, 1.2253e-04, 2.9863e-05, 3.6264e-05, 5.3559e-05,\n 7.9834e-05, 3.9719e-05, 5.9683e-05, 5.5094e-05, 4.7299e-05, 2.8947e-05,\n 8.8645e-05, 3.7811e-05, 5.7449e-05, 3.8916e-05, 3.5761e-05, 3.8607e-05,\n 5.4496e-05, 3.3187e-05, 5.7240e-05, 3.9947e-05, 5.8673e-05, 5.8965e-05,\n 4.2512e-05, 4.7837e-05, 4.3652e-05, 4.6553e-05, 4.4725e-05, 4.4520e-05,\n 6.9728e-05, 7.5693e-05, 6.3110e-05, 4.3444e-05, 4.0420e-05, 2.4252e-05,\n 4.6289e-05, 7.4145e-05, 7.5478e-05, 4.7701e-05, 2.6809e-05, 8.2851e-05,\n 4.4084e-05, 5.8768e-05, 7.7577e-05, 5.9214e-05, 7.2280e-05, 5.1530e-05,\n 7.9891e-05, 5.0276e-05, 6.2839e-05, 4.9635e-05, 4.6646e-05, 6.8613e-05,\n 6.8683e-05, 5.2295e-05, 7.8827e-05, 1.1299e-04, 4.6122e-05, 1.5575e-04,\n 4.3651e-05, 4.5907e-05, 7.1533e-05, 7.0953e-05, 5.2412e-05, 5.1293e-05,\n 5.9562e-05, 3.8549e-05, 6.1453e-05, 7.8999e-05, 3.4048e-05, 7.5879e-05,\n 2.8280e-05, 7.3041e-05, 7.4350e-05, 1.1400e-04, 6.6496e-05, 8.5429e-05,\n 4.7730e-05, 7.2226e-05, 8.8128e-05, 1.3913e-04, 6.6743e-05, 4.2839e-05,\n 7.4045e-05, 4.7676e-05, 1.5592e-04, 3.9264e-05, 6.6012e-05, 6.4786e-05,\n 4.0134e-05, 3.3699e-05, 3.2883e-05, 5.5779e-05, 5.0576e-05, 9.0290e-05,\n 5.2625e-05, 5.0423e-05, 2.8138e-05, 7.9277e-05, 5.7549e-05, 1.1509e-04,\n 2.8836e-05, 7.2511e-05, 3.4857e-05, 8.2304e-05, 3.1018e-05, 4.1797e-05,\n 8.7846e-05, 6.8434e-05, 4.9665e-05, 5.3479e-05, 7.0655e-05, 2.4221e-05,\n 8.9669e-05, 4.8202e-05, 2.1805e-05, 1.0738e-04, 5.3922e-05, 2.8486e-05,\n 1.8350e-05, 5.5512e-05, 3.2033e-05, 6.8572e-05, 4.9232e-05, 1.1052e-04,\n 6.1798e-05, 2.6545e-05, 1.0045e-04, 3.1705e-05, 2.8117e-05, 7.7675e-05,\n 4.9593e-05, 4.5674e-05, 1.2761e-04, 5.4827e-05, 1.1256e-04, 4.2123e-05,\n 1.5746e-04, 4.6915e-05, 4.9792e-05, 1.9247e-05, 4.3192e-05, 6.2088e-05,\n 4.8430e-05, 4.3248e-05, 3.2156e-05, 2.2281e-05, 7.4154e-05, 3.6954e-05,\n 2.0083e-05, 2.5954e-05, 3.5940e-05, 5.3539e-05, 4.4445e-05, 2.7355e-05,\n 2.4695e-05, 4.6953e-05, 3.5006e-05, 3.1311e-05, 7.1988e-05, 2.8489e-05,\n 5.8765e-05, 7.7898e-05, 2.6446e-05, 3.9245e-05, 6.2222e-05, 3.3933e-05,\n 3.0907e-05, 6.4031e-05, 5.4410e-05, 9.0196e-05, 3.3336e-05, 3.5551e-05,\n 5.1316e-05, 3.0009e-05, 4.7368e-05, 8.6437e-05, 3.1002e-05, 2.8865e-05,\n 2.3493e-05, 4.4992e-05, 4.2009e-05, 3.5059e-05, 1.1842e-04, 6.5512e-05,\n 3.3070e-05, 4.7629e-05, 4.5540e-05, 1.1222e-12, 4.0556e-05, 1.1757e-04,\n 3.8764e-05, 9.6322e-05, 6.5268e-05, 7.1363e-05, 9.9162e-05, 6.8901e-05,\n 4.9794e-05, 3.9280e-05, 7.2223e-05, 4.0804e-05, 1.0287e-04, 3.2542e-05,\n 7.7413e-05, 7.2885e-05, 3.5973e-05, 5.7374e-05, 6.3346e-05, 5.7053e-05,\n 4.4549e-05, 5.1252e-05, 4.6045e-05, 6.8863e-05, 4.7184e-05, 5.9963e-05,\n 5.0030e-05, 6.1043e-05, 3.9680e-05, 5.9801e-05, 4.2500e-05, 7.2130e-05,\n 7.1770e-05, 6.3406e-05, 3.2498e-05, 5.5373e-05, 3.0779e-05, 4.6314e-05,\n 5.3817e-05, 8.1381e-05, 4.9947e-05, 4.8441e-05, 5.9329e-05, 3.3553e-05,\n 3.4580e-05, 6.1237e-05, 3.6859e-05, 6.1834e-05, 5.6411e-05, 2.9889e-05,\n 1.1492e-04, 5.4260e-05, 2.8415e-05, 2.8381e-05, 5.7384e-05, 2.7463e-05,\n 5.0266e-05, 3.0035e-05, 4.6975e-05, 5.6400e-05, 1.6202e-04, 6.1731e-05,\n 3.5794e-05, 3.3317e-05, 6.2078e-05, 4.1177e-05, 7.0061e-05, 3.5253e-05,\n 3.9935e-05, 5.7119e-05, 1.2950e-04, 7.8509e-05, 7.9792e-05, 5.4692e-05,\n 3.4310e-05, 3.5511e-05, 4.8893e-05, 5.1593e-05, 4.9269e-05, 2.6300e-05,\n 4.4222e-05, 7.7345e-05, 1.5764e-04, 9.2705e-05, 5.9907e-05, 7.0868e-05,\n 9.4160e-05, 1.0703e-04, 5.9710e-05, 9.3497e-05, 4.5778e-05, 1.3718e-04,\n 1.1304e-04, 6.4054e-05, 1.6180e-05, 4.1545e-05, 4.1745e-05, 4.9890e-05,\n 3.5661e-05, 2.7400e-05, 8.2298e-05, 5.2561e-05, 5.1443e-05, 3.8009e-05,\n 4.8503e-05, 8.9324e-05, 6.4933e-05, 9.5627e-05, 7.9542e-05, 3.6398e-05,\n 5.3881e-05, 7.1737e-05, 5.3651e-05, 5.1417e-05, 6.5135e-05, 3.6452e-05,\n 5.8567e-05, 4.1068e-05, 5.3894e-05, 3.1316e-05, 3.2275e-05, 2.9633e-05,\n 2.5750e-05, 1.3444e-04, 1.0748e-04, 8.1882e-05, 5.0514e-05, 4.5723e-05,\n 2.3340e-05, 8.2819e-05, 2.8850e-05, 3.0701e-05, 3.3886e-05, 5.4958e-05,\n 6.2998e-05, 5.3031e-05, 6.2614e-05, 4.6225e-05, 3.4312e-05, 2.8701e-05,\n 4.9671e-05, 3.3279e-05, 4.9138e-05, 5.3014e-05, 6.9202e-05, 3.7531e-05,\n 5.8764e-05, 1.3034e-04, 4.1029e-05, 5.5770e-05, 5.0254e-05, 4.7487e-05,\n 4.7637e-05, 4.0011e-05, 6.3104e-05, 8.4194e-05, 4.4124e-05, 2.3763e-05,\n 2.8494e-05, 8.4725e-05, 5.9289e-05, 9.8478e-05, 4.8509e-05, 3.2164e-05,\n 8.3664e-05, 3.6280e-05, 5.0724e-05, 4.4049e-05, 1.0415e-04, 1.8487e-05,\n 6.2910e-05, 8.2645e-05, 6.5310e-05, 3.5773e-05, 1.0344e-04, 9.3767e-05,\n 4.2377e-05, 5.9356e-05, 1.0729e-04, 8.2754e-05, 6.3416e-05, 5.7448e-05,\n 6.5475e-05, 3.7429e-05, 7.3715e-05, 6.6487e-05, 9.4268e-05, 5.3491e-05,\n 6.9627e-05, 4.6719e-05, 5.8085e-05, 4.1634e-05, 4.2710e-05, 3.7757e-05,\n 5.2753e-05, 6.0489e-05, 6.0177e-05, 5.6579e-05, 6.6921e-05, 8.3564e-05,\n 4.7465e-05, 3.0861e-05, 7.2649e-05, 6.1883e-05, 5.0618e-05, 2.4526e-04,\n 6.5074e-05, 5.2563e-05, 6.9155e-05, 5.2239e-05, 7.0772e-05, 5.5335e-05,\n 5.8006e-05, 3.8923e-05, 5.0197e-05, 1.3035e-04, 6.6794e-05, 2.5591e-05,\n 1.1256e-04, 4.3462e-05, 2.8619e-05, 4.7781e-05, 5.8115e-05, 4.3858e-05,\n 4.7840e-05, 4.7897e-05, 5.4021e-05, 3.9497e-05, 3.4499e-05, 6.1777e-05,\n 6.9185e-05, 6.9981e-05, 5.4334e-05, 1.4137e-04, 6.4348e-05, 6.5196e-05,\n 7.6647e-05, 5.4671e-05, 6.2043e-05, 5.2826e-05, 7.8273e-05, 3.3240e-05,\n 2.9149e-05, 7.7048e-05, 5.9467e-05, 3.6690e-05, 4.3462e-05, 5.0825e-05,\n 4.7159e-05, 4.8570e-05, 1.0005e-04, 5.0125e-05, 4.0187e-05, 4.0040e-05,\n 6.1845e-05, 5.3359e-05, 3.6871e-05, 5.2010e-05, 5.6462e-05, 4.7500e-05,\n 6.1954e-05, 5.1761e-05, 5.3324e-05, 3.5606e-05, 4.3536e-05, 5.0068e-05,\n 3.1401e-05, 5.2671e-05, 3.6151e-05, 5.4025e-05, 4.0031e-05, 5.2217e-05,\n 3.8021e-05, 4.2126e-11], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(15016.)",
|
| 27 |
+
"exp_avg": "tensor([[ 3.1772e-06, 1.2786e-05, -3.2297e-06, ..., 1.9808e-05,\n 6.2462e-06, 5.6052e-45],\n [ 1.7134e-06, -1.4341e-06, 7.3615e-06, ..., -1.5843e-06,\n 1.7675e-06, 5.6052e-45],\n [ 6.4555e-07, 5.3137e-06, -9.6408e-06, ..., 8.2882e-06,\n -2.2125e-06, -5.6052e-45],\n ...,\n [-1.4182e-05, 2.7694e-05, -4.2474e-06, ..., 2.7486e-05,\n -1.7100e-05, 5.6052e-45],\n [ 4.5695e-06, -1.1768e-06, -3.8450e-06, ..., 6.3789e-06,\n 2.8424e-06, 5.6052e-45],\n [ 5.9524e-06, -4.4862e-06, -8.5787e-06, ..., 5.2917e-08,\n -8.2085e-06, -5.6052e-45]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[1.1665e-09, 2.3408e-09, 9.4141e-10, ..., 2.0722e-09, 7.4880e-10,\n 8.0059e-14],\n [2.0300e-09, 4.1552e-09, 2.3215e-09, ..., 6.0247e-09, 3.5091e-09,\n 3.5652e-13],\n [1.5510e-09, 2.4339e-09, 2.2780e-09, ..., 4.6661e-09, 1.1482e-09,\n 4.7879e-13],\n ...,\n [2.5512e-09, 5.4049e-09, 1.9224e-09, ..., 6.9844e-09, 1.5428e-09,\n 1.6013e-13],\n [2.9149e-09, 7.4968e-09, 2.0093e-09, ..., 4.3726e-09, 3.2103e-09,\n 8.6325e-13],\n [3.4116e-09, 3.0886e-09, 1.7748e-09, ..., 4.8097e-09, 2.5420e-09,\n 2.6320e-13]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(7508.)",
|
| 32 |
+
"exp_avg": "tensor([[-2.4757e-06, 1.0314e-05, -7.4739e-06, ..., 1.2806e-05,\n 8.3040e-06, 5.6052e-45],\n [-1.6064e-06, 1.2875e-07, 1.0187e-05, ..., -7.5642e-06,\n -4.2836e-06, 5.6052e-45],\n [-5.1586e-07, 1.3371e-05, -6.9807e-06, ..., 1.1380e-05,\n -7.6650e-06, -5.6052e-45],\n ...,\n [-4.4638e-06, -1.7454e-05, -1.5209e-05, ..., 2.8226e-05,\n 1.8409e-05, 5.6052e-45],\n [ 4.0809e-06, -4.6805e-05, -1.3270e-05, ..., 7.1289e-06,\n 3.8600e-06, -5.6052e-45],\n [-1.2300e-05, 1.1340e-05, 1.0589e-07, ..., -6.7282e-07,\n 2.2209e-06, -5.6052e-45]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[2.0019e-09, 2.4423e-09, 9.2098e-10, ..., 1.9795e-09, 1.4260e-09,\n 1.8237e-13],\n [1.7068e-09, 5.4847e-09, 2.8636e-09, ..., 9.5888e-09, 4.0001e-09,\n 2.7790e-12],\n [1.9990e-09, 3.8111e-09, 2.5465e-09, ..., 7.6051e-09, 2.0817e-09,\n 7.8447e-13],\n ...,\n [4.4811e-09, 3.5700e-09, 4.1166e-09, ..., 1.3493e-08, 2.5356e-09,\n 7.6686e-15],\n [2.2806e-09, 4.9229e-09, 2.1121e-09, ..., 4.4970e-09, 2.2614e-09,\n 4.3791e-12],\n [4.1609e-09, 3.2601e-09, 1.9756e-09, ..., 5.0153e-09, 3.7011e-09,\n 2.8604e-14]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(7508.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0004, 0.0004], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([1.1692e-05, 1.1692e-05], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.00654543046337755,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.00654543046337755,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.00654543046337755,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.0032728879774401812,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 4,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 4,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.00654543046337755,
|
| 149 |
+
0.00654543046337755,
|
| 150 |
+
0.00654543046337755,
|
| 151 |
+
0.0032728879774401812
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 64.73133333333334,
|
| 156 |
+
"best_epoch": 3,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 64.34866666666667,
|
| 159 |
+
"512": 64.522
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4
|
| 167 |
+
],
|
| 168 |
+
"train_loss": [
|
| 169 |
+
3.9435249049420933,
|
| 170 |
+
3.3040703793567867,
|
| 171 |
+
4.3101251841734625,
|
| 172 |
+
4.185147669827233
|
| 173 |
+
],
|
| 174 |
+
"train_acc": [
|
| 175 |
+
54.38726307083047,
|
| 176 |
+
59.31631083223343,
|
| 177 |
+
60.291879721118846,
|
| 178 |
+
61.30111583163371
|
| 179 |
+
],
|
| 180 |
+
"val_acc": [
|
| 181 |
+
61.635333333333335,
|
| 182 |
+
62.978,
|
| 183 |
+
64.12,
|
| 184 |
+
64.73133333333334
|
| 185 |
+
],
|
| 186 |
+
"scale_accs": {
|
| 187 |
+
"256": [
|
| 188 |
+
61.635333333333335,
|
| 189 |
+
62.978,
|
| 190 |
+
63.782,
|
| 191 |
+
64.34866666666667
|
| 192 |
+
],
|
| 193 |
+
"512": [
|
| 194 |
+
63.839333333333336,
|
| 195 |
+
64.522
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
"lr": [
|
| 199 |
+
0.00975530705321762,
|
| 200 |
+
0.00904518046337755,
|
| 201 |
+
0.00793913236883622,
|
| 202 |
+
0.00654543046337755
|
| 203 |
+
]
|
| 204 |
+
}
|
| 205 |
+
},
|
| 206 |
+
"train_config": {
|
| 207 |
+
"name": "david_training",
|
| 208 |
+
"run_id": "20251012_235237",
|
| 209 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 210 |
+
"model_variant": [
|
| 211 |
+
"clip_vit_b16",
|
| 212 |
+
"clip_vit_laion_b32",
|
| 213 |
+
"clip_vit_b32"
|
| 214 |
+
],
|
| 215 |
+
"num_classes": 1000,
|
| 216 |
+
"preset": "small_fast",
|
| 217 |
+
"custom_config_path": null,
|
| 218 |
+
"num_classes_override": null,
|
| 219 |
+
"use_belly_override": null,
|
| 220 |
+
"belly_expand_override": null,
|
| 221 |
+
"progressive_training_override": true,
|
| 222 |
+
"scale_warmup_epochs_override": {
|
| 223 |
+
"256": 0,
|
| 224 |
+
"512": 2
|
| 225 |
+
},
|
| 226 |
+
"num_epochs": 10,
|
| 227 |
+
"batch_size": 1024,
|
| 228 |
+
"learning_rate": 0.01,
|
| 229 |
+
"weight_decay": 1e-05,
|
| 230 |
+
"warmup_epochs": 3,
|
| 231 |
+
"use_rose_loss": true,
|
| 232 |
+
"rose_initial_weight": 0.1,
|
| 233 |
+
"rose_max_weight": 0.8,
|
| 234 |
+
"rose_weight_schedule": "adaptive",
|
| 235 |
+
"use_cayley_loss": false,
|
| 236 |
+
"cayley_weight": 0.01,
|
| 237 |
+
"scale_loss_balance": null,
|
| 238 |
+
"use_mixed_precision": false,
|
| 239 |
+
"gradient_clip": 15.0,
|
| 240 |
+
"scheduler_type": "cosine_restarts",
|
| 241 |
+
"min_lr": 1e-06,
|
| 242 |
+
"freeze_strategy": "never",
|
| 243 |
+
"freeze_threshold": 90.0,
|
| 244 |
+
"unfreeze_on_plateau": true,
|
| 245 |
+
"patience": 10,
|
| 246 |
+
"track_gradients": true,
|
| 247 |
+
"gradient_scale_threshold": 1e-05,
|
| 248 |
+
"gradient_scale_multiplier": 10.0,
|
| 249 |
+
"log_interval": 50,
|
| 250 |
+
"val_interval": 1,
|
| 251 |
+
"save_interval": 5,
|
| 252 |
+
"log_fusion_weights": true,
|
| 253 |
+
"log_loss_components": true,
|
| 254 |
+
"save_format": "safetensors",
|
| 255 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 256 |
+
"upload_to_hub": true,
|
| 257 |
+
"base_dir": "./david_training",
|
| 258 |
+
"num_workers": 10,
|
| 259 |
+
"pin_memory": true,
|
| 260 |
+
"prefetch_factor": 4,
|
| 261 |
+
"persistent_workers": true
|
| 262 |
+
}
|
| 263 |
+
}
|