AbstractPhil's picture
Update best_model_acc74.73_metadata.json - Run 20251012_194945
2f52dab verified
{
"epoch": 3,
"optimizer_state_dict": {
"state": {
"0": {
"step": "tensor(10012.)",
"exp_avg": "tensor([[-3.7641e-06, -5.1728e-05, 5.7122e-07, ..., 2.0874e-05,\n -1.2036e-05, 2.3336e-05],\n [-1.2392e-05, 4.6187e-05, -7.8925e-05, ..., 3.4238e-05,\n -7.9588e-06, 3.3513e-05],\n [ 2.9951e-05, 1.9398e-05, -3.4095e-06, ..., -1.1654e-05,\n 1.9361e-05, 1.3784e-05],\n ...,\n [ 5.0621e-05, -8.6231e-05, -3.3662e-05, ..., 5.7095e-05,\n 5.6111e-05, 2.9641e-05],\n [-3.1227e-05, 8.1676e-05, -3.6620e-05, ..., -6.2150e-06,\n -2.7740e-05, -2.0728e-05],\n [ 4.7761e-05, 6.3250e-06, 1.7746e-05, ..., -5.9511e-06,\n 2.9054e-05, -3.4172e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.2736e-08, 9.0446e-08, 3.6081e-08, ..., 1.0066e-08, 5.9217e-09,\n 9.7002e-09],\n [2.2826e-08, 4.8268e-08, 2.3595e-08, ..., 1.0866e-08, 9.2255e-09,\n 9.3819e-09],\n [1.0563e-08, 8.0615e-08, 2.9189e-08, ..., 6.5718e-09, 4.8137e-09,\n 5.8897e-09],\n ...,\n [5.9965e-08, 5.9644e-08, 8.5602e-09, ..., 7.6014e-09, 1.4492e-08,\n 8.1896e-09],\n [1.5989e-08, 4.0738e-08, 2.2158e-08, ..., 6.0039e-09, 5.7252e-09,\n 4.9129e-09],\n [6.3336e-08, 5.1285e-08, 1.0793e-08, ..., 1.1482e-08, 1.8137e-08,\n 1.0059e-08]], device='cuda:0')"
},
"1": {
"step": "tensor(10012.)",
"exp_avg": "tensor([-1.1451e-03, 6.9451e-04, 4.8177e-04, -3.7464e-04, 3.9402e-04,\n -2.6834e-04, 5.6052e-45, 6.7900e-04, -5.8752e-04, 4.0954e-04,\n -5.4298e-05, 2.0049e-03, -3.6309e-04, -5.7477e-04, 3.4176e-04,\n 8.9293e-04, -5.4441e-04, -4.5223e-04, 1.6841e-04, -2.6340e-04,\n -1.5536e-04, -1.2385e-03, -9.4912e-04, 1.1975e-03, -2.2359e-04,\n -8.8528e-04, -1.2915e-03, -1.2825e-05, 1.0977e-03, 1.5715e-03,\n -1.5443e-04, -1.1547e-04, -1.0588e-03, 7.0614e-04, -7.1672e-04,\n 1.7077e-04, -1.5715e-03, 2.1451e-04, -7.5934e-05, 7.0358e-04,\n -3.3957e-04, -4.0263e-04, -4.4656e-04, 2.4487e-05, 3.7829e-04,\n 6.0429e-04, -4.4417e-04, 6.7227e-04, 3.2396e-04, -9.4684e-04,\n 4.9633e-04, 5.1935e-04, -2.0585e-03, -2.4121e-04, 3.8798e-04,\n -5.9186e-04, 1.3759e-03, 7.2398e-05, 4.5881e-04, -9.6293e-04,\n 2.4296e-04, 8.2516e-04, 9.5735e-05, 5.5313e-04, -4.2228e-04,\n -9.2123e-04, -3.0434e-04, 4.8817e-04, 7.0368e-04, -1.6389e-03,\n -8.5298e-04, 9.0736e-04, -1.2085e-03, -4.8357e-04, -2.4845e-04,\n 1.0364e-03, 5.8717e-05, 2.6810e-04, -9.0021e-04, 1.2136e-04,\n -4.0103e-04, 1.7773e-03, -5.1270e-04, -5.5415e-04, -6.0617e-04,\n -5.8404e-05, 9.2119e-04, -5.1254e-04, -1.8617e-03, -5.6477e-05,\n 1.5242e-03, 2.4273e-03, 1.1593e-03, -6.1760e-04, -3.3929e-04,\n -5.1371e-04, -4.1287e-04, -1.7192e-04, -8.9166e-04, 8.9654e-04,\n 2.6662e-04, -2.8103e-04, 9.4090e-05, -7.4372e-04, -1.5560e-03,\n -6.6590e-04, 1.2832e-03, 4.0066e-18, 8.5097e-04, 5.6052e-45,\n -8.5773e-04, -1.1950e-03, 1.7571e-03, 1.2730e-03, 5.0667e-04,\n 5.3363e-05, 6.5947e-04, 8.5765e-04, 1.2211e-03, 3.6985e-04,\n 5.0328e-04, 1.7847e-04, -2.0027e-04, -7.6111e-04, 3.0230e-04,\n 2.0906e-03, 8.5261e-04, 8.1259e-04, -8.9142e-05, 1.1257e-03,\n -2.8858e-04, 7.4646e-04, 2.7096e-04, -1.0692e-03, -6.4136e-04,\n -1.7302e-03, -1.3879e-04, 1.4966e-03, -8.6441e-04, 1.3040e-04,\n -8.0956e-04, -8.0541e-05, -1.4600e-03, -8.8697e-04, 4.0350e-04,\n -5.4919e-04, 1.3881e-03, -4.4837e-04, 5.5685e-04, 5.2212e-04,\n -9.2181e-04, -3.7057e-04, -1.0138e-03, -2.4642e-04, 9.6518e-06,\n -8.6976e-04, 1.2066e-03, -4.3746e-04, -6.6206e-04, 5.4364e-04,\n 2.4945e-04, -4.9993e-04, 1.2455e-04, -5.3948e-04, 4.8769e-05,\n -6.5464e-04, -5.7580e-04, -1.1918e-03, -8.2890e-05, 2.1126e-04,\n 8.4140e-04, -2.7515e-04, 1.1742e-03, -4.9654e-04, -1.5066e-03,\n -2.4652e-04, -5.8141e-04, -1.9680e-03, -2.4486e-04, 3.4281e-04,\n 7.7364e-04, 3.8486e-04, 2.3448e-04, 6.5624e-04, -1.6770e-03,\n -9.4887e-04, -5.8213e-04, -1.9040e-03, 1.1485e-04, 3.7748e-04,\n 1.1556e-03, 1.7102e-03, 4.7753e-04, -1.0768e-04, 4.5646e-04,\n -1.3100e-04, 9.7533e-05, -8.0243e-04, -4.8223e-04, -1.0058e-03,\n -8.0044e-04, -1.0991e-03, 8.6852e-04, 7.6079e-04, -2.2804e-03,\n -7.7952e-04, 1.9034e-03, 1.5958e-03, -3.8376e-04, 8.4223e-04,\n 1.0976e-03, 5.9817e-04, 1.1589e-03, 7.2499e-04, -6.2796e-04,\n 1.1846e-03, 4.0175e-04, -1.9446e-03, -3.0935e-04, -2.5535e-04,\n 1.5394e-04, -7.3942e-04, 7.7543e-04, -3.0723e-03, 5.5453e-04,\n -1.9619e-04, -7.1793e-04, 4.4095e-04, -5.7579e-04, -1.0535e-03,\n 5.2357e-04, 5.1323e-06, -1.2357e-04, -2.7611e-04, -4.2245e-05,\n 1.1760e-03, -1.6450e-03, -4.0199e-04, -2.3877e-03, -1.2225e-03,\n -1.0995e-03, -4.4524e-05, 1.1555e-04, -5.0741e-04, 8.0639e-04,\n -1.5651e-03, 3.8853e-04, 7.6282e-04, 2.0855e-03, -3.9915e-04,\n -1.7110e-04, -6.7637e-04, 1.4803e-04, -2.0245e-03, 1.2788e-03,\n -1.8838e-03, 8.3051e-04, -1.9781e-03, -1.0442e-03, -2.3641e-04,\n -1.0236e-04, -6.6691e-04, -2.6343e-05, -2.0224e-03, 7.4739e-06,\n -1.0623e-03, 1.2971e-04, -7.5657e-05, 5.4341e-04, -2.3106e-04,\n 8.7056e-04, -1.4325e-03, -9.1358e-04, -2.4797e-04, 1.1548e-03,\n -3.4293e-04, 1.0828e-04, 2.8215e-04, 9.2130e-04, -6.5622e-04,\n 1.1430e-03, -5.8607e-04, -1.1116e-03, 6.3891e-04, -4.9340e-04,\n 5.9911e-04, -1.1397e-03, -4.6133e-04, -1.0545e-03, 5.6052e-45,\n 1.3967e-04, 4.0636e-04, 3.7505e-04, 5.6052e-45, -1.7266e-04,\n -5.1989e-05, -2.4445e-04, 1.6514e-05, -1.0083e-03, -5.8933e-04,\n -2.8645e-04, -1.6936e-03, -2.1965e-03, 2.4548e-04, -5.2671e-04,\n 1.6575e-03, -2.9393e-04, -6.4305e-04, -5.2278e-04, -1.0811e-03,\n 2.9536e-04, -3.0780e-04, 1.1805e-03, 4.2007e-04, -2.4898e-04,\n -4.0774e-04, 1.4744e-03, 3.5556e-04, 1.4929e-03, 1.8483e-04,\n -7.0091e-04, 2.0195e-04, 3.8676e-04, 4.1516e-04, -4.1760e-04,\n -9.8263e-04, 9.1381e-04, 1.0413e-03, 9.5262e-04, -3.8251e-04,\n -5.5106e-04, -3.5559e-04, 1.7234e-03, 1.1170e-03, -1.4355e-04,\n 1.1856e-03, 3.9160e-05, 1.8064e-04, -1.9498e-03, 6.8110e-05,\n -1.1316e-03, 2.5082e-03, -3.1252e-04, -6.5553e-04, -2.8636e-04,\n -3.6832e-04, 1.3550e-04, -1.0688e-03, -1.5249e-04, 7.1293e-04,\n -7.6410e-04, 4.2551e-04, 2.4044e-03, -8.4508e-04, -5.1122e-04,\n 1.1307e-03, 5.4497e-04, 1.7609e-04, 8.8661e-04, -3.5345e-04,\n -2.2017e-04, 5.0916e-04, 5.6052e-45, 1.0180e-03, -1.4543e-04,\n -1.8255e-03, -6.6085e-04, -4.7002e-04, 6.7655e-04, -4.4899e-04,\n 4.2777e-04, -1.7133e-03, -4.8237e-04, 5.9540e-06, -1.3374e-03,\n 8.5903e-04, 1.3710e-04, -1.8925e-03, -2.8509e-04, -4.7819e-04,\n 5.9416e-04, 2.5101e-04, -1.5893e-03, 7.3263e-04, -1.4270e-04,\n 3.9402e-04, 5.9010e-04, -5.5505e-04, 1.5732e-04, 2.1328e-04,\n 1.0441e-03, 3.9944e-04, 1.0086e-03, -7.3976e-04, -1.1049e-04,\n 3.3189e-04, -1.4761e-04, -4.7878e-04, -2.9203e-04, 1.0765e-03,\n 8.0452e-04, -2.5798e-04, 3.9636e-04, -3.8774e-04, -1.3796e-03,\n 1.8464e-04, -2.0523e-03, -7.2402e-04, -8.1835e-04, 5.1277e-04,\n -7.8095e-04, -9.4092e-05, 1.0400e-03, 2.2210e-04, -1.7427e-04,\n 5.2959e-04, -2.2106e-04, -1.4321e-03, -9.4472e-04, 5.6424e-04,\n 2.0249e-03, 1.3539e-04, 3.6248e-05, 6.3532e-04, -3.6015e-04,\n 6.1153e-04, 7.7999e-05, -1.1182e-04, 1.0827e-03, -1.2285e-03,\n -2.5012e-03, 5.2922e-04, 5.1844e-05, 2.3721e-03, -5.5702e-04,\n 7.8043e-04, 1.2215e-03, -2.4706e-04, 1.2857e-03, -1.0495e-04,\n 4.1419e-04, 2.0478e-03, -5.1336e-04, -8.8523e-04, 2.7020e-04,\n -2.6100e-04, 1.0522e-03, -1.3173e-03, 3.3327e-04, -4.3115e-04,\n -5.1379e-04, 7.6815e-04, -5.0125e-04, 8.5582e-04, 4.2555e-04,\n 8.4562e-04, 6.7441e-04, -3.9073e-04, 4.8939e-04, -9.2824e-04,\n -3.2658e-04, -3.7470e-04, 2.4213e-04, 6.9592e-05, -9.1162e-04,\n 3.8926e-04, -6.9231e-04, -4.8272e-05, 4.5083e-04, 4.2616e-04,\n -1.7760e-03, -8.9521e-04, -1.8811e-03, 5.6954e-04, -2.8623e-05,\n -8.2559e-05, 3.9858e-08, -1.0017e-03, 5.6052e-45, 9.4891e-04,\n 2.4803e-04, -1.5330e-03, -2.7238e-04, 2.2018e-04, -1.2628e-04,\n -1.0569e-03, 7.3167e-04, 1.3449e-03, -7.2017e-04, -1.3676e-03,\n -1.5266e-04, -5.6754e-04, 2.9074e-04, 5.1422e-04, 5.8516e-04,\n 1.4854e-04, -5.5509e-04, 1.1322e-03, -1.0262e-03, 3.6919e-04,\n -3.0072e-05, 4.9487e-04, 1.7812e-04, -2.9532e-04, 1.2800e-03,\n 2.3806e-04, 2.1661e-04, 2.2248e-04, -2.0835e-03, 2.8300e-04,\n -3.1597e-04, -5.6825e-04, -1.2886e-03, -2.4737e-16, 3.2350e-05,\n -8.3913e-05, -8.4856e-04, 1.0948e-03, 1.3273e-03, -1.1556e-03,\n 1.5671e-03, -8.0001e-04, 9.5780e-04, -1.0148e-03, 5.6052e-45,\n -6.4841e-04, -4.4275e-04, 4.0499e-04, 5.5307e-04, -8.6338e-04,\n 1.0832e-03, -3.7132e-04, -4.3471e-04, -1.1092e-03, -1.4444e-03,\n 3.1695e-04, 2.8684e-04, 1.2493e-03, 6.8065e-04, 2.5468e-04,\n -1.0085e-03, -1.7380e-04, 3.3336e-04, -8.1387e-04, 1.9292e-03,\n -1.4444e-04, 7.5023e-05, -4.9682e-04, -1.2546e-04, -7.1993e-04,\n -1.6258e-04, 5.8702e-04, 1.1239e-03, -8.5135e-04, 5.8085e-04,\n -5.0724e-04, 1.1700e-03, 1.6919e-04, -1.2145e-03, -1.1848e-03,\n -1.3654e-03, 6.6234e-04, -1.2335e-03, 6.1928e-04, 1.8527e-03,\n -7.2367e-04, 1.2847e-04, 1.9625e-04, 3.4789e-04, 1.6255e-03,\n 4.4811e-04, -3.8786e-04, -8.0311e-05, -1.5434e-03, -1.4276e-03,\n 3.7621e-04, -6.9256e-04, -2.6316e-04, -2.3967e-04, -1.7847e-03,\n 1.3584e-03, 5.7864e-04, -1.1328e-03, -1.2796e-03, 2.8642e-04,\n -1.3142e-04, -1.8895e-03, 7.8159e-04, 6.2208e-04, 1.2490e-03,\n -4.8898e-04, 1.2786e-03, -6.8000e-04, 1.0311e-04, 1.2959e-03,\n -8.7099e-05, -1.1465e-03, 9.4822e-04, 1.6630e-03, -5.0957e-04,\n -3.2815e-04, 7.9898e-04, 3.1932e-04, -3.3961e-04, 2.9366e-04,\n 5.4057e-04, 8.9427e-04, 4.1858e-04, -4.6261e-05, -4.1231e-06,\n 7.0907e-04, 2.1019e-06, 6.6769e-04, -1.0332e-03, 5.6540e-04,\n -1.0621e-03, -7.1512e-04, -1.7930e-03, -7.8780e-04, -2.5352e-04,\n -1.1613e-03, 2.1687e-04, -4.9228e-04, 4.6471e-04, 4.7909e-04,\n 5.3126e-04, 1.4839e-04, -2.8380e-04, 1.7751e-03, 2.0284e-03,\n 2.1766e-03, -4.4979e-04, 6.6693e-04, 4.6823e-06, -1.3950e-03,\n 1.2805e-03, -2.0743e-04, 1.5502e-04, 1.2689e-03, 8.8095e-04,\n 8.3023e-04, -9.3808e-05, -1.3650e-04, -2.4911e-04, -5.5303e-04,\n 8.1580e-04, 1.4227e-03, 1.2654e-03, 8.6656e-04, 4.2033e-04,\n 1.2030e-03, -1.7184e-04, -4.8889e-04, -1.1314e-04, 3.2705e-04,\n -5.3643e-04, -8.5297e-04, 3.6044e-04, -1.1522e-04, 2.2001e-04,\n -7.0108e-04, -1.0761e-04, 3.8404e-04, -7.2066e-08, -5.1524e-04,\n -1.6986e-03, 1.6089e-03, -1.2578e-03, -7.6102e-04, 3.6894e-04,\n -2.2246e-04, -5.1103e-04, -1.1439e-03, 3.7518e-04, -5.3329e-04,\n -4.2349e-04, -3.1031e-04, -5.4623e-04, -2.2273e-04, -5.4024e-04,\n 4.3852e-04, -2.2082e-04, 1.1241e-03, -1.2242e-03, 1.8634e-04,\n -1.9316e-03, 1.2651e-03, -1.5689e-03, -1.0148e-03, 8.6289e-04,\n 5.7083e-04, 1.3490e-04, -6.4741e-04, 1.2241e-05, 1.2448e-03,\n -6.5101e-04, 1.3416e-03, -1.2740e-03, -4.2766e-04, -8.0380e-05,\n 7.2062e-04, 3.1122e-04, -9.3246e-04, -1.2371e-04, 1.1307e-04,\n 6.2074e-04, 1.0877e-04, -4.4245e-04, 6.3629e-05, 3.6711e-04,\n 4.6915e-04, -5.8830e-04, -5.7406e-04, 1.9247e-03, -2.5367e-04,\n 6.6723e-04, -1.2351e-05, 1.2957e-03, -1.0624e-03, -8.6392e-04,\n 5.7204e-04, 1.6345e-03, 6.6043e-04, 2.2812e-04, 7.5267e-04,\n -6.5857e-05, 7.4192e-04, -5.3115e-05, -8.4686e-04, -2.0854e-04,\n 1.4542e-05, 7.9896e-04, -1.9063e-03, -2.3377e-03, 1.0952e-03,\n -1.2032e-03, -3.8912e-04, -4.5146e-04, 3.7937e-04, 4.8562e-05,\n -1.7851e-03, -2.8886e-04, 1.5728e-03, 1.4382e-03, 2.2903e-03,\n -2.1632e-03, -5.2947e-04, 2.0630e-03, 8.5572e-04, 3.8231e-04,\n -7.0540e-04, -4.4702e-04, 1.6281e-03, -2.1807e-04, -1.7892e-03,\n -1.1961e-04, 2.0565e-03, -8.9785e-05, -1.9253e-03, 6.1590e-06,\n -1.4804e-03, 1.7863e-03, -1.0535e-04, 6.4400e-04, 5.3828e-04,\n 7.7438e-04, -8.0664e-04, -3.2566e-05], device='cuda:0')",
"exp_avg_sq": "tensor([9.5303e-06, 1.1688e-05, 7.2917e-06, 1.6562e-05, 1.4972e-05, 7.7543e-06,\n 3.2157e-11, 3.4698e-06, 1.4015e-05, 9.2869e-06, 1.8876e-05, 1.3414e-05,\n 5.7661e-06, 1.2834e-05, 1.3524e-05, 5.3783e-06, 1.2585e-05, 2.0260e-05,\n 5.7985e-06, 1.3948e-05, 1.2802e-05, 1.6572e-05, 1.2211e-05, 1.2053e-05,\n 1.2283e-05, 1.9700e-05, 2.0910e-05, 1.6391e-05, 1.4291e-05, 1.2613e-05,\n 1.4010e-05, 2.0675e-05, 1.0563e-05, 2.0557e-05, 8.9857e-06, 7.3581e-06,\n 1.6245e-05, 9.3280e-06, 1.6338e-05, 7.5664e-06, 5.8591e-06, 7.2394e-06,\n 1.0554e-05, 1.0677e-05, 4.1151e-06, 1.8531e-05, 1.4094e-05, 7.2704e-06,\n 1.0810e-05, 1.2973e-05, 6.1270e-06, 1.3542e-05, 1.2605e-05, 2.0548e-05,\n 1.6738e-05, 1.6586e-05, 9.8687e-06, 7.6073e-06, 8.0870e-06, 1.4129e-05,\n 1.6882e-05, 1.5319e-05, 1.3164e-05, 1.4094e-05, 8.7625e-06, 1.0155e-05,\n 4.7369e-06, 1.3785e-05, 1.4588e-05, 1.9354e-05, 5.4642e-06, 2.8296e-05,\n 1.4563e-05, 1.2911e-05, 1.4985e-05, 5.3682e-06, 9.2992e-06, 1.7433e-05,\n 8.6002e-06, 4.9885e-06, 3.1732e-06, 1.4169e-05, 9.6241e-06, 1.5229e-05,\n 2.7174e-05, 1.1558e-05, 1.0495e-05, 5.3174e-06, 1.3458e-05, 6.1462e-06,\n 1.6029e-05, 1.4687e-05, 1.2336e-05, 1.8888e-05, 1.7670e-05, 8.9765e-06,\n 6.2614e-06, 1.4425e-05, 4.8298e-06, 6.8197e-06, 1.6738e-05, 5.1309e-06,\n 5.7104e-06, 1.4404e-05, 1.2941e-05, 1.4815e-05, 1.2281e-05, 1.0924e-08,\n 4.8987e-06, 1.4868e-10, 1.8908e-05, 7.5338e-06, 2.1959e-05, 9.3867e-06,\n 8.2253e-06, 1.2672e-05, 1.3839e-05, 1.2226e-05, 1.9424e-05, 1.0697e-05,\n 1.5559e-05, 7.9476e-06, 1.8334e-05, 8.3173e-06, 9.5634e-06, 1.1324e-05,\n 1.5987e-05, 2.8049e-06, 8.0189e-06, 1.5312e-05, 5.0828e-06, 3.4507e-06,\n 1.5147e-05, 1.7603e-05, 1.0855e-05, 1.1300e-05, 8.0714e-06, 1.2066e-05,\n 1.0569e-05, 9.0399e-06, 1.5400e-05, 1.0793e-05, 1.8732e-05, 1.5190e-05,\n 4.1800e-06, 1.8468e-05, 1.1299e-05, 1.2581e-05, 1.1421e-05, 1.3501e-05,\n 1.7161e-05, 1.9406e-05, 1.1684e-05, 2.0987e-05, 1.7952e-05, 2.2972e-05,\n 9.3649e-06, 1.7668e-05, 1.3707e-05, 9.7448e-06, 7.6537e-06, 9.1253e-06,\n 2.3494e-05, 5.6327e-06, 7.9700e-06, 1.5128e-05, 1.0904e-05, 1.5992e-05,\n 2.0071e-05, 6.3990e-06, 1.9694e-05, 1.7181e-05, 1.4122e-05, 1.0072e-05,\n 9.7812e-06, 1.1182e-05, 1.0686e-05, 1.7263e-05, 1.0148e-05, 7.5042e-06,\n 1.6736e-05, 1.2543e-05, 8.4199e-06, 1.1430e-05, 1.7540e-05, 1.5349e-05,\n 1.1912e-05, 1.5026e-05, 1.2447e-05, 7.9280e-06, 5.7384e-06, 1.1335e-05,\n 1.3251e-05, 7.3063e-06, 5.8317e-06, 1.6707e-05, 1.2239e-05, 1.1099e-05,\n 9.4699e-06, 7.2312e-06, 1.1423e-05, 1.3977e-05, 1.1411e-05, 2.0546e-05,\n 1.0156e-05, 1.4926e-05, 1.9148e-05, 1.0327e-05, 5.1247e-06, 7.2788e-06,\n 1.3233e-05, 1.4634e-05, 1.8249e-05, 2.3228e-05, 1.0815e-05, 9.5412e-06,\n 1.4652e-05, 1.5829e-05, 1.5047e-05, 9.7337e-06, 8.0784e-06, 1.3740e-05,\n 1.8252e-05, 2.5723e-05, 5.0952e-06, 9.1496e-06, 1.3825e-05, 1.1239e-05,\n 4.4540e-06, 5.7932e-06, 1.8083e-05, 9.2265e-10, 1.7574e-05, 1.1166e-05,\n 7.9021e-06, 1.3688e-05, 1.0063e-05, 1.3096e-05, 1.6189e-05, 8.3286e-06,\n 7.5211e-06, 6.9826e-06, 1.5647e-05, 1.0168e-05, 7.0944e-06, 1.0407e-05,\n 1.0727e-05, 1.5736e-05, 1.3651e-05, 2.9995e-06, 2.1263e-05, 1.5306e-05,\n 2.7650e-05, 7.4280e-06, 1.1784e-05, 2.0348e-05, 1.5717e-05, 1.5587e-05,\n 1.0754e-05, 4.2425e-06, 4.8953e-06, 6.8651e-06, 1.3890e-05, 2.1777e-05,\n 1.4261e-05, 1.0189e-05, 8.3472e-06, 9.4552e-06, 9.4809e-06, 1.7297e-05,\n 1.2682e-05, 1.4787e-05, 1.2810e-05, 1.9639e-05, 1.3846e-05, 1.0121e-05,\n 1.1041e-05, 7.0514e-06, 7.8929e-06, 6.2621e-06, 1.6040e-05, 1.0323e-05,\n 8.0223e-06, 1.9987e-05, 8.0755e-06, 5.6893e-06, 1.1718e-05, 8.6121e-06,\n 1.1662e-05, 4.3420e-12, 1.2513e-05, 1.0482e-05, 7.4096e-06, 7.9098e-11,\n 2.2511e-05, 1.4289e-05, 1.3479e-05, 7.8832e-06, 6.3546e-06, 4.8938e-06,\n 1.0611e-05, 9.8149e-06, 1.7161e-05, 1.0444e-05, 7.1905e-06, 2.3733e-05,\n 7.5246e-06, 1.4901e-05, 1.1132e-05, 1.7966e-05, 2.0387e-05, 1.2599e-05,\n 6.3242e-06, 7.6802e-06, 1.5802e-05, 1.4780e-06, 6.6571e-06, 9.8627e-06,\n 2.0643e-05, 2.4725e-05, 5.1014e-06, 2.0445e-05, 7.2011e-06, 7.2042e-06,\n 1.0537e-05, 1.1783e-05, 1.6560e-05, 2.4706e-05, 6.3325e-06, 1.6197e-05,\n 6.9269e-06, 8.2126e-06, 6.2894e-06, 2.1394e-05, 1.4723e-05, 1.0112e-05,\n 1.1619e-05, 1.2231e-05, 1.8785e-05, 3.3103e-06, 1.1892e-05, 1.2486e-05,\n 1.2199e-05, 6.8305e-06, 8.5388e-06, 1.2501e-05, 1.1398e-05, 1.5482e-05,\n 1.1288e-05, 7.7396e-06, 1.0579e-05, 1.3812e-05, 1.3097e-05, 2.0718e-05,\n 7.7261e-06, 7.4259e-06, 1.5673e-05, 7.3978e-06, 1.1114e-05, 1.0572e-05,\n 6.6707e-06, 1.3625e-05, 3.0429e-11, 1.5437e-05, 1.1555e-05, 8.6030e-06,\n 1.6041e-05, 1.4351e-05, 8.9653e-06, 1.6381e-05, 1.6100e-05, 1.1494e-05,\n 1.6681e-05, 1.3798e-05, 9.6591e-06, 1.1650e-05, 1.5771e-05, 1.5028e-05,\n 1.2905e-05, 1.0153e-05, 7.8785e-06, 3.5827e-06, 1.1679e-05, 1.0658e-05,\n 2.6630e-06, 1.0535e-05, 3.9436e-06, 4.2823e-06, 1.2752e-05, 1.4266e-05,\n 1.4210e-05, 1.2607e-05, 1.5861e-05, 1.9535e-05, 9.1851e-06, 9.6807e-06,\n 1.6247e-05, 1.3407e-05, 1.0902e-05, 1.3498e-05, 2.1359e-05, 8.1839e-06,\n 4.9923e-06, 2.0318e-05, 1.0391e-05, 1.9645e-05, 2.1572e-05, 4.1469e-06,\n 7.1170e-06, 1.6013e-05, 4.6509e-06, 7.1612e-06, 7.4488e-06, 1.1887e-05,\n 1.8416e-05, 5.0786e-06, 1.4900e-05, 2.3696e-05, 1.4393e-05, 4.7947e-06,\n 1.5737e-05, 1.8946e-05, 5.2945e-07, 1.1053e-05, 6.9682e-06, 9.6128e-06,\n 6.4327e-06, 1.1402e-05, 1.9102e-05, 8.8166e-06, 1.1846e-05, 1.3390e-05,\n 1.1060e-05, 1.4524e-05, 8.5525e-06, 2.1628e-05, 1.3212e-05, 5.8260e-06,\n 8.8141e-06, 1.1349e-05, 1.8188e-05, 1.4266e-05, 4.7113e-06, 1.1646e-05,\n 1.9327e-05, 1.6899e-05, 2.1234e-05, 2.0955e-05, 1.4819e-05, 2.0013e-05,\n 1.4570e-05, 2.1142e-05, 1.6906e-05, 1.2838e-05, 1.2731e-05, 8.4487e-06,\n 2.0386e-05, 6.2665e-06, 1.2234e-05, 7.8744e-06, 8.3362e-06, 1.2040e-05,\n 8.1106e-06, 9.6262e-06, 8.8375e-06, 1.4359e-05, 1.0858e-05, 3.8779e-06,\n 4.1187e-06, 8.2062e-06, 8.8804e-06, 9.6199e-06, 1.0613e-05, 1.2147e-05,\n 8.9569e-06, 1.7243e-05, 4.0093e-09, 7.6042e-06, 4.9915e-13, 6.4598e-06,\n 4.6774e-06, 1.2336e-05, 1.7331e-05, 7.2069e-06, 5.9699e-06, 1.4494e-05,\n 1.2268e-05, 9.1018e-06, 1.3675e-05, 1.4851e-05, 1.0808e-05, 5.7043e-06,\n 5.3844e-06, 1.9467e-05, 3.1478e-06, 7.9087e-06, 2.1157e-06, 2.5168e-05,\n 3.0410e-05, 1.9248e-05, 8.9028e-06, 1.4726e-05, 3.6855e-06, 1.9002e-05,\n 1.4169e-05, 7.4856e-06, 1.1170e-05, 1.2587e-05, 2.2285e-05, 5.4696e-06,\n 9.6882e-06, 1.3531e-05, 1.4631e-05, 8.4990e-09, 1.4464e-05, 1.5305e-05,\n 1.4734e-05, 7.7866e-06, 8.6757e-06, 1.9839e-05, 1.3895e-05, 9.1347e-06,\n 1.1654e-05, 2.4959e-05, 4.8165e-13, 1.4524e-05, 2.8697e-06, 1.1773e-05,\n 1.6286e-05, 1.9173e-05, 1.0164e-05, 1.4515e-05, 4.6740e-06, 1.6135e-05,\n 1.5198e-05, 2.0119e-05, 1.1142e-05, 8.4372e-06, 8.4284e-06, 4.1838e-06,\n 1.5154e-05, 7.8070e-06, 7.0159e-06, 1.0229e-05, 1.9394e-05, 1.2348e-05,\n 1.2438e-05, 2.1569e-05, 1.3322e-05, 1.3116e-05, 1.5999e-05, 1.3896e-05,\n 1.5511e-05, 1.2331e-05, 2.1044e-05, 7.2566e-06, 1.7465e-05, 1.0700e-05,\n 1.5722e-05, 1.6043e-05, 8.4148e-06, 1.0496e-05, 1.1043e-05, 1.3707e-05,\n 2.0589e-05, 1.3662e-05, 1.4906e-05, 8.5022e-06, 1.1155e-05, 1.9893e-05,\n 7.4693e-06, 1.2934e-05, 1.4327e-05, 1.6823e-05, 1.6644e-05, 2.0833e-05,\n 1.0124e-05, 1.0026e-05, 1.0997e-05, 1.7723e-05, 8.3643e-06, 8.6571e-06,\n 1.6923e-05, 1.6912e-05, 5.8310e-06, 1.0468e-05, 2.3667e-05, 1.7413e-05,\n 4.7312e-06, 1.1616e-05, 1.4697e-05, 1.1970e-05, 1.6128e-05, 1.8895e-05,\n 1.6095e-05, 1.3738e-05, 1.0618e-05, 1.8867e-05, 9.1550e-06, 1.1110e-05,\n 5.7641e-06, 1.5941e-05, 1.8712e-05, 1.0165e-05, 1.3843e-05, 9.5436e-06,\n 5.4740e-06, 8.2182e-06, 1.5495e-05, 1.8859e-08, 1.7666e-05, 1.2927e-05,\n 1.5306e-05, 9.3148e-06, 9.1180e-06, 1.3781e-05, 1.1824e-05, 8.5329e-06,\n 8.1035e-06, 4.7145e-06, 1.4307e-05, 1.5442e-05, 1.0577e-05, 1.2339e-05,\n 9.9984e-06, 1.2274e-05, 1.8818e-05, 7.7204e-06, 1.3112e-05, 1.5220e-05,\n 1.9819e-05, 1.3719e-05, 1.4243e-05, 7.5665e-06, 1.6495e-05, 1.3165e-05,\n 8.3040e-06, 2.0128e-05, 8.6027e-06, 2.3777e-05, 1.9614e-05, 8.9389e-06,\n 2.0340e-05, 5.0984e-06, 8.9009e-06, 8.7813e-06, 7.3742e-06, 1.1875e-05,\n 1.2482e-05, 1.3221e-05, 1.0452e-05, 1.1393e-05, 1.9093e-05, 9.0531e-06,\n 1.4816e-05, 7.7582e-06, 1.1539e-05, 9.7578e-06, 5.1027e-06, 7.5204e-06,\n 8.0451e-06, 1.2264e-05, 6.8654e-06, 1.0943e-08, 1.1290e-05, 1.8646e-05,\n 1.4284e-05, 9.4953e-06, 1.0476e-05, 5.4270e-06, 1.5430e-05, 1.0204e-05,\n 7.8607e-06, 6.0681e-06, 1.0086e-05, 1.8062e-05, 1.4162e-05, 8.2247e-06,\n 1.8144e-05, 1.2219e-05, 1.7108e-05, 1.0191e-05, 1.1953e-05, 1.6483e-05,\n 1.1651e-05, 2.0612e-05, 1.2725e-05, 1.2672e-05, 7.3682e-06, 7.4477e-06,\n 1.5651e-05, 7.2835e-06, 1.3589e-05, 9.2135e-06, 8.1031e-06, 9.5716e-06,\n 9.0644e-06, 1.6636e-05, 1.4540e-05, 1.4633e-05, 2.1256e-05, 6.1462e-06,\n 1.6118e-05, 1.4382e-05, 8.4717e-06, 1.1373e-05, 1.6553e-05, 1.3927e-05,\n 1.5988e-05, 1.1152e-05, 2.3491e-06, 1.9410e-05, 1.5731e-05, 1.5519e-05,\n 1.5230e-05, 1.2111e-05, 3.2337e-06, 8.1275e-06, 8.6459e-06, 2.3210e-05,\n 9.8610e-06, 1.6778e-05, 1.7152e-05, 1.4095e-05, 5.7988e-06, 1.6732e-05,\n 6.8690e-06, 1.2739e-05, 1.2563e-05, 5.2051e-06, 6.6720e-06, 1.7120e-05,\n 1.4340e-05, 1.7328e-05, 1.6508e-05, 7.3559e-06, 1.2980e-05, 1.1413e-05,\n 9.7393e-06, 2.3058e-05, 1.1535e-05, 2.6012e-05, 1.9615e-05, 1.5590e-05,\n 9.3565e-06, 9.0053e-06, 1.2799e-05, 1.4898e-05, 1.7967e-05, 3.8958e-06,\n 8.0698e-06, 8.6371e-06, 6.2909e-06, 1.5977e-05, 1.6124e-05, 1.5404e-05,\n 1.4204e-05, 1.2803e-05, 8.0096e-06, 1.8295e-05, 1.2773e-05, 1.7660e-05,\n 1.8414e-05, 1.0739e-05, 6.0110e-06, 1.3525e-05, 6.8135e-06, 1.7515e-05],\n device='cuda:0')"
},
"2": {
"step": "tensor(10012.)",
"exp_avg": "tensor([[-1.6045e-06, 9.6523e-06, -7.5892e-08, ..., 2.1236e-07,\n -5.4476e-06, -4.4641e-06],\n [ 1.2121e-07, 1.4509e-06, -3.4204e-06, ..., -1.1553e-06,\n 2.0525e-07, -2.4011e-06],\n [-3.9494e-06, 2.7145e-06, 3.3027e-06, ..., 9.9424e-07,\n 2.9960e-07, -1.0538e-07],\n ...,\n [ 7.5449e-06, -1.2953e-07, 2.0773e-06, ..., 5.1380e-06,\n 1.4903e-07, 7.8780e-06],\n [-9.1176e-07, 2.0919e-06, 1.1829e-05, ..., -4.0703e-07,\n -1.1322e-06, -3.6000e-07],\n [ 9.0723e-07, 1.2680e-06, -5.9392e-07, ..., 2.5916e-07,\n 2.0099e-06, -5.7187e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[7.0123e-10, 1.1949e-09, 1.3338e-10, ..., 1.4619e-10, 1.4745e-08,\n 7.8227e-10],\n [6.6141e-10, 6.0005e-10, 5.1392e-10, ..., 3.8852e-10, 5.1191e-11,\n 1.8762e-09],\n [2.7354e-09, 1.3780e-09, 8.0309e-10, ..., 2.5143e-10, 4.6088e-11,\n 3.6735e-10],\n ...,\n [1.8507e-09, 3.7474e-11, 1.3114e-10, ..., 1.0442e-09, 5.5941e-11,\n 2.5808e-09],\n [1.9530e-09, 1.2123e-09, 3.2009e-09, ..., 1.1629e-09, 3.1841e-10,\n 1.4175e-09],\n [2.4496e-10, 2.8373e-10, 3.9672e-10, ..., 1.0095e-09, 2.9250e-09,\n 7.0458e-10]], device='cuda:0')"
},
"3": {
"step": "tensor(10012.)",
"exp_avg": "tensor([ 1.5276e-05, 1.0258e-05, 1.1067e-05, -8.6696e-07, -1.9532e-04,\n 5.6052e-45, -5.5197e-05, 1.4822e-05, 5.6052e-45, 2.8765e-05,\n 2.0425e-05, -3.3431e-05, -1.0135e-04, -4.1064e-05, -1.0280e-04,\n -8.0975e-06, -1.2088e-05, -1.2971e-04, 1.8154e-05, 3.6637e-05,\n 1.1402e-05, -2.4497e-05, -6.4238e-05, -1.3481e-04, -5.5239e-06,\n 3.1967e-06, -4.5010e-05, 4.3215e-06, 4.2409e-05, 2.6685e-06,\n -2.6973e-05, 2.2250e-05, -2.2976e-05, 3.2516e-05, 5.6052e-45,\n 5.6052e-45, -6.4053e-06, 1.4633e-06, 9.6338e-05, 4.3709e-05,\n -8.8465e-05, 9.3732e-05, -1.3835e-05, 2.2293e-05, 5.6052e-45,\n -5.6028e-05, 3.0646e-05, -1.2674e-05, 3.5955e-05, 2.3826e-05,\n 5.0791e-06, 7.3621e-06, -8.3530e-06, 2.4602e-05, 7.1414e-06,\n 7.4882e-06, 5.6434e-05, 3.9528e-05, -5.6861e-05, 9.4838e-06,\n -3.9263e-05, -1.2793e-05, 2.9964e-05, 2.1294e-06, -1.5811e-05,\n 2.2195e-06, 5.6052e-45, -1.4620e-04, 1.9020e-05, 6.4903e-06,\n 5.4826e-05, -3.9466e-06, 3.5306e-05, 5.1184e-05, 7.0124e-05,\n 2.5484e-05, -1.6561e-04, -1.0081e-05, 5.8983e-05, 5.8716e-05,\n -4.4734e-05, 2.7633e-05, -2.9303e-05, -6.7613e-05, 8.9718e-05,\n -3.8698e-05, 4.2180e-06, -9.9214e-05, -1.2421e-04, 6.2266e-05,\n 6.9976e-06, 5.4587e-05, -4.4155e-05, -5.4377e-05, -5.5751e-06,\n -2.4111e-06, 2.9555e-05, -4.6239e-06, -5.6624e-05, -3.3399e-05,\n 9.3337e-05, 5.0396e-06, -1.2612e-05, 5.6052e-45, -1.5250e-05,\n 7.3538e-05, -2.4822e-05, 1.1928e-06, 2.3399e-05, -1.2058e-05,\n -8.8837e-06, 1.6782e-04, 4.9031e-05, 1.0469e-05, 5.6052e-45,\n 9.1131e-06, -9.9768e-06, 2.7065e-05, -6.4721e-05, 6.0888e-05,\n -5.6052e-45, -2.5915e-06, -6.1233e-05, 5.7921e-05, 4.8374e-05,\n -8.5313e-08, -6.0109e-05, -5.6052e-45, 4.0118e-05, 5.9754e-05,\n 5.8486e-05, 1.5946e-06, 8.3824e-06, -4.5313e-05, 1.0157e-05,\n 7.7468e-05, 1.0409e-05, 7.0575e-05, 4.4217e-05, -7.7613e-05,\n 3.2479e-05, 1.5976e-05, 1.7178e-05, 9.0666e-05, 2.1243e-05,\n -7.0833e-05, -2.5025e-05, 5.6052e-45, -7.0327e-05, -1.9921e-06,\n -2.9337e-05, 5.6052e-45, 1.7295e-05, -5.6052e-45, 1.6959e-05,\n 2.6185e-05, -3.3330e-06, 7.5774e-05, 5.6052e-45, -5.8581e-05,\n -1.6447e-05, 4.8121e-05, 8.4693e-05, -1.0392e-05, 5.6052e-45,\n 1.2311e-05, -1.5285e-04, 4.2694e-06, -5.4683e-05, -5.6052e-45,\n 5.6052e-45, 9.9757e-05, -4.2327e-05, -4.0946e-06, -3.8244e-05,\n 4.9483e-05, 2.6975e-05, -4.2585e-06, 1.2956e-05, 5.6052e-45,\n 2.6316e-05, -1.2823e-05, -2.1182e-05, 1.1635e-05, 5.6052e-45,\n 1.4639e-05, 5.6052e-45, 5.6052e-45, -5.7811e-05, 1.8062e-05,\n -7.2083e-05, 4.4074e-05, 8.5995e-06, 1.7726e-05, 1.9085e-06,\n -5.4419e-05, -5.6052e-45, 4.7963e-06, 5.6052e-45, 1.5655e-05,\n 1.0191e-04, -3.4316e-05, -3.3835e-05, 3.5889e-05, -1.1947e-05,\n -7.3998e-06, 3.2160e-05, -8.8281e-06, 3.4348e-05, 2.1400e-06,\n 5.4918e-05, -5.7798e-05, 1.0595e-05, -3.9984e-05, -3.5376e-05,\n -6.9512e-05, 3.9161e-05, -1.2892e-05, 5.6052e-45, -1.0049e-04,\n 1.2640e-05, 2.3388e-05, 5.6052e-45, -1.2619e-04, -1.2526e-05,\n 2.5348e-05, 5.6052e-45, -2.8062e-06, -7.9737e-07, 1.0908e-05,\n 2.2173e-05, 9.0159e-05, 1.4016e-04, 5.6052e-45, -5.1363e-05,\n -3.4216e-05, -1.7228e-05, 2.5691e-06, -1.3240e-05, -3.1236e-07,\n -3.9922e-05, -1.6164e-05, -2.1659e-05, 7.2739e-05, 3.1798e-05,\n 3.5994e-06, -1.0230e-05, 5.6052e-45, -2.5493e-05, 5.6052e-45,\n 3.1463e-05, 3.1629e-05, 6.5591e-05, 5.6052e-45, 2.6235e-05,\n -6.1052e-05, -5.9830e-05, 2.5568e-05, 5.6052e-45, -2.2694e-05,\n 3.7319e-05, -3.5230e-05, -7.9107e-06, 5.6052e-45, -8.2047e-05,\n 5.0192e-05, -1.5710e-05, 5.6052e-45, 2.2718e-05, -6.0811e-05,\n -2.4385e-06, -1.7986e-05, 5.6052e-45, 4.7626e-05, 6.9412e-05,\n 2.4488e-05, 1.1520e-06, -3.1183e-05, 5.6052e-45, -1.1837e-05,\n 1.3205e-05, 2.2909e-05, -4.7885e-06, -4.1149e-05, 5.6052e-45,\n -1.1609e-05, -1.2585e-05, 5.6052e-45, -5.4325e-05, 5.2429e-05,\n -1.5612e-05, 3.1249e-05, -7.0725e-05, 1.2804e-06, 5.6556e-05,\n 7.8347e-06, 1.7375e-05, -6.7052e-06, 5.6052e-45, 1.8626e-06,\n 2.5275e-05, 4.2567e-05, 1.9152e-05, -1.0695e-06, 5.6052e-45,\n 8.9304e-05, -3.0525e-05, 1.2620e-04, 3.1557e-05, 2.2132e-05,\n 1.1433e-05, -7.4400e-05, -4.8840e-05, 4.4258e-05, -6.3593e-05,\n -3.9969e-05, -3.3742e-05, 4.0115e-05, -1.2332e-04, 2.2298e-05,\n 1.7788e-05, 6.6770e-05, -1.5836e-04, 4.7846e-05, 5.3668e-05,\n -7.8923e-05, 5.6052e-45, 1.2765e-05, 2.0837e-05, -6.5613e-06,\n -6.4203e-06, 2.5547e-05, -2.0975e-05, 6.3407e-05, 7.8296e-06,\n -2.5594e-06, 9.9895e-05, 3.1161e-06, 9.3205e-05, 5.6052e-45,\n 5.6052e-45, 5.1026e-05, 3.1521e-05, -8.2363e-07, -2.4814e-05,\n 9.0201e-08, -1.8197e-05, -2.7830e-05, -3.3440e-05, 4.5511e-08,\n -4.2649e-05, 1.5841e-05, 1.8821e-05, -4.6713e-06, -2.0360e-05,\n 9.5749e-05, 5.6052e-45, 1.3305e-05, 5.6052e-45, 1.3911e-04,\n 5.8479e-05, 7.8608e-05, -3.0404e-06, 2.6681e-05, -9.0157e-06,\n -2.5521e-05, 4.8066e-05, -8.6350e-05, -1.9267e-06, -4.6316e-05,\n 3.6077e-05, -3.0606e-05, 1.1121e-04, -4.4452e-06, -1.1556e-04,\n -5.8235e-06, -5.0386e-06, 6.9263e-05, 8.8447e-06, 3.2701e-05,\n -2.3312e-05, 1.8085e-05, -5.1934e-05, 2.1744e-05, -5.6052e-45,\n -7.5242e-06, -6.9563e-05, -4.7078e-05, -7.0706e-05, -4.2932e-06,\n 2.7302e-05, -1.9158e-05, -6.1222e-05, 5.6052e-45, -1.0200e-05,\n 9.0668e-07, 5.6052e-45, 1.0145e-05, -4.8925e-05, 5.5769e-05,\n -2.0828e-05, 7.3147e-05, 5.2785e-05, -1.4463e-05, 5.6052e-45,\n 2.3083e-05, -1.1984e-04, 5.6789e-05, -1.1530e-07, 2.4758e-05,\n 5.6052e-45, 1.3195e-05, 5.6052e-45, 2.3149e-05, -2.1618e-05,\n 1.1338e-05, 8.7737e-05, 4.6111e-06, 7.8559e-05, -5.8790e-05,\n -5.0776e-05, 5.5197e-05, -3.4574e-05, 6.9471e-05, 5.6052e-45,\n 3.2820e-05, 2.4943e-05, -7.1176e-05, 5.6052e-45, -1.5267e-05,\n 4.7129e-05, -2.2682e-06, -2.4263e-05, 5.2895e-05, -6.1012e-06,\n 1.0590e-06, 3.2961e-05, -5.6277e-05, -5.0924e-05, -4.1682e-05,\n -5.4565e-06, 3.2084e-05, 3.4695e-05, -9.3319e-06, -5.1549e-05,\n -3.6251e-05, 5.6052e-45, 2.4971e-06, 1.3442e-05, -3.8377e-05,\n -4.1408e-05, 5.6052e-45, 5.6052e-45, -4.7327e-05, 3.3584e-05,\n 1.3734e-05, 5.5028e-05, 5.9178e-05, -4.8188e-06, 2.6056e-05,\n 3.8103e-05, -5.9290e-05, 7.1877e-05, 3.6770e-06, 6.6161e-06,\n 2.6131e-05, 1.5787e-05, 4.0913e-05, -1.6747e-05, 8.8130e-05,\n 7.4298e-05, 5.6052e-45, 5.6052e-45, 3.0099e-05, 5.6052e-45,\n -7.5396e-06, 3.7872e-05, 3.0593e-05, 8.2172e-05, -6.9642e-06,\n -6.0313e-06, -2.7419e-05, 1.7407e-05, 8.4621e-05, -1.1640e-05,\n -4.0080e-06, -2.8937e-05, 9.4715e-06, -1.6210e-04, 5.6052e-45,\n -1.0683e-05, 1.2202e-05, 1.5430e-05, -1.9903e-05, -8.5622e-05,\n 7.2690e-05, -1.4266e-05, 5.0827e-05, -9.3354e-05, -7.6952e-05,\n -3.6029e-05, 1.1831e-04, 6.2250e-05, -3.0301e-05, 4.9757e-05,\n -1.1587e-05, -7.7400e-05, 6.4011e-05, 1.8953e-05, 2.0873e-07,\n 1.0755e-05, -4.6833e-05], device='cuda:0')",
"exp_avg_sq": "tensor([2.4113e-08, 4.1731e-08, 3.5847e-08, 5.4154e-08, 7.2794e-08, 1.3197e-09,\n 8.0730e-08, 7.3170e-08, 6.1060e-16, 5.4326e-08, 5.6775e-08, 8.8597e-08,\n 1.0140e-07, 5.3634e-08, 6.0305e-08, 4.7852e-08, 8.1491e-08, 5.8590e-08,\n 4.4216e-08, 7.6142e-08, 5.6656e-08, 6.7793e-08, 4.3643e-08, 8.3086e-08,\n 5.9145e-08, 3.0265e-08, 1.5916e-08, 5.2503e-08, 3.9382e-08, 5.8621e-08,\n 5.6303e-08, 5.9855e-08, 3.1071e-08, 7.8787e-08, 1.1004e-17, 1.6545e-09,\n 5.9693e-08, 7.4292e-08, 5.2175e-08, 4.5016e-08, 6.0012e-08, 6.1896e-08,\n 6.3822e-08, 6.9327e-08, 5.3339e-09, 4.4806e-08, 6.9034e-08, 5.5524e-08,\n 5.3799e-08, 5.5482e-08, 5.3725e-08, 5.7532e-08, 8.4437e-08, 6.1655e-08,\n 6.4877e-08, 5.0245e-08, 7.6168e-08, 2.1664e-08, 5.7983e-08, 6.3110e-08,\n 4.6985e-08, 7.3406e-08, 4.9873e-08, 8.8404e-08, 9.8777e-08, 6.9599e-08,\n 2.9817e-09, 4.7742e-08, 4.8686e-08, 2.6813e-08, 4.4341e-08, 5.0490e-08,\n 5.6027e-08, 2.5553e-08, 8.6917e-08, 6.3309e-08, 8.0172e-08, 3.6777e-08,\n 6.2486e-08, 5.4433e-08, 4.1791e-08, 7.8370e-08, 5.4611e-08, 8.3984e-09,\n 8.8342e-08, 5.8304e-08, 4.5534e-08, 7.2310e-08, 7.6397e-08, 8.8431e-08,\n 5.7467e-08, 5.4937e-08, 7.4365e-08, 3.5639e-08, 6.5759e-08, 7.6250e-08,\n 6.5030e-08, 2.4682e-08, 5.1891e-08, 5.5270e-08, 7.1669e-08, 4.4528e-08,\n 2.7846e-08, 1.0117e-09, 3.2640e-08, 7.9502e-08, 6.2317e-08, 4.2636e-08,\n 7.0253e-08, 5.6605e-08, 6.8884e-08, 5.9552e-08, 8.1796e-08, 4.6232e-08,\n 2.4106e-10, 6.5392e-08, 7.2272e-08, 5.8402e-08, 3.9448e-08, 8.5029e-08,\n 6.0345e-09, 5.7294e-08, 5.1348e-08, 6.8961e-08, 5.2996e-08, 8.1207e-08,\n 6.6063e-08, 3.0610e-09, 4.8520e-08, 8.1181e-08, 6.0323e-08, 4.1004e-08,\n 7.9430e-08, 5.1271e-08, 5.2703e-08, 5.7149e-08, 3.4449e-08, 9.7927e-08,\n 2.9023e-08, 5.2807e-08, 4.5036e-08, 5.8055e-08, 6.4253e-08, 7.4514e-08,\n 4.2201e-08, 1.5773e-08, 1.1456e-07, 5.4435e-10, 6.3859e-08, 4.4783e-08,\n 5.4718e-08, 8.0841e-13, 5.3264e-08, 1.1095e-09, 4.9305e-08, 6.1498e-08,\n 4.6629e-08, 5.9287e-08, 2.4947e-08, 4.0618e-08, 4.9534e-08, 4.5806e-08,\n 9.0382e-08, 4.2576e-08, 9.1526e-15, 7.4358e-08, 4.0580e-08, 4.8236e-08,\n 9.6983e-08, 1.3629e-11, 4.8926e-09, 4.6385e-08, 4.6707e-08, 4.0533e-08,\n 3.9594e-08, 6.2404e-08, 3.7797e-08, 3.3464e-08, 3.9336e-08, 1.8310e-16,\n 5.8822e-08, 3.7860e-08, 3.9233e-08, 3.1106e-08, 8.5952e-09, 4.0379e-08,\n 3.5017e-09, 1.0080e-09, 7.7183e-08, 5.1708e-08, 6.3980e-08, 5.8806e-08,\n 2.8678e-08, 2.9113e-08, 5.5651e-08, 4.9706e-08, 7.7591e-11, 2.2626e-08,\n 1.2171e-08, 5.5532e-09, 3.9570e-08, 7.2775e-08, 2.7527e-08, 5.1356e-08,\n 4.4699e-08, 3.2412e-08, 8.1828e-08, 4.7395e-08, 3.3081e-08, 3.9566e-08,\n 4.6682e-08, 1.0445e-07, 5.1706e-08, 7.8194e-08, 7.7087e-08, 4.7145e-08,\n 7.0649e-08, 4.6466e-08, 1.5301e-09, 4.8518e-08, 5.7146e-08, 7.9284e-08,\n 6.9603e-09, 6.3599e-08, 8.6769e-08, 7.2013e-08, 6.6997e-13, 6.0181e-08,\n 8.4044e-08, 3.3997e-08, 5.6521e-08, 6.1994e-08, 8.1871e-08, 1.7142e-10,\n 6.9722e-08, 2.6933e-08, 8.7292e-08, 2.3548e-08, 6.7753e-08, 7.5965e-08,\n 3.3360e-08, 4.2462e-08, 7.6593e-08, 5.1919e-08, 1.4569e-08, 2.5447e-08,\n 6.7673e-08, 5.9892e-09, 5.7021e-08, 1.1368e-14, 4.6786e-08, 6.0066e-08,\n 2.5230e-08, 1.5548e-13, 7.8500e-08, 3.8354e-08, 5.8715e-08, 5.9085e-08,\n 1.5793e-13, 5.0934e-08, 5.0525e-08, 6.1263e-08, 5.5271e-08, 1.5832e-09,\n 1.8578e-08, 4.5282e-08, 4.1416e-08, 1.4876e-09, 6.3259e-08, 6.8028e-08,\n 7.6641e-08, 2.4175e-08, 4.1631e-10, 6.0388e-08, 2.8164e-08, 3.7103e-08,\n 4.9592e-08, 4.1585e-08, 2.4656e-09, 7.9026e-08, 8.0449e-08, 4.1898e-08,\n 9.9536e-08, 8.7768e-08, 8.2117e-09, 6.1743e-08, 3.7768e-08, 1.9331e-10,\n 7.3273e-08, 7.1925e-08, 2.4915e-08, 6.8073e-08, 7.6119e-08, 3.4902e-08,\n 7.2068e-08, 4.1870e-08, 9.6968e-08, 5.1223e-08, 1.9312e-09, 5.6683e-08,\n 3.9424e-08, 7.6747e-08, 4.5445e-08, 4.1428e-08, 1.0400e-10, 6.4942e-08,\n 2.3495e-08, 8.7070e-08, 5.5254e-08, 6.0729e-08, 8.8919e-08, 7.6502e-08,\n 4.3779e-08, 8.0097e-08, 4.6830e-08, 6.9212e-08, 2.8628e-08, 7.8463e-08,\n 5.1017e-08, 8.9991e-08, 5.3763e-08, 5.7670e-08, 6.3110e-08, 9.5632e-08,\n 4.2900e-08, 5.3091e-08, 2.7801e-09, 5.5819e-08, 4.8429e-08, 1.6451e-08,\n 4.9162e-08, 2.2427e-08, 5.5445e-08, 3.6380e-08, 4.6477e-08, 5.5833e-08,\n 9.2433e-08, 8.7451e-08, 4.3229e-08, 7.2016e-18, 1.8211e-09, 6.1288e-08,\n 5.0955e-08, 4.4467e-08, 3.6541e-08, 8.9635e-08, 4.1745e-08, 3.2729e-08,\n 5.2582e-08, 9.2804e-08, 4.2002e-08, 2.8904e-08, 5.5167e-08, 8.0781e-08,\n 1.4139e-08, 5.7835e-08, 2.7654e-11, 4.7214e-08, 1.6204e-10, 6.3989e-08,\n 6.3572e-08, 3.4944e-08, 4.3049e-08, 2.4932e-08, 6.4056e-08, 1.2392e-08,\n 2.9389e-08, 6.8370e-08, 5.2996e-08, 6.9422e-08, 2.0922e-08, 5.6336e-08,\n 9.4352e-08, 7.9748e-08, 8.6965e-08, 4.1696e-08, 4.8301e-08, 5.5361e-08,\n 6.3180e-08, 1.1327e-07, 4.4957e-08, 4.9024e-08, 7.6318e-08, 6.5085e-08,\n 3.7083e-10, 6.1075e-08, 1.7927e-08, 5.7190e-08, 4.2309e-08, 5.7347e-08,\n 5.7305e-08, 6.5200e-08, 7.5105e-08, 3.4699e-10, 7.5087e-08, 6.6107e-08,\n 4.6867e-15, 7.2369e-08, 6.5367e-08, 3.0968e-08, 2.5117e-08, 6.1386e-08,\n 5.1250e-08, 4.0803e-08, 1.5679e-09, 6.0553e-08, 4.8542e-08, 3.1379e-08,\n 5.1645e-08, 6.8155e-08, 3.1317e-09, 3.7928e-08, 3.8531e-10, 4.1359e-08,\n 5.5742e-08, 5.6861e-08, 5.3002e-08, 6.7694e-08, 3.3923e-08, 5.9316e-08,\n 4.5821e-08, 5.4256e-08, 7.3822e-08, 6.6263e-08, 1.1348e-10, 4.8611e-08,\n 4.9074e-08, 3.4722e-08, 8.4382e-13, 5.0740e-08, 4.5520e-08, 9.2413e-08,\n 5.1933e-08, 7.3582e-08, 5.9029e-08, 3.1768e-08, 5.8675e-08, 4.8558e-08,\n 7.7053e-08, 9.2321e-08, 4.8665e-08, 4.9978e-08, 5.4404e-08, 2.0576e-08,\n 5.7548e-08, 5.0278e-08, 1.2959e-10, 9.8803e-08, 2.6774e-08, 4.2154e-08,\n 4.6551e-08, 2.8741e-10, 3.1988e-11, 3.6786e-08, 7.9908e-08, 4.1925e-08,\n 9.8297e-08, 4.8705e-08, 3.2561e-08, 5.5025e-08, 6.2800e-08, 3.6031e-08,\n 3.8980e-08, 5.9881e-08, 3.7085e-08, 4.7282e-08, 4.1210e-08, 5.6990e-08,\n 2.8904e-08, 5.5247e-08, 6.7189e-08, 9.8510e-10, 6.6075e-12, 5.7441e-08,\n 4.2448e-10, 3.5674e-08, 5.6097e-08, 8.3856e-08, 5.7303e-08, 7.1816e-08,\n 5.8891e-08, 2.2037e-08, 4.2520e-08, 9.3695e-08, 1.0548e-07, 3.0798e-08,\n 5.5174e-08, 4.6713e-08, 5.7331e-08, 3.8464e-16, 6.4505e-08, 8.0789e-08,\n 6.7007e-08, 6.4246e-08, 6.8290e-08, 6.2090e-08, 4.9027e-08, 3.9212e-08,\n 3.3760e-08, 3.5867e-08, 5.1796e-08, 5.3722e-08, 7.9797e-08, 6.1159e-08,\n 8.4364e-08, 5.1378e-08, 3.8383e-08, 4.8533e-08, 4.0232e-08, 8.0854e-08,\n 4.0022e-08, 2.0542e-08], device='cuda:0')"
},
"4": {
"step": "tensor(10012.)",
"exp_avg": "tensor([[ 1.7900e-06, -1.0314e-06, -3.6447e-06, ..., -7.1665e-06,\n -1.9752e-06, 1.1531e-06],\n [-1.1330e-06, 4.9733e-06, -1.9675e-06, ..., -9.3226e-06,\n -1.3978e-07, -4.9035e-06],\n [ 2.1640e-06, 1.3491e-05, 2.8918e-06, ..., -8.5314e-06,\n -8.2248e-06, 3.4197e-07],\n ...,\n [ 2.0761e-06, -5.1566e-06, -1.7191e-06, ..., 2.0805e-05,\n -2.5680e-06, -1.0762e-06],\n [-1.5231e-06, 1.5463e-06, -4.7639e-06, ..., 3.4510e-05,\n -9.1299e-07, 2.6249e-06],\n [-5.0076e-07, 4.0075e-06, -1.0533e-05, ..., 1.6455e-05,\n -3.5544e-06, 5.6047e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.0452e-11, 1.5248e-09, 2.8052e-10, ..., 3.1296e-09, 3.8442e-10,\n 1.4472e-10],\n [1.5738e-10, 7.0214e-10, 7.7514e-10, ..., 5.7881e-09, 1.9885e-09,\n 3.8859e-10],\n [1.3251e-10, 6.2980e-10, 2.5440e-09, ..., 2.1564e-09, 2.1342e-09,\n 2.1230e-10],\n ...,\n [2.0346e-10, 6.2279e-10, 5.7852e-10, ..., 7.2002e-09, 1.1173e-09,\n 3.7779e-10],\n [1.2580e-10, 9.2935e-10, 7.9559e-10, ..., 4.9413e-09, 1.0013e-09,\n 7.6502e-10],\n [1.0040e-10, 4.4856e-09, 1.1881e-09, ..., 3.5083e-09, 9.5886e-10,\n 1.8706e-09]], device='cuda:0')"
},
"5": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 1.3957e-06, -9.8488e-06, -4.2651e-06, ..., -2.5496e-08,\n -9.4403e-06, 1.7876e-06],\n [ 1.2468e-05, -1.8169e-05, 1.1723e-05, ..., -5.0204e-06,\n 1.5662e-06, 9.1725e-06],\n [ 4.6827e-05, -1.9760e-06, -1.9879e-07, ..., -1.0881e-05,\n 3.7307e-06, -2.1125e-05],\n ...,\n [-7.7595e-06, 2.0305e-06, -3.3610e-05, ..., -2.0470e-06,\n 1.0315e-06, -3.2351e-06],\n [ 2.7247e-06, 7.5905e-06, -1.7333e-05, ..., -6.0528e-06,\n 9.6604e-07, -4.4591e-07],\n [ 4.1351e-06, -6.7916e-06, -1.7063e-06, ..., 2.0330e-06,\n 1.2000e-06, -2.6034e-05]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.3436e-08, 8.2126e-09, 1.3916e-09, ..., 1.0857e-09, 1.0876e-08,\n 1.0624e-09],\n [6.0730e-09, 2.7598e-09, 3.2541e-09, ..., 1.5204e-09, 1.5632e-09,\n 5.1214e-09],\n [2.8397e-08, 4.3955e-09, 7.1250e-10, ..., 1.2959e-09, 6.3249e-10,\n 3.7649e-09],\n ...,\n [2.6813e-09, 1.7656e-09, 9.7237e-09, ..., 3.8346e-09, 1.6329e-09,\n 9.8725e-10],\n [1.0062e-09, 1.3336e-09, 1.5390e-09, ..., 1.6659e-08, 1.2641e-09,\n 1.8250e-09],\n [3.9988e-09, 3.0146e-09, 3.7997e-09, ..., 2.0085e-09, 1.2672e-09,\n 3.6154e-08]], device='cuda:0')"
},
"6": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.0963e-04, 2.1216e-05, -1.2054e-04, ..., -5.1170e-05,\n 1.3469e-04, -1.1580e-04], device='cuda:0')",
"exp_avg_sq": "tensor([2.0061e-07, 1.7258e-07, 2.5714e-07, ..., 2.6132e-07, 1.5135e-07,\n 2.2594e-07], device='cuda:0')"
},
"7": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 1.6714e-06, 1.1516e-05, -4.1415e-07, ..., 1.9588e-06,\n 7.9324e-09, -1.7983e-06],\n [ 9.6577e-06, -2.4541e-06, 1.1640e-05, ..., 1.2740e-06,\n -2.8864e-06, 3.7550e-06],\n [-3.2062e-05, 2.3607e-06, -5.1668e-06, ..., 3.3001e-06,\n 1.3241e-06, 8.3874e-06],\n ...,\n [-2.8333e-05, -1.0854e-05, 9.9818e-06, ..., 3.8512e-06,\n -4.4186e-07, -9.0960e-06],\n [-2.2457e-05, -1.2640e-06, 1.0581e-05, ..., 9.4327e-06,\n 1.3150e-05, -2.7273e-06],\n [ 8.8183e-07, -6.2085e-06, -3.5083e-07, ..., -3.2856e-06,\n 3.6583e-06, -4.3428e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.4578e-10, 8.0239e-10, 3.9673e-10, ..., 5.1929e-10, 3.3057e-10,\n 2.9410e-10],\n [1.2690e-09, 6.6811e-10, 1.3252e-09, ..., 1.3175e-09, 8.3079e-10,\n 1.6379e-09],\n [2.3519e-09, 6.6678e-10, 2.9042e-09, ..., 2.9897e-09, 7.8924e-10,\n 1.6544e-09],\n ...,\n [2.2413e-09, 1.0234e-09, 1.8572e-09, ..., 9.6366e-10, 1.8743e-09,\n 2.6162e-09],\n [1.7890e-09, 8.4585e-10, 1.3123e-09, ..., 1.4179e-09, 2.3857e-09,\n 5.3894e-10],\n [1.4442e-09, 6.9420e-10, 1.1985e-09, ..., 1.0064e-09, 9.1804e-10,\n 4.6651e-10]], device='cuda:0')"
},
"14": {
"step": "tensor(5006.)",
"exp_avg": "tensor(-0.0016, device='cuda:0')",
"exp_avg_sq": "tensor(0.0002, device='cuda:0')"
},
"15": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-1.3015e-11, 7.3880e-12, -9.6587e-11, ..., 1.0481e-11,\n -7.3462e-13, 1.7540e-11],\n [-1.2943e-11, 9.2591e-12, 9.8614e-11, ..., 1.7914e-11,\n -9.2236e-12, 3.0437e-11],\n [-2.6757e-13, 3.7123e-13, -6.3616e-12, ..., -9.0666e-14,\n -4.3058e-13, 1.2047e-12],\n ...,\n [ 1.7961e-11, -9.3997e-12, 1.8041e-10, ..., -3.1297e-11,\n 1.3129e-12, -2.2425e-11],\n [-1.1870e-12, 1.8359e-13, 8.6724e-12, ..., -3.9059e-13,\n -1.2398e-13, -7.4524e-13],\n [-1.2111e-12, 1.6075e-13, 9.1538e-12, ..., -1.5647e-11,\n 2.9289e-12, -8.1898e-12]], device='cuda:0')",
"exp_avg_sq": "tensor([[8.3656e-14, 9.0579e-14, 2.5837e-13, ..., 3.1332e-14, 3.4250e-13,\n 1.9138e-13],\n [4.7889e-14, 3.1786e-14, 5.8213e-14, ..., 1.1357e-14, 4.3337e-14,\n 8.9354e-14],\n [1.0201e-14, 3.8561e-14, 1.4663e-14, ..., 3.2177e-15, 5.7293e-14,\n 6.5381e-15],\n ...,\n [2.6235e-13, 7.9215e-14, 3.0471e-13, ..., 2.2130e-14, 3.0890e-13,\n 9.4364e-15],\n [1.7111e-15, 3.5463e-15, 9.3295e-15, ..., 3.7334e-16, 1.0539e-14,\n 2.4281e-15],\n [6.3012e-14, 4.0755e-14, 6.0686e-14, ..., 1.5178e-14, 9.1410e-14,\n 1.5697e-13]], device='cuda:0')"
},
"16": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-1.7840e-10, 2.0906e-10, -6.0739e-12, 5.4428e-12, 5.5066e-11,\n -1.9396e-11, 1.5779e-11, -7.3389e-12, 1.2820e-11, -1.9285e-10,\n -3.8367e-12, 1.7359e-10, 2.0207e-10, 2.0593e-12, 1.0550e-12,\n -5.2117e-12, -8.0074e-12, -3.3948e-11, -8.7640e-12, 2.8634e-12,\n -9.1639e-12, 1.6886e-11, 1.2204e-11, 1.7621e-11, -2.4857e-10,\n 1.0675e-10, 5.1752e-12, -1.2270e-11, 1.0096e-10, 1.2335e-10,\n 6.2786e-11, -2.1163e-11, 1.2854e-11, 4.2930e-11, -2.1252e-11,\n -2.8812e-11, -1.0570e-10, -3.9923e-12, -2.2059e-11, 3.2135e-11,\n -1.8812e-11, 1.5223e-11, 2.8024e-11, -1.2042e-10, -2.0719e-10,\n 1.0486e-11, 8.5928e-12, -3.3800e-12, -1.7722e-10, 9.7119e-13,\n -1.1552e-12, -1.1884e-11, 7.9608e-12, -4.1708e-12, -1.5213e-11,\n 1.2343e-11, -2.2402e-11, -5.0099e-13, 1.2871e-11, 3.1771e-12,\n -1.4426e-11, 2.1037e-10, 1.2604e-11, -2.4968e-12], device='cuda:0')",
"exp_avg_sq": "tensor([1.1606e-10, 4.1386e-11, 1.5352e-11, 8.9856e-11, 6.8799e-12, 4.8101e-12,\n 3.2873e-10, 4.3838e-11, 5.4368e-11, 1.5145e-10, 4.1139e-13, 3.9407e-10,\n 2.5012e-11, 6.9938e-11, 3.7118e-10, 2.7578e-12, 2.5155e-10, 8.4395e-11,\n 4.8450e-11, 1.3959e-11, 9.0687e-12, 2.3024e-13, 1.0063e-11, 5.1936e-12,\n 5.5491e-11, 4.8310e-10, 1.2835e-11, 8.5174e-11, 1.5501e-11, 5.5845e-10,\n 1.9013e-11, 6.0837e-12, 4.3437e-12, 6.8669e-12, 5.2200e-11, 1.0904e-10,\n 9.4516e-10, 2.2872e-11, 1.4629e-11, 1.2989e-10, 9.1498e-11, 2.9458e-11,\n 7.2874e-11, 2.1702e-11, 3.7293e-10, 3.6922e-10, 5.9850e-12, 9.7246e-11,\n 1.3604e-10, 1.3260e-10, 4.1623e-12, 2.3391e-13, 5.1030e-10, 2.1271e-10,\n 1.0877e-11, 5.2628e-11, 6.3215e-11, 1.9992e-12, 4.4744e-12, 2.8275e-11,\n 3.8302e-11, 1.5777e-10, 3.4867e-12, 5.9893e-11], device='cuda:0')"
},
"17": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-3.6663e-11, 3.1694e-10, -2.9383e-12, -2.3204e-12, 8.2120e-11,\n 1.5868e-14, -1.3550e-11, -3.4264e-13, -1.0666e-11, -4.9501e-11,\n -1.8927e-13, 4.7801e-10, 3.6702e-10, -6.4797e-12, -7.4384e-12,\n 1.4828e-11, -4.2287e-12, 6.3288e-13, 1.5454e-10, -8.8197e-12,\n -3.8521e-13, -3.1128e-14, 4.9418e-11, 6.3192e-11, -1.2266e-10,\n 3.6587e-10, -6.4057e-12, 2.1698e-10, 1.1457e-10, 3.7511e-10,\n 1.6734e-10, -7.0832e-15, 2.4937e-11, 1.3056e-10, 9.5319e-13,\n 1.2999e-12, 1.0294e-10, -1.1065e-12, -2.9334e-13, 2.2942e-10,\n -2.9386e-14, 2.1375e-10, 2.5628e-10, 8.0657e-12, -6.7093e-11,\n -9.1375e-12, 2.9377e-12, -1.1553e-12, 2.1653e-11, -5.6422e-12,\n 1.8408e-11, 4.9638e-13, -5.5036e-12, -3.6359e-12, 2.6765e-13,\n -4.0475e-12, 9.1333e-13, -1.1936e-11, 9.9768e-12, -3.9995e-12,\n -3.6476e-16, 4.8126e-10, 2.7772e-12, 1.4312e-10], device='cuda:0')",
"exp_avg_sq": "tensor([7.3130e-13, 1.9819e-13, 1.1441e-13, 8.6939e-13, 2.6261e-14, 9.8734e-14,\n 1.9765e-12, 5.9280e-13, 3.2543e-13, 9.1244e-13, 3.1780e-14, 2.1744e-12,\n 1.5970e-13, 3.9940e-13, 1.9535e-12, 5.5212e-15, 1.6209e-12, 1.1350e-12,\n 2.9385e-13, 1.9736e-13, 7.7342e-14, 5.2774e-15, 3.9173e-14, 1.1325e-14,\n 5.4984e-13, 5.6569e-12, 9.7691e-14, 5.3590e-13, 8.6493e-14, 5.1119e-12,\n 7.5794e-14, 7.0434e-14, 8.9070e-15, 2.7339e-14, 6.0360e-13, 1.0051e-12,\n 8.2690e-12, 2.3353e-13, 3.4084e-13, 9.1646e-13, 9.2978e-13, 1.5145e-13,\n 4.2035e-13, 1.9062e-13, 2.3881e-12, 2.2655e-12, 1.4022e-13, 1.6891e-12,\n 8.8486e-13, 7.9149e-13, 4.3032e-15, 4.0656e-15, 5.9599e-12, 1.1462e-12,\n 2.3508e-13, 2.9053e-13, 3.6161e-13, 1.2582e-15, 6.1969e-15, 2.1528e-13,\n 3.6542e-13, 1.2805e-12, 8.2495e-14, 4.0297e-13], device='cuda:0')"
},
"18": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.2058e-11, 2.9535e-10, 3.3362e-12, 2.3758e-12, 1.4327e-10,\n 4.2449e-14, 1.3731e-11, 3.8710e-13, 1.0290e-11, -2.9923e-11,\n 1.5881e-13, 3.1192e-10, 2.9962e-10, 7.7704e-12, 8.2307e-12,\n 7.1674e-11, 4.4608e-12, -7.1515e-13, 1.3690e-10, 1.2907e-11,\n 4.2073e-13, 1.0388e-13, 1.0580e-10, 1.1423e-10, -6.4399e-11,\n 2.6328e-10, 6.9729e-12, 1.4611e-10, 1.7088e-10, 2.7394e-10,\n 1.7082e-10, -1.2474e-14, 9.5137e-11, 1.5262e-10, -9.8137e-13,\n -1.6633e-12, 7.6496e-11, 1.1618e-12, 6.4230e-13, 1.6908e-10,\n -1.4180e-13, 1.6449e-10, 1.8107e-10, 2.3378e-11, -3.8489e-11,\n 9.5083e-12, -2.7112e-12, 1.2892e-12, 9.8456e-12, 6.4280e-12,\n 8.0273e-11, -6.2446e-13, 6.2987e-12, 4.3024e-12, -2.7018e-13,\n 3.3881e-12, -9.0197e-13, 4.4703e-11, 8.2896e-11, 4.3214e-12,\n 9.0058e-14, 3.2064e-10, -2.3930e-12, 1.2967e-10], device='cuda:0')",
"exp_avg_sq": "tensor([1.0807e-12, 4.0139e-13, 2.4813e-13, 1.1377e-12, 4.5338e-14, 1.2717e-13,\n 3.6762e-12, 6.5544e-13, 6.9683e-13, 1.4072e-12, 3.8420e-14, 3.9780e-12,\n 2.6113e-13, 8.2937e-13, 4.2269e-12, 1.2202e-14, 2.8927e-12, 1.1474e-12,\n 3.4056e-13, 1.9469e-13, 1.7156e-13, 1.1245e-14, 4.9332e-14, 2.1720e-14,\n 7.2608e-13, 4.3475e-12, 1.8384e-13, 6.9539e-13, 1.3709e-13, 5.0640e-12,\n 1.3562e-13, 1.2695e-13, 1.4682e-14, 4.8359e-14, 7.3756e-13, 1.3850e-12,\n 9.3640e-12, 3.6818e-13, 2.9677e-13, 1.0883e-12, 1.2248e-12, 2.1808e-13,\n 6.6069e-13, 2.1853e-13, 3.6718e-12, 4.4308e-12, 1.4779e-13, 1.3405e-12,\n 1.2825e-12, 1.5536e-12, 8.0807e-15, 6.5408e-15, 6.0898e-12, 2.5965e-12,\n 2.3493e-13, 6.9657e-13, 8.4256e-13, 3.8151e-15, 1.0883e-14, 3.8136e-13,\n 5.3369e-13, 1.7600e-12, 9.8180e-14, 4.7056e-13], device='cuda:0')"
},
"19": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-1.5413e-10, 1.2463e-09, -1.8553e-10, -1.0029e-10, 7.4452e-10,\n -1.5992e-10, -1.7266e-10, -1.8151e-10, -1.6237e-10, -2.1984e-10,\n -1.9966e-11, 1.6236e-09, 1.4072e-09, -1.4119e-10, -1.5250e-10,\n 2.5307e-10, -1.6193e-10, -1.7467e-10, 1.1960e-09, -1.7501e-10,\n -1.8029e-10, 1.1799e-11, 5.4234e-10, 6.8369e-10, -2.7862e-10,\n 1.6895e-09, -1.7055e-10, 1.2925e-09, 6.6420e-10, 1.5146e-09,\n 1.1881e-09, -1.5348e-10, 4.2368e-10, 1.0201e-09, -1.4546e-10,\n -1.7714e-10, 3.4677e-10, -1.6466e-10, -1.7004e-10, 1.2408e-09,\n -1.7245e-10, 1.4214e-09, 1.4495e-09, 3.7628e-11, -2.3862e-10,\n -1.6111e-10, -1.6748e-10, -1.6426e-10, 1.1814e-10, -1.8746e-10,\n 3.7350e-10, -1.3909e-10, -1.2682e-10, -1.5820e-10, 8.7810e-12,\n -1.1103e-10, -1.7253e-10, -2.6874e-11, 2.7372e-10, -1.8412e-10,\n -1.3105e-10, 1.2076e-09, -1.6714e-10, 9.6631e-10],\n [ 2.0288e-10, -1.1879e-09, 1.8107e-10, 9.5907e-11, -7.0961e-10,\n 1.5456e-10, 1.6787e-10, 1.7793e-10, 1.5678e-10, 2.7701e-10,\n 1.5743e-11, -1.5938e-09, -1.3413e-09, 1.3534e-10, 1.4796e-10,\n -2.4968e-10, 1.5731e-10, 1.6863e-10, -1.1540e-09, 1.7183e-10,\n 1.7495e-10, -1.7514e-11, -5.2582e-10, -6.7490e-10, 3.3723e-10,\n -1.6448e-09, 1.6375e-10, -1.2627e-09, -6.3327e-10, -1.4593e-09,\n -1.1621e-09, 1.4840e-10, -4.1347e-10, -1.0005e-09, 1.4076e-10,\n 1.7175e-10, -3.0443e-10, 1.5969e-10, 1.6333e-10, -1.2022e-09,\n 1.6616e-10, -1.3823e-09, -1.3905e-09, -7.1859e-12, 3.0755e-10,\n 1.5477e-10, 1.6137e-10, 1.5756e-10, -8.9133e-11, 1.8185e-10,\n -3.6308e-10, 1.3285e-10, 1.2007e-10, 1.5060e-10, -1.4128e-11,\n 1.0714e-10, 1.6672e-10, 2.9879e-11, -2.6167e-10, 1.7726e-10,\n 1.2484e-10, -1.1454e-09, 1.6161e-10, -9.4067e-10]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.6424e-11, 5.2650e-12, 1.7811e-12, 5.7235e-11, 2.1768e-11, 7.5530e-11,\n 1.0923e-11, 1.0558e-10, 1.1271e-12, 5.0939e-11, 1.0681e-10, 3.2498e-11,\n 3.6462e-12, 1.1411e-12, 1.4506e-11, 2.0977e-11, 2.0543e-11, 1.7936e-10,\n 1.3831e-10, 1.5740e-13, 1.0146e-11, 8.7194e-12, 1.0077e-10, 3.5866e-11,\n 1.3241e-12, 2.7665e-10, 8.1923e-13, 6.9334e-11, 1.2793e-11, 1.6087e-10,\n 3.3660e-11, 1.2898e-11, 6.8467e-11, 1.5066e-11, 3.0277e-11, 5.7977e-11,\n 1.2483e-10, 2.5732e-11, 2.2585e-10, 7.3314e-11, 8.0551e-11, 3.6942e-11,\n 3.9392e-11, 2.8882e-11, 6.6062e-11, 3.1138e-11, 1.1669e-10, 2.3871e-10,\n 4.0130e-11, 6.1104e-12, 7.7450e-11, 4.2123e-12, 1.4964e-10, 1.8126e-11,\n 1.6608e-10, 1.3515e-11, 1.4641e-11, 1.9942e-11, 8.6461e-11, 5.6258e-12,\n 4.3117e-11, 3.8503e-12, 6.2132e-11, 1.1272e-10],\n [2.6424e-11, 5.2648e-12, 1.7811e-12, 5.7235e-11, 2.1768e-11, 7.5530e-11,\n 1.0923e-11, 1.0558e-10, 1.1271e-12, 5.0939e-11, 1.0681e-10, 3.2498e-11,\n 3.6460e-12, 1.1411e-12, 1.4506e-11, 2.0977e-11, 2.0543e-11, 1.7936e-10,\n 1.3831e-10, 1.5740e-13, 1.0146e-11, 8.7194e-12, 1.0077e-10, 3.5866e-11,\n 1.3239e-12, 2.7665e-10, 8.1923e-13, 6.9333e-11, 1.2793e-11, 1.6087e-10,\n 3.3660e-11, 1.2898e-11, 6.8467e-11, 1.5066e-11, 3.0277e-11, 5.7977e-11,\n 1.2483e-10, 2.5732e-11, 2.2585e-10, 7.3314e-11, 8.0551e-11, 3.6942e-11,\n 3.9391e-11, 2.8882e-11, 6.6062e-11, 3.1138e-11, 1.1669e-10, 2.3871e-10,\n 4.0130e-11, 6.1104e-12, 7.7450e-11, 4.2123e-12, 1.4964e-10, 1.8126e-11,\n 1.6608e-10, 1.3515e-11, 1.4641e-11, 1.9942e-11, 8.6461e-11, 5.6258e-12,\n 4.3117e-11, 3.8501e-12, 6.2132e-11, 1.1272e-10]], device='cuda:0')"
},
"20": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 8.8584e-10, -8.5368e-10], device='cuda:0')",
"exp_avg_sq": "tensor([4.8836e-10, 4.8836e-10], device='cuda:0')"
},
"21": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 3.7274e-14, 6.5196e-15, -9.4480e-14, ..., -1.2960e-14,\n -9.9918e-14, -6.4988e-14],\n [-7.5086e-14, -3.0603e-13, -3.0487e-13, ..., -2.3948e-13,\n 1.0456e-12, -6.4386e-13],\n [ 4.8375e-14, 1.0308e-13, -4.6882e-14, ..., -1.1986e-13,\n 1.3247e-13, -1.7845e-13],\n ...,\n [ 3.0635e-13, 2.3642e-14, -6.2734e-13, ..., 2.1177e-13,\n 3.1111e-12, -4.2317e-13],\n [-7.2158e-14, 9.6638e-14, -1.6123e-13, ..., -4.8174e-14,\n -1.8182e-13, 7.8090e-14],\n [ 6.5790e-13, 7.7567e-14, -2.0091e-12, ..., 6.4702e-13,\n -1.7622e-12, 1.4287e-12]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.2074e-16, 2.6479e-15, 2.5935e-15, ..., 5.1460e-16, 2.7778e-15,\n 4.8500e-15],\n [1.0079e-17, 4.2887e-16, 4.2718e-16, ..., 4.2950e-16, 6.1095e-17,\n 6.0833e-15],\n [7.3094e-16, 5.1192e-15, 4.2988e-15, ..., 1.8080e-15, 3.7231e-15,\n 1.4173e-14],\n ...,\n [1.7273e-15, 1.9509e-15, 6.1097e-16, ..., 4.3643e-16, 8.2723e-15,\n 5.5174e-15],\n [4.2506e-16, 5.0347e-16, 2.7218e-15, ..., 5.4115e-16, 1.2879e-15,\n 4.2561e-15],\n [1.3428e-14, 2.4691e-14, 4.8367e-14, ..., 1.8942e-14, 3.8170e-14,\n 3.7558e-14]], device='cuda:0')"
},
"22": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-9.0415e-13, -3.7276e-12, -1.0533e-12, 1.2496e-11, 6.8932e-13,\n 1.3830e-12, 4.3866e-12, -2.8368e-12, -2.2677e-12, -5.8835e-12,\n 3.6151e-12, -1.8986e-12, 5.3169e-13, -1.0459e-11, 1.0073e-12,\n 2.7111e-13, -7.7340e-12, -5.2955e-12, 5.9832e-13, 3.8376e-12,\n 3.7075e-13, -4.6565e-11, 4.5245e-12, 4.3206e-13, -5.4150e-12,\n -1.1352e-11, 1.0004e-12, 7.0019e-14, 2.0047e-12, 5.2299e-12,\n 1.3253e-12, -3.2326e-12, -1.2173e-12, 3.8387e-12, -1.7869e-13,\n 4.9283e-12, 1.8726e-12, -1.1519e-11, 9.8872e-12, -1.5930e-13,\n -5.8808e-12, -3.2495e-13, 1.5524e-12, -3.4351e-13, 8.7898e-12,\n 2.7495e-12, -1.4321e-12, -1.8130e-12, 8.0237e-12, 4.2367e-11,\n 4.3621e-13, 2.0237e-11, -1.0619e-12, 4.0967e-12, 1.9426e-13,\n 1.6815e-11, 1.7169e-12, -1.3208e-12, 1.0643e-12, -6.1120e-11,\n 1.8123e-11, -8.9027e-12, -9.7623e-13, 1.4409e-11], device='cuda:0')",
"exp_avg_sq": "tensor([2.0659e-12, 7.6957e-13, 6.0913e-12, 3.1088e-11, 7.8211e-12, 7.2414e-11,\n 3.6822e-11, 1.8632e-11, 1.4027e-12, 1.0877e-11, 2.4637e-11, 5.9402e-15,\n 3.9354e-11, 6.3869e-13, 5.5258e-11, 1.1213e-12, 6.3082e-12, 1.0290e-11,\n 4.0864e-14, 3.7620e-11, 2.8489e-12, 9.8037e-13, 9.6372e-12, 1.0597e-11,\n 9.9857e-12, 2.4709e-12, 5.9716e-12, 8.1404e-12, 2.3627e-11, 4.9560e-12,\n 2.5405e-11, 1.1638e-12, 4.2345e-12, 7.2662e-13, 3.0157e-12, 5.4660e-12,\n 5.1145e-13, 3.6448e-12, 1.5312e-11, 9.0105e-11, 8.2696e-13, 1.3760e-12,\n 5.2135e-11, 2.9673e-11, 5.8871e-11, 3.2957e-12, 1.8278e-11, 1.4600e-11,\n 7.8513e-12, 3.5724e-12, 2.7594e-12, 9.5647e-11, 1.3078e-13, 3.0916e-12,\n 5.2131e-12, 5.9340e-12, 2.8653e-12, 5.5703e-11, 8.5919e-11, 5.0126e-12,\n 4.9632e-11, 3.0809e-12, 1.6234e-12, 4.0073e-11], device='cuda:0')"
},
"23": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 6.6532e-15, -1.7399e-12, 1.5698e-13, 2.6700e-11, -1.6804e-13,\n -6.6042e-13, -1.2267e-12, 3.0119e-13, 7.4829e-12, 2.2049e-12,\n 1.9178e-11, -1.2172e-13, 1.9290e-11, 1.2276e-12, -7.3174e-13,\n 1.0260e-13, 3.5127e-12, 5.2023e-12, 2.5045e-13, 1.7901e-11,\n 1.6605e-13, -4.3218e-11, 1.0013e-11, -1.6808e-13, 1.1010e-11,\n 5.1708e-12, -6.8712e-15, -7.7393e-14, -3.4824e-13, 9.9289e-12,\n -8.7398e-13, 2.1827e-13, -9.3861e-14, 6.6585e-12, -1.0926e-13,\n 1.3796e-11, 3.3418e-13, -2.6629e-12, 2.3983e-11, -3.9928e-13,\n 3.9147e-12, 6.1447e-14, -6.3247e-13, -1.6383e-13, 3.2335e-11,\n -1.2118e-13, 2.9226e-13, -7.0516e-14, 2.5653e-11, 5.4623e-11,\n -1.3610e-13, 3.2065e-11, 4.2335e-13, 1.2175e-11, -6.6027e-14,\n 4.5916e-11, -1.0091e-13, 2.1557e-11, -9.8202e-13, -4.9703e-11,\n 3.2763e-11, 1.0193e-11, 1.6628e-13, 4.1282e-11], device='cuda:0')",
"exp_avg_sq": "tensor([9.8363e-15, 8.8949e-15, 7.8228e-14, 3.0497e-13, 4.8192e-14, 9.4516e-13,\n 1.7057e-13, 1.0987e-13, 1.6174e-14, 1.0147e-13, 1.8126e-13, 1.5978e-16,\n 2.3925e-13, 6.5061e-15, 4.3894e-13, 5.3159e-15, 4.5754e-14, 8.1076e-14,\n 1.3925e-17, 3.0757e-13, 1.2118e-14, 2.4440e-14, 7.7062e-14, 9.8431e-14,\n 9.9795e-14, 2.8350e-14, 4.8957e-14, 3.1128e-14, 1.2908e-13, 4.7505e-14,\n 1.0676e-13, 1.1548e-14, 1.6800e-14, 7.3081e-15, 1.3231e-14, 5.2758e-14,\n 1.5091e-15, 2.6941e-14, 1.3398e-13, 1.2568e-12, 8.4688e-15, 5.9191e-15,\n 4.1881e-13, 1.5500e-13, 4.9900e-13, 1.1914e-14, 9.5376e-14, 8.7062e-14,\n 6.2848e-14, 5.0964e-14, 1.1322e-14, 9.0226e-13, 1.2394e-15, 2.9288e-14,\n 2.1645e-14, 4.3696e-14, 1.3968e-14, 5.8586e-13, 7.2160e-13, 5.0966e-14,\n 4.4875e-13, 2.5239e-14, 1.2830e-14, 2.3264e-13], device='cuda:0')"
},
"24": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.4461e-14, 3.0825e-12, -1.6231e-13, 2.4724e-11, 1.7051e-13,\n 6.2332e-13, 1.0564e-12, -2.7201e-13, 1.0487e-11, 5.4138e-12,\n 1.7700e-11, 6.1295e-15, 1.4628e-11, 7.5164e-13, 7.7879e-13,\n -9.9103e-14, 3.6422e-12, 6.3073e-12, -2.5021e-13, 1.6488e-11,\n -1.6268e-13, -3.3783e-11, 1.2727e-11, 1.7490e-13, 9.5994e-12,\n 5.2264e-12, 8.0707e-15, 8.7465e-14, 2.6010e-13, 1.4085e-11,\n 8.1313e-13, 5.2873e-12, 1.0107e-13, 1.1171e-11, 1.2675e-13,\n 1.5639e-11, -3.0903e-13, -1.1879e-12, 2.2832e-11, 4.0028e-13,\n 5.6403e-12, -5.4925e-14, 6.1287e-13, 2.6996e-13, 2.2752e-11,\n 1.0330e-13, -2.5973e-13, 1.4029e-13, 1.5644e-11, 4.6215e-11,\n 1.2767e-13, 3.0090e-11, 1.5114e-12, 1.5670e-11, 6.9512e-14,\n 3.2012e-11, 9.1982e-14, 1.5142e-11, 9.6072e-13, -3.8478e-11,\n 3.0617e-11, 8.2408e-12, -1.7330e-13, 3.0740e-11], device='cuda:0')",
"exp_avg_sq": "tensor([1.3057e-14, 1.3283e-14, 4.5705e-14, 3.3400e-13, 6.6769e-14, 6.8599e-13,\n 3.2023e-13, 1.6539e-13, 2.1294e-14, 1.2783e-13, 2.8072e-13, 3.3031e-16,\n 4.4690e-13, 1.0925e-14, 5.4788e-13, 7.7738e-15, 7.7066e-14, 1.2254e-13,\n 3.2801e-17, 4.1359e-13, 2.0933e-14, 2.3298e-14, 1.1038e-13, 9.9458e-14,\n 1.1397e-13, 3.6972e-14, 5.1875e-14, 7.2591e-14, 2.1161e-13, 6.2742e-14,\n 2.4890e-13, 1.7618e-14, 3.5863e-14, 1.3694e-14, 2.4810e-14, 6.8480e-14,\n 3.1170e-15, 4.7604e-14, 1.7038e-13, 8.5875e-13, 1.3272e-14, 9.6760e-15,\n 4.6278e-13, 2.7103e-13, 6.1910e-13, 2.5628e-14, 1.8477e-13, 1.4539e-13,\n 1.0330e-13, 5.5985e-14, 2.1734e-14, 9.9493e-13, 3.0941e-15, 3.9980e-14,\n 4.4588e-14, 7.9302e-14, 1.9287e-14, 5.9148e-13, 8.3999e-13, 7.5907e-14,\n 5.3067e-13, 4.2750e-14, 1.2831e-14, 4.3395e-13], device='cuda:0')"
},
"25": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 1.8222e-11, 1.1954e-12, 1.2324e-11, -1.0575e-10, 1.3905e-11,\n 9.1382e-12, 8.6090e-12, 1.3500e-11, -6.2931e-11, -1.6913e-11,\n -8.3776e-11, 1.3697e-11, -6.4374e-11, -7.2104e-12, 1.2047e-11,\n 2.1007e-11, -1.8648e-11, -3.0187e-11, 9.0484e-12, -7.2059e-11,\n 1.5740e-11, 6.3454e-11, -5.6375e-11, 1.2025e-11, -7.2075e-11,\n -2.9581e-11, 1.1405e-11, 2.7846e-12, 3.2532e-12, -7.3990e-11,\n 1.3724e-11, -7.7780e-12, 1.7469e-11, -4.0627e-11, 9.5409e-12,\n -8.4790e-11, 1.5967e-11, 1.1626e-11, -1.3634e-10, 1.1875e-11,\n -3.7239e-11, 2.3816e-12, 1.5162e-11, 1.0252e-11, -1.1609e-10,\n 5.3061e-12, 1.1356e-12, 1.5799e-11, -5.4228e-11, -1.5412e-10,\n 6.7257e-12, -1.0936e-10, -3.5844e-12, -9.2197e-11, 1.5151e-11,\n -1.2500e-10, 1.5249e-11, -9.2065e-11, 1.1978e-11, 8.9162e-11,\n -1.2604e-10, -7.9588e-11, 1.1263e-11, -1.5439e-10],\n [-1.5597e-11, -8.9259e-12, -1.0334e-11, 8.0189e-11, -1.1983e-11,\n -6.0078e-12, -6.0541e-12, -1.1634e-11, 5.4135e-11, 6.0274e-12,\n 7.4643e-11, -1.1689e-11, 4.0787e-11, 1.4703e-12, -9.0629e-12,\n -1.9253e-11, 7.6162e-12, 1.2248e-11, -6.7445e-12, 5.3708e-11,\n -1.3564e-11, -8.4757e-11, 4.9049e-11, -9.3575e-12, 5.8436e-11,\n 1.8707e-11, -9.4612e-12, -1.3531e-12, -1.4980e-12, 6.0015e-11,\n -1.1670e-11, 1.1020e-12, -1.5189e-11, 3.5047e-11, -6.4067e-12,\n 7.2791e-11, -1.4166e-11, -2.1106e-11, 1.2291e-10, -9.9120e-12,\n 3.4277e-11, -1.4689e-12, -1.2600e-11, -8.7290e-12, 1.1041e-10,\n -3.8141e-12, 1.6651e-12, -1.4007e-11, 4.7791e-11, 1.3815e-10,\n -4.5792e-12, 9.7356e-11, 6.4033e-12, 7.6102e-11, -1.3450e-11,\n 1.1531e-10, -1.2848e-11, 8.2692e-11, -1.0816e-11, -1.0591e-10,\n 1.1125e-10, 6.5366e-11, -8.9087e-12, 1.3387e-10]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.4827e-12, 4.7309e-12, 4.3347e-11, 4.6659e-12, 5.1987e-12, 3.2718e-11,\n 3.4770e-12, 3.6325e-12, 6.9801e-12, 3.4437e-12, 4.2768e-12, 6.3458e-14,\n 9.1195e-13, 3.2107e-12, 6.1902e-12, 1.0808e-12, 1.9982e-12, 2.9355e-12,\n 9.7621e-14, 5.2821e-12, 4.3524e-12, 1.2243e-14, 4.8115e-12, 6.9997e-12,\n 1.1708e-11, 6.0581e-13, 1.2418e-11, 6.1127e-13, 3.4051e-12, 7.9724e-12,\n 1.2462e-12, 3.6296e-12, 1.5184e-12, 2.6538e-13, 7.4980e-13, 5.6842e-12,\n 5.0096e-13, 1.9282e-12, 8.6794e-12, 3.3154e-11, 2.7188e-12, 4.1194e-13,\n 1.4277e-11, 2.7064e-12, 7.5082e-12, 1.1748e-12, 1.8505e-12, 3.3045e-12,\n 3.3942e-14, 6.7574e-14, 1.3491e-12, 1.3466e-11, 3.9927e-13, 3.4508e-12,\n 3.1432e-12, 7.8557e-14, 2.8846e-12, 1.5195e-11, 1.1916e-11, 3.3984e-14,\n 7.7450e-12, 2.4887e-12, 1.6013e-11, 3.1546e-12],\n [5.4827e-12, 4.7309e-12, 4.3347e-11, 4.6659e-12, 5.1987e-12, 3.2718e-11,\n 3.4771e-12, 3.6325e-12, 6.9801e-12, 3.4437e-12, 4.2768e-12, 6.3458e-14,\n 9.1196e-13, 3.2107e-12, 6.1902e-12, 1.0808e-12, 1.9982e-12, 2.9355e-12,\n 9.7622e-14, 5.2821e-12, 4.3524e-12, 1.2250e-14, 4.8115e-12, 6.9998e-12,\n 1.1708e-11, 6.0581e-13, 1.2418e-11, 6.1127e-13, 3.4051e-12, 7.9724e-12,\n 1.2462e-12, 3.6297e-12, 1.5184e-12, 2.6538e-13, 7.4980e-13, 5.6842e-12,\n 5.0096e-13, 1.9282e-12, 8.6795e-12, 3.3154e-11, 2.7188e-12, 4.1194e-13,\n 1.4277e-11, 2.7065e-12, 7.5082e-12, 1.1748e-12, 1.8505e-12, 3.3045e-12,\n 3.3946e-14, 6.7577e-14, 1.3491e-12, 1.3466e-11, 3.9927e-13, 3.4508e-12,\n 3.1432e-12, 7.8562e-14, 2.8846e-12, 1.5195e-11, 1.1916e-11, 3.3990e-14,\n 7.7450e-12, 2.4887e-12, 1.6013e-11, 3.1546e-12]], device='cuda:0')"
},
"26": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-6.7622e-11, 5.3632e-11], device='cuda:0')",
"exp_avg_sq": "tensor([4.8029e-11, 4.8029e-11], device='cuda:0')"
},
"27": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-3.8616e-18, 2.6188e-18, 1.5485e-18, ..., 4.2419e-19,\n 3.4496e-18, -3.5950e-18],\n [ 1.1407e-17, 1.9128e-18, -1.5219e-17, ..., 4.9930e-19,\n 1.8511e-18, 3.3675e-19],\n [ 4.8348e-17, 7.8012e-19, -1.7505e-16, ..., -6.9549e-18,\n 1.4019e-17, -1.7158e-17],\n ...,\n [ 1.0150e-17, -2.7405e-18, 1.2133e-17, ..., 4.2858e-19,\n -2.2331e-19, -1.6842e-18],\n [-2.7171e-17, -4.6849e-18, 1.1248e-16, ..., 3.6599e-17,\n -1.0963e-18, -2.1785e-17],\n [ 1.5975e-17, -3.6062e-20, -4.5452e-18, ..., 2.3187e-18,\n 9.3335e-19, -1.6690e-18]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.9035e-14, 1.0087e-14, 4.2211e-15, ..., 9.4783e-15, 7.6353e-14,\n 3.7309e-14],\n [2.0598e-15, 8.2832e-16, 9.1614e-16, ..., 9.0815e-17, 6.6385e-15,\n 1.5664e-15],\n [5.8866e-15, 4.8601e-15, 5.9261e-15, ..., 2.7636e-16, 3.9758e-14,\n 9.7689e-16],\n ...,\n [3.4281e-14, 4.6347e-14, 4.4575e-14, ..., 3.5738e-14, 3.8651e-14,\n 5.4378e-14],\n [1.7831e-14, 2.3302e-14, 2.9472e-14, ..., 1.6968e-14, 1.8691e-14,\n 2.4119e-14],\n [7.1130e-16, 1.7711e-16, 3.2707e-16, ..., 1.3091e-16, 2.2118e-16,\n 6.6471e-16]], device='cuda:0')"
},
"28": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-1.1420e-17, -8.1861e-18, -1.9975e-16, -1.4445e-17, -5.2661e-16,\n 8.9005e-17, -9.8651e-17, 8.2827e-17, -1.9327e-17, -1.9034e-16,\n 4.1470e-17, -7.5563e-17, 3.1859e-16, -1.4230e-17, 1.6872e-17,\n -2.2431e-17, -1.0851e-16, -9.1533e-19, 2.4422e-17, 9.9130e-17,\n 6.3592e-17, -7.8756e-17, 4.6460e-17, 1.1202e-16, 2.3176e-16,\n -7.8115e-17, 7.6557e-17, -2.8735e-18, -3.1569e-16, 3.3051e-16,\n -2.1110e-17, -9.5170e-18, 4.7750e-17, 2.0624e-17, 3.7388e-17,\n 1.0786e-19, 4.2345e-16, -1.8663e-16, -1.8245e-17, -8.7701e-18,\n -1.5692e-17, 1.0405e-18, 1.4327e-16, 9.3006e-18, -2.2521e-16,\n 3.1927e-17, -5.1494e-16, -3.9848e-17, 4.1335e-16, -7.6389e-17,\n -8.9737e-17, 2.2055e-17, -3.1424e-17, -7.9313e-18, 2.0721e-16,\n -1.4066e-17, -1.2001e-17, -1.2073e-16, 6.8694e-17, -5.8522e-17,\n 5.2410e-17, 8.3540e-18, 1.8452e-16, 1.1893e-17], device='cuda:0')",
"exp_avg_sq": "tensor([1.3520e-11, 5.8235e-13, 2.5176e-12, 1.0604e-12, 3.4127e-11, 3.7936e-11,\n 3.1992e-11, 1.9056e-11, 6.9962e-13, 1.2780e-13, 1.3173e-11, 1.7665e-12,\n 9.3953e-11, 2.2863e-13, 4.0547e-11, 1.3424e-11, 9.0368e-13, 1.6199e-12,\n 6.4698e-12, 3.1291e-12, 7.3149e-12, 1.8871e-12, 3.3657e-12, 1.0180e-11,\n 7.1546e-11, 6.9232e-16, 1.5949e-11, 3.2792e-11, 4.8969e-11, 5.5461e-11,\n 3.6213e-12, 4.1833e-11, 1.3339e-11, 2.4714e-11, 6.7133e-11, 1.4933e-12,\n 6.6282e-11, 1.6794e-11, 7.0148e-12, 2.6002e-13, 1.0509e-10, 2.6962e-11,\n 5.4001e-12, 2.5187e-11, 1.5678e-12, 4.8095e-11, 2.4584e-12, 1.3475e-12,\n 8.3616e-11, 1.6627e-11, 9.9057e-13, 6.0126e-12, 1.6902e-11, 1.8159e-13,\n 1.1635e-11, 3.3575e-12, 2.9665e-12, 1.3182e-12, 5.5935e-11, 2.9042e-11,\n 7.1927e-12, 3.3681e-11, 2.3525e-11, 3.2721e-13], device='cuda:0')"
},
"29": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-1.2593e-18, 8.2118e-18, 2.3646e-16, 4.0667e-18, -1.2163e-16,\n 3.7673e-16, 4.0448e-18, 4.5679e-16, -4.3876e-18, 1.7748e-16,\n -8.0397e-18, 3.1231e-16, 7.3388e-16, 3.0991e-18, -1.3385e-17,\n -1.1017e-18, 3.9496e-16, 3.7849e-18, 5.8451e-16, 4.0583e-16,\n 1.7731e-16, -1.1416e-18, 1.4393e-17, -3.8325e-17, 7.6552e-16,\n -1.4551e-17, 4.7017e-16, 3.3113e-18, -2.5486e-17, 5.1491e-16,\n 1.0982e-18, -4.7376e-18, 2.3643e-16, -1.5825e-17, -2.9468e-17,\n 9.3389e-18, 7.0367e-16, 2.8448e-16, 2.1745e-18, 1.1132e-17,\n -5.6704e-18, 3.7273e-16, 4.6245e-16, -5.9441e-18, 1.3221e-16,\n -1.8542e-17, -9.3987e-17, 2.8033e-16, 1.1310e-15, 5.6218e-16,\n 2.4706e-16, 5.3390e-16, -2.8903e-19, 8.4882e-18, 6.0437e-16,\n 2.0602e-18, 5.0895e-18, 3.3430e-16, -2.6549e-17, 1.9645e-18,\n -3.5127e-20, -8.6153e-18, 4.9378e-16, 1.1170e-17], device='cuda:0')",
"exp_avg_sq": "tensor([5.6240e-14, 1.6612e-16, 2.2312e-14, 8.9927e-16, 3.6231e-13, 4.4784e-13,\n 2.0730e-13, 3.0539e-13, 1.6496e-15, 5.5099e-15, 5.4342e-14, 1.8960e-14,\n 1.1034e-12, 3.4862e-17, 2.0069e-13, 4.5464e-14, 2.5671e-14, 3.6066e-15,\n 7.7901e-14, 2.8962e-14, 5.0304e-14, 6.3254e-15, 6.3522e-15, 5.0039e-14,\n 5.4203e-13, 1.0426e-15, 1.2856e-13, 1.8475e-13, 3.8895e-13, 6.7936e-13,\n 8.6758e-15, 2.5561e-13, 1.1908e-13, 7.8765e-14, 3.7279e-13, 1.2828e-15,\n 9.4737e-13, 1.3485e-13, 2.8805e-14, 1.1795e-16, 1.2176e-12, 3.7927e-13,\n 5.7956e-14, 1.2721e-13, 1.6502e-14, 2.6321e-13, 2.0558e-14, 2.2882e-14,\n 7.1599e-13, 2.0903e-13, 1.0938e-14, 1.0262e-13, 8.1902e-14, 3.4066e-17,\n 8.3942e-14, 9.2296e-15, 5.4081e-15, 1.6081e-14, 3.6072e-13, 1.6865e-13,\n 2.5827e-14, 1.8978e-13, 2.0903e-13, 1.4027e-16], device='cuda:0')"
},
"30": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 1.3758e-18, -8.9220e-18, 2.1282e-16, -4.6121e-18, -1.2458e-16,\n 4.0718e-16, -5.4985e-18, 4.3868e-16, 3.7309e-18, 1.9329e-16,\n 7.6840e-18, 3.0683e-16, 6.2872e-16, -2.2913e-18, 1.2897e-17,\n 1.7602e-18, 3.1814e-16, -3.7612e-18, 4.7350e-16, 4.3062e-16,\n 3.1392e-16, 1.1672e-18, -1.3142e-17, 3.0200e-17, 5.6729e-16,\n 3.8147e-17, 4.6047e-16, -2.9211e-18, -2.0829e-17, 6.2796e-16,\n -1.4384e-18, 5.0119e-18, 3.5483e-16, 1.7435e-17, 2.8629e-17,\n -8.9422e-18, 7.4669e-16, 1.9112e-16, -2.6942e-18, -1.1382e-17,\n 6.5851e-18, 3.4438e-16, 4.9505e-16, 6.2637e-18, 1.4383e-16,\n 1.8757e-17, -8.1419e-17, 3.1313e-16, 8.3224e-16, 3.5325e-16,\n 2.7561e-16, 4.5441e-16, 6.8696e-19, -9.2177e-18, 5.5732e-16,\n -2.6579e-18, -5.5709e-18, 2.9843e-16, 2.2818e-17, -2.0588e-18,\n 3.0656e-20, 9.6456e-18, 5.2866e-16, -1.1473e-17], device='cuda:0')",
"exp_avg_sq": "tensor([7.9971e-14, 3.0998e-16, 3.7946e-14, 1.8229e-15, 4.4474e-13, 5.2376e-13,\n 2.0348e-13, 2.9474e-13, 2.6604e-15, 1.0042e-14, 8.8049e-14, 3.6169e-14,\n 1.1886e-12, 7.8471e-17, 3.1278e-13, 8.1901e-14, 3.2312e-14, 6.9217e-15,\n 1.1441e-13, 4.5827e-14, 8.5338e-14, 9.4866e-15, 9.1152e-15, 7.4905e-14,\n 8.4075e-13, 2.3991e-15, 2.2407e-13, 2.4214e-13, 6.2215e-13, 7.1783e-13,\n 1.4895e-14, 3.0107e-13, 1.7992e-13, 1.8942e-13, 5.5130e-13, 1.7686e-15,\n 8.5699e-13, 2.4418e-13, 2.4678e-14, 2.2470e-16, 7.7540e-13, 3.8396e-13,\n 9.2092e-14, 1.7765e-13, 3.5253e-14, 3.8334e-13, 4.4340e-14, 3.5401e-14,\n 1.0330e-12, 2.4296e-13, 2.2636e-14, 1.1120e-13, 1.0131e-13, 5.8622e-17,\n 1.5733e-13, 1.3317e-14, 6.5154e-15, 3.0896e-14, 4.3620e-13, 1.8597e-13,\n 4.0007e-14, 2.8619e-13, 3.0746e-13, 2.5628e-16], device='cuda:0')"
},
"31": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-4.0327e-16, -3.5989e-16, 2.3496e-15, -2.6941e-16, -5.2215e-16,\n 1.5664e-15, -4.6465e-16, 2.4335e-15, 1.1275e-16, 1.7188e-15,\n -2.2667e-16, 2.2381e-15, 2.3182e-15, 1.3115e-16, -2.9093e-16,\n -4.0834e-16, 3.4742e-15, -3.9946e-16, 3.6696e-15, 1.9735e-15,\n 6.2365e-16, -4.4825e-16, -4.0008e-16, -2.5080e-16, 2.3452e-15,\n -2.7390e-16, 2.2133e-15, 1.6616e-16, -8.4421e-17, 1.8919e-15,\n -3.3896e-16, -3.2846e-16, 1.0943e-15, -3.8424e-16, -3.9112e-16,\n -3.8888e-16, 2.5811e-15, 1.3650e-15, -3.4291e-16, -3.6096e-16,\n -3.5952e-16, 1.7925e-15, 2.8403e-15, -3.5354e-16, 9.9995e-16,\n -3.5078e-16, -5.6555e-16, 2.0855e-15, 3.4903e-15, 3.2481e-15,\n 1.8430e-15, 3.5716e-15, -3.7974e-16, -3.7975e-16, 2.4655e-15,\n -3.7914e-16, -3.4189e-16, 2.5101e-15, -2.9961e-16, -3.0334e-16,\n -4.0294e-16, -2.3478e-16, 2.1914e-15, -3.0290e-16],\n [ 4.0327e-16, 3.5989e-16, -2.3495e-15, 2.6941e-16, 5.2217e-16,\n -1.5664e-15, 4.6463e-16, -2.4334e-15, -1.1275e-16, -1.7188e-15,\n 2.2667e-16, -2.2380e-15, -2.3182e-15, -1.3115e-16, 2.9092e-16,\n 4.0833e-16, -3.4742e-15, 3.9945e-16, -3.6696e-15, -1.9734e-15,\n -6.2362e-16, 4.4825e-16, 4.0007e-16, 2.5079e-16, -2.3451e-15,\n 2.7389e-16, -2.2132e-15, -1.6616e-16, 8.4430e-17, -1.8918e-15,\n 3.3895e-16, 3.2845e-16, -1.0942e-15, 3.8423e-16, 3.9112e-16,\n 3.8887e-16, -2.5810e-15, -1.3650e-15, 3.4290e-16, 3.6096e-16,\n 3.5950e-16, -1.7924e-15, -2.8402e-15, 3.5355e-16, -9.9991e-16,\n 3.5078e-16, 5.6550e-16, -2.0854e-15, -3.4903e-15, -3.2480e-15,\n -1.8429e-15, -3.5716e-15, 3.7973e-16, 3.7975e-16, -2.4655e-15,\n 3.7912e-16, 3.4189e-16, -2.5101e-15, 2.9960e-16, 3.0331e-16,\n 4.0293e-16, 2.3477e-16, -2.1914e-15, 3.0289e-16]], device='cuda:0')",
"exp_avg_sq": "tensor([[6.5701e-12, 1.5625e-12, 8.2414e-15, 2.3847e-12, 1.0494e-11, 1.3311e-11,\n 1.7502e-11, 1.8726e-11, 1.3937e-13, 2.0783e-12, 2.6063e-12, 1.5306e-12,\n 1.5235e-11, 3.5907e-13, 5.1295e-12, 2.8953e-12, 7.8452e-12, 3.6691e-13,\n 6.1066e-12, 9.2339e-14, 8.9044e-15, 3.9311e-13, 7.6586e-12, 6.5409e-14,\n 5.2456e-12, 4.3135e-13, 3.8408e-12, 9.6527e-12, 3.9400e-12, 1.4279e-11,\n 1.4294e-12, 1.6003e-11, 3.1961e-12, 5.9376e-13, 4.6199e-12, 6.1205e-12,\n 2.0973e-11, 4.1351e-12, 2.2417e-11, 1.9083e-12, 4.4650e-11, 1.6387e-11,\n 3.9771e-12, 7.5901e-12, 1.6785e-12, 6.9044e-12, 4.4265e-13, 2.6879e-12,\n 7.4313e-12, 1.2417e-11, 6.9424e-13, 1.1464e-11, 1.0224e-11, 4.0836e-13,\n 7.0949e-13, 4.2844e-12, 9.4033e-12, 1.4084e-12, 1.0557e-11, 1.7155e-11,\n 3.4701e-12, 3.1519e-12, 5.7251e-12, 3.8565e-13],\n [6.5701e-12, 1.5625e-12, 8.2414e-15, 2.3847e-12, 1.0494e-11, 1.3311e-11,\n 1.7502e-11, 1.8726e-11, 1.3937e-13, 2.0783e-12, 2.6063e-12, 1.5306e-12,\n 1.5235e-11, 3.5907e-13, 5.1295e-12, 2.8953e-12, 7.8452e-12, 3.6691e-13,\n 6.1066e-12, 9.2339e-14, 8.9044e-15, 3.9311e-13, 7.6586e-12, 6.5409e-14,\n 5.2456e-12, 4.3135e-13, 3.8408e-12, 9.6527e-12, 3.9400e-12, 1.4279e-11,\n 1.4294e-12, 1.6003e-11, 3.1961e-12, 5.9376e-13, 4.6199e-12, 6.1205e-12,\n 2.0973e-11, 4.1351e-12, 2.2417e-11, 1.9083e-12, 4.4650e-11, 1.6387e-11,\n 3.9771e-12, 7.5901e-12, 1.6785e-12, 6.9044e-12, 4.4265e-13, 2.6879e-12,\n 7.4313e-12, 1.2417e-11, 6.9424e-13, 1.1464e-11, 1.0224e-11, 4.0836e-13,\n 7.0949e-13, 4.2844e-12, 9.4033e-12, 1.4084e-12, 1.0557e-11, 1.7155e-11,\n 3.4701e-12, 3.1519e-12, 5.7251e-12, 3.8565e-13]], device='cuda:0')"
},
"32": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 1.9503e-15, -1.9502e-15], device='cuda:0')",
"exp_avg_sq": "tensor([5.9527e-11, 5.9527e-11], device='cuda:0')"
},
"33": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 2.1743e-18, -6.1897e-18, -1.0596e-17, ..., -2.9035e-18,\n 1.6597e-17, -3.2396e-17],\n [ 8.7781e-19, -4.1219e-18, 5.7592e-18, ..., -2.3706e-18,\n 1.3670e-18, 9.6446e-19],\n [-8.1375e-18, -3.9040e-17, -2.2077e-18, ..., -2.2963e-17,\n 7.0850e-18, -5.1500e-18],\n ...,\n [-1.7058e-17, -3.0106e-17, 3.1943e-17, ..., 1.5647e-17,\n -1.0438e-16, 4.7068e-17],\n [ 3.7790e-18, 1.4492e-18, -4.1457e-19, ..., -7.7472e-19,\n 1.4349e-18, 2.2846e-18],\n [-2.5517e-18, -1.7524e-18, 1.9487e-18, ..., 1.4662e-18,\n 6.2489e-18, -4.7265e-18]], device='cuda:0')",
"exp_avg_sq": "tensor([[6.8866e-17, 3.8359e-19, 1.1058e-17, ..., 3.1496e-17, 2.5981e-16,\n 8.7401e-19],\n [6.3120e-14, 4.0511e-14, 1.1936e-14, ..., 2.9053e-14, 9.8134e-14,\n 2.0734e-13],\n [2.6464e-15, 7.5468e-15, 4.0568e-15, ..., 4.5742e-15, 1.7556e-14,\n 3.7168e-14],\n ...,\n [2.3564e-14, 2.6443e-14, 3.8659e-14, ..., 8.7840e-14, 2.4472e-13,\n 2.3190e-13],\n [6.3585e-15, 1.8016e-15, 1.4766e-14, ..., 2.1384e-15, 6.8025e-15,\n 2.3128e-14],\n [2.7466e-16, 9.2540e-17, 2.1357e-17, ..., 7.2919e-17, 1.5298e-15,\n 5.8603e-17]], device='cuda:0')"
},
"34": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-9.9624e-17, -1.2483e-17, -1.8348e-16, 2.9578e-17, 3.1559e-17,\n -1.2549e-17, 2.6279e-16, 2.5127e-18, 5.1979e-18, 5.4168e-16,\n 1.6359e-17, -1.3439e-16, -2.1947e-16, 3.0750e-17, -1.6505e-16,\n -1.8488e-16, 9.9467e-18, -1.8853e-16, -8.0850e-18, 3.5766e-16,\n -1.3153e-16, 6.4261e-17, -7.6842e-18, -2.6991e-16, 1.2884e-16,\n 1.9978e-17, -1.6341e-16, -5.8624e-17, 1.2944e-17, 3.1719e-17,\n -1.7821e-17, -9.1401e-18, 4.5080e-18, 2.5747e-17, -1.3820e-16,\n 1.2130e-16, 9.7604e-17, -2.6302e-18, 8.5923e-18, 2.0667e-17,\n 2.2196e-16, -1.7797e-17, -1.0407e-17, -9.7208e-17, 4.8047e-16,\n -3.2626e-16, -9.0326e-18, 4.0893e-17, -1.4999e-16, -7.9804e-17,\n -1.6719e-16, -7.5433e-17, -5.0733e-17, -1.1493e-17, 7.8548e-18,\n 4.1245e-16, -2.7618e-16, -2.2358e-17, 2.4084e-17, 3.7154e-17,\n 2.7485e-16, -5.9533e-17, 2.5208e-17, 1.1831e-17], device='cuda:0')",
"exp_avg_sq": "tensor([1.2168e-15, 8.8346e-11, 1.0181e-11, 1.9230e-13, 4.6719e-12, 3.3322e-13,\n 4.8126e-11, 1.3418e-14, 5.0938e-12, 1.1432e-10, 2.2013e-12, 3.2242e-13,\n 3.9081e-13, 1.7364e-11, 5.8796e-11, 2.1312e-12, 2.0898e-11, 5.8813e-13,\n 9.0641e-12, 2.5406e-11, 3.8789e-12, 6.3805e-11, 2.7773e-12, 1.5178e-11,\n 1.7254e-10, 1.8746e-11, 2.0487e-12, 6.8044e-12, 1.6093e-11, 2.6027e-12,\n 2.2062e-13, 9.5104e-11, 2.6367e-12, 3.9723e-12, 1.1235e-12, 7.3082e-11,\n 2.1948e-11, 2.5000e-11, 7.1244e-11, 4.4376e-13, 1.0952e-11, 7.6936e-11,\n 4.0061e-11, 2.6807e-12, 7.4945e-11, 3.5347e-12, 3.1653e-11, 8.8201e-11,\n 6.1745e-13, 1.1538e-12, 3.7295e-11, 3.2646e-14, 2.6463e-11, 7.7637e-13,\n 4.3572e-11, 6.0585e-11, 3.5456e-11, 4.2456e-11, 1.7323e-11, 1.6833e-14,\n 4.5761e-11, 1.1372e-10, 1.3381e-11, 2.3347e-13], device='cuda:0')"
},
"35": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 6.9833e-18, 4.1307e-18, -1.4934e-17, 4.7009e-18, 3.5202e-16,\n 1.6094e-18, 4.6752e-16, 9.1482e-18, -6.0397e-19, 7.7188e-16,\n 8.5438e-19, 1.0994e-16, -1.5960e-17, -1.0384e-17, -4.5122e-17,\n 1.2209e-16, -5.5307e-20, 1.6110e-16, -9.0319e-19, 3.9296e-16,\n 1.5449e-16, -3.2238e-17, -1.6457e-18, 1.0789e-16, 3.4478e-16,\n -3.5360e-18, 2.7265e-16, 2.7297e-16, -9.8064e-18, 2.9489e-19,\n 5.4034e-18, -7.1823e-18, 2.4580e-18, 4.6479e-20, 1.6135e-16,\n 6.0455e-16, 5.3472e-16, -1.7435e-18, -2.3967e-17, 4.5851e-18,\n 4.7826e-16, -6.5859e-19, -1.5253e-18, 1.6575e-16, 7.7849e-16,\n 3.3861e-17, 2.6266e-18, -1.3635e-17, 1.3721e-16, 2.0951e-16,\n 1.1932e-16, 3.2778e-17, 5.5504e-18, 1.0988e-17, -1.6642e-17,\n 7.2440e-16, 3.4542e-17, 6.1706e-18, -1.1239e-17, 7.6661e-18,\n 4.2882e-16, 1.4511e-16, -8.2222e-18, 4.0821e-18], device='cuda:0')",
"exp_avg_sq": "tensor([1.8411e-15, 6.1592e-13, 4.0261e-14, 2.3345e-15, 2.6967e-14, 5.0141e-15,\n 2.6597e-13, 2.4438e-15, 4.1341e-14, 6.3232e-13, 1.6348e-14, 1.5759e-16,\n 4.8555e-16, 1.1146e-13, 2.5811e-13, 9.1149e-15, 1.3447e-13, 5.3687e-16,\n 7.7073e-14, 1.3261e-13, 2.3166e-14, 4.7612e-13, 2.0532e-14, 7.5012e-14,\n 2.5572e-12, 1.5538e-13, 7.8718e-15, 2.3357e-14, 1.1500e-13, 1.8686e-14,\n 6.4792e-15, 7.9662e-13, 4.5012e-14, 2.5521e-14, 2.8992e-15, 3.8048e-13,\n 1.1981e-13, 2.7769e-13, 5.4664e-13, 4.4100e-15, 4.1571e-14, 8.4257e-13,\n 4.9785e-13, 1.3797e-14, 4.8054e-13, 1.5905e-14, 3.1573e-13, 5.4881e-13,\n 8.2733e-16, 4.0023e-15, 2.2258e-13, 1.9134e-18, 1.7306e-13, 6.2746e-15,\n 3.2636e-13, 3.9311e-13, 2.4168e-13, 5.0663e-13, 1.1211e-13, 1.8770e-15,\n 3.4298e-13, 8.8195e-13, 8.4329e-14, 3.0427e-15], device='cuda:0')"
},
"36": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 4.6393e-17, -1.9626e-18, 2.8847e-17, -4.5353e-18, 3.1550e-16,\n -2.3857e-18, 4.4559e-16, -1.5419e-17, 5.6449e-19, 6.5154e-16,\n -8.9713e-19, 1.2475e-16, 2.7006e-17, 9.5599e-18, -6.2661e-17,\n 1.0129e-16, 1.5782e-18, 1.3305e-16, 1.9219e-18, 4.6076e-16,\n 1.4586e-16, 2.8206e-17, 1.8882e-18, 7.2645e-17, 2.7888e-16,\n 3.8172e-18, 1.9062e-16, 2.2447e-16, 1.3015e-17, -2.3228e-19,\n -5.6924e-18, 1.0117e-17, -2.4535e-18, -4.4409e-20, 1.4852e-16,\n 4.0984e-16, 4.1584e-16, 2.1347e-18, 2.2047e-17, -4.0891e-18,\n 4.6265e-16, 1.3309e-18, 1.4077e-18, 1.5975e-16, 6.4851e-16,\n 2.7389e-17, -4.5580e-19, 1.1907e-17, 1.4473e-16, 2.0288e-16,\n 1.3939e-16, 9.4697e-17, -4.2594e-18, -1.1209e-17, 1.5703e-17,\n 5.8998e-16, -2.7195e-18, -5.2722e-18, 1.0419e-17, -3.3646e-17,\n 4.2569e-16, 2.0915e-16, 7.7750e-18, -3.1357e-18], device='cuda:0')",
"exp_avg_sq": "tensor([2.1977e-15, 9.4621e-13, 8.1098e-14, 4.0571e-15, 3.0923e-14, 9.1295e-15,\n 4.5745e-13, 3.3508e-15, 6.7377e-14, 1.0754e-12, 3.2851e-14, 2.1870e-16,\n 8.5506e-16, 1.9669e-13, 5.1916e-13, 1.0011e-14, 2.3469e-13, 6.5483e-16,\n 1.1951e-13, 2.0210e-13, 2.9371e-14, 6.7256e-13, 3.8819e-14, 1.1877e-13,\n 1.5264e-12, 2.1886e-13, 8.7607e-15, 4.9861e-14, 1.8208e-13, 3.3784e-14,\n 9.0871e-15, 1.0301e-12, 4.4508e-14, 5.3978e-14, 5.3313e-15, 6.3190e-13,\n 1.7891e-13, 3.0511e-13, 7.7762e-13, 7.9865e-15, 8.7396e-14, 8.2519e-13,\n 4.6652e-13, 2.0453e-14, 6.9121e-13, 1.7922e-14, 3.8891e-13, 9.2559e-13,\n 1.1847e-15, 4.4212e-15, 3.2241e-13, 3.4709e-18, 3.0168e-13, 1.0765e-14,\n 4.6010e-13, 5.6694e-13, 3.2508e-13, 4.8081e-13, 2.1459e-13, 2.6883e-15,\n 4.0584e-13, 9.7503e-13, 1.6069e-13, 4.6338e-15], device='cuda:0')"
},
"37": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-1.1495e-16, 2.7202e-16, 1.1272e-16, 4.8016e-17, -3.3352e-15,\n 3.3028e-16, -1.8585e-15, 3.2418e-16, 2.2366e-16, -2.3152e-15,\n 1.5769e-16, -1.5708e-15, 2.2204e-16, 1.9676e-16, 1.6235e-16,\n -1.3895e-15, 1.5448e-16, -2.2962e-15, 3.2213e-16, -1.5301e-15,\n -1.6543e-15, 9.8164e-17, 3.7922e-16, -4.9496e-16, -1.3314e-15,\n 2.8051e-16, -3.2238e-15, -1.8742e-15, 2.6946e-16, -1.2836e-16,\n 4.1842e-16, 3.9568e-16, 3.8281e-16, -8.0593e-17, -1.5823e-15,\n -2.0438e-15, -3.0338e-15, 2.5246e-16, 4.4787e-16, 1.4559e-16,\n -2.6553e-15, 2.2041e-16, 2.5986e-16, -1.0868e-15, -2.5730e-15,\n -3.8212e-16, 3.4426e-16, 1.5524e-16, -1.8263e-15, -2.6506e-15,\n -4.9891e-16, -5.1334e-16, 2.2837e-16, 8.7984e-17, 3.6462e-16,\n -3.0017e-15, -1.7371e-16, -4.4130e-17, 3.1766e-16, 5.3397e-16,\n -1.6991e-15, -4.8540e-16, 3.2324e-16, 1.4185e-16],\n [ 1.1513e-16, -2.7203e-16, -1.1291e-16, -4.7992e-17, 3.3349e-15,\n -3.3030e-16, 1.8584e-15, -3.2414e-16, -2.2366e-16, 2.3149e-15,\n -1.5767e-16, 1.5708e-15, -2.2215e-16, -1.9690e-16, -1.6245e-16,\n 1.3902e-15, -1.5446e-16, 2.2964e-15, -3.2208e-16, 1.5299e-15,\n 1.6543e-15, -9.8126e-17, -3.7922e-16, 4.9513e-16, 1.3315e-15,\n -2.8045e-16, 3.2237e-15, 1.8737e-15, -2.6947e-16, 1.2854e-16,\n -4.1838e-16, -3.9577e-16, -3.8278e-16, 8.0751e-17, 1.5821e-15,\n 2.0438e-15, 3.0335e-15, -2.5241e-16, -4.4780e-16, -1.4556e-16,\n 2.6552e-15, -2.2038e-16, -2.5984e-16, 1.0864e-15, 2.5736e-15,\n 3.8192e-16, -3.4424e-16, -1.5517e-16, 1.8265e-15, 2.6502e-15,\n 4.9880e-16, 5.1340e-16, -2.2839e-16, -8.7801e-17, -3.6459e-16,\n 3.0018e-15, 1.7270e-16, 4.4316e-17, -3.1778e-16, -5.3401e-16,\n 1.6990e-15, 4.8536e-16, -3.2330e-16, -1.4183e-16]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.2839e-11, 7.5618e-12, 1.0978e-12, 2.2779e-13, 1.3952e-11, 1.3118e-12,\n 4.8993e-12, 1.0907e-11, 3.6845e-12, 5.7311e-12, 2.8852e-12, 6.3261e-12,\n 2.5547e-12, 3.8084e-12, 3.2310e-12, 1.2963e-11, 3.5281e-12, 1.7417e-11,\n 5.3368e-12, 1.0474e-12, 5.5101e-14, 4.5437e-12, 6.6494e-13, 6.8527e-13,\n 6.9182e-11, 1.2782e-11, 1.3335e-11, 1.2244e-12, 6.1902e-13, 7.4267e-13,\n 1.0166e-11, 1.1270e-11, 2.1042e-11, 3.0961e-12, 3.2595e-13, 3.9785e-12,\n 6.2460e-12, 2.0640e-11, 9.6428e-12, 8.5815e-13, 9.2002e-13, 2.5060e-11,\n 2.8043e-11, 1.0740e-13, 5.1301e-12, 2.0324e-11, 1.1389e-11, 5.1122e-12,\n 9.0703e-12, 1.8055e-11, 4.6782e-12, 2.1698e-13, 4.3284e-12, 5.0868e-14,\n 8.6180e-12, 9.2516e-12, 6.6604e-12, 2.5271e-11, 3.2999e-12, 8.4189e-12,\n 6.7145e-12, 1.5837e-11, 4.2759e-12, 5.5560e-13],\n [1.2839e-11, 7.5618e-12, 1.0978e-12, 2.2779e-13, 1.3952e-11, 1.3118e-12,\n 4.8993e-12, 1.0907e-11, 3.6845e-12, 5.7311e-12, 2.8852e-12, 6.3261e-12,\n 2.5547e-12, 3.8084e-12, 3.2310e-12, 1.2963e-11, 3.5281e-12, 1.7417e-11,\n 5.3368e-12, 1.0474e-12, 5.5101e-14, 4.5437e-12, 6.6494e-13, 6.8527e-13,\n 6.9182e-11, 1.2782e-11, 1.3335e-11, 1.2244e-12, 6.1902e-13, 7.4267e-13,\n 1.0166e-11, 1.1270e-11, 2.1042e-11, 3.0961e-12, 3.2595e-13, 3.9785e-12,\n 6.2460e-12, 2.0640e-11, 9.6428e-12, 8.5815e-13, 9.2002e-13, 2.5060e-11,\n 2.8043e-11, 1.0740e-13, 5.1301e-12, 2.0324e-11, 1.1389e-11, 5.1122e-12,\n 9.0703e-12, 1.8055e-11, 4.6782e-12, 2.1698e-13, 4.3284e-12, 5.0868e-14,\n 8.6180e-12, 9.2516e-12, 6.6604e-12, 2.5271e-11, 3.2999e-12, 8.4189e-12,\n 6.7145e-12, 1.5837e-11, 4.2759e-12, 5.5560e-13]], device='cuda:0')"
},
"38": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-1.4727e-15, 1.4724e-15], device='cuda:0')",
"exp_avg_sq": "tensor([7.4491e-11, 7.4491e-11], device='cuda:0')"
},
"39": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 5.5119e-13, 8.3203e-13, -3.9151e-13, ..., -2.7099e-12,\n -4.2327e-13, -2.0786e-13],\n [-1.8088e-13, 4.9662e-14, 4.8814e-13, ..., 1.1678e-13,\n -2.3937e-14, -2.5037e-13],\n [ 1.2626e-12, 1.0179e-12, 2.5555e-12, ..., 3.7440e-12,\n -1.7107e-13, 2.4846e-12],\n ...,\n [ 6.9863e-13, -1.3867e-12, 5.8241e-13, ..., 2.0356e-12,\n -7.8000e-14, -1.3402e-13],\n [ 8.0772e-14, -2.6199e-14, 2.9046e-14, ..., -2.5823e-14,\n 2.1014e-14, 6.3164e-14],\n [-9.9222e-14, -3.1161e-15, 5.8856e-14, ..., -5.3185e-13,\n -1.0044e-13, -4.2711e-14]], device='cuda:0')",
"exp_avg_sq": "tensor([[3.1767e-14, 5.1253e-14, 4.0965e-14, ..., 1.1939e-14, 5.6462e-14,\n 9.2243e-14],\n [2.5723e-14, 1.0299e-13, 4.6605e-14, ..., 1.5773e-14, 1.7585e-14,\n 2.2104e-13],\n [1.1356e-14, 1.4518e-14, 3.2225e-15, ..., 1.3730e-15, 1.8172e-14,\n 1.6084e-14],\n ...,\n [1.7230e-15, 1.0154e-14, 7.6993e-15, ..., 6.5046e-16, 3.4626e-15,\n 1.1959e-14],\n [7.8023e-16, 4.2811e-15, 4.3336e-15, ..., 5.6994e-16, 5.5532e-17,\n 8.5510e-15],\n [2.5183e-15, 5.3953e-15, 8.9695e-15, ..., 5.7824e-16, 3.8993e-15,\n 7.1663e-15]], device='cuda:0')"
},
"40": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 6.2797e-12, -2.4535e-12, 2.8751e-11, -7.7267e-13, -4.7726e-12,\n 1.5168e-12, 8.2453e-14, -4.3707e-13, 1.5485e-11, -2.2903e-12,\n 1.9949e-12, 2.1249e-12, -8.4570e-13, -1.8416e-13, -2.0212e-12,\n 8.9644e-12, -1.0942e-12, 6.2756e-12, -1.2698e-13, -5.6595e-13,\n -2.9009e-12, 1.1343e-12, 5.2247e-13, 9.0514e-14, 5.7704e-12,\n -1.6063e-13, 6.1030e-12, 1.0192e-11, -2.3380e-11, 9.8043e-12,\n -3.0743e-11, 9.6459e-13, -5.2118e-11, 7.9549e-13, -1.9466e-12,\n -8.8766e-13, -1.1791e-12, 1.4025e-12, 1.8297e-12, -4.5821e-12,\n -5.8907e-13, 4.5137e-12, 3.3624e-12, -8.1315e-13, -1.9568e-12,\n -6.1358e-13, -7.7635e-13, 4.5374e-13, 2.2563e-13, -1.0249e-11,\n 5.4820e-12, 2.0914e-13, 5.3713e-12, 2.6240e-12, 1.2355e-12,\n -1.4300e-12, -5.7659e-13, 4.1976e-13, 1.0776e-11, -4.1417e-12,\n 2.9802e-12, 8.2234e-12, 1.7390e-13, -1.5273e-12], device='cuda:0')",
"exp_avg_sq": "tensor([6.0506e-11, 9.9222e-11, 1.4041e-11, 6.9040e-12, 7.4276e-13, 7.5157e-12,\n 4.3422e-12, 9.0784e-14, 5.5655e-12, 2.1442e-12, 2.3175e-11, 2.7975e-13,\n 2.8509e-12, 1.0214e-11, 1.7739e-11, 4.8738e-12, 1.0983e-11, 1.4991e-11,\n 1.4228e-11, 8.0104e-12, 2.5798e-11, 6.9478e-13, 1.2221e-12, 1.4829e-13,\n 4.0369e-11, 1.8298e-12, 3.0964e-11, 3.7022e-12, 1.2044e-12, 2.8397e-12,\n 9.3089e-12, 5.1344e-11, 4.1718e-12, 7.3056e-14, 4.1992e-11, 5.1542e-13,\n 3.8843e-12, 3.6534e-11, 2.1532e-11, 2.1707e-12, 1.0062e-11, 1.8413e-12,\n 1.0275e-12, 4.8104e-13, 1.0362e-12, 1.8177e-11, 4.6098e-12, 3.6842e-12,\n 2.4565e-12, 7.0207e-12, 5.6165e-13, 1.3336e-12, 1.6042e-11, 6.7747e-12,\n 3.8589e-14, 6.2067e-14, 4.5829e-11, 1.6643e-13, 2.2906e-12, 2.9783e-11,\n 6.1696e-12, 6.4608e-12, 4.1337e-12, 4.7782e-12], device='cuda:0')"
},
"41": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.9717e-11, 1.3466e-12, -1.4907e-11, 1.9072e-13, -4.0395e-12,\n -2.5425e-11, 2.6727e-13, -3.3262e-13, -1.7341e-11, 1.5051e-13,\n 2.5919e-13, 4.2720e-13, -1.3745e-11, -3.4233e-11, -5.4391e-11,\n -2.1672e-11, 2.4235e-13, -2.4837e-11, 2.8403e-13, -1.3659e-14,\n 1.1135e-12, -9.8914e-15, 1.3547e-14, -8.4016e-17, -2.7380e-11,\n -1.1856e-11, -3.6795e-11, -1.6953e-11, -4.4502e-11, -3.5452e-12,\n -8.1950e-11, 1.8069e-13, -1.2125e-10, -2.8140e-13, 1.3771e-12,\n -2.2953e-13, 1.0051e-13, -3.3149e-11, -5.4729e-15, -2.7029e-11,\n 1.5830e-13, -1.4185e-12, -6.9155e-12, -1.9635e-13, -1.2890e-13,\n 5.5727e-14, 9.3985e-14, 8.8534e-14, 8.4585e-14, -5.1552e-11,\n 6.5792e-13, -1.4124e-11, -3.1607e-11, -2.3620e-13, -6.6095e-14,\n -6.5557e-13, -3.6024e-11, -4.3870e-13, -5.5469e-12, 1.9671e-12,\n -2.1197e-11, -2.4269e-11, -1.8687e-15, 1.8587e-14], device='cuda:0')",
"exp_avg_sq": "tensor([9.9153e-13, 1.1967e-12, 1.1851e-13, 3.6319e-14, 8.8875e-15, 8.3469e-14,\n 1.7918e-14, 2.8165e-16, 6.6033e-14, 1.0335e-14, 1.0700e-13, 7.1315e-16,\n 4.0188e-14, 9.6259e-14, 2.1319e-13, 5.3177e-14, 8.5862e-14, 1.7606e-13,\n 6.8122e-14, 7.9025e-14, 1.8735e-13, 3.6079e-15, 5.2509e-15, 5.4975e-16,\n 5.1707e-13, 2.3317e-14, 2.7822e-13, 4.7280e-14, 1.9889e-14, 4.1401e-14,\n 9.2872e-14, 3.9617e-13, 5.4701e-14, 1.2018e-17, 2.6266e-13, 2.5798e-15,\n 2.4956e-14, 4.1311e-13, 1.5242e-13, 2.8601e-14, 7.4408e-14, 3.1424e-14,\n 1.0284e-14, 2.1626e-15, 4.8575e-15, 1.6522e-13, 2.7417e-14, 1.9228e-14,\n 1.0682e-14, 7.1715e-14, 4.6369e-15, 1.7717e-14, 1.7560e-13, 3.3219e-14,\n 3.7843e-18, 1.6950e-17, 5.9567e-13, 6.0488e-16, 3.1392e-14, 1.9149e-13,\n 7.0940e-14, 6.9091e-14, 2.8325e-14, 5.2030e-14], device='cuda:0')"
},
"42": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.2121e-11, -1.4490e-12, -8.1183e-12, -1.8629e-13, -1.8408e-11,\n -2.2855e-11, -3.4300e-13, 3.4229e-13, -1.4357e-11, -1.5536e-13,\n -3.7630e-13, -7.0973e-13, -2.0095e-11, -2.7263e-11, -3.6205e-11,\n -1.5657e-11, -2.7037e-13, -2.0540e-11, -3.6025e-13, 6.6914e-15,\n -9.6533e-13, 5.3045e-15, -1.1644e-14, 4.1384e-15, -2.1019e-11,\n -1.8833e-11, -2.7107e-11, -1.6571e-11, -4.7044e-11, -6.5542e-12,\n -5.9883e-11, -2.5576e-13, -8.1851e-11, 3.3436e-13, -1.4473e-12,\n 2.1198e-13, -9.5466e-14, -2.5368e-11, -1.2263e-15, -2.7929e-11,\n -1.6018e-13, -6.4726e-12, -1.4488e-11, 2.1382e-13, 1.2720e-13,\n -9.3010e-14, -9.8529e-14, -1.1143e-13, -1.0912e-13, -3.8702e-11,\n -1.2024e-12, -2.0631e-11, -2.3625e-11, 1.9895e-13, 6.9549e-14,\n 6.2149e-13, -2.8282e-11, 2.9232e-13, -8.1759e-12, -2.0342e-12,\n -1.9946e-11, -2.0061e-11, -2.3187e-15, -2.8238e-14], device='cuda:0')",
"exp_avg_sq": "tensor([6.8940e-13, 9.4555e-13, 2.1159e-13, 6.3859e-14, 1.5226e-14, 1.0908e-13,\n 4.3431e-14, 4.7646e-16, 9.3565e-14, 1.9460e-14, 2.2923e-13, 1.3906e-15,\n 4.5994e-14, 1.5922e-13, 2.8538e-13, 8.2301e-14, 1.0061e-13, 1.9540e-13,\n 1.3330e-13, 7.2839e-14, 2.5390e-13, 7.3540e-15, 1.0819e-14, 1.1457e-15,\n 4.8261e-13, 3.5231e-14, 4.1105e-13, 6.6330e-14, 3.3104e-14, 3.7371e-14,\n 1.5237e-13, 4.9513e-13, 9.0878e-14, 5.1534e-17, 4.1395e-13, 3.7748e-15,\n 3.5462e-14, 4.3091e-13, 1.9854e-13, 4.1272e-14, 9.4752e-14, 2.2751e-14,\n 2.0211e-14, 3.8190e-15, 8.1553e-15, 1.7193e-13, 4.1041e-14, 3.5557e-14,\n 2.4749e-14, 1.1742e-13, 7.4879e-15, 2.6991e-14, 2.2052e-13, 6.5012e-14,\n 1.3331e-17, 4.5376e-17, 5.5107e-13, 1.1074e-15, 3.5880e-14, 2.8816e-13,\n 8.8282e-14, 1.0022e-13, 3.5505e-14, 4.3146e-14], device='cuda:0')"
},
"43": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 1.2038e-10, -1.2847e-11, 5.0275e-11, -1.6451e-11, 3.9874e-11,\n 1.3300e-10, -1.4072e-11, -1.8782e-11, 7.6062e-11, -1.7194e-11,\n -1.8996e-11, -1.1002e-11, 8.8021e-11, 1.1994e-10, 1.4446e-10,\n 1.0021e-10, -2.2963e-11, 1.3040e-10, -1.6143e-11, -2.4158e-11,\n -1.5760e-11, -2.1045e-11, -1.5165e-11, -2.2370e-11, 1.1918e-10,\n 6.9066e-11, 1.0360e-10, 8.2063e-11, 1.8027e-10, 2.6989e-11,\n 2.8268e-10, -2.1565e-11, 3.5633e-10, -2.2988e-11, -1.4046e-11,\n -1.5618e-11, -2.0882e-11, 1.3206e-10, -2.3195e-11, 1.4708e-10,\n -2.1763e-11, 1.6248e-11, 5.0851e-11, -2.5039e-11, -1.6420e-11,\n -1.2735e-11, -1.6766e-11, -1.4921e-11, -1.6474e-11, 1.9080e-10,\n -7.9035e-12, 8.6545e-11, 1.2756e-10, -9.8678e-12, -1.2376e-11,\n -1.3493e-11, 1.4747e-10, -1.4238e-11, 4.5071e-11, -2.7372e-11,\n 1.0789e-10, 1.2037e-10, -2.5100e-11, -1.5199e-11],\n [-1.2645e-10, 1.1252e-11, -4.8567e-11, 1.6110e-11, -3.9446e-11,\n -1.2060e-10, 1.1416e-11, 2.0737e-11, -6.9855e-11, 1.7745e-11,\n 1.8798e-11, 9.3639e-12, -8.2581e-11, -1.1859e-10, -1.3986e-10,\n -7.7784e-11, 2.2413e-11, -1.0950e-10, 1.4834e-11, 2.2718e-11,\n 1.5509e-11, 2.2383e-11, 1.5009e-11, 2.2515e-11, -1.0392e-10,\n -7.0209e-11, -1.1503e-10, -7.6777e-11, -1.7520e-10, -1.8836e-11,\n -2.8610e-10, 1.9904e-11, -3.5411e-10, 2.1761e-11, 1.2145e-11,\n 1.6962e-11, 2.0097e-11, -1.4516e-10, 2.2991e-11, -1.4724e-10,\n 2.0551e-11, -1.0885e-11, -4.8396e-11, 2.5285e-11, 1.5447e-11,\n 1.1424e-11, 1.6211e-11, 1.6420e-11, 1.6587e-11, -1.8161e-10,\n 9.3693e-12, -8.3849e-11, -1.2361e-10, 9.8713e-12, 1.0728e-11,\n 1.3030e-11, -1.3789e-10, 1.4973e-11, -4.0633e-11, 2.8254e-11,\n -1.0748e-10, -1.1320e-10, 2.4739e-11, 1.4859e-11]], device='cuda:0')",
"exp_avg_sq": "tensor([[3.8134e-11, 2.1877e-11, 1.3799e-12, 4.1384e-12, 2.6846e-12, 7.6788e-12,\n 5.1713e-13, 3.7966e-12, 2.4078e-12, 1.4867e-12, 9.5604e-13, 8.1851e-13,\n 8.3369e-12, 1.7550e-12, 1.0426e-12, 2.2335e-12, 8.6010e-12, 9.7782e-12,\n 1.1574e-12, 1.8732e-11, 5.4475e-12, 3.7689e-13, 1.3585e-12, 6.3882e-13,\n 1.7524e-11, 3.3213e-12, 2.9619e-12, 2.5124e-12, 4.7363e-13, 2.1485e-11,\n 1.4941e-12, 8.2762e-12, 3.6430e-13, 5.8731e-13, 2.2951e-12, 5.8943e-12,\n 4.7280e-12, 1.5369e-11, 6.4978e-12, 3.2062e-12, 5.7458e-12, 3.0772e-11,\n 2.3438e-12, 3.2722e-12, 3.2151e-12, 1.4598e-11, 4.0341e-12, 1.2994e-12,\n 3.2824e-13, 1.3308e-12, 3.8691e-12, 2.3582e-12, 5.2096e-12, 1.5529e-12,\n 5.4466e-13, 2.5521e-12, 1.9438e-11, 3.8155e-13, 1.1453e-11, 2.0631e-12,\n 5.9322e-12, 4.3717e-12, 6.5840e-12, 1.8462e-11],\n [3.8134e-11, 2.1877e-11, 1.3800e-12, 4.1384e-12, 2.6846e-12, 7.6789e-12,\n 5.1713e-13, 3.7966e-12, 2.4080e-12, 1.4867e-12, 9.5605e-13, 8.1851e-13,\n 8.3370e-12, 1.7552e-12, 1.0428e-12, 2.2336e-12, 8.6009e-12, 9.7783e-12,\n 1.1574e-12, 1.8732e-11, 5.4475e-12, 3.7687e-13, 1.3586e-12, 6.3882e-13,\n 1.7524e-11, 3.3213e-12, 2.9620e-12, 2.5125e-12, 4.7373e-13, 2.1485e-11,\n 1.4943e-12, 8.2762e-12, 3.6450e-13, 5.8731e-13, 2.2952e-12, 5.8943e-12,\n 4.7280e-12, 1.5370e-11, 6.4978e-12, 3.2062e-12, 5.7458e-12, 3.0772e-11,\n 2.3439e-12, 3.2722e-12, 3.2151e-12, 1.4598e-11, 4.0341e-12, 1.2994e-12,\n 3.2825e-13, 1.3309e-12, 3.8691e-12, 2.3583e-12, 5.2097e-12, 1.5529e-12,\n 5.4466e-13, 2.5521e-12, 1.9438e-11, 3.8155e-13, 1.1453e-11, 2.0631e-12,\n 5.9323e-12, 4.3719e-12, 6.5840e-12, 1.8462e-11]], device='cuda:0')"
},
"44": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 1.0973e-10, -1.0696e-10], device='cuda:0')",
"exp_avg_sq": "tensor([4.7507e-11, 4.7507e-11], device='cuda:0')"
},
"45": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-1.1841e-16, 2.1617e-17, -7.2017e-16, ..., -2.6010e-16,\n 1.1178e-16, -3.0714e-16],\n [-8.3637e-17, -1.5473e-17, 8.7499e-17, ..., -4.9703e-18,\n -2.3738e-17, 6.7615e-17],\n [ 2.5622e-17, -9.3458e-18, -2.4324e-16, ..., -4.4636e-18,\n -8.7071e-18, -1.6297e-17],\n ...,\n [-5.1745e-17, -2.4365e-18, 5.1871e-16, ..., 5.9380e-17,\n -5.3728e-17, 1.3447e-16],\n [ 2.0339e-17, -9.4766e-18, 4.8977e-17, ..., -2.2832e-17,\n -3.0091e-18, -1.9416e-17],\n [ 4.8103e-18, 9.0920e-18, 4.6646e-17, ..., 1.8717e-17,\n -9.2601e-18, 3.9993e-17]], device='cuda:0')",
"exp_avg_sq": "tensor([[6.4945e-15, 2.5841e-15, 1.1563e-15, ..., 2.2206e-15, 5.7200e-15,\n 1.1195e-14],\n [3.5312e-16, 7.0015e-16, 3.2307e-16, ..., 6.6207e-16, 1.4008e-16,\n 5.2726e-15],\n [1.1014e-15, 1.1246e-15, 4.4919e-16, ..., 5.2876e-17, 4.3148e-15,\n 1.8807e-15],\n ...,\n [1.0340e-18, 5.4159e-20, 9.4317e-20, ..., 2.6154e-18, 1.1273e-16,\n 2.3559e-18],\n [3.1942e-15, 2.6725e-15, 1.4482e-15, ..., 2.4210e-15, 1.7604e-14,\n 8.0905e-15],\n [1.5584e-16, 4.3659e-17, 4.7262e-17, ..., 2.6897e-21, 8.8623e-17,\n 2.1179e-16]], device='cuda:0')"
},
"46": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-1.6244e-15, -1.2054e-16, -5.1112e-16, -1.3639e-15, 1.5085e-16,\n -8.6839e-17, 2.4929e-16, 6.6051e-16, 1.3844e-17, -3.4811e-17,\n -7.8472e-16, 7.2879e-16, -1.7641e-15, 1.7485e-16, 5.3529e-18,\n 1.4895e-16, 1.7869e-16, -1.8821e-16, -2.0843e-16, -2.0836e-17,\n 1.6868e-16, -8.7957e-18, -1.2544e-16, -2.8227e-16, 4.2576e-16,\n -2.2417e-17, 1.9881e-15, -3.4927e-16, -4.6901e-16, 7.8621e-16,\n -3.6937e-16, 5.9274e-16, 5.4099e-17, -6.0899e-17, 1.2927e-16,\n -1.5108e-16, 1.5907e-15, -1.6180e-16, -2.3174e-15, 6.4597e-16,\n -1.4358e-16, -1.5804e-16, -2.3996e-15, 7.5147e-16, 2.3815e-15,\n 1.8837e-16, 5.5000e-17, -3.4678e-16, 1.8974e-16, 8.9595e-17,\n 3.2083e-16, 5.0996e-17, 1.5909e-16, 1.8871e-16, 3.5631e-16,\n -2.5067e-16, -1.2531e-15, 1.4228e-17, 9.3978e-16, 1.2896e-16,\n 1.7852e-17, 7.0775e-16, 1.3063e-16, 2.1397e-16], device='cuda:0')",
"exp_avg_sq": "tensor([3.3806e-12, 7.3815e-13, 4.9038e-13, 5.8563e-12, 1.9456e-14, 1.1492e-12,\n 9.2367e-13, 1.2454e-13, 1.2397e-11, 2.4519e-12, 8.0481e-12, 7.8599e-13,\n 9.5360e-12, 1.2030e-11, 4.5783e-13, 2.3854e-12, 1.1454e-14, 2.8329e-13,\n 5.5609e-13, 9.3074e-14, 1.0743e-11, 1.5226e-12, 5.9662e-12, 1.8367e-13,\n 1.4875e-12, 6.7286e-14, 3.1805e-12, 2.5257e-13, 1.1452e-12, 2.1924e-13,\n 4.3664e-13, 3.7859e-13, 8.3293e-13, 8.4325e-13, 3.2641e-12, 6.4525e-13,\n 1.9034e-12, 1.3648e-12, 1.4314e-11, 1.3767e-12, 4.1070e-12, 1.1497e-12,\n 4.5147e-12, 1.2092e-14, 2.2932e-13, 2.0463e-14, 7.8283e-14, 4.8003e-12,\n 1.4194e-14, 2.3095e-13, 1.6828e-12, 5.2784e-12, 3.5606e-13, 5.0155e-14,\n 1.9156e-14, 2.4085e-13, 5.5810e-12, 1.0666e-11, 2.6615e-12, 4.4571e-13,\n 5.4009e-13, 8.4094e-17, 4.5675e-12, 4.7858e-14], device='cuda:0')"
},
"47": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-4.3684e-15, -1.5866e-15, 6.4186e-17, -4.3142e-15, 8.0466e-18,\n 2.6075e-17, -3.8234e-17, -1.2716e-15, 3.2217e-17, 4.7629e-18,\n -3.4579e-15, -1.9779e-15, -3.9835e-15, -1.5129e-15, -2.0284e-15,\n 3.8451e-18, 9.2113e-18, 1.3470e-16, -1.8322e-17, -1.1806e-17,\n 1.4530e-18, 1.3795e-17, 4.8722e-17, -3.8098e-17, -1.5718e-15,\n -1.5644e-17, 9.5619e-17, -1.7521e-15, -2.6536e-15, -9.4730e-16,\n -2.3268e-17, -2.0674e-15, 2.9674e-19, 1.6539e-17, -2.3446e-15,\n -4.3462e-18, -4.5214e-16, 6.6650e-17, -5.0127e-15, -1.9759e-15,\n 9.5339e-17, -2.2107e-15, -4.3113e-15, -1.1107e-15, -1.0446e-16,\n 1.2762e-17, 1.6716e-18, 1.8042e-16, 1.3856e-17, 2.3030e-18,\n -2.1030e-15, 5.5169e-17, 5.5927e-19, 7.8176e-18, 4.6630e-17,\n -4.2991e-17, -4.2684e-15, 4.8928e-18, -1.5014e-15, 1.7695e-18,\n -6.4803e-18, -9.7294e-17, -1.3949e-17, 1.8435e-17], device='cuda:0')",
"exp_avg_sq": "tensor([1.2941e-14, 3.7673e-15, 4.3005e-15, 4.1880e-14, 2.0352e-16, 6.3393e-15,\n 7.3641e-15, 2.8715e-16, 1.0681e-13, 2.5068e-14, 5.4122e-14, 4.8476e-15,\n 7.7926e-14, 1.0461e-13, 1.4169e-15, 1.2969e-14, 3.2200e-16, 2.7810e-15,\n 7.2534e-15, 1.0046e-15, 8.4989e-14, 1.0309e-14, 4.9498e-14, 2.0314e-15,\n 8.3304e-15, 1.2804e-15, 2.3553e-14, 2.3275e-15, 6.3312e-15, 1.7111e-15,\n 3.6050e-15, 1.7201e-15, 7.5635e-15, 5.9892e-15, 3.0333e-14, 7.1860e-15,\n 1.1614e-14, 8.5791e-15, 8.2568e-14, 6.9676e-15, 3.2712e-14, 7.0781e-15,\n 2.8925e-14, 1.3883e-17, 8.1809e-16, 1.3365e-15, 9.7029e-16, 3.9327e-14,\n 4.8776e-16, 1.7806e-15, 1.5884e-14, 3.3483e-14, 4.1095e-15, 5.3050e-16,\n 4.0154e-16, 2.9011e-15, 4.4206e-14, 7.0209e-14, 1.4125e-14, 4.2779e-15,\n 5.8301e-15, 4.6674e-16, 4.0413e-14, 1.1837e-15], device='cuda:0')"
},
"48": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-3.5623e-15, -1.6765e-15, -5.7856e-17, -3.2008e-15, -8.8696e-18,\n -3.1296e-17, 5.1570e-17, -1.1683e-15, -2.8536e-17, -5.9218e-18,\n -2.3182e-15, -1.4870e-15, -3.3115e-15, -1.3952e-15, -1.8144e-15,\n 4.9880e-18, -1.4763e-17, -1.2486e-16, 1.7951e-17, 1.0529e-17,\n 1.9886e-17, -1.6095e-17, -5.4466e-17, 3.6905e-17, -1.3298e-15,\n 1.5173e-17, 3.2676e-17, -1.9793e-15, -2.4828e-15, -1.0369e-15,\n 2.2143e-17, -1.6042e-15, 7.5419e-20, -2.0111e-17, -1.8332e-15,\n 4.8247e-18, -5.5164e-16, -6.8194e-17, -4.0330e-15, -1.4171e-15,\n -1.1245e-16, -2.0982e-15, -3.6392e-15, -1.1787e-15, 2.9437e-17,\n -1.6765e-17, -1.9474e-18, -2.0212e-16, -1.8056e-17, -1.5776e-18,\n -1.6291e-15, -6.4011e-17, -4.1119e-18, -7.7148e-18, -9.7270e-17,\n 4.2410e-17, -2.9633e-15, -1.9928e-17, -1.0102e-15, -1.7591e-18,\n 8.1153e-18, -4.9167e-16, 9.2921e-18, -2.2886e-17], device='cuda:0')",
"exp_avg_sq": "tensor([2.9801e-14, 5.5334e-15, 7.3417e-15, 5.1063e-14, 5.2360e-16, 1.4730e-14,\n 1.3219e-14, 6.8439e-16, 1.4011e-13, 3.0531e-14, 7.7672e-14, 5.5241e-15,\n 8.9687e-14, 1.1734e-13, 3.1259e-15, 2.9197e-14, 5.5009e-16, 3.6379e-15,\n 8.7687e-15, 2.1039e-15, 1.2123e-13, 1.8554e-14, 6.7341e-14, 3.2790e-15,\n 1.1699e-14, 1.6768e-15, 2.6882e-14, 2.2403e-15, 9.1888e-15, 1.8614e-15,\n 6.6025e-15, 2.0692e-15, 1.1668e-14, 1.0372e-14, 2.7124e-14, 8.8871e-15,\n 1.6162e-14, 1.7052e-14, 1.3741e-13, 1.0658e-14, 4.8236e-14, 1.0803e-14,\n 3.9900e-14, 1.3922e-17, 1.3369e-15, 1.1478e-15, 1.7021e-15, 5.8079e-14,\n 7.0332e-16, 3.6214e-15, 1.3645e-14, 6.6450e-14, 5.5455e-15, 8.7937e-16,\n 6.4575e-16, 4.4741e-15, 5.1546e-14, 1.1386e-13, 2.3389e-14, 6.7072e-15,\n 7.4264e-15, 3.9567e-16, 5.2248e-14, 1.5072e-15], device='cuda:0')"
},
"49": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 2.0929e-14, 1.3668e-14, -1.8617e-15, 2.0816e-14, -2.3369e-15,\n -2.6351e-15, -2.7613e-15, 1.2692e-14, -2.1755e-15, -1.0817e-15,\n 1.4881e-14, 1.8613e-14, 1.7030e-14, 5.3500e-15, 1.7397e-14,\n -2.5576e-15, -2.5078e-15, -2.2628e-15, -2.1374e-15, -2.2179e-15,\n -2.3115e-15, -2.8928e-15, -1.9366e-15, -1.9171e-15, 1.1079e-14,\n -1.5005e-15, -6.7179e-16, 1.1719e-14, 1.8337e-14, 1.5806e-14,\n -2.1810e-15, 2.2999e-14, -2.6705e-16, -3.2130e-15, 1.5755e-14,\n -2.1065e-15, 3.0624e-15, -2.9415e-15, 1.7270e-14, 1.3892e-14,\n -2.8858e-15, 1.3616e-14, 1.6140e-14, 1.5651e-14, 1.2227e-15,\n -2.9343e-15, -1.0636e-15, -2.7989e-15, -2.8206e-15, -2.3548e-15,\n 1.7576e-14, -1.2080e-15, -3.3819e-15, -2.5555e-15, -2.4234e-15,\n -2.3729e-15, 2.2122e-14, -1.8639e-15, 9.6113e-15, -1.7187e-15,\n -2.7775e-15, 1.5825e-15, 3.7935e-16, -1.1720e-15],\n [-2.0930e-14, -1.3668e-14, 1.8617e-15, -2.0816e-14, 2.3369e-15,\n 2.6351e-15, 2.7613e-15, -1.2693e-14, 2.1756e-15, 1.0817e-15,\n -1.4882e-14, -1.8613e-14, -1.7030e-14, -5.3502e-15, -1.7397e-14,\n 2.5576e-15, 2.5078e-15, 2.2628e-15, 2.1374e-15, 2.2179e-15,\n 2.3116e-15, 2.8929e-15, 1.9366e-15, 1.9171e-15, -1.1079e-14,\n 1.5006e-15, 6.7166e-16, -1.1719e-14, -1.8337e-14, -1.5806e-14,\n 2.1811e-15, -2.2999e-14, 2.6707e-16, 3.2130e-15, -1.5755e-14,\n 2.1065e-15, -3.0625e-15, 2.9416e-15, -1.7271e-14, -1.3892e-14,\n 2.8858e-15, -1.3616e-14, -1.6140e-14, -1.5651e-14, -1.2228e-15,\n 2.9344e-15, 1.0637e-15, 2.7989e-15, 2.8207e-15, 2.3549e-15,\n -1.7576e-14, 1.2080e-15, 3.3819e-15, 2.5556e-15, 2.4234e-15,\n 2.3729e-15, -2.2122e-14, 1.8639e-15, -9.6115e-15, 1.7187e-15,\n 2.7775e-15, -1.5825e-15, -3.7932e-16, 1.1720e-15]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.6611e-13, 8.9561e-13, 1.1166e-13, 1.2890e-12, 4.7356e-14, 9.1094e-14,\n 1.7808e-16, 5.6989e-14, 1.9184e-12, 2.0064e-12, 1.2417e-12, 2.0314e-12,\n 2.1653e-12, 1.7380e-12, 2.3232e-13, 4.3371e-13, 2.0842e-13, 2.8971e-15,\n 2.2155e-12, 2.1294e-13, 1.5935e-12, 6.1267e-13, 1.8637e-12, 6.6293e-13,\n 8.8971e-13, 1.6107e-12, 1.6938e-12, 2.6167e-14, 5.2602e-13, 1.6470e-15,\n 8.8816e-13, 2.2074e-12, 1.6862e-12, 1.9861e-13, 3.5786e-12, 1.4418e-12,\n 1.9587e-12, 7.1680e-14, 1.1465e-12, 6.3847e-13, 2.5630e-13, 3.0675e-13,\n 2.3207e-13, 2.5267e-12, 1.9351e-13, 4.5373e-12, 6.5755e-13, 2.3839e-13,\n 1.5104e-12, 3.5247e-13, 4.7616e-12, 2.3755e-13, 1.8148e-12, 4.5724e-14,\n 5.9434e-13, 1.2868e-12, 1.9856e-12, 5.3921e-13, 1.1069e-12, 7.1428e-13,\n 1.7558e-12, 6.1374e-12, 1.7824e-12, 1.6695e-12],\n [2.6611e-13, 8.9561e-13, 1.1166e-13, 1.2890e-12, 4.7356e-14, 9.1094e-14,\n 1.7808e-16, 5.6989e-14, 1.9184e-12, 2.0064e-12, 1.2417e-12, 2.0314e-12,\n 2.1653e-12, 1.7380e-12, 2.3232e-13, 4.3371e-13, 2.0842e-13, 2.8971e-15,\n 2.2155e-12, 2.1294e-13, 1.5935e-12, 6.1267e-13, 1.8637e-12, 6.6293e-13,\n 8.8971e-13, 1.6107e-12, 1.6938e-12, 2.6167e-14, 5.2602e-13, 1.6470e-15,\n 8.8816e-13, 2.2074e-12, 1.6862e-12, 1.9861e-13, 3.5786e-12, 1.4418e-12,\n 1.9587e-12, 7.1680e-14, 1.1465e-12, 6.3847e-13, 2.5630e-13, 3.0675e-13,\n 2.3207e-13, 2.5267e-12, 1.9351e-13, 4.5373e-12, 6.5755e-13, 2.3839e-13,\n 1.5104e-12, 3.5247e-13, 4.7616e-12, 2.3755e-13, 1.8148e-12, 4.5724e-14,\n 5.9434e-13, 1.2868e-12, 1.9856e-12, 5.3921e-13, 1.1069e-12, 7.1428e-13,\n 1.7558e-12, 6.1374e-12, 1.7824e-12, 1.6695e-12]], device='cuda:0')"
},
"50": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 1.2579e-14, -1.2579e-14], device='cuda:0')",
"exp_avg_sq": "tensor([1.1074e-11, 1.1074e-11], device='cuda:0')"
},
"51": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-1.7736e-19, -1.3360e-20, -1.2241e-20, ..., -2.4055e-19,\n -3.0410e-21, -1.2375e-19],\n [-3.3347e-19, 1.6279e-20, 1.8198e-20, ..., -2.5511e-19,\n -1.9983e-20, -6.3528e-20],\n [ 1.3333e-20, -4.2840e-20, 5.9964e-20, ..., 3.5946e-19,\n -8.4528e-21, 9.3035e-20],\n ...,\n [ 2.0946e-19, -4.9628e-21, -1.4346e-19, ..., -9.4583e-20,\n 3.4586e-20, 5.0546e-20],\n [-3.4659e-19, 2.6532e-22, 5.7149e-20, ..., -1.2383e-19,\n -1.4627e-20, -1.0173e-20],\n [-3.2653e-19, 2.0924e-20, 7.9654e-20, ..., -1.4787e-19,\n -3.7100e-20, 1.3880e-19]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.0350e-18, 2.1867e-19, 2.0379e-18, ..., 1.6747e-17, 2.2282e-17,\n 4.8762e-19],\n [3.6410e-20, 7.2671e-21, 1.8898e-20, ..., 5.3799e-19, 4.3298e-19,\n 2.0859e-22],\n [3.1015e-19, 5.1184e-21, 3.0650e-20, ..., 3.1794e-19, 1.1157e-18,\n 1.0768e-22],\n ...,\n [1.3065e-18, 4.6150e-20, 1.8894e-19, ..., 3.2705e-20, 3.0525e-18,\n 9.4522e-23],\n [2.2001e-18, 4.3561e-20, 1.3166e-17, ..., 1.9484e-21, 3.8783e-18,\n 8.2478e-19],\n [6.9584e-20, 4.3971e-20, 4.5710e-21, ..., 8.6304e-20, 1.5213e-21,\n 3.2333e-20]], device='cuda:0')"
},
"52": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 5.9866e-19, -1.0713e-20, 6.3806e-19, -2.8112e-19, -8.0696e-19,\n 3.1264e-19, 1.0452e-18, -2.4238e-19, -5.5763e-19, -7.0638e-19,\n 7.0336e-19, -1.4013e-18, 7.9903e-20, -8.6312e-20, 5.3174e-20,\n -1.4691e-18, -3.1279e-18, 1.2776e-19, 1.2683e-18, -1.1111e-18,\n 2.6011e-18, -6.8543e-19, 4.4068e-18, 5.9210e-19, -2.1754e-18,\n 1.4065e-18, -5.9701e-20, 5.7875e-19, 6.4677e-19, 2.4628e-19,\n -1.6427e-19, -1.3959e-18, 1.0323e-18, -6.3086e-20, -1.4626e-18,\n 3.8914e-19, 1.8344e-18, -7.4539e-19, -3.1722e-19, 6.7043e-19,\n -1.2257e-18, 3.2402e-19, 1.2024e-18, 8.0218e-19, -6.0630e-19,\n -3.4590e-19, 1.5783e-19, -8.3460e-19, 6.1888e-19, 8.3234e-20,\n 1.4600e-19, 2.6120e-19, -8.2425e-20, -1.0380e-18, -1.5743e-18,\n -2.6300e-19, 7.4539e-19, -6.6496e-19, -1.2648e-18, 1.7656e-18,\n -1.9594e-19, 2.6610e-19, -2.6425e-19, -3.7430e-19], device='cuda:0')",
"exp_avg_sq": "tensor([3.9158e-15, 4.9917e-17, 5.8516e-17, 1.0690e-17, 5.7338e-18, 3.1419e-18,\n 6.8335e-16, 4.4933e-17, 3.0319e-16, 5.7286e-16, 4.9652e-16, 2.4560e-17,\n 5.4880e-18, 2.0542e-16, 7.8291e-16, 7.7118e-16, 4.4526e-17, 5.6321e-17,\n 4.1837e-18, 9.1133e-17, 6.6417e-16, 9.6412e-17, 2.6881e-15, 2.4781e-16,\n 1.6575e-15, 6.2423e-16, 3.4552e-17, 1.2694e-15, 4.7591e-19, 8.3603e-17,\n 4.4958e-16, 9.2934e-17, 1.9607e-17, 2.2548e-16, 5.6442e-17, 1.2105e-16,\n 1.6728e-16, 8.1399e-17, 2.8725e-17, 2.8023e-17, 1.3609e-15, 1.5403e-19,\n 6.8802e-19, 6.3808e-16, 7.8402e-19, 6.1285e-16, 2.2330e-16, 1.1151e-16,\n 1.5137e-16, 2.8297e-16, 7.0788e-18, 2.7489e-17, 2.6259e-16, 2.2748e-15,\n 1.1892e-16, 2.6797e-19, 2.1586e-17, 5.1974e-16, 7.8433e-17, 8.5663e-16,\n 1.9944e-17, 1.9135e-16, 1.2155e-16, 6.4695e-18], device='cuda:0')"
},
"53": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.1947e-20, -5.3340e-21, 4.9574e-19, -1.3675e-19, -2.2218e-19,\n -2.6130e-19, -5.9071e-19, -5.9839e-20, 4.7559e-19, 1.1132e-19,\n 5.8323e-20, -4.2801e-19, 6.7410e-20, -1.6390e-20, -3.9526e-20,\n -3.8461e-19, -7.6432e-19, -7.2067e-20, -6.9963e-20, 7.8643e-20,\n 1.3539e-19, 2.3084e-19, 1.4198e-18, -3.2090e-20, 1.4398e-19,\n -4.0905e-20, -1.7688e-21, -2.2379e-19, 8.3592e-21, -9.1800e-20,\n -3.0490e-20, -2.1481e-19, 2.5530e-21, -1.4898e-23, -4.9737e-19,\n -9.1568e-20, 3.5534e-19, 6.6918e-20, -1.5824e-19, 8.8230e-20,\n -6.3723e-20, 1.8167e-20, 4.9302e-19, 9.4646e-21, -4.5631e-19,\n -5.9772e-20, -6.9355e-21, -7.9070e-19, -2.9256e-20, 7.1432e-19,\n 4.4060e-20, 5.2994e-19, -4.0530e-22, 9.5688e-21, -3.4154e-19,\n -1.5033e-20, -2.0419e-20, -2.5558e-20, -1.2386e-19, 1.1013e-19,\n -1.3157e-19, 1.1828e-19, -7.8376e-20, -1.9723e-20], device='cuda:0')",
"exp_avg_sq": "tensor([5.7646e-15, 7.2906e-17, 1.4429e-17, 1.8901e-17, 2.2706e-17, 2.6917e-19,\n 3.4876e-16, 7.7731e-18, 2.2707e-16, 9.1545e-16, 9.8139e-16, 4.1020e-17,\n 1.6328e-18, 8.7921e-18, 3.7447e-16, 1.0188e-16, 1.8300e-17, 9.6399e-17,\n 3.1274e-17, 5.6786e-17, 1.2675e-16, 6.1323e-17, 1.1754e-15, 4.7990e-17,\n 2.2828e-15, 2.6060e-16, 2.2449e-17, 1.0561e-15, 1.0068e-19, 9.3876e-17,\n 3.9078e-16, 9.5006e-17, 1.2364e-17, 3.4005e-16, 1.2035e-17, 6.5841e-17,\n 1.1178e-16, 1.2802e-16, 5.2727e-18, 1.8920e-17, 9.7959e-16, 1.0608e-20,\n 4.2751e-17, 7.9872e-16, 8.9039e-19, 4.4113e-16, 1.1143e-16, 9.8316e-17,\n 1.3237e-17, 2.9608e-16, 9.7432e-18, 5.8569e-17, 2.8838e-16, 8.4084e-16,\n 1.9612e-17, 4.5761e-19, 3.6592e-17, 4.9904e-16, 7.0871e-17, 6.9931e-16,\n 6.0665e-18, 4.1623e-16, 5.5467e-17, 2.8605e-21], device='cuda:0')"
},
"54": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 7.6637e-20, -3.2460e-22, 2.6252e-19, -8.0634e-20, -1.5683e-19,\n -2.3527e-20, 2.1451e-19, -1.0886e-20, -4.2231e-20, -1.1505e-19,\n 2.1268e-19, -4.0149e-19, 6.0603e-20, 1.2625e-20, -1.8296e-20,\n -3.7658e-19, -6.0441e-19, -1.2995e-19, 2.0490e-19, -1.9128e-19,\n 3.4990e-19, -9.4977e-20, 1.0824e-18, 5.3781e-20, -3.9796e-19,\n 3.5770e-19, 4.6480e-20, -9.3174e-20, -5.6178e-20, -1.4078e-20,\n 4.6355e-21, -3.9107e-19, 2.0649e-21, 1.6182e-22, -3.9703e-19,\n -1.8150e-20, 5.3190e-19, -1.4969e-19, -1.6511e-19, 6.1783e-20,\n -1.9167e-19, 3.3750e-20, 9.8151e-20, 1.0196e-19, -3.4127e-19,\n -9.5060e-20, 6.3497e-21, -3.2888e-19, -3.0097e-20, 1.9703e-19,\n -3.1225e-20, 1.7433e-19, -1.8336e-21, -2.1156e-19, -3.9468e-19,\n 7.3796e-21, 2.5852e-19, -1.1160e-19, -3.0895e-19, 1.8969e-19,\n -2.8105e-20, -5.4879e-20, -2.3887e-19, -6.6545e-20], device='cuda:0')",
"exp_avg_sq": "tensor([3.1888e-15, 4.6416e-17, 3.4077e-17, 1.4784e-17, 1.8212e-17, 9.2635e-19,\n 4.7077e-16, 2.8192e-17, 3.3592e-16, 6.4221e-16, 4.6703e-16, 2.6793e-17,\n 3.9782e-18, 1.8844e-16, 6.7384e-16, 9.1591e-16, 3.1466e-17, 4.5471e-17,\n 1.8212e-17, 8.2618e-17, 4.3241e-16, 1.2898e-16, 2.5262e-15, 2.3250e-16,\n 1.8827e-15, 5.9180e-16, 3.3438e-17, 9.1031e-16, 2.4970e-18, 4.1814e-17,\n 3.5214e-16, 1.2403e-16, 2.2659e-17, 2.1556e-16, 4.8068e-17, 9.1353e-17,\n 1.6309e-16, 1.2750e-16, 3.3415e-17, 1.6668e-17, 1.0381e-15, 2.7209e-20,\n 8.9444e-18, 6.2592e-16, 1.1313e-18, 6.4227e-16, 1.0970e-16, 1.0742e-16,\n 1.0795e-16, 3.1703e-16, 1.6723e-18, 4.1672e-17, 1.9830e-16, 1.9227e-15,\n 1.3223e-16, 3.3213e-19, 4.2325e-17, 3.7644e-16, 8.6797e-17, 9.4986e-16,\n 1.1240e-17, 2.1550e-16, 2.1578e-16, 2.7645e-20], device='cuda:0')"
},
"55": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 1.0160e-19, 1.5752e-18, -5.1544e-18, 1.6376e-18, 2.7448e-18,\n 4.9772e-18, 1.0305e-18, 2.3798e-18, -2.8453e-18, -5.2049e-19,\n -1.0363e-18, 3.9144e-18, -1.4140e-18, -2.0763e-19, -7.8540e-19,\n 2.6657e-18, -5.3682e-18, 1.7641e-18, 7.2047e-19, -7.9414e-19,\n 1.1699e-18, -2.1570e-18, -6.4031e-18, -2.6745e-19, -4.2778e-19,\n -7.0031e-19, -8.1037e-19, 1.2860e-18, 6.0894e-19, 4.9126e-20,\n 3.3359e-19, 1.2062e-18, 5.6221e-18, -6.6755e-19, 4.6537e-18,\n 3.6609e-18, -2.3933e-18, -3.3146e-19, 1.8648e-18, -2.0324e-18,\n -1.0742e-19, -3.4483e-19, 4.7790e-18, 7.1804e-19, 7.0866e-18,\n 1.1233e-18, -2.1969e-19, 5.8450e-18, 4.9753e-18, -5.4965e-18,\n -5.4341e-19, -5.2719e-18, -6.7620e-19, -8.7791e-19, 2.2036e-18,\n 2.4448e-20, -1.0829e-18, -1.1305e-18, 5.9333e-19, 2.3001e-18,\n 2.1766e-18, -1.5815e-19, 9.8529e-19, 1.8906e-19],\n [-1.0161e-19, -1.5752e-18, 5.1544e-18, -1.6376e-18, -2.7447e-18,\n -4.9772e-18, -1.0303e-18, -2.3798e-18, 2.8455e-18, 5.2057e-19,\n 1.0365e-18, -3.9143e-18, 1.4140e-18, 2.0780e-19, 7.8538e-19,\n -2.6656e-18, 5.3682e-18, -1.7641e-18, -7.2034e-19, 7.9433e-19,\n -1.1699e-18, 2.1569e-18, 6.4032e-18, 2.6747e-19, 4.2777e-19,\n 7.0035e-19, 8.1033e-19, -1.2858e-18, -6.0892e-19, -4.9132e-20,\n -3.3360e-19, -1.2061e-18, -5.6221e-18, 6.6753e-19, -4.6539e-18,\n -3.6609e-18, 2.3934e-18, 3.3151e-19, -1.8647e-18, 2.0324e-18,\n 1.0743e-19, 3.4480e-19, -4.7790e-18, -7.1806e-19, -7.0866e-18,\n -1.1232e-18, 2.1968e-19, -5.8450e-18, -4.9753e-18, 5.4966e-18,\n 5.4347e-19, 5.2719e-18, 6.7619e-19, 8.7789e-19, -2.2035e-18,\n -2.4489e-20, 1.0830e-18, 1.1305e-18, -5.9321e-19, -2.3002e-18,\n -2.1766e-18, 1.5821e-19, -9.8520e-19, -1.8907e-19]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.7500e-15, 6.7120e-17, 3.3276e-17, 6.5487e-16, 1.0421e-15, 1.9361e-18,\n 2.8896e-18, 7.6225e-18, 2.1095e-16, 8.5255e-16, 7.0344e-16, 8.5791e-17,\n 2.4853e-17, 4.3880e-20, 2.6180e-16, 1.0012e-19, 6.3701e-17, 1.8142e-15,\n 9.3055e-17, 8.4053e-17, 3.8494e-18, 9.4149e-17, 2.1124e-16, 6.0972e-18,\n 3.0351e-16, 4.5890e-17, 1.3100e-16, 1.2273e-16, 1.6758e-17, 6.2951e-19,\n 1.5454e-16, 1.9886e-17, 3.7359e-18, 7.1454e-16, 2.1352e-18, 2.8270e-16,\n 3.4491e-18, 4.5468e-16, 1.0029e-17, 1.0807e-16, 2.8314e-16, 6.9691e-17,\n 1.8164e-16, 6.9431e-16, 3.2123e-16, 3.6778e-16, 2.9302e-16, 6.5141e-17,\n 1.7871e-17, 4.1956e-16, 3.4428e-17, 1.8762e-16, 5.5988e-16, 1.5681e-16,\n 1.3965e-18, 2.5566e-16, 8.1014e-17, 1.9779e-16, 1.1570e-16, 4.6931e-16,\n 1.7518e-16, 1.2874e-16, 2.1610e-20, 1.5330e-17],\n [1.7500e-15, 6.7120e-17, 3.3276e-17, 6.5487e-16, 1.0421e-15, 1.9361e-18,\n 2.8896e-18, 7.6225e-18, 2.1095e-16, 8.5255e-16, 7.0344e-16, 8.5791e-17,\n 2.4853e-17, 4.3880e-20, 2.6180e-16, 1.0012e-19, 6.3701e-17, 1.8142e-15,\n 9.3055e-17, 8.4053e-17, 3.8494e-18, 9.4149e-17, 2.1124e-16, 6.0972e-18,\n 3.0351e-16, 4.5889e-17, 1.3100e-16, 1.2273e-16, 1.6758e-17, 6.2951e-19,\n 1.5454e-16, 1.9886e-17, 3.7359e-18, 7.1454e-16, 2.1352e-18, 2.8270e-16,\n 3.4491e-18, 4.5468e-16, 1.0029e-17, 1.0807e-16, 2.8314e-16, 6.9691e-17,\n 1.8164e-16, 6.9431e-16, 3.2123e-16, 3.6778e-16, 2.9302e-16, 6.5141e-17,\n 1.7871e-17, 4.1956e-16, 3.4428e-17, 1.8762e-16, 5.5988e-16, 1.5681e-16,\n 1.3965e-18, 2.5566e-16, 8.1014e-17, 1.9779e-16, 1.1570e-16, 4.6931e-16,\n 1.7518e-16, 1.2874e-16, 2.1610e-20, 1.5330e-17]], device='cuda:0')"
},
"56": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 6.2518e-19, -6.2495e-19], device='cuda:0')",
"exp_avg_sq": "tensor([1.6042e-15, 1.6042e-15], device='cuda:0')"
},
"57": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-3.5385e-16, 5.8232e-13, -5.0105e-13, -2.2946e-05, 1.8671e-15,\n 2.2645e-11, 5.0786e-18, 1.1625e-15],\n [ 3.1597e-18, -4.1265e-15, 5.8287e-15, 2.1496e-07, -1.4356e-17,\n -1.9020e-13, -4.2607e-20, -9.7572e-18],\n [-2.6146e-20, 4.7127e-16, 5.0265e-16, 2.2944e-09, 1.0580e-18,\n 6.4798e-15, 1.4603e-21, 3.3548e-19],\n [ 2.3991e-20, 4.2324e-17, 1.3748e-16, 2.3209e-09, 4.7981e-20,\n -6.2088e-16, -1.3725e-22, -3.1298e-20],\n [-3.4054e-16, 5.7738e-13, -4.6407e-13, -2.1935e-05, 1.8331e-15,\n 2.1985e-11, 4.9306e-18, 1.1287e-15],\n [ 4.6855e-20, -2.5561e-16, -1.5759e-16, 1.3856e-09, -6.3574e-19,\n -5.0154e-15, -1.1262e-21, -2.5847e-19],\n [-2.6915e-16, 4.5246e-13, -3.7048e-13, -1.7374e-05, 1.4400e-15,\n 1.7336e-11, 3.8874e-18, 8.9001e-16],\n [-1.0284e-19, 9.6979e-16, 8.6342e-16, 7.9144e-10, 2.2616e-18,\n 1.5568e-14, 3.5043e-21, 8.0451e-19]], device='cuda:0')",
"exp_avg_sq": "tensor([[8.0652e-10, 3.4493e-10, 6.0163e-10, 2.7763e-05, 2.6057e-09, 8.4797e-10,\n 1.3261e-10, 5.9841e-10],\n [8.5259e-11, 4.5280e-11, 8.6142e-11, 1.7176e-07, 3.5916e-10, 9.6372e-11,\n 2.1248e-11, 9.7630e-11],\n [3.8180e-10, 1.5875e-10, 2.8414e-10, 4.8563e-07, 1.4004e-09, 3.7944e-10,\n 7.8904e-11, 3.6024e-10],\n [3.2888e-11, 1.4711e-11, 2.2531e-11, 1.2713e-07, 1.0962e-10, 3.2475e-11,\n 5.0046e-12, 2.3496e-11],\n [4.3064e-10, 2.1346e-10, 3.8423e-10, 1.9021e-05, 1.7456e-09, 4.7022e-10,\n 1.0311e-10, 4.6435e-10],\n [1.6450e-10, 7.2112e-11, 1.2089e-10, 1.4209e-07, 4.8834e-10, 1.4844e-10,\n 3.0641e-11, 1.2949e-10],\n [1.1951e-14, 1.7989e-14, 4.4937e-14, 1.1421e-05, 7.0876e-14, 1.8203e-13,\n 8.9126e-15, 1.5034e-14],\n [5.1535e-11, 2.6109e-11, 4.5987e-11, 3.0070e-07, 2.4638e-10, 6.5590e-11,\n 1.3607e-11, 6.2602e-11]], device='cuda:0')"
},
"58": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.2952e-05, 2.1492e-07, 2.2939e-09, 2.3207e-09, -2.1934e-05,\n 1.3831e-09, -1.7367e-05, 7.9400e-10], device='cuda:0')",
"exp_avg_sq": "tensor([2.7820e-05, 1.7853e-07, 5.1005e-07, 1.2900e-07, 1.9054e-05, 1.5226e-07,\n 1.1421e-05, 3.0501e-07], device='cuda:0')"
},
"59": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[ 1.1380e-05, -1.3898e-06, -1.3978e-06, -1.3978e-06, 1.2151e-05,\n -1.3978e-06, 1.3969e-05, -1.3978e-06],\n [-1.4512e-05, 1.7723e-06, 1.7825e-06, 1.7825e-06, -1.5495e-05,\n 1.7825e-06, -1.7815e-05, 1.7826e-06],\n [ 1.5846e-06, -1.9352e-07, -1.9463e-07, -1.9463e-07, 1.6920e-06,\n -1.9463e-07, 1.9452e-06, -1.9463e-07],\n [ 1.5466e-06, -1.8888e-07, -1.8997e-07, -1.8997e-07, 1.6514e-06,\n -1.8997e-07, 1.8986e-06, -1.8997e-07]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.5243e-06, 4.5383e-07, 3.1419e-06, 2.5648e-06, 5.9543e-06, 4.2099e-06,\n 1.3224e-05, 2.8143e-06],\n [3.3185e-07, 3.3620e-08, 1.6034e-07, 1.2839e-07, 3.6280e-07, 2.5697e-07,\n 6.9683e-07, 1.3571e-07],\n [1.2835e-06, 8.3692e-08, 5.9255e-07, 4.9121e-07, 1.3853e-06, 7.4446e-07,\n 2.9438e-06, 5.4124e-07],\n [1.1358e-06, 7.3981e-08, 5.0145e-07, 4.1773e-07, 1.2260e-06, 6.2784e-07,\n 2.5771e-06, 4.5878e-07]], device='cuda:0')"
},
"60": {
"step": "tensor(5006.)",
"exp_avg": "tensor([ 5.4949e-06, -8.4146e-06, 1.4776e-06, 1.4421e-06], device='cuda:0')",
"exp_avg_sq": "tensor([1.5446e-04, 9.8517e-06, 2.6850e-05, 2.2638e-05], device='cuda:0')"
},
"61": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-2.4894e-12, -1.4336e-11, -7.0961e-14, ..., -1.9309e-12,\n -1.0626e-13, -3.3520e-11],\n [-1.8415e-12, -1.1691e-11, -4.1220e-14, ..., -1.4439e-12,\n 2.2560e-14, -2.6862e-11],\n [-1.3228e-13, 5.4178e-13, -3.8900e-15, ..., -6.0158e-14,\n 2.1374e-15, 5.5073e-12],\n ...,\n [-7.9676e-14, 6.5010e-13, -5.3836e-15, ..., -2.4179e-14,\n -1.0283e-14, 4.5259e-12],\n [-1.6673e-12, -9.4400e-12, -4.8657e-14, ..., -1.2446e-12,\n -5.5686e-14, -2.1469e-11],\n [-1.1386e-13, 1.3942e-12, -4.6386e-15, ..., -1.1350e-14,\n -2.3326e-16, 5.2485e-12]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.4599e-11, 2.3194e-11, 2.8442e-11, ..., 1.5921e-11, 4.0086e-11,\n 4.7554e-11],\n [1.5829e-11, 9.5736e-12, 1.2407e-11, ..., 6.1236e-12, 1.4088e-11,\n 1.8230e-11],\n [5.6005e-12, 3.8339e-12, 4.8110e-12, ..., 1.8783e-12, 7.3860e-12,\n 7.5790e-12],\n ...,\n [1.6238e-12, 1.1510e-12, 1.0465e-12, ..., 5.8995e-13, 2.2870e-12,\n 2.4847e-12],\n [2.5528e-12, 1.5148e-12, 2.2202e-12, ..., 1.1897e-12, 1.7836e-12,\n 2.7297e-12],\n [4.7682e-12, 3.3377e-12, 3.3648e-12, ..., 1.3245e-12, 5.4075e-12,\n 4.6877e-12]], device='cuda:0')"
},
"62": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-5.5138e-08, -4.6102e-08, 1.3923e-08, 9.1736e-09, 1.5554e-08,\n -5.2015e-08, 1.4397e-08, 1.5964e-08, -3.6599e-08, -4.7789e-08,\n -3.8973e-08, -3.3079e-08, -4.8816e-08, -4.7098e-08, 8.2303e-09,\n -5.3817e-08, -3.5950e-08, 1.3831e-08, -4.6887e-08, -3.6243e-08,\n -3.9487e-08, -4.7708e-08, -5.3213e-08, 7.7841e-09, 1.1791e-08,\n 1.0017e-08, -6.2604e-08, 8.8038e-09, 1.2282e-08, -3.3439e-08,\n 1.0682e-08, -4.2956e-08, -3.7600e-08, -4.1769e-08, -8.5605e-09,\n 7.6397e-09, 1.0221e-08, -5.9867e-08, 8.2911e-09, 1.3814e-08,\n 8.4936e-09, 9.4709e-09, 9.4113e-09, 9.2606e-09, 8.5803e-09,\n 8.3813e-09, 9.7971e-09, -4.4680e-08, -3.7809e-08, 1.1612e-08,\n -5.4682e-08, 8.5946e-09, 7.1759e-09, -4.3175e-08, 1.5093e-08,\n 1.5153e-08, -3.8577e-08, 9.3245e-09, -3.4137e-08, -5.2386e-08,\n 8.9232e-09, -4.3834e-08, 1.1278e-08, -3.8833e-08, -5.9772e-08,\n 1.0405e-08, 1.0063e-08, 9.5713e-09, -5.6958e-08, -3.5318e-08,\n -4.3324e-08, -4.6535e-08, -4.3017e-08, 1.0654e-08, 1.0534e-08,\n -3.4632e-08, 1.0663e-08, -3.1762e-08, -4.2007e-08, -4.1731e-08,\n -4.4553e-08, 9.3405e-09, 1.4680e-08, -5.3332e-08, 6.2805e-09,\n -6.4170e-08, -3.7293e-08, 1.1611e-08, -5.1835e-08, 9.2804e-09,\n 8.7687e-09, 8.6013e-09, -4.6335e-08, -5.2371e-08, 1.0587e-08,\n -4.8988e-08, -4.7626e-08, -5.2319e-08, 9.2041e-09, -3.4227e-08,\n 1.0039e-08, 1.1073e-08, -4.7813e-08, 7.7591e-09, -4.3887e-08,\n 9.6214e-09, -4.2594e-08, -3.6209e-08, 8.5078e-09, 8.8106e-09,\n 9.6383e-09, -3.6469e-08, 1.6419e-08, -4.4422e-08, -3.2824e-08,\n -4.1438e-08, 1.5646e-08, 1.2863e-08, 9.5828e-09, 1.2671e-08,\n -4.2259e-08, 1.0279e-08, 9.1091e-09, 8.8078e-09, -3.7766e-08,\n -3.3333e-08, 1.1847e-08, 7.6178e-09, 1.0830e-08, 1.1457e-08,\n -3.8663e-09, 1.2834e-08, -4.5605e-08, 9.8589e-09, -3.5555e-08,\n -4.6324e-08, -3.1660e-08, -3.3804e-08, -3.6194e-08, 8.1141e-09,\n 8.7303e-09, 1.0852e-08, 1.1100e-08, -5.5599e-08, -4.1848e-08,\n 9.5365e-09, -4.0017e-08, -4.0781e-08, -4.6383e-08, 8.7914e-09,\n -3.8144e-08, -4.8574e-08, -5.1735e-08, 8.7825e-09, -5.2740e-08,\n 7.7782e-09, 9.1529e-09, 1.4930e-08, -4.7059e-08, -1.8825e-08,\n -5.1735e-08, 1.1988e-08, -3.8510e-08, -3.5931e-08, -3.6683e-08,\n -3.6350e-08, -3.7091e-08, -5.3257e-08, -3.7409e-08, -6.6047e-09,\n 8.4992e-09, 7.4443e-09, -2.6825e-08, 1.1032e-08, 1.0443e-08,\n 9.4380e-09, -3.8514e-08, -3.8054e-08, 1.2948e-08, -5.3466e-08,\n -4.3961e-08, 1.1329e-08, 8.7663e-09, -3.9601e-08, -3.5720e-08,\n -4.8193e-08, -4.7404e-08, -4.0343e-08, 1.0127e-08, 1.0717e-08,\n -3.6864e-08, 1.2669e-08], device='cuda:0')",
"exp_avg_sq": "tensor([2.8993e-08, 1.1178e-08, 4.6578e-09, 4.3273e-10, 7.6076e-09, 2.0784e-08,\n 4.4764e-09, 5.4744e-09, 2.6678e-09, 1.6353e-08, 4.0258e-09, 5.8471e-10,\n 1.5644e-08, 1.1690e-08, 1.6451e-10, 2.5478e-08, 1.4527e-09, 3.7518e-09,\n 1.1608e-08, 1.8194e-09, 3.4356e-09, 1.4516e-08, 2.3383e-08, 1.9893e-10,\n 1.7815e-09, 2.6562e-10, 4.3796e-08, 2.0337e-10, 3.0869e-09, 1.5481e-09,\n 2.4680e-09, 7.0775e-09, 1.7566e-09, 6.3018e-09, 3.0560e-11, 1.8519e-10,\n 1.3136e-09, 3.7505e-08, 1.4357e-10, 7.7874e-09, 5.2845e-10, 4.8940e-10,\n 1.5125e-10, 1.1701e-09, 1.4281e-10, 1.3389e-10, 5.4160e-10, 9.1850e-09,\n 2.5892e-09, 1.1378e-09, 2.5880e-08, 7.7722e-11, 1.2146e-10, 1.0973e-08,\n 5.6666e-09, 6.3091e-09, 2.8074e-09, 4.1428e-10, 1.1879e-09, 2.2940e-08,\n 7.6099e-10, 7.7661e-09, 6.6831e-10, 3.5134e-09, 4.4145e-08, 1.1108e-09,\n 3.9430e-10, 7.3768e-10, 3.2411e-08, 1.4589e-09, 9.1465e-09, 1.3270e-08,\n 9.1015e-09, 1.0108e-09, 6.1784e-10, 2.3110e-09, 6.1610e-10, 3.5957e-10,\n 6.6928e-09, 7.3597e-09, 7.3700e-09, 1.7157e-10, 6.2863e-09, 2.2476e-08,\n 1.1155e-10, 5.4298e-08, 2.3723e-09, 2.1202e-09, 1.6711e-08, 2.7400e-10,\n 7.5588e-10, 8.3218e-10, 9.9666e-09, 2.0926e-08, 2.7075e-09, 1.8369e-08,\n 1.2566e-08, 2.0177e-08, 2.3898e-10, 7.5910e-10, 1.1949e-09, 1.5009e-09,\n 1.7231e-08, 2.9757e-10, 9.3437e-09, 1.0984e-09, 7.9400e-09, 1.3650e-09,\n 1.4974e-10, 3.2941e-10, 1.5900e-10, 2.8274e-09, 6.6402e-09, 1.2034e-08,\n 4.1601e-10, 6.7572e-09, 7.3141e-09, 4.4039e-09, 1.0836e-09, 2.2403e-09,\n 5.8429e-09, 3.6050e-10, 6.7170e-10, 3.5903e-10, 2.2202e-09, 8.2545e-10,\n 8.6442e-10, 2.7548e-10, 1.1075e-09, 1.6181e-09, 3.1211e-11, 1.3920e-09,\n 1.1520e-08, 8.1573e-10, 1.2700e-09, 1.2632e-08, 1.7312e-10, 5.7202e-10,\n 3.0832e-09, 2.5042e-11, 3.6708e-11, 8.1891e-10, 8.6969e-10, 3.0562e-08,\n 4.8849e-09, 7.0243e-10, 2.7885e-09, 6.2513e-09, 9.3911e-09, 1.0988e-10,\n 5.2708e-09, 1.6360e-08, 2.1865e-08, 1.6158e-10, 2.4973e-08, 2.7023e-10,\n 2.7974e-09, 8.7888e-09, 1.2932e-08, 4.6774e-11, 1.9071e-08, 1.9136e-09,\n 2.9148e-09, 2.0726e-09, 8.8062e-10, 1.3775e-09, 2.7739e-09, 2.8462e-08,\n 2.5817e-09, 2.7303e-11, 7.0980e-11, 7.3127e-11, 8.1678e-11, 1.0777e-09,\n 1.5635e-09, 6.7141e-10, 4.7306e-09, 2.3263e-09, 2.9368e-09, 2.5027e-08,\n 1.0718e-08, 5.8374e-09, 4.1628e-11, 4.1032e-09, 2.5959e-09, 1.4980e-08,\n 1.3740e-08, 3.9797e-09, 7.0858e-10, 1.2910e-09, 1.8495e-09, 3.5622e-09],\n device='cuda:0')"
},
"63": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-9.4749e-08, -9.5594e-08, 4.4944e-08, 4.1445e-08, 4.3699e-08,\n -9.3526e-08, 4.3177e-08, 4.0693e-08, -9.2851e-08, -8.2623e-08,\n -9.5208e-08, -9.8553e-08, -9.3632e-08, -9.7626e-08, 4.5974e-08,\n -9.1674e-08, -9.5507e-08, 4.0069e-08, -1.0143e-07, -9.7453e-08,\n -9.7340e-08, -8.9762e-08, -9.2996e-08, 4.7237e-08, 4.0127e-08,\n 4.1146e-08, -9.4601e-08, 4.2235e-08, 4.2719e-08, -8.9229e-08,\n 4.7631e-08, -9.8128e-08, -1.0677e-07, -9.0681e-08, -5.0695e-08,\n 4.5644e-08, 4.5515e-08, -1.0030e-07, 4.1727e-08, 4.6254e-08,\n 4.6865e-08, 4.2958e-08, 3.9924e-08, 4.7523e-08, 4.2931e-08,\n 4.2241e-08, 4.4719e-08, -9.5939e-08, -1.0251e-07, 3.9681e-08,\n -1.0090e-07, 4.3084e-08, 4.7324e-08, -7.8169e-08, 4.2245e-08,\n 3.7938e-08, -1.0485e-07, 4.2474e-08, -8.2686e-08, -9.2718e-08,\n 4.7441e-08, -9.4887e-08, 4.0252e-08, -9.5386e-08, -7.8277e-08,\n 4.5079e-08, 4.1586e-08, 4.2118e-08, -8.9404e-08, -9.8257e-08,\n -8.9353e-08, -8.8530e-08, -8.1668e-08, 4.5527e-08, 4.1291e-08,\n -8.0471e-08, 3.9283e-08, -9.8022e-08, -8.8892e-08, -8.3194e-08,\n -1.0623e-07, 4.3349e-08, 4.1911e-08, -9.6144e-08, 5.0423e-08,\n -9.4749e-08, -1.0178e-07, 4.2044e-08, -1.0848e-07, 4.2026e-08,\n 4.5719e-08, 4.2229e-08, -1.0203e-07, -9.5974e-08, 4.3437e-08,\n -8.4653e-08, -9.6946e-08, -9.8451e-08, 4.4630e-08, -1.0402e-07,\n 4.6472e-08, 4.1022e-08, -8.0110e-08, 4.6473e-08, -8.9939e-08,\n 4.6934e-08, -8.8059e-08, -9.9998e-08, 3.6813e-08, 4.6017e-08,\n 3.9825e-08, -8.9420e-08, 4.2333e-08, -7.9836e-08, -1.0737e-07,\n -8.9761e-08, 4.2942e-08, 4.4539e-08, 4.5534e-08, 4.3991e-08,\n -9.9045e-08, 4.0512e-08, 4.4020e-08, 4.3845e-08, -9.8396e-08,\n -1.0220e-07, 4.2449e-08, 4.7040e-08, 4.4357e-08, 4.4217e-08,\n -3.8140e-09, 4.0351e-08, -8.8743e-08, 4.5719e-08, -1.0073e-07,\n -8.7693e-08, -9.1710e-08, -1.1095e-07, -8.7824e-08, 4.1233e-08,\n 3.3934e-08, 4.3227e-08, 4.2512e-08, -8.4674e-08, -1.0545e-07,\n 4.6126e-08, -1.1424e-07, -8.7614e-08, -1.0530e-07, 4.2126e-08,\n -7.5539e-08, -8.5221e-08, -9.2665e-08, 4.2341e-08, -8.5475e-08,\n 4.5117e-08, 4.7000e-08, 4.3483e-08, -9.2861e-08, -8.1359e-08,\n -9.9026e-08, 4.4155e-08, -9.6567e-08, -8.5017e-08, -1.1100e-07,\n -1.0031e-07, -8.6897e-08, -7.8410e-08, -9.1638e-08, -2.9992e-08,\n 4.2742e-08, 4.5821e-08, -1.0008e-07, 4.3817e-08, 4.5731e-08,\n 4.3061e-08, -8.5848e-08, -1.0183e-07, 4.2107e-08, -8.8730e-08,\n -8.3384e-08, 4.5024e-08, 3.6688e-08, -9.7275e-08, -8.5684e-08,\n -9.2884e-08, -9.1905e-08, -1.0077e-07, 4.5929e-08, 4.3114e-08,\n -9.7286e-08, 4.5479e-08],\n [ 2.9272e-08, 2.9508e-08, -1.5492e-08, -1.3844e-08, -1.4397e-08,\n 3.0587e-08, -1.4256e-08, -1.3574e-08, 3.0780e-08, 2.4147e-08,\n 3.0297e-08, 3.1332e-08, 3.0106e-08, 3.1011e-08, -1.5348e-08,\n 3.0392e-08, 3.1217e-08, -1.3069e-08, 3.3000e-08, 3.2662e-08,\n 3.0861e-08, 2.6889e-08, 2.9845e-08, -1.6301e-08, -1.3427e-08,\n -1.3462e-08, 2.9630e-08, -1.4163e-08, -1.4225e-08, 2.8954e-08,\n -1.6528e-08, 2.9736e-08, 3.3699e-08, 2.8362e-08, 1.6873e-08,\n -1.5380e-08, -1.5060e-08, 3.2264e-08, -1.3723e-08, -1.5704e-08,\n -1.5664e-08, -1.4460e-08, -1.3414e-08, -1.6132e-08, -1.4172e-08,\n -1.3625e-08, -1.4867e-08, 3.2112e-08, 3.2503e-08, -1.3262e-08,\n 3.3797e-08, -1.4280e-08, -1.5952e-08, 2.3277e-08, -1.4431e-08,\n -1.2400e-08, 3.3245e-08, -1.4103e-08, 2.6912e-08, 2.9477e-08,\n -1.5927e-08, 2.9988e-08, -1.3196e-08, 3.2233e-08, 2.5182e-08,\n -1.5108e-08, -1.4188e-08, -1.3914e-08, 2.9099e-08, 3.2164e-08,\n 2.8804e-08, 2.6187e-08, 2.4770e-08, -1.5051e-08, -1.3940e-08,\n 2.5819e-08, -1.2917e-08, 3.0401e-08, 2.7817e-08, 2.5767e-08,\n 3.4433e-08, -1.4209e-08, -1.3737e-08, 3.0117e-08, -1.6979e-08,\n 2.8894e-08, 3.3654e-08, -1.4014e-08, 3.5369e-08, -1.3643e-08,\n -1.6125e-08, -1.4372e-08, 3.2600e-08, 2.9973e-08, -1.4201e-08,\n 2.7704e-08, 3.0528e-08, 3.2046e-08, -1.4909e-08, 3.3737e-08,\n -1.5583e-08, -1.3497e-08, 2.3968e-08, -1.5483e-08, 2.9186e-08,\n -1.5806e-08, 2.8944e-08, 3.2193e-08, -1.2683e-08, -1.5440e-08,\n -1.3629e-08, 2.8503e-08, -1.4074e-08, 2.5317e-08, 3.5524e-08,\n 2.8798e-08, -1.4018e-08, -1.4938e-08, -1.5074e-08, -1.4992e-08,\n 3.1158e-08, -1.3448e-08, -1.4819e-08, -1.4661e-08, 3.1740e-08,\n 3.2102e-08, -1.4520e-08, -1.5865e-08, -1.4663e-08, -1.4565e-08,\n -1.9519e-10, -1.3173e-08, 2.9331e-08, -1.5316e-08, 3.2737e-08,\n 2.7851e-08, 2.9608e-08, 3.4914e-08, 2.9090e-08, -1.3499e-08,\n -1.1294e-08, -1.4521e-08, -1.4315e-08, 2.6773e-08, 3.4982e-08,\n -1.5342e-08, 3.6872e-08, 2.7369e-08, 3.4254e-08, -1.4244e-08,\n 2.3281e-08, 2.7343e-08, 2.9210e-08, -1.4244e-08, 2.7071e-08,\n -1.5160e-08, -1.5890e-08, -1.4539e-08, 3.0302e-08, 2.5675e-08,\n 3.2044e-08, -1.4813e-08, 2.9869e-08, 2.6394e-08, 3.6481e-08,\n 3.1039e-08, 2.8234e-08, 2.5521e-08, 2.8674e-08, 1.0705e-08,\n -1.4091e-08, -1.5544e-08, 3.3026e-08, -1.4534e-08, -1.5405e-08,\n -1.4325e-08, 2.7395e-08, 3.1952e-08, -1.4062e-08, 2.7947e-08,\n 2.8054e-08, -1.5533e-08, -1.2336e-08, 3.0325e-08, 2.6301e-08,\n 2.9476e-08, 3.1134e-08, 3.1773e-08, -1.5557e-08, -1.4739e-08,\n 3.0978e-08, -1.5367e-08],\n [ 3.4885e-08, 3.5207e-08, -1.5670e-08, -1.4686e-08, -1.5590e-08,\n 3.3529e-08, -1.5387e-08, -1.4429e-08, 3.3069e-08, 3.1159e-08,\n 3.4583e-08, 3.5818e-08, 3.3842e-08, 3.5486e-08, -1.6294e-08,\n 3.2652e-08, 3.4254e-08, -1.4367e-08, 3.6454e-08, 3.4519e-08,\n 3.5417e-08, 3.3501e-08, 3.3643e-08, -1.6458e-08, -1.4207e-08,\n -1.4730e-08, 3.4611e-08, -1.4936e-08, -1.5161e-08, 3.2115e-08,\n -1.6547e-08, 3.6438e-08, 3.8923e-08, 3.3201e-08, 1.8007e-08,\n -1.6101e-08, -1.6203e-08, 3.6239e-08, -1.4900e-08, -1.6254e-08,\n -1.6600e-08, -1.5163e-08, -1.4106e-08, -1.6701e-08, -1.5301e-08,\n -1.5226e-08, -1.5882e-08, 3.4004e-08, 3.7297e-08, -1.4057e-08,\n 3.5747e-08, -1.5325e-08, -1.6690e-08, 2.9254e-08, -1.4799e-08,\n -1.3589e-08, 3.8147e-08, -1.5095e-08, 2.9722e-08, 3.3695e-08,\n -1.6766e-08, 3.4574e-08, -1.4396e-08, 3.3649e-08, 2.8296e-08,\n -1.5945e-08, -1.4578e-08, -1.5007e-08, 3.2123e-08, 3.5213e-08,\n 3.2263e-08, 3.3218e-08, 3.0318e-08, -1.6214e-08, -1.4553e-08,\n 2.9125e-08, -1.4029e-08, 3.6021e-08, 3.2542e-08, 3.0595e-08,\n 3.8243e-08, -1.5504e-08, -1.4991e-08, 3.5176e-08, -1.7792e-08,\n 3.5084e-08, 3.6290e-08, -1.4914e-08, 3.8940e-08, -1.5101e-08,\n -1.5745e-08, -1.4822e-08, 3.6984e-08, 3.5155e-08, -1.5555e-08,\n 3.0341e-08, 3.5381e-08, 3.5377e-08, -1.5812e-08, 3.7447e-08,\n -1.6434e-08, -1.4645e-08, 2.9916e-08, -1.6488e-08, 3.2366e-08,\n -1.6561e-08, 3.1504e-08, 3.6121e-08, -1.2841e-08, -1.6268e-08,\n -1.3938e-08, 3.2459e-08, -1.5036e-08, 2.9057e-08, 3.8277e-08,\n 3.2476e-08, -1.5389e-08, -1.5749e-08, -1.6206e-08, -1.5429e-08,\n 3.6169e-08, -1.4400e-08, -1.5536e-08, -1.5527e-08, 3.5509e-08,\n 3.7335e-08, -1.4860e-08, -1.6586e-08, -1.5798e-08, -1.5776e-08,\n 2.1378e-09, -1.4461e-08, 3.1656e-08, -1.6175e-08, 3.6224e-08,\n 3.1878e-08, 3.3081e-08, 4.0503e-08, 3.1294e-08, -1.4757e-08,\n -1.2049e-08, -1.5273e-08, -1.5003e-08, 3.0850e-08, 3.7538e-08,\n -1.6378e-08, 4.1204e-08, 3.2100e-08, 3.7843e-08, -1.4835e-08,\n 2.7853e-08, 3.0840e-08, 3.3807e-08, -1.4950e-08, 3.1117e-08,\n -1.5939e-08, -1.6552e-08, -1.5400e-08, 3.3332e-08, 2.9654e-08,\n 3.5681e-08, -1.5611e-08, 3.5532e-08, 3.1240e-08, 3.9689e-08,\n 3.6899e-08, 3.1260e-08, 2.8180e-08, 3.3546e-08, 1.0269e-08,\n -1.5243e-08, -1.6109e-08, 3.5712e-08, -1.5580e-08, -1.6134e-08,\n -1.5289e-08, 3.1144e-08, 3.7224e-08, -1.4922e-08, 3.2381e-08,\n 2.9482e-08, -1.5691e-08, -1.2959e-08, 3.5666e-08, 3.1644e-08,\n 3.3779e-08, 3.2382e-08, 3.6752e-08, -1.6158e-08, -1.5097e-08,\n 3.5328e-08, -1.6021e-08],\n [ 3.0629e-08, 3.0915e-08, -1.3782e-08, -1.2916e-08, -1.3712e-08,\n 2.9445e-08, -1.3534e-08, -1.2690e-08, 2.9037e-08, 2.7353e-08,\n 3.0361e-08, 3.1441e-08, 2.9717e-08, 3.1165e-08, -1.4332e-08,\n 2.8666e-08, 3.0072e-08, -1.2634e-08, 3.2013e-08, 3.0310e-08,\n 3.1095e-08, 2.9412e-08, 2.9541e-08, -1.4477e-08, -1.2494e-08,\n -1.2955e-08, 3.0393e-08, -1.3136e-08, -1.3333e-08, 2.8199e-08,\n -1.4555e-08, 3.1993e-08, 3.4190e-08, 2.9157e-08, 1.5823e-08,\n -1.4162e-08, -1.4252e-08, 3.1828e-08, -1.3105e-08, -1.4297e-08,\n -1.4601e-08, -1.3335e-08, -1.2405e-08, -1.4690e-08, -1.3458e-08,\n -1.3391e-08, -1.3970e-08, 2.9857e-08, 3.2750e-08, -1.2363e-08,\n 3.1389e-08, -1.3479e-08, -1.4681e-08, 2.5678e-08, -1.3015e-08,\n -1.1950e-08, 3.3502e-08, -1.3276e-08, 2.6091e-08, 2.9584e-08,\n -1.4747e-08, 3.0362e-08, -1.2661e-08, 2.9544e-08, 2.4835e-08,\n -1.4025e-08, -1.2821e-08, -1.3198e-08, 2.8214e-08, 3.0919e-08,\n 2.8324e-08, 2.9165e-08, 2.6615e-08, -1.4262e-08, -1.2799e-08,\n 2.5565e-08, -1.2338e-08, 3.1635e-08, 2.8569e-08, 2.6865e-08,\n 3.3591e-08, -1.3636e-08, -1.3184e-08, 3.0888e-08, -1.5651e-08,\n 3.0810e-08, 3.1873e-08, -1.3117e-08, 3.4205e-08, -1.3282e-08,\n -1.3848e-08, -1.3036e-08, 3.2485e-08, 3.0881e-08, -1.3681e-08,\n 2.6643e-08, 3.1073e-08, 3.1064e-08, -1.3908e-08, 3.2874e-08,\n -1.4455e-08, -1.2880e-08, 2.6261e-08, -1.4502e-08, 2.8423e-08,\n -1.4567e-08, 2.7647e-08, 3.1718e-08, -1.1291e-08, -1.4309e-08,\n -1.2258e-08, 2.8492e-08, -1.3224e-08, 2.5502e-08, 3.3607e-08,\n 2.8522e-08, -1.3535e-08, -1.3852e-08, -1.4254e-08, -1.3570e-08,\n 3.1757e-08, -1.2664e-08, -1.3665e-08, -1.3657e-08, 3.1183e-08,\n 3.2796e-08, -1.3069e-08, -1.4588e-08, -1.3896e-08, -1.3876e-08,\n 1.8742e-09, -1.2718e-08, 2.7793e-08, -1.4227e-08, 3.1810e-08,\n 2.8000e-08, 2.9050e-08, 3.5570e-08, 2.7475e-08, -1.2978e-08,\n -1.0593e-08, -1.3433e-08, -1.3195e-08, 2.7086e-08, 3.2965e-08,\n -1.4406e-08, 3.6199e-08, 2.8185e-08, 3.3235e-08, -1.3047e-08,\n 2.4445e-08, 2.7075e-08, 2.9686e-08, -1.3148e-08, 2.7323e-08,\n -1.4019e-08, -1.4558e-08, -1.3544e-08, 2.9263e-08, 2.6053e-08,\n 3.1338e-08, -1.3731e-08, 3.1208e-08, 2.7419e-08, 3.4866e-08,\n 3.2410e-08, 2.7442e-08, 2.4743e-08, 2.9455e-08, 9.0262e-09,\n -1.3407e-08, -1.4169e-08, 3.1370e-08, -1.3703e-08, -1.4191e-08,\n -1.3447e-08, 2.7344e-08, 3.2691e-08, -1.3123e-08, 2.8441e-08,\n 2.5883e-08, -1.3800e-08, -1.1395e-08, 3.1319e-08, 2.7778e-08,\n 2.9663e-08, 2.8424e-08, 3.2278e-08, -1.4213e-08, -1.3278e-08,\n 3.1016e-08, -1.4091e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[3.1078e-07, 2.6179e-07, 7.3243e-08, 4.4242e-08, 6.3196e-08, 1.9871e-07,\n 6.9307e-08, 6.1166e-08, 1.4565e-07, 2.2440e-07, 1.6625e-07, 1.5542e-07,\n 2.5470e-07, 2.2637e-07, 6.2392e-08, 2.1723e-07, 1.9934e-07, 4.7948e-08,\n 1.8078e-07, 1.6982e-07, 2.0927e-07, 2.2744e-07, 1.8265e-07, 5.7374e-08,\n 4.4953e-08, 5.0909e-08, 1.8538e-07, 4.5227e-08, 5.2122e-08, 1.0245e-07,\n 7.2654e-08, 1.7303e-07, 2.1870e-07, 1.9596e-07, 1.0058e-08, 5.3176e-08,\n 6.1389e-08, 2.7773e-07, 4.0827e-08, 5.9814e-08, 6.0638e-08, 4.7725e-08,\n 4.6321e-08, 6.3544e-08, 4.6294e-08, 4.3611e-08, 6.2761e-08, 1.6542e-07,\n 1.6892e-07, 4.9834e-08, 3.0324e-07, 4.4376e-08, 5.9107e-08, 1.7584e-07,\n 5.7780e-08, 4.2410e-08, 1.7367e-07, 5.0328e-08, 1.9374e-07, 2.9053e-07,\n 7.0965e-08, 1.9915e-07, 5.4216e-08, 2.4193e-07, 1.8036e-07, 5.9468e-08,\n 5.6587e-08, 4.4264e-08, 1.5571e-07, 1.6629e-07, 1.7207e-07, 2.4633e-07,\n 1.9614e-07, 7.3484e-08, 5.4872e-08, 1.3397e-07, 4.4380e-08, 1.0442e-07,\n 1.9355e-07, 2.0025e-07, 2.3085e-07, 5.6627e-08, 5.5308e-08, 2.0802e-07,\n 8.1443e-08, 3.1143e-07, 1.6177e-07, 5.1638e-08, 2.3047e-07, 5.1167e-08,\n 5.1847e-08, 3.8284e-08, 2.1289e-07, 2.3281e-07, 4.4928e-08, 1.5381e-07,\n 2.1336e-07, 2.3276e-07, 6.1622e-08, 1.5620e-07, 7.4434e-08, 4.8138e-08,\n 1.5110e-07, 5.2852e-08, 1.3638e-07, 6.4273e-08, 1.7701e-07, 1.8589e-07,\n 3.0446e-08, 6.5781e-08, 4.5791e-08, 1.2697e-07, 6.5389e-08, 1.4363e-07,\n 1.3357e-07, 1.5220e-07, 6.2386e-08, 5.8534e-08, 5.7452e-08, 7.2302e-08,\n 2.5691e-07, 5.0073e-08, 5.0158e-08, 4.8980e-08, 2.2966e-07, 1.2757e-07,\n 7.7571e-08, 5.8353e-08, 6.1978e-08, 7.0080e-08, 7.5544e-09, 6.2019e-08,\n 2.6356e-07, 7.5199e-08, 1.7037e-07, 1.8876e-07, 4.1485e-08, 1.3426e-07,\n 1.1363e-07, 1.6942e-08, 8.1720e-09, 6.3154e-08, 5.8405e-08, 1.8841e-07,\n 2.6083e-07, 7.1795e-08, 3.0406e-07, 1.4755e-07, 1.9693e-07, 4.3757e-08,\n 1.4870e-07, 1.8514e-07, 2.8899e-07, 4.2443e-08, 2.3443e-07, 4.4098e-08,\n 4.6213e-08, 5.4518e-08, 1.6825e-07, 1.2286e-08, 1.7301e-07, 6.6866e-08,\n 2.0428e-07, 1.7646e-07, 1.3522e-07, 1.7204e-07, 2.0495e-07, 1.4168e-07,\n 1.9554e-07, 5.9220e-09, 5.3545e-08, 4.0813e-08, 1.4313e-08, 6.3260e-08,\n 6.1180e-08, 4.7832e-08, 1.3581e-07, 2.7399e-07, 5.5565e-08, 1.6023e-07,\n 1.5089e-07, 4.0833e-08, 9.4105e-09, 1.8088e-07, 1.3350e-07, 1.6275e-07,\n 2.2735e-07, 2.2061e-07, 8.3636e-08, 5.5043e-08, 2.0650e-07, 7.2788e-08],\n [3.5063e-08, 2.9421e-08, 8.1132e-09, 4.9681e-09, 7.0034e-09, 2.2334e-08,\n 7.6780e-09, 6.7839e-09, 1.6488e-08, 2.5137e-08, 1.8850e-08, 1.7493e-08,\n 2.8770e-08, 2.5381e-08, 6.9040e-09, 2.4513e-08, 2.2403e-08, 5.3378e-09,\n 2.0423e-08, 1.9166e-08, 2.3485e-08, 2.5640e-08, 2.0663e-08, 6.3998e-09,\n 5.0347e-09, 5.6662e-09, 2.0872e-08, 5.0627e-09, 5.8094e-09, 1.1668e-08,\n 8.0617e-09, 1.9579e-08, 2.4598e-08, 2.2116e-08, 1.0932e-09, 5.9164e-09,\n 6.7954e-09, 3.1233e-08, 4.6015e-09, 6.6333e-09, 6.7570e-09, 5.3293e-09,\n 5.1759e-09, 7.0599e-09, 5.1489e-09, 4.8962e-09, 6.9648e-09, 1.8665e-08,\n 1.9115e-08, 5.5640e-09, 3.4260e-08, 4.9216e-09, 6.5649e-09, 1.9966e-08,\n 6.4513e-09, 4.7692e-09, 1.9643e-08, 5.6437e-09, 2.1659e-08, 3.2600e-08,\n 7.8747e-09, 2.2332e-08, 6.0158e-09, 2.7182e-08, 2.0354e-08, 6.5984e-09,\n 6.3052e-09, 4.9678e-09, 1.7568e-08, 1.8892e-08, 1.9438e-08, 2.7689e-08,\n 2.2082e-08, 8.1485e-09, 6.1014e-09, 1.5198e-08, 4.9672e-09, 1.1780e-08,\n 2.1855e-08, 2.2568e-08, 2.5981e-08, 6.2767e-09, 6.1997e-09, 2.3474e-08,\n 9.0185e-09, 3.4834e-08, 1.8344e-08, 5.7907e-09, 2.5986e-08, 5.7114e-09,\n 5.7880e-09, 4.3101e-09, 2.4032e-08, 2.6130e-08, 5.0542e-09, 1.7337e-08,\n 2.3988e-08, 2.6191e-08, 6.8433e-09, 1.7661e-08, 8.2536e-09, 5.3913e-09,\n 1.7099e-08, 5.8938e-09, 1.5356e-08, 7.1341e-09, 1.9907e-08, 2.0805e-08,\n 3.4613e-09, 7.2888e-09, 5.1002e-09, 1.4356e-08, 7.2587e-09, 1.6238e-08,\n 1.5085e-08, 1.7304e-08, 6.9428e-09, 6.5063e-09, 6.3972e-09, 7.9941e-09,\n 2.8832e-08, 5.5693e-09, 5.6031e-09, 5.4539e-09, 2.5752e-08, 1.4461e-08,\n 8.5826e-09, 6.4759e-09, 6.8748e-09, 7.7639e-09, 8.0366e-10, 6.8686e-09,\n 2.9530e-08, 8.3136e-09, 1.9137e-08, 2.1244e-08, 4.6403e-09, 1.5220e-08,\n 1.2905e-08, 1.8497e-09, 8.7158e-10, 7.0234e-09, 6.5090e-09, 2.1196e-08,\n 2.9315e-08, 7.9572e-09, 3.4019e-08, 1.6644e-08, 2.2219e-08, 4.8819e-09,\n 1.6920e-08, 2.0853e-08, 3.2396e-08, 4.7444e-09, 2.6264e-08, 4.9451e-09,\n 5.1831e-09, 6.0738e-09, 1.9046e-08, 1.3743e-09, 1.9583e-08, 7.4064e-09,\n 2.2920e-08, 1.9888e-08, 1.5261e-08, 1.9351e-08, 2.3011e-08, 1.6102e-08,\n 2.2019e-08, 6.6484e-10, 5.9403e-09, 4.5177e-09, 1.6501e-09, 7.0204e-09,\n 6.8040e-09, 5.3791e-09, 1.5480e-08, 3.0722e-08, 6.1854e-09, 1.8137e-08,\n 1.7127e-08, 4.6073e-09, 1.0102e-09, 2.0393e-08, 1.5206e-08, 1.8420e-08,\n 2.5537e-08, 2.4732e-08, 9.2522e-09, 6.1275e-09, 2.3220e-08, 8.0556e-09],\n [4.0232e-08, 3.4066e-08, 9.4216e-09, 5.7136e-09, 8.1604e-09, 2.5946e-08,\n 8.9352e-09, 7.9132e-09, 1.9063e-08, 2.9366e-08, 2.1675e-08, 2.0303e-08,\n 3.3122e-08, 2.9610e-08, 8.0565e-09, 2.8220e-08, 2.5994e-08, 6.2254e-09,\n 2.3611e-08, 2.2250e-08, 2.7351e-08, 2.9486e-08, 2.3806e-08, 7.3949e-09,\n 5.8117e-09, 6.5745e-09, 2.4189e-08, 5.8540e-09, 6.7329e-09, 1.3412e-08,\n 9.3275e-09, 2.2548e-08, 2.8423e-08, 2.5477e-08, 1.3021e-09, 6.8764e-09,\n 7.9410e-09, 3.6136e-08, 5.2598e-09, 7.7394e-09, 7.8236e-09, 6.1754e-09,\n 5.9865e-09, 8.1923e-09, 5.9960e-09, 5.6193e-09, 8.1001e-09, 2.1652e-08,\n 2.2026e-08, 6.4449e-09, 3.9224e-08, 5.7419e-09, 7.6337e-09, 2.2910e-08,\n 7.4633e-09, 5.4779e-09, 2.2705e-08, 6.4954e-09, 2.5335e-08, 3.7831e-08,\n 9.1366e-09, 2.6064e-08, 7.0233e-09, 3.1423e-08, 2.3560e-08, 7.6935e-09,\n 7.3016e-09, 5.7228e-09, 2.0329e-08, 2.1619e-08, 2.2453e-08, 3.2070e-08,\n 2.5685e-08, 9.4592e-09, 7.0963e-09, 1.7500e-08, 5.7388e-09, 1.3689e-08,\n 2.5344e-08, 2.6180e-08, 2.9946e-08, 7.3245e-09, 7.1235e-09, 2.7017e-08,\n 1.0442e-08, 4.0481e-08, 2.1170e-08, 6.6492e-09, 2.9981e-08, 6.6156e-09,\n 6.6949e-09, 4.9438e-09, 2.7628e-08, 3.0306e-08, 5.7972e-09, 2.0168e-08,\n 2.7864e-08, 3.0277e-08, 7.9424e-09, 2.0344e-08, 9.5679e-09, 6.2258e-09,\n 1.9767e-08, 6.8191e-09, 1.7904e-08, 8.2815e-09, 2.3192e-08, 2.4213e-08,\n 3.9136e-09, 8.4921e-09, 5.9316e-09, 1.6637e-08, 8.4146e-09, 1.8809e-08,\n 1.7401e-08, 1.9872e-08, 8.0312e-09, 7.5601e-09, 7.4203e-09, 9.3077e-09,\n 3.3530e-08, 6.4889e-09, 6.4757e-09, 6.3450e-09, 3.0072e-08, 1.6657e-08,\n 9.9701e-09, 7.5339e-09, 7.9950e-09, 9.0250e-09, 9.4629e-10, 8.0137e-09,\n 3.4351e-08, 9.6814e-09, 2.2371e-08, 2.4548e-08, 5.3132e-09, 1.7490e-08,\n 1.4870e-08, 2.1883e-09, 1.0405e-09, 8.1450e-09, 7.5323e-09, 2.4662e-08,\n 3.3893e-08, 9.2430e-09, 3.9560e-08, 1.9308e-08, 2.5753e-08, 5.6637e-09,\n 1.9374e-08, 2.4247e-08, 3.7508e-08, 5.4778e-09, 3.0529e-08, 5.6980e-09,\n 5.9665e-09, 7.0494e-09, 2.1964e-08, 1.6140e-09, 2.2516e-08, 8.6334e-09,\n 2.6647e-08, 2.3055e-08, 1.7626e-08, 2.2394e-08, 2.6653e-08, 1.8520e-08,\n 2.5485e-08, 7.3979e-10, 6.9197e-09, 5.2962e-09, 1.8840e-09, 8.1703e-09,\n 7.9075e-09, 6.1649e-09, 1.7669e-08, 3.5585e-08, 7.1774e-09, 2.0919e-08,\n 1.9655e-08, 5.2594e-09, 1.1842e-09, 2.3589e-08, 1.7380e-08, 2.1227e-08,\n 2.9703e-08, 2.8763e-08, 1.0723e-08, 7.1190e-09, 2.6829e-08, 9.3701e-09],\n [2.8824e-08, 2.4237e-08, 6.9826e-09, 4.1358e-09, 5.9950e-09, 1.8322e-08,\n 6.5892e-09, 5.7841e-09, 1.3282e-08, 2.0711e-08, 1.5203e-08, 1.4299e-08,\n 2.3463e-08, 2.0880e-08, 5.9289e-09, 2.0061e-08, 1.8411e-08, 4.4971e-09,\n 1.6566e-08, 1.5520e-08, 1.9304e-08, 2.1077e-08, 1.6750e-08, 5.4154e-09,\n 4.2096e-09, 4.8058e-09, 1.7072e-08, 4.2313e-09, 4.9117e-09, 9.2757e-09,\n 6.9297e-09, 1.5870e-08, 2.0261e-08, 1.8076e-08, 9.7315e-10, 5.0145e-09,\n 5.8196e-09, 2.5697e-08, 3.8117e-09, 5.6565e-09, 5.7228e-09, 4.4787e-09,\n 4.3498e-09, 6.0225e-09, 4.3595e-09, 4.0887e-09, 5.9486e-09, 1.5139e-08,\n 1.5478e-08, 4.6799e-09, 2.8115e-08, 4.1968e-09, 5.5926e-09, 1.6065e-08,\n 5.4334e-09, 3.9581e-09, 1.5873e-08, 4.7144e-09, 1.7937e-08, 2.6928e-08,\n 6.7463e-09, 1.8358e-08, 5.1175e-09, 2.2458e-08, 1.6545e-08, 5.6216e-09,\n 5.3403e-09, 4.1350e-09, 1.4297e-08, 1.5222e-08, 1.5789e-08, 2.2787e-08,\n 1.7989e-08, 6.9930e-09, 5.1776e-09, 1.2215e-08, 4.1590e-09, 9.5423e-09,\n 1.7691e-08, 1.8378e-08, 2.1419e-08, 5.3606e-09, 5.1962e-09, 1.9214e-08,\n 7.7981e-09, 2.9027e-08, 1.4723e-08, 4.8502e-09, 2.1264e-08, 4.8082e-09,\n 4.8785e-09, 3.5699e-09, 1.9676e-08, 2.1577e-08, 4.1956e-09, 1.4067e-08,\n 1.9657e-08, 2.1527e-08, 5.8453e-09, 1.4348e-08, 7.0938e-09, 4.5050e-09,\n 1.3790e-08, 4.9846e-09, 1.2464e-08, 6.1025e-09, 1.6239e-08, 1.7273e-08,\n 2.8259e-09, 6.2435e-09, 4.3039e-09, 1.1579e-08, 6.2166e-09, 1.3108e-08,\n 1.2280e-08, 1.3851e-08, 5.9121e-09, 5.5325e-09, 5.4199e-09, 6.9013e-09,\n 2.3742e-08, 4.7114e-09, 4.7173e-09, 4.6055e-09, 2.1156e-08, 1.1652e-08,\n 7.4125e-09, 5.5282e-09, 5.8809e-09, 6.6713e-09, 7.7435e-10, 5.8832e-09,\n 2.4439e-08, 7.1776e-09, 1.5614e-08, 1.7461e-08, 3.9359e-09, 1.2294e-08,\n 1.0326e-08, 1.6342e-09, 8.2042e-10, 5.9761e-09, 5.5141e-09, 1.7301e-08,\n 2.4191e-08, 6.8352e-09, 2.8299e-08, 1.3512e-08, 1.8044e-08, 4.1090e-09,\n 1.3555e-08, 1.6966e-08, 2.6917e-08, 3.9910e-09, 2.1764e-08, 4.1254e-09,\n 4.3263e-09, 5.1335e-09, 1.5392e-08, 1.1320e-09, 1.5886e-08, 6.3480e-09,\n 1.8896e-08, 1.6208e-08, 1.2435e-08, 1.5905e-08, 1.9011e-08, 1.2884e-08,\n 1.8032e-08, 5.7755e-10, 5.0691e-09, 3.8556e-09, 1.2716e-09, 5.9903e-09,\n 5.7741e-09, 4.4739e-09, 1.2377e-08, 2.5486e-08, 5.2431e-09, 1.4656e-08,\n 1.3794e-08, 3.8094e-09, 9.5117e-10, 1.6644e-08, 1.2166e-08, 1.4906e-08,\n 2.0960e-08, 2.0436e-08, 8.0176e-09, 5.1850e-09, 1.9144e-08, 6.9404e-09]],\n device='cuda:0')"
},
"64": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-3.3476e-07, 1.1548e-07, 1.1667e-07, 1.0264e-07], device='cuda:0')",
"exp_avg_sq": "tensor([6.8911e-06, 7.6474e-07, 8.6192e-07, 6.7798e-07], device='cuda:0')"
}
},
"param_groups": [
{
"lr": 0.00654543046337755,
"name": "shared",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
0,
1
]
},
{
"lr": 0.00654543046337755,
"name": "scale_256",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
2,
3,
4
]
},
{
"lr": 0.00654543046337755,
"name": "scale_512",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
5,
6,
7
]
},
{
"lr": 0.00654543046337755,
"name": "scale_768",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
8,
9,
10
]
},
{
"lr": 0.00654543046337755,
"name": "scale_1024",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
11,
12,
13
]
},
{
"lr": 0.0032728879774401812,
"name": "fusion",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.005,
"params": [
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64
]
}
]
},
"scheduler_state_dict": {
"T_0": 10,
"T_i": 10,
"T_mult": 2,
"eta_min": 1e-06,
"T_cur": 4,
"base_lrs": [
0.01,
0.01,
0.01,
0.01,
0.01,
0.005
],
"last_epoch": 4,
"_step_count": 0,
"_is_initial": false,
"_get_lr_called_within_step": false,
"_last_lr": [
0.00654543046337755,
0.00654543046337755,
0.00654543046337755,
0.00654543046337755,
0.00654543046337755,
0.0032728879774401812
]
},
"metrics": {
"best_val_acc": 74.733,
"best_epoch": 3,
"scale_accuracies": {
"256": 72.935,
"512": 74.481
},
"training_history": {
"epochs": [
1,
2,
3,
4
],
"train_loss": [
3.078377773987499,
1.970284724826104,
2.077278166926007,
1.7731811365888825
],
"train_acc": [
58.631622575355124,
71.63597719891318,
75.18789509876542,
77.50847469533636
],
"val_acc": [
69.481,
71.403,
73.618,
74.733
],
"scale_accs": {
"256": [
69.481,
71.403,
72.534,
72.935
],
"512": [
73.041,
74.481
]
},
"lr": [
0.00975530705321762,
0.00904518046337755,
0.00793913236883622,
0.00654543046337755
]
}
},
"train_config": {
"name": "david_training",
"run_id": "20251012_194945",
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
"model_variant": [
"clip_vit_b16",
"clip_vit_laion_b32"
],
"num_classes": 1000,
"preset": "balanced",
"custom_config_path": null,
"num_classes_override": null,
"use_belly_override": null,
"belly_expand_override": null,
"progressive_training_override": true,
"scale_warmup_epochs_override": {
"256": 0,
"512": 2,
"768": 5,
"1024": 8
},
"num_epochs": 10,
"batch_size": 1024,
"learning_rate": 0.01,
"weight_decay": 1e-05,
"warmup_epochs": 3,
"use_rose_loss": true,
"rose_initial_weight": 0.2,
"rose_max_weight": 0.8,
"rose_weight_schedule": "adaptive",
"use_cayley_loss": true,
"cayley_weight": 0.01,
"scale_loss_balance": null,
"use_mixed_precision": false,
"gradient_clip": 10.0,
"scheduler_type": "cosine_restarts",
"min_lr": 1e-06,
"freeze_strategy": "never",
"freeze_threshold": 90.0,
"unfreeze_on_plateau": true,
"patience": 10,
"track_gradients": true,
"gradient_scale_threshold": 1e-05,
"gradient_scale_multiplier": 10.0,
"log_interval": 50,
"val_interval": 1,
"save_interval": 5,
"log_fusion_weights": true,
"log_loss_components": true,
"save_format": "safetensors",
"hf_repo": "AbstractPhil/david-shared-space",
"upload_to_hub": true,
"base_dir": "./david_training",
"num_workers": 10,
"pin_memory": true,
"prefetch_factor": 4,
"persistent_workers": true
}
}