AbstractPhil's picture
Update best_model_acc75.74_metadata.json - Run 20251012_194945
c9cba98 verified
{
"epoch": 9,
"optimizer_state_dict": {
"state": {
"0": {
"step": "tensor(25030.)",
"exp_avg": "tensor([[-6.2919e-06, 1.0013e-05, -9.9932e-05, ..., 2.1619e-05,\n 1.7618e-05, 6.4771e-05],\n [ 1.1803e-05, 9.2438e-05, 3.5995e-05, ..., -6.6299e-06,\n 1.8790e-05, 1.8174e-05],\n [-7.4797e-05, -1.5104e-04, 5.3033e-05, ..., -1.8053e-05,\n 3.1169e-05, 9.8736e-06],\n ...,\n [-9.6427e-05, 5.3696e-05, -5.7858e-05, ..., 3.2176e-05,\n -7.0185e-05, 4.1728e-06],\n [ 5.0344e-05, -3.9465e-05, -4.4415e-05, ..., 1.5531e-06,\n -5.9903e-06, -1.7012e-05],\n [-3.9471e-05, 1.0801e-04, -2.3330e-05, ..., 1.7045e-05,\n -5.6475e-05, -2.4730e-07]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.2631e-08, 1.6505e-07, 6.3903e-08, ..., 1.7547e-08, 1.0013e-08,\n 1.9447e-08],\n [3.5573e-08, 7.7625e-08, 3.6296e-08, ..., 1.5250e-08, 1.3575e-08,\n 1.4376e-08],\n [1.5144e-08, 1.3092e-07, 3.9439e-08, ..., 1.0120e-08, 7.0511e-09,\n 8.0278e-09],\n ...,\n [8.0546e-08, 8.2534e-08, 1.1634e-08, ..., 1.0709e-08, 1.9536e-08,\n 1.1572e-08],\n [2.7425e-08, 6.3754e-08, 3.1446e-08, ..., 8.2951e-09, 8.7378e-09,\n 7.7105e-09],\n [8.8053e-08, 7.3712e-08, 1.6761e-08, ..., 1.6529e-08, 2.5850e-08,\n 1.4750e-08]], device='cuda:0')"
},
"1": {
"step": "tensor(25030.)",
"exp_avg": "tensor([ 7.9983e-04, 8.0063e-04, -9.8581e-04, 1.8882e-04, 2.9675e-04,\n 4.9042e-04, 5.6052e-45, 5.5825e-04, 1.1364e-03, -1.2248e-03,\n 2.2197e-04, 3.2142e-04, -1.4227e-04, -1.6236e-03, 1.0334e-03,\n 2.1748e-04, 3.8554e-04, -2.9640e-03, 3.3317e-04, -3.0551e-04,\n 7.5790e-04, -1.3134e-03, -2.4832e-04, 7.6575e-04, 1.5152e-03,\n -4.7339e-05, -5.6058e-04, -1.4234e-03, -1.5254e-03, -1.5395e-04,\n -3.4123e-03, 4.1186e-04, 7.1988e-04, -1.6996e-03, 1.3367e-03,\n -6.9721e-04, 1.9242e-04, 2.9460e-04, -1.2235e-03, 1.1535e-04,\n 1.0889e-03, 2.9835e-04, -1.1607e-03, -1.1146e-03, 2.9673e-04,\n -3.3083e-03, 2.7067e-03, 7.1326e-05, 5.6159e-04, -4.0775e-04,\n 8.1892e-04, 2.1895e-03, -2.2499e-04, -1.3438e-03, 7.8022e-04,\n 9.2620e-04, 8.0605e-04, -1.4633e-04, 4.4383e-04, 2.1116e-03,\n -1.0618e-04, -1.3566e-03, -6.4058e-04, -2.0942e-04, 1.0846e-03,\n 2.5780e-04, 4.6419e-05, -2.1915e-04, -2.3139e-03, 4.5580e-04,\n 2.8449e-04, -1.8341e-03, 1.1721e-04, -5.9132e-04, -3.3722e-05,\n 3.4723e-05, 1.7194e-04, 9.3790e-05, -3.6204e-04, -1.9053e-04,\n -9.0577e-04, 1.2991e-03, -1.3745e-03, 6.9077e-04, 1.2840e-03,\n -2.8500e-03, 5.5868e-04, 5.6082e-04, 2.2439e-03, 3.9990e-04,\n 2.1363e-04, -1.1489e-05, 3.7407e-04, 1.6349e-03, 4.2038e-04,\n -1.3962e-03, -6.9135e-04, -3.9485e-04, -6.3909e-05, 2.2149e-04,\n 1.7557e-03, -1.6035e-03, -4.7295e-04, -6.9182e-04, 2.9723e-03,\n 1.2864e-03, -1.7077e-03, 2.2606e-05, -5.3085e-04, -3.4924e-05,\n -7.2629e-04, 2.7521e-04, -1.7092e-03, -1.3422e-03, 1.8046e-03,\n 6.1404e-04, 1.4190e-03, -5.6740e-04, -3.3296e-03, 2.6722e-04,\n -8.0608e-05, 3.0874e-04, -5.9278e-04, -3.9335e-04, 2.6415e-04,\n -4.6236e-04, 1.8736e-03, -5.4773e-05, -1.1771e-03, -4.1220e-04,\n -2.5466e-05, 4.7170e-04, 1.9626e-05, 1.2207e-03, -8.0210e-04,\n 1.0472e-03, 2.6955e-04, -1.7988e-04, 1.2784e-04, -1.1345e-03,\n -2.0396e-03, -1.5763e-03, 1.7819e-03, 2.6843e-05, -2.0597e-04,\n -1.2044e-03, 6.2408e-04, 4.9042e-04, 1.2589e-03, -8.9527e-04,\n -1.6258e-03, 1.4965e-04, 4.8453e-04, 1.9437e-03, 7.0646e-04,\n 7.1072e-04, -2.0071e-04, 1.2872e-03, 1.5114e-03, 6.5026e-04,\n -3.1184e-04, -4.0544e-04, -3.0286e-03, 1.0086e-04, -2.5109e-04,\n -2.9108e-03, 1.1540e-03, -1.8546e-03, -6.4586e-04, -1.0061e-04,\n -1.7154e-03, -2.7993e-03, 8.2482e-04, 1.2674e-04, 8.6023e-05,\n 1.0252e-05, 1.5493e-03, -5.7324e-04, 1.4971e-03, 5.0112e-04,\n -7.3038e-04, 2.3907e-04, 3.7605e-04, 2.6232e-04, 1.8681e-03,\n -1.8482e-03, -1.2070e-03, -1.5871e-03, 1.0440e-03, 1.5408e-04,\n 6.9338e-04, -1.7411e-03, -2.8084e-04, 5.6057e-04, 1.1883e-03,\n -1.5181e-03, -9.3456e-04, -7.2626e-06, -4.9187e-05, 3.4699e-04,\n -1.3585e-03, 7.3996e-05, 8.4771e-04, -5.9847e-04, 1.0433e-03,\n 9.7113e-05, 9.4662e-04, -3.1713e-03, -6.0138e-04, 9.4770e-04,\n -2.8944e-04, 3.9036e-04, -4.6556e-04, 1.5058e-03, 1.4521e-03,\n -7.8728e-04, 7.1576e-05, -5.7935e-04, 9.5661e-04, -5.9737e-04,\n 3.0034e-04, -3.8112e-04, -1.9275e-03, -1.1778e-03, 3.1816e-04,\n 4.0038e-04, 3.7826e-04, -3.1439e-04, 7.9201e-06, 2.4945e-04,\n -2.6997e-04, 2.0608e-04, -4.3139e-05, -2.9423e-04, -8.4025e-04,\n -1.1325e-03, -1.9649e-04, -5.5455e-04, 1.1044e-03, -1.2294e-03,\n 1.0527e-03, -3.5259e-04, -4.2518e-04, 3.8201e-04, -1.5847e-04,\n -1.1714e-05, -1.8132e-03, 4.4274e-04, 7.8887e-04, 1.6198e-04,\n -1.5790e-03, 1.3062e-03, 2.4119e-03, 2.4654e-04, -1.4194e-04,\n -6.6182e-04, -5.3890e-04, 1.0084e-03, -1.6670e-03, 3.1296e-04,\n -1.4503e-03, -2.3758e-04, -4.2679e-04, 3.5867e-03, 2.2028e-03,\n -3.4169e-04, -1.1224e-03, -3.7069e-04, -1.5715e-03, -4.8718e-04,\n -4.9678e-04, -9.8161e-04, -4.8192e-04, 1.0934e-03, 3.1590e-04,\n 4.2662e-05, -2.1412e-03, 5.3633e-04, 2.0706e-03, 8.5035e-05,\n -3.8599e-04, 3.3671e-04, -1.5031e-03, 1.5659e-03, 4.0214e-05,\n 5.5436e-04, -4.6917e-04, 2.7373e-04, 1.4510e-04, 5.6052e-45,\n -7.5638e-04, 3.7352e-04, -9.2232e-04, -5.8025e-04, 3.0458e-03,\n 1.5578e-05, -1.3928e-03, 1.3624e-03, -2.6514e-03, -9.3491e-05,\n -1.5637e-03, -3.0003e-04, -3.9031e-04, -1.3261e-03, -9.6766e-04,\n 2.1176e-04, 1.0318e-03, 1.0760e-04, -3.9057e-04, 1.4816e-03,\n 1.1465e-04, -1.8946e-03, -3.4072e-04, -1.7863e-03, 2.6365e-03,\n 5.4237e-04, 1.5465e-03, -1.4310e-03, 8.9282e-04, -2.4541e-04,\n -1.6350e-03, -2.7257e-03, -6.9499e-05, -4.4406e-05, 6.8737e-04,\n 1.6002e-03, 1.0249e-03, 1.8451e-03, -3.7031e-05, 9.6601e-04,\n -1.0480e-04, 1.3035e-04, 2.9004e-04, 2.4481e-04, -4.1038e-03,\n -1.0090e-03, 8.0402e-04, -1.4494e-03, -6.6215e-05, -5.5787e-05,\n 1.3031e-03, -5.6479e-04, -4.4965e-04, -6.0618e-04, -1.2703e-03,\n 1.0261e-03, -2.0553e-03, 1.7247e-04, -1.6572e-03, 5.4412e-04,\n 1.7190e-05, 2.1281e-03, 1.1473e-03, 1.1373e-04, -1.5463e-04,\n 4.0323e-04, -1.1157e-04, 8.5353e-05, 1.8116e-04, 3.9786e-04,\n 7.8150e-04, -1.0130e-04, 5.6052e-45, 1.3339e-03, -8.6783e-04,\n -4.9198e-06, -1.8015e-03, 4.5443e-05, -1.4106e-04, 9.5843e-04,\n 8.6038e-05, 6.4942e-04, -1.9569e-03, -4.4860e-04, 5.3198e-04,\n 2.8954e-04, 1.2009e-04, 2.1346e-04, -7.4199e-04, 9.5429e-04,\n 1.8802e-03, -4.2577e-04, -7.5480e-04, 1.4950e-03, -9.6271e-05,\n -6.0823e-04, 7.6897e-04, 8.2148e-04, 5.2975e-04, 4.7292e-04,\n 1.3361e-05, -6.2657e-04, -1.1144e-03, -4.0825e-04, 1.8236e-04,\n 2.2581e-03, -8.9857e-04, -1.8408e-03, 1.6713e-03, -1.2141e-03,\n -9.7518e-04, -5.3200e-04, -3.7171e-04, -3.2499e-04, 1.1745e-03,\n 1.4599e-03, 2.4018e-03, -5.4427e-04, 8.5966e-04, -1.7846e-03,\n -4.0078e-04, 1.0246e-03, -3.9109e-04, 2.0758e-03, 1.0714e-05,\n -1.9483e-03, -1.3929e-03, -6.0378e-05, 5.4854e-04, 1.9208e-03,\n 2.9210e-04, -2.7499e-04, 4.4067e-04, 2.8751e-04, -3.5363e-05,\n -8.9292e-04, 8.7968e-04, -1.9811e-04, -3.4615e-04, 4.7374e-04,\n -1.3262e-04, 1.8968e-03, 7.2346e-04, -5.5066e-04, -1.4815e-03,\n 4.5356e-04, -7.6457e-05, 1.5263e-04, -9.8358e-04, 7.3764e-05,\n -5.5382e-04, -3.2377e-03, 9.1220e-04, 1.6598e-03, 9.1260e-04,\n -8.0147e-04, -8.5847e-04, 2.3507e-03, -1.8797e-03, -1.0002e-03,\n 1.5247e-03, 4.9850e-04, 1.4032e-03, 6.8864e-04, 2.4742e-04,\n -1.4332e-03, 3.8403e-04, 9.0814e-05, 9.9362e-04, -1.1887e-04,\n -1.7140e-03, -1.2839e-03, -1.0429e-04, 1.1450e-05, 5.7652e-04,\n -4.8510e-04, -1.4655e-03, -1.0372e-03, 9.1219e-05, -4.3112e-04,\n 7.3111e-04, 7.2956e-04, 9.3722e-04, 1.9324e-03, 8.3233e-04,\n 1.0860e-03, -2.6062e-04, -1.3915e-03, 5.6052e-45, -7.3756e-04,\n -5.5800e-04, 3.7547e-03, -1.4339e-03, 1.1591e-05, 1.6483e-03,\n 5.8383e-04, -1.0625e-03, -1.5684e-03, 3.1511e-04, 1.7195e-03,\n 1.3577e-03, 1.1977e-04, 5.7738e-04, -6.7715e-04, 1.2636e-03,\n 1.3446e-03, 5.3626e-04, 6.7720e-04, 2.2937e-03, -8.6590e-04,\n 1.0598e-04, 6.5748e-04, -1.8696e-03, 2.9006e-04, -7.9544e-04,\n 1.1421e-03, -4.6670e-04, 5.3831e-04, 3.2174e-03, -2.4692e-05,\n 2.3192e-04, 2.5171e-04, 1.1485e-03, 2.8347e-04, -6.9133e-04,\n -5.9610e-04, -1.9937e-03, -8.9382e-04, 5.6972e-04, -1.3597e-03,\n -1.8648e-03, -2.2042e-03, 9.5893e-05, 1.6584e-03, 5.6052e-45,\n 7.7561e-04, -2.3263e-04, 5.3366e-04, -1.7987e-03, -2.0214e-04,\n 5.0771e-05, 1.1368e-03, -1.4359e-04, -9.2052e-04, -6.8988e-04,\n -1.8458e-03, 7.0403e-04, -1.0605e-03, -3.0052e-05, 7.0883e-05,\n 1.5671e-03, 1.0140e-03, 3.7160e-04, 1.1855e-03, 8.2738e-04,\n 3.3404e-04, -2.8339e-04, -6.7094e-04, 5.6741e-05, -1.6220e-04,\n -9.1303e-05, 2.0998e-03, -1.4664e-03, 1.2323e-03, -1.0723e-03,\n 8.9767e-05, -5.0246e-04, -4.5477e-04, -1.2441e-04, 9.7451e-05,\n 2.7428e-04, -2.9231e-03, 1.8413e-03, 4.1255e-04, -1.1032e-03,\n 9.9999e-04, -4.4035e-04, 1.8335e-05, 1.2014e-04, 2.0306e-03,\n 8.0410e-04, 9.4402e-04, 1.9621e-04, 2.3590e-04, 6.9340e-04,\n 9.3947e-04, -2.1698e-04, -8.5069e-04, 3.8291e-04, -5.9366e-05,\n 6.7063e-05, -1.1633e-04, -3.2420e-04, -1.2278e-03, -1.0307e-04,\n -1.0326e-03, 2.0934e-03, 3.7052e-05, -7.4112e-04, -2.6446e-03,\n -1.3380e-03, -9.6510e-04, -1.5712e-05, 1.4089e-03, 9.0694e-04,\n 1.5589e-04, -6.0421e-05, -6.4810e-04, 1.0558e-03, -1.3650e-03,\n -3.8341e-04, -8.1666e-04, 6.7045e-04, 1.5658e-03, -1.7754e-03,\n 6.5267e-04, 1.1156e-03, 3.1881e-04, -1.1866e-03, -4.7908e-04,\n 4.3486e-04, 6.9220e-07, -8.6625e-04, -9.6577e-04, -7.7262e-04,\n -1.4790e-03, -8.4920e-04, -8.2405e-04, -3.9033e-04, 9.2642e-04,\n 2.0806e-04, -1.0255e-03, 1.3074e-03, -9.1285e-04, 1.7819e-03,\n -1.6574e-04, -6.5158e-04, -6.1102e-04, -8.9023e-04, -5.5093e-04,\n -1.3496e-03, 1.6028e-03, -1.4632e-04, -7.8553e-04, -2.8621e-04,\n 6.2157e-04, 6.3230e-04, -3.0073e-03, 1.0797e-03, 1.4020e-03,\n -6.4888e-04, -1.1584e-03, -7.1249e-04, -2.3275e-04, -2.0919e-04,\n 2.1483e-04, 3.8421e-04, -2.6238e-03, 2.8413e-04, 8.0011e-04,\n 6.4212e-04, 2.6656e-03, -1.4033e-03, 1.0188e-03, 3.3846e-04,\n 5.2716e-04, 1.0264e-03, -1.1837e-03, -4.5940e-04, 2.3478e-04,\n -1.4267e-03, 1.1231e-03, 1.3843e-04, -1.1400e-04, -1.0946e-03,\n 1.7158e-03, 5.9585e-06, -8.3432e-05, -1.2252e-03, -8.3683e-04,\n -1.5730e-03, 9.0591e-04, -4.5706e-04, 2.5805e-04, -1.1767e-03,\n 3.0371e-03, -1.0592e-03, -4.2514e-04, 4.0533e-03, 6.3128e-04,\n 5.3276e-04, -7.5342e-04, 3.1164e-04, 5.8583e-04, -1.6711e-03,\n -4.7821e-04, -8.3085e-04, 8.4213e-04, 3.8757e-04, -1.8007e-03,\n -7.5723e-04, 2.7998e-04, 6.5853e-04, 1.5350e-04, -1.8766e-03,\n 8.4358e-04, -6.5402e-04, -1.2636e-03, 7.2984e-05, 8.1001e-04,\n -1.1478e-03, -1.3656e-03, 2.3236e-03, -2.8322e-05, 2.2018e-03,\n 2.0905e-03, -7.0009e-04, -2.5474e-03, 4.9352e-04, -1.0206e-03,\n -2.8639e-04, -1.1847e-03, -1.0340e-04, 6.4957e-04, 4.6806e-04,\n 1.8080e-03, -8.3908e-04, -2.6636e-04, -1.6207e-04, 1.8032e-04,\n -1.2480e-03, -3.7098e-03, -2.3367e-03, -1.2381e-03, 1.2113e-03,\n 1.0608e-03, -1.6000e-04, 1.2123e-03, 2.1694e-04, -1.3611e-04,\n -4.6727e-04, -3.0906e-03, 9.8517e-04, -3.5683e-04, 5.3170e-04,\n 4.9222e-04, -5.2281e-04, 1.9248e-04, 9.6928e-04, 4.2321e-05,\n 7.0495e-04, -7.2237e-05, 1.6401e-04, 9.2230e-04, -6.7522e-04,\n -5.3071e-04, -4.8024e-04, -8.4599e-04, -8.1523e-04, 5.8322e-05,\n -1.0562e-03, -5.9788e-04, 4.6086e-04, -4.6877e-05, -2.5921e-03,\n -1.9743e-03, 2.3618e-04, -1.7344e-03, -6.4165e-04, -3.0567e-04,\n -2.8551e-04, -1.0962e-03, 2.2907e-03, -4.4141e-04, 3.6014e-04,\n -2.9145e-03, 1.1462e-03, -5.8112e-04], device='cuda:0')",
"exp_avg_sq": "tensor([1.6990e-05, 1.7899e-05, 1.0537e-05, 2.5534e-05, 2.2816e-05, 1.2718e-05,\n 9.5910e-18, 5.5501e-06, 1.9357e-05, 1.2486e-05, 2.7262e-05, 1.7683e-05,\n 8.2033e-06, 1.8065e-05, 1.8693e-05, 7.7530e-06, 1.9244e-05, 3.1211e-05,\n 8.0213e-06, 2.0504e-05, 1.7554e-05, 2.6998e-05, 1.7577e-05, 1.6359e-05,\n 1.9458e-05, 2.8303e-05, 2.7464e-05, 2.7571e-05, 2.0019e-05, 1.5430e-05,\n 2.4597e-05, 3.1442e-05, 1.6667e-05, 3.0793e-05, 1.3574e-05, 9.6796e-06,\n 2.5766e-05, 1.5140e-05, 2.2068e-05, 1.2212e-05, 9.0884e-06, 1.0967e-05,\n 1.3833e-05, 1.5813e-05, 5.2488e-06, 2.6344e-05, 1.9849e-05, 1.0322e-05,\n 1.5765e-05, 1.8492e-05, 8.0104e-06, 1.8113e-05, 1.5394e-05, 3.0834e-05,\n 2.2439e-05, 2.2263e-05, 1.2642e-05, 9.8827e-06, 1.2192e-05, 2.0790e-05,\n 2.4693e-05, 2.3565e-05, 1.9800e-05, 1.9909e-05, 1.3523e-05, 1.5368e-05,\n 6.8278e-06, 2.5105e-05, 2.1767e-05, 2.7935e-05, 8.4296e-06, 4.1690e-05,\n 1.8861e-05, 1.5980e-05, 2.0553e-05, 7.5991e-06, 1.5784e-05, 2.8222e-05,\n 1.3426e-05, 7.6158e-06, 5.5861e-06, 1.9130e-05, 1.4026e-05, 2.0479e-05,\n 4.2296e-05, 1.8019e-05, 1.5732e-05, 7.0062e-06, 1.9909e-05, 8.3963e-06,\n 2.2520e-05, 2.0560e-05, 1.8178e-05, 3.2600e-05, 3.6635e-05, 1.2936e-05,\n 9.6002e-06, 1.9562e-05, 7.0812e-06, 1.0908e-05, 2.2527e-05, 8.2928e-06,\n 9.0733e-06, 2.2029e-05, 1.7949e-05, 2.2648e-05, 1.9205e-05, 1.2883e-06,\n 6.2978e-06, 6.0064e-07, 2.8922e-05, 9.9613e-06, 3.0879e-05, 1.4445e-05,\n 1.2858e-05, 1.8724e-05, 2.0536e-05, 1.8394e-05, 2.6297e-05, 1.6047e-05,\n 2.0921e-05, 1.2906e-05, 2.6831e-05, 1.4095e-05, 1.3861e-05, 1.8392e-05,\n 2.6285e-05, 4.4965e-06, 1.1874e-05, 2.1176e-05, 7.7928e-06, 5.7021e-06,\n 2.1361e-05, 2.6443e-05, 1.7811e-05, 1.7815e-05, 1.2535e-05, 1.7273e-05,\n 1.7891e-05, 1.4795e-05, 2.3721e-05, 1.6328e-05, 2.7172e-05, 2.3425e-05,\n 6.3788e-06, 2.5758e-05, 1.3994e-05, 1.7273e-05, 1.7027e-05, 2.0490e-05,\n 2.2925e-05, 3.5646e-05, 1.9413e-05, 2.8965e-05, 2.9219e-05, 3.3607e-05,\n 1.5201e-05, 2.6031e-05, 2.2095e-05, 1.5319e-05, 1.0714e-05, 1.4033e-05,\n 3.4082e-05, 7.3036e-06, 1.1765e-05, 2.2372e-05, 1.6408e-05, 2.6093e-05,\n 2.9366e-05, 9.4723e-06, 2.8426e-05, 2.4713e-05, 1.7822e-05, 1.5364e-05,\n 1.2015e-05, 1.6753e-05, 1.6273e-05, 2.7230e-05, 1.6696e-05, 1.1653e-05,\n 2.7312e-05, 2.0096e-05, 1.1889e-05, 1.7173e-05, 2.3148e-05, 2.2458e-05,\n 1.8050e-05, 1.9907e-05, 1.8976e-05, 1.1336e-05, 7.6575e-06, 1.7527e-05,\n 2.2392e-05, 1.3147e-05, 9.4309e-06, 1.9594e-05, 1.9236e-05, 1.7510e-05,\n 1.4884e-05, 1.1214e-05, 1.7131e-05, 2.2004e-05, 1.7120e-05, 2.7114e-05,\n 1.5732e-05, 2.1722e-05, 2.5027e-05, 1.5962e-05, 8.0092e-06, 9.9325e-06,\n 2.1970e-05, 1.6441e-05, 2.7500e-05, 3.1100e-05, 1.6810e-05, 1.3342e-05,\n 2.3343e-05, 2.4428e-05, 2.1053e-05, 1.1240e-05, 1.0656e-05, 2.0187e-05,\n 2.3760e-05, 3.9485e-05, 6.6944e-06, 1.2471e-05, 1.8656e-05, 1.6922e-05,\n 6.1631e-06, 7.7593e-06, 2.7093e-05, 4.8398e-07, 2.5782e-05, 1.5817e-05,\n 1.2748e-05, 1.7970e-05, 1.3455e-05, 1.9391e-05, 2.3564e-05, 1.0755e-05,\n 1.0491e-05, 9.4307e-06, 2.6559e-05, 1.6315e-05, 1.0977e-05, 1.6158e-05,\n 1.7998e-05, 2.2504e-05, 1.9023e-05, 5.5621e-06, 2.9166e-05, 2.4259e-05,\n 3.7776e-05, 8.3274e-06, 1.6562e-05, 2.9791e-05, 2.2913e-05, 2.2110e-05,\n 1.5169e-05, 6.5462e-06, 7.3321e-06, 9.9313e-06, 2.2025e-05, 3.3637e-05,\n 2.0786e-05, 1.4176e-05, 1.2428e-05, 1.4453e-05, 1.2446e-05, 2.7167e-05,\n 1.7618e-05, 1.8098e-05, 1.7334e-05, 2.8242e-05, 2.1313e-05, 1.4787e-05,\n 1.6699e-05, 9.7308e-06, 1.1580e-05, 8.8703e-06, 2.3103e-05, 1.7405e-05,\n 1.2214e-05, 2.8276e-05, 1.2932e-05, 7.5455e-06, 1.8422e-05, 1.3312e-05,\n 1.5876e-05, 1.2950e-18, 1.9474e-05, 1.4946e-05, 1.1934e-05, 1.1433e-06,\n 3.1028e-05, 2.0153e-05, 2.0411e-05, 1.0520e-05, 1.0038e-05, 6.0533e-06,\n 1.8927e-05, 1.7108e-05, 2.4009e-05, 1.5186e-05, 9.7761e-06, 3.5103e-05,\n 9.5148e-06, 2.0846e-05, 1.6599e-05, 2.7209e-05, 3.0580e-05, 1.8382e-05,\n 8.2228e-06, 1.1097e-05, 2.3335e-05, 1.7188e-06, 8.9664e-06, 1.5647e-05,\n 2.9239e-05, 3.2466e-05, 7.2724e-06, 2.8598e-05, 1.0064e-05, 1.1732e-05,\n 1.5953e-05, 1.6429e-05, 2.2269e-05, 3.0391e-05, 9.3573e-06, 2.0953e-05,\n 8.7494e-06, 1.1772e-05, 8.4859e-06, 3.4008e-05, 2.3581e-05, 1.6352e-05,\n 2.0030e-05, 1.7807e-05, 2.6652e-05, 5.0179e-06, 1.9023e-05, 1.9267e-05,\n 1.7868e-05, 1.0023e-05, 1.1540e-05, 1.5158e-05, 1.6751e-05, 2.1856e-05,\n 1.7738e-05, 1.2933e-05, 1.5298e-05, 2.3311e-05, 1.8812e-05, 3.0575e-05,\n 9.7197e-06, 9.2601e-06, 2.2569e-05, 1.1751e-05, 1.6553e-05, 1.4959e-05,\n 9.9767e-06, 2.2741e-05, 9.0756e-18, 2.2665e-05, 1.6370e-05, 1.3481e-05,\n 2.1559e-05, 2.1117e-05, 1.6633e-05, 2.5579e-05, 2.3316e-05, 1.7105e-05,\n 2.1572e-05, 2.0860e-05, 1.4868e-05, 1.8372e-05, 2.2991e-05, 2.5729e-05,\n 2.2328e-05, 1.4836e-05, 1.1881e-05, 6.2106e-06, 1.8868e-05, 1.7318e-05,\n 3.1552e-06, 1.5226e-05, 6.5191e-06, 5.9639e-06, 2.1269e-05, 2.2449e-05,\n 1.9604e-05, 1.9591e-05, 2.4604e-05, 2.4725e-05, 1.2307e-05, 1.2452e-05,\n 1.9166e-05, 1.9571e-05, 1.4964e-05, 2.2580e-05, 3.1411e-05, 1.2866e-05,\n 7.2901e-06, 2.8412e-05, 1.5768e-05, 3.2502e-05, 3.2061e-05, 6.9915e-06,\n 1.0889e-05, 2.6825e-05, 6.4173e-06, 9.9443e-06, 1.1515e-05, 2.0121e-05,\n 2.3206e-05, 8.3154e-06, 1.9904e-05, 3.2610e-05, 1.8043e-05, 7.4446e-06,\n 2.4971e-05, 2.6220e-05, 2.9397e-06, 1.7666e-05, 9.6477e-06, 1.2566e-05,\n 1.1622e-05, 1.6695e-05, 2.5078e-05, 1.4194e-05, 1.6835e-05, 1.7918e-05,\n 1.7340e-05, 2.0890e-05, 1.1746e-05, 3.0161e-05, 2.0091e-05, 1.0276e-05,\n 1.2495e-05, 1.5547e-05, 2.4548e-05, 2.2898e-05, 6.8485e-06, 1.5975e-05,\n 2.8824e-05, 2.3973e-05, 2.9937e-05, 3.1045e-05, 2.2338e-05, 2.8298e-05,\n 2.0441e-05, 2.9562e-05, 2.4850e-05, 1.6349e-05, 1.8142e-05, 1.2815e-05,\n 2.8469e-05, 1.0657e-05, 1.7441e-05, 1.1724e-05, 1.4926e-05, 1.6773e-05,\n 1.4104e-05, 1.3488e-05, 1.3748e-05, 2.1155e-05, 1.6686e-05, 7.0719e-06,\n 5.3632e-06, 1.2178e-05, 1.3545e-05, 1.6944e-05, 1.5927e-05, 1.8272e-05,\n 1.2156e-05, 2.4628e-05, 2.0076e-06, 1.2538e-05, 1.4887e-19, 1.1132e-05,\n 7.0811e-06, 2.1625e-05, 2.6641e-05, 1.1094e-05, 8.3711e-06, 1.9528e-05,\n 1.9133e-05, 1.3551e-05, 2.2315e-05, 1.9737e-05, 1.6571e-05, 1.0153e-05,\n 7.0343e-06, 2.7886e-05, 5.5528e-06, 1.1628e-05, 4.9646e-06, 3.5017e-05,\n 4.3405e-05, 2.8734e-05, 1.3936e-05, 2.1482e-05, 5.9549e-06, 2.7142e-05,\n 1.7754e-05, 1.0881e-05, 1.4740e-05, 1.5658e-05, 3.5209e-05, 8.4996e-06,\n 1.3616e-05, 2.1266e-05, 2.2437e-05, 1.7285e-06, 2.0819e-05, 2.1454e-05,\n 2.3173e-05, 1.3583e-05, 1.4569e-05, 2.5835e-05, 1.8832e-05, 1.4754e-05,\n 1.6953e-05, 4.0199e-05, 1.4365e-19, 3.0862e-05, 4.0877e-06, 1.7826e-05,\n 2.4776e-05, 2.8939e-05, 1.8578e-05, 1.9287e-05, 6.2899e-06, 2.5057e-05,\n 2.6226e-05, 2.7914e-05, 1.5602e-05, 1.5078e-05, 1.2102e-05, 6.2138e-06,\n 2.1695e-05, 1.1541e-05, 1.0153e-05, 1.5384e-05, 2.7690e-05, 1.8821e-05,\n 1.7312e-05, 2.8398e-05, 1.9600e-05, 1.8477e-05, 2.6138e-05, 3.1315e-05,\n 2.5404e-05, 2.0162e-05, 3.1772e-05, 1.1792e-05, 2.5652e-05, 1.7379e-05,\n 2.4041e-05, 2.0980e-05, 1.3570e-05, 1.6876e-05, 1.6233e-05, 2.3933e-05,\n 3.1477e-05, 1.7864e-05, 2.3508e-05, 1.3179e-05, 1.4709e-05, 2.7712e-05,\n 1.1482e-05, 1.9948e-05, 2.0257e-05, 3.0444e-05, 2.2266e-05, 3.4792e-05,\n 1.7331e-05, 1.4633e-05, 1.6484e-05, 2.5342e-05, 1.1475e-05, 1.4485e-05,\n 2.3898e-05, 2.5043e-05, 9.0727e-06, 1.5459e-05, 3.4590e-05, 2.5734e-05,\n 6.1016e-06, 2.1939e-05, 2.2347e-05, 1.6030e-05, 2.3965e-05, 2.6925e-05,\n 2.4542e-05, 1.9331e-05, 1.6753e-05, 2.6474e-05, 1.3262e-05, 1.6351e-05,\n 9.5757e-06, 2.6617e-05, 2.8374e-05, 1.6638e-05, 1.8254e-05, 1.4009e-05,\n 8.6242e-06, 1.1261e-05, 2.0648e-05, 2.9073e-06, 2.5413e-05, 1.9298e-05,\n 2.2170e-05, 1.4301e-05, 1.2160e-05, 2.0426e-05, 2.0200e-05, 1.3614e-05,\n 1.1096e-05, 7.1882e-06, 2.2194e-05, 2.2856e-05, 1.6351e-05, 1.8459e-05,\n 1.4252e-05, 1.6185e-05, 2.8023e-05, 1.2210e-05, 1.7632e-05, 2.2592e-05,\n 2.9480e-05, 2.0219e-05, 2.1987e-05, 1.1319e-05, 2.2722e-05, 1.5933e-05,\n 1.3945e-05, 3.1503e-05, 1.3617e-05, 3.4010e-05, 3.1699e-05, 1.4141e-05,\n 2.9481e-05, 9.3002e-06, 1.2283e-05, 1.3469e-05, 9.9006e-06, 1.6710e-05,\n 1.8191e-05, 1.8573e-05, 1.5021e-05, 1.9837e-05, 2.3612e-05, 1.3306e-05,\n 2.1838e-05, 1.1115e-05, 1.6551e-05, 1.4377e-05, 7.4406e-06, 1.0821e-05,\n 1.0783e-05, 1.7228e-05, 9.3364e-06, 7.2259e-07, 1.5371e-05, 2.8200e-05,\n 2.0572e-05, 1.4557e-05, 1.4298e-05, 7.4558e-06, 2.0139e-05, 1.4411e-05,\n 9.9607e-06, 8.2375e-06, 1.5300e-05, 2.8978e-05, 1.7306e-05, 1.2996e-05,\n 2.9711e-05, 1.7078e-05, 2.2920e-05, 1.5418e-05, 1.6640e-05, 2.6542e-05,\n 1.8015e-05, 2.9983e-05, 1.6715e-05, 1.8900e-05, 1.0453e-05, 1.1761e-05,\n 2.4368e-05, 9.8478e-06, 2.4068e-05, 1.3517e-05, 1.1437e-05, 1.4615e-05,\n 1.3179e-05, 2.5537e-05, 1.8880e-05, 1.9888e-05, 2.9455e-05, 1.0856e-05,\n 2.1537e-05, 2.1511e-05, 1.3003e-05, 1.6493e-05, 2.2751e-05, 2.0442e-05,\n 2.3364e-05, 1.6617e-05, 3.7077e-06, 3.1293e-05, 2.2858e-05, 2.1223e-05,\n 2.3460e-05, 1.9382e-05, 6.9256e-06, 1.2779e-05, 1.5103e-05, 3.3332e-05,\n 1.6736e-05, 2.1514e-05, 2.4347e-05, 2.2351e-05, 9.3130e-06, 2.4987e-05,\n 8.3927e-06, 1.6849e-05, 1.6204e-05, 9.0486e-06, 1.0639e-05, 2.6235e-05,\n 2.1827e-05, 2.4758e-05, 2.2736e-05, 1.0160e-05, 2.3193e-05, 1.8941e-05,\n 1.4194e-05, 2.9966e-05, 1.7052e-05, 3.6449e-05, 2.9556e-05, 1.9774e-05,\n 1.2420e-05, 1.2722e-05, 1.6261e-05, 2.3057e-05, 2.0669e-05, 6.0142e-06,\n 1.2261e-05, 1.1540e-05, 9.7171e-06, 2.4836e-05, 2.4205e-05, 2.3052e-05,\n 2.1386e-05, 2.1412e-05, 9.3868e-06, 2.5699e-05, 1.8229e-05, 2.7843e-05,\n 2.9978e-05, 1.5172e-05, 7.2864e-06, 1.8452e-05, 9.8679e-06, 2.4810e-05],\n device='cuda:0')"
},
"2": {
"step": "tensor(25030.)",
"exp_avg": "tensor([[-6.3906e-06, 1.1369e-08, -1.1024e-07, ..., 2.2519e-07,\n -6.8898e-06, 2.2879e-07],\n [ 1.9717e-06, 3.0004e-06, 9.3756e-07, ..., -3.0797e-06,\n -2.8172e-07, 1.3778e-06],\n [ 3.1404e-07, 1.1187e-06, -1.3415e-06, ..., -5.9015e-07,\n 1.3964e-06, 1.9524e-05],\n ...,\n [ 2.5073e-06, 2.8094e-07, 3.2614e-07, ..., 2.2778e-07,\n 4.1787e-07, 1.3539e-07],\n [ 3.9046e-06, 2.7940e-07, 1.3852e-05, ..., -2.8302e-06,\n -1.2856e-06, -3.1763e-07],\n [-2.0780e-07, 5.7776e-07, 4.2506e-06, ..., -3.9929e-06,\n 3.8202e-06, -3.3503e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.5243e-10, 5.7844e-10, 3.7350e-11, ..., 8.0666e-11, 5.4811e-09,\n 2.6954e-10],\n [1.9410e-10, 2.0233e-10, 1.1579e-10, ..., 1.5026e-10, 1.6756e-11,\n 6.9123e-10],\n [8.7254e-10, 5.2411e-10, 2.4227e-10, ..., 4.0984e-11, 3.7696e-11,\n 2.2668e-10],\n ...,\n [8.8997e-10, 2.7353e-11, 3.7259e-11, ..., 4.0612e-10, 3.7691e-11,\n 9.1834e-10],\n [7.8965e-10, 3.6015e-10, 1.0325e-09, ..., 3.3602e-10, 1.0379e-10,\n 4.5461e-10],\n [1.5774e-10, 9.1707e-11, 1.6985e-10, ..., 4.2201e-10, 9.5753e-10,\n 1.5869e-10]], device='cuda:0')"
},
"3": {
"step": "tensor(25030.)",
"exp_avg": "tensor([-1.5532e-06, 1.3641e-06, 1.3810e-05, 4.0360e-05, -7.5290e-06,\n 5.6052e-45, 6.2604e-05, 5.0437e-05, 5.6052e-45, 2.6732e-05,\n 2.9818e-05, 2.3016e-05, 3.4169e-05, -4.9857e-05, 5.1890e-06,\n 2.3976e-05, -3.7288e-05, -5.3288e-05, -5.9664e-06, 6.4284e-06,\n -1.4171e-05, -3.9787e-06, -7.7966e-05, 6.7507e-05, -3.9102e-05,\n -1.0559e-05, 2.1888e-05, -8.6939e-06, -3.3221e-06, -4.4676e-05,\n 1.5569e-05, -3.9756e-05, 4.0226e-06, -7.3283e-06, 5.6052e-45,\n 5.6052e-45, -3.2224e-05, -4.9995e-05, 3.4237e-05, 2.2920e-05,\n -8.6845e-05, -1.0617e-05, 9.6708e-06, 4.4829e-06, 5.6052e-45,\n -1.2511e-05, 1.1518e-05, 5.4320e-06, 1.3972e-05, 1.2826e-05,\n -3.8298e-05, 6.7088e-06, 1.6660e-05, -1.4574e-05, -7.2954e-05,\n -1.0753e-05, 2.0893e-05, 8.7996e-06, 4.6933e-06, -3.6449e-06,\n -1.4808e-05, -3.8119e-05, -1.0179e-05, 2.2893e-05, 4.7607e-05,\n -1.8585e-05, 5.6052e-45, -2.6434e-05, 3.0247e-05, -1.1503e-05,\n 4.4390e-05, -2.4203e-05, -3.1325e-05, 2.7484e-06, 1.4413e-06,\n -8.7992e-05, -1.5455e-06, 6.4076e-06, -3.0121e-06, -1.6013e-05,\n -2.7272e-05, -3.5306e-05, -1.3530e-05, -3.8729e-06, -3.6165e-06,\n -1.1579e-06, -2.1886e-05, -2.7731e-05, -1.8179e-05, 9.0374e-06,\n -1.1716e-05, -2.8905e-05, 3.6840e-05, 1.1840e-05, -2.5399e-05,\n -3.7308e-07, -1.6511e-05, -1.3502e-07, 4.7049e-05, 5.8477e-05,\n -8.6270e-07, -3.5487e-05, -3.6207e-05, 5.6052e-45, 3.3465e-05,\n -5.4766e-06, -5.6512e-05, 1.6599e-05, -5.4257e-05, 2.7117e-05,\n -1.2160e-05, 8.7942e-06, 2.8582e-05, -1.2648e-04, 5.6052e-45,\n 1.0941e-05, 2.3705e-05, -2.8559e-05, -8.7142e-06, -2.6913e-05,\n -5.6052e-45, 3.6533e-05, -1.7131e-05, 2.7563e-05, -2.1467e-05,\n 3.5139e-06, -9.1499e-06, 5.6052e-45, -1.3245e-05, -5.5171e-05,\n -1.1080e-05, -9.0526e-06, 1.4086e-05, 2.4300e-05, -2.7464e-06,\n 1.1485e-05, 5.2966e-05, 5.6512e-05, 1.3155e-05, 1.4536e-05,\n -1.9038e-05, 2.2021e-05, -1.8400e-05, 4.9152e-06, 1.3740e-05,\n -1.8815e-06, 3.8578e-05, 5.6052e-45, 2.7829e-05, 2.0107e-07,\n 1.6897e-05, 5.6052e-45, 6.0270e-06, 5.6052e-45, -3.7751e-05,\n 1.0077e-05, 1.4529e-05, 5.1549e-06, 5.6052e-45, -4.4937e-06,\n 5.7798e-05, 2.3901e-05, 2.0000e-05, 3.3101e-06, 5.6052e-45,\n -5.1424e-05, 7.3265e-06, -8.7206e-06, 5.3238e-05, -5.6052e-45,\n 5.6052e-45, 4.1872e-05, 2.0787e-07, -4.1413e-05, 3.9063e-05,\n 3.0743e-05, 1.0816e-05, 3.9479e-05, 3.7600e-05, 5.6052e-45,\n -1.1247e-05, 3.7698e-05, -1.0611e-05, 4.4312e-05, 5.6052e-45,\n -9.4502e-06, 5.6052e-45, 5.6052e-45, -6.9620e-05, 3.5580e-05,\n -3.3928e-05, 6.2709e-05, -1.1954e-05, 1.4752e-05, -3.7193e-05,\n 3.8667e-05, 5.6052e-45, -3.3454e-05, 5.6052e-45, -1.9350e-05,\n -7.9059e-06, 8.8573e-06, -5.9242e-06, -9.1914e-06, -1.2059e-05,\n 1.7719e-05, -2.6013e-05, -5.8602e-05, 3.1873e-05, 1.0457e-05,\n -1.3929e-05, 4.5841e-05, -2.6160e-05, -2.3046e-05, -4.0007e-05,\n 1.2751e-05, 4.3158e-05, -1.2012e-06, 1.5106e-05, 3.1071e-05,\n 1.0337e-05, -1.8460e-05, 5.6052e-45, 6.1686e-06, 3.8582e-05,\n -4.4393e-05, 5.6052e-45, -1.9801e-05, -5.3661e-05, 1.0721e-05,\n 1.8499e-05, -1.6254e-05, 6.6031e-05, 5.6052e-45, -7.0499e-06,\n -5.4065e-06, -7.4356e-05, 8.7581e-06, 2.7564e-06, 6.6149e-05,\n 9.7932e-06, 5.4058e-07, -1.5741e-05, 7.9690e-06, -1.0155e-05,\n -1.1956e-06, 5.5822e-05, 5.6052e-45, 1.2872e-05, 5.6052e-45,\n 7.1673e-06, 8.2971e-06, 1.8562e-05, 5.6052e-45, -2.8332e-05,\n 5.9607e-06, 2.0218e-05, 2.1416e-05, 5.6052e-45, -3.2608e-05,\n -1.8235e-05, 1.0593e-05, 2.6248e-05, -1.9805e-06, 2.7673e-05,\n 1.5039e-05, -2.2942e-05, 1.0070e-06, -7.3114e-06, 5.9506e-05,\n 5.4415e-05, -5.3054e-06, 5.6052e-45, 3.5226e-06, 3.2647e-05,\n 1.2797e-05, 1.9934e-05, -4.6253e-05, 5.6052e-45, 5.5791e-05,\n 2.0929e-05, 9.7199e-06, 1.7278e-05, 2.0150e-05, 5.6052e-45,\n -7.9129e-06, 9.9507e-07, 5.6052e-45, -1.7710e-05, -1.7280e-05,\n -1.3680e-05, -2.1318e-05, -1.2969e-05, -3.5015e-05, 1.3282e-05,\n -3.7411e-05, -2.9820e-05, -7.6559e-06, 5.6052e-45, 9.4900e-05,\n 3.7034e-05, 9.8073e-06, 4.6211e-06, -1.2729e-05, 5.6052e-45,\n -1.7818e-05, 1.4897e-05, -2.8854e-05, -7.8208e-06, 2.0048e-05,\n 3.0491e-05, 6.3377e-07, 1.7880e-05, -2.1428e-05, 1.1770e-05,\n 4.7485e-05, -1.6723e-05, -1.4461e-04, -2.3329e-05, -5.4445e-05,\n -5.1253e-05, 3.8712e-06, -3.5108e-06, 1.6297e-05, 2.4213e-05,\n -2.4917e-05, 5.6052e-45, 2.9558e-05, 1.9037e-05, -1.4092e-05,\n -3.2274e-06, 6.9478e-06, -2.0050e-05, 2.7369e-06, -1.8488e-05,\n -5.1251e-05, -4.5300e-05, -2.4890e-05, 2.9784e-06, 5.6052e-45,\n 5.6052e-45, -2.1240e-05, -5.0983e-06, 1.1920e-05, -2.2834e-05,\n 6.0355e-06, 1.7146e-05, -5.5055e-06, -4.5910e-05, 5.0362e-05,\n 1.7531e-05, -2.2350e-05, 6.5908e-05, -2.5865e-05, -2.5978e-06,\n 3.2278e-05, 5.6052e-45, 9.4731e-05, 5.6052e-45, 3.9547e-05,\n -6.4186e-06, -5.3757e-05, -5.1269e-05, 3.2874e-05, 2.3089e-05,\n 8.6022e-06, -5.4142e-06, -4.9966e-06, 2.2455e-05, -1.0578e-05,\n 2.1114e-05, -1.5836e-05, -1.8765e-05, -4.9209e-05, -1.2633e-05,\n -3.1446e-05, -3.5338e-05, 1.9139e-05, 1.4470e-05, 4.8311e-05,\n 3.0392e-05, -4.3695e-05, -5.6574e-06, 3.8992e-05, 5.6052e-45,\n 3.7474e-05, -6.7510e-06, 4.0166e-05, 1.2163e-05, 3.4485e-05,\n 2.3529e-05, 3.2034e-05, 1.3397e-05, 5.6052e-45, -9.8048e-06,\n 5.4528e-05, 5.6052e-45, 3.8660e-05, 8.6061e-06, -4.7263e-05,\n -2.2298e-05, -3.7056e-06, 7.4046e-07, -9.7144e-08, 5.6052e-45,\n -4.8929e-05, 2.9198e-05, 3.0006e-05, -8.7401e-06, 7.7096e-06,\n 5.6052e-45, 2.1279e-05, 5.6052e-45, 2.5067e-05, -4.0968e-05,\n 4.3038e-05, -6.3173e-06, 4.9858e-06, 1.3439e-05, 2.3642e-05,\n -5.7995e-05, -1.9621e-05, -5.5984e-05, 9.2107e-06, 5.6052e-45,\n -7.8220e-06, 2.8638e-06, -3.8174e-06, 5.6052e-45, -4.6995e-05,\n 6.1436e-06, 1.5616e-05, 1.2044e-05, -1.6651e-05, 2.9592e-05,\n -2.9161e-05, -1.1624e-05, -9.1898e-06, 6.3077e-05, -5.8889e-06,\n 1.0936e-05, -2.4246e-05, 8.4425e-06, 5.8892e-06, 5.2331e-05,\n 2.9009e-06, 5.6052e-45, -1.6834e-05, -2.1642e-05, 3.0959e-05,\n -6.1942e-06, 5.6052e-45, 5.6052e-45, -4.1630e-05, -4.6973e-05,\n 2.8629e-06, -9.1660e-06, -2.7336e-05, -7.8880e-06, 1.8560e-05,\n -1.5946e-05, 1.2696e-05, 2.1647e-06, -3.7804e-05, -2.4993e-06,\n -9.7643e-06, -2.4939e-05, -2.3945e-05, -4.4985e-07, 3.1700e-06,\n -2.6285e-05, 5.6052e-45, 5.6052e-45, 1.2923e-05, 5.6052e-45,\n -1.4293e-05, -1.0265e-04, -4.4123e-06, -7.1711e-06, -2.6337e-05,\n 3.5358e-05, -1.4589e-05, 1.8487e-05, 1.5602e-05, -1.1403e-07,\n -3.2254e-05, 6.5042e-05, 1.9641e-06, 1.4321e-05, 5.6052e-45,\n -2.4551e-05, 4.6506e-05, -2.6211e-05, 6.7799e-06, -3.7910e-06,\n 1.4256e-05, -5.3249e-06, 1.2317e-05, 4.7621e-05, 3.3116e-05,\n 1.0622e-05, 1.9098e-05, -1.0250e-05, 2.8285e-05, 3.2545e-05,\n -1.6976e-05, 2.7047e-05, 9.5931e-07, -2.4369e-06, 6.9677e-05,\n -5.5940e-06, 3.2288e-06], device='cuda:0')",
"exp_avg_sq": "tensor([5.7506e-09, 9.2275e-09, 8.8515e-09, 1.1270e-08, 1.5198e-08, 3.9361e-16,\n 1.7644e-08, 1.5685e-08, 1.8211e-22, 1.2512e-08, 1.1719e-08, 1.7018e-08,\n 1.9939e-08, 1.1756e-08, 1.2282e-08, 1.0247e-08, 1.7848e-08, 1.2463e-08,\n 9.3071e-09, 1.4882e-08, 1.2621e-08, 1.3423e-08, 1.1373e-08, 1.7384e-08,\n 1.2799e-08, 6.2746e-09, 5.0420e-09, 1.3530e-08, 1.0031e-08, 1.1281e-08,\n 1.0929e-08, 1.4247e-08, 8.2066e-09, 1.6979e-08, 3.2819e-24, 4.9345e-16,\n 1.3880e-08, 1.5346e-08, 1.1490e-08, 1.0289e-08, 1.3907e-08, 1.4335e-08,\n 1.3507e-08, 1.3906e-08, 1.6146e-15, 1.1749e-08, 1.5023e-08, 1.2010e-08,\n 1.3051e-08, 1.0797e-08, 1.2268e-08, 1.3703e-08, 1.8438e-08, 1.6191e-08,\n 1.3809e-08, 1.0940e-08, 1.7752e-08, 1.4410e-08, 1.3897e-08, 1.4418e-08,\n 9.8333e-09, 1.5297e-08, 9.5278e-09, 2.2295e-08, 2.4814e-08, 1.2619e-08,\n 8.8930e-16, 1.1387e-08, 1.0844e-08, 1.3739e-08, 1.0372e-08, 1.1791e-08,\n 1.1662e-08, 1.0673e-08, 1.8460e-08, 1.3051e-08, 1.9449e-08, 1.0659e-08,\n 1.3667e-08, 1.3505e-08, 1.0015e-08, 1.6041e-08, 1.1531e-08, 4.6737e-09,\n 2.0711e-08, 1.1698e-08, 1.0873e-08, 1.7552e-08, 1.5835e-08, 2.2883e-08,\n 1.1797e-08, 1.2830e-08, 1.7548e-08, 9.2030e-09, 1.4502e-08, 1.5404e-08,\n 1.3350e-08, 8.0701e-09, 1.2360e-08, 1.2901e-08, 1.5638e-08, 1.1352e-08,\n 7.0374e-09, 3.0175e-16, 8.0314e-09, 1.8277e-08, 1.4101e-08, 1.1006e-08,\n 1.5568e-08, 1.5830e-08, 1.3154e-08, 1.3791e-08, 1.8129e-08, 1.3732e-08,\n 7.1898e-17, 1.4506e-08, 1.4933e-08, 1.4175e-08, 1.0773e-08, 1.9219e-08,\n 1.7998e-15, 1.4638e-08, 1.0276e-08, 1.5805e-08, 1.1066e-08, 2.1223e-08,\n 1.4632e-08, 9.1295e-16, 9.8995e-09, 1.4541e-08, 1.1933e-08, 9.2403e-09,\n 1.5364e-08, 1.2835e-08, 1.3361e-08, 1.2645e-08, 1.1522e-08, 2.4470e-08,\n 6.6163e-09, 1.1667e-08, 1.0504e-08, 1.4763e-08, 1.2455e-08, 1.5923e-08,\n 1.3303e-08, 8.0208e-09, 2.5964e-08, 1.6236e-16, 1.3919e-08, 1.0391e-08,\n 1.1972e-08, 2.4111e-19, 1.1999e-08, 3.3108e-16, 1.0973e-08, 1.6309e-08,\n 9.2645e-09, 1.2266e-08, 7.4406e-15, 9.9339e-09, 1.2635e-08, 1.0394e-08,\n 1.6831e-08, 1.1118e-08, 2.7298e-21, 1.6290e-08, 8.4838e-09, 1.1666e-08,\n 2.2407e-08, 4.0650e-18, 1.4592e-15, 1.1946e-08, 1.0112e-08, 8.7033e-09,\n 1.0431e-08, 1.3081e-08, 1.0243e-08, 8.8208e-09, 1.0635e-08, 2.1442e-18,\n 1.1321e-08, 8.8100e-09, 1.0388e-08, 8.0686e-09, 2.5636e-15, 8.8212e-09,\n 1.0444e-15, 3.0064e-16, 1.7111e-08, 1.1638e-08, 1.3971e-08, 1.5412e-08,\n 8.0750e-09, 7.5766e-09, 1.1640e-08, 1.0667e-08, 2.4020e-17, 7.6841e-09,\n 3.6301e-15, 6.7732e-09, 9.3576e-09, 1.5704e-08, 6.0391e-09, 1.1018e-08,\n 1.0102e-08, 1.0049e-08, 1.9634e-08, 1.1188e-08, 8.0062e-09, 9.5328e-09,\n 1.0594e-08, 2.1283e-08, 1.2285e-08, 1.7662e-08, 1.6639e-08, 1.1975e-08,\n 1.3496e-08, 1.1523e-08, 1.1610e-08, 1.2016e-08, 1.4740e-08, 1.7826e-08,\n 2.0759e-15, 1.2434e-08, 1.6922e-08, 1.2904e-08, 1.9982e-19, 1.1311e-08,\n 1.9792e-08, 9.6458e-09, 1.2120e-08, 1.2847e-08, 2.0122e-08, 5.1128e-17,\n 1.5412e-08, 6.2836e-09, 1.8259e-08, 7.1226e-09, 1.3464e-08, 1.3574e-08,\n 7.4874e-09, 9.1459e-09, 1.8289e-08, 1.2195e-08, 1.0013e-08, 7.8164e-09,\n 1.4472e-08, 1.7863e-15, 1.2881e-08, 3.3905e-21, 1.0611e-08, 1.2278e-08,\n 7.7714e-09, 4.6373e-20, 1.5544e-08, 8.7641e-09, 1.2469e-08, 1.6727e-08,\n 4.7104e-20, 1.2762e-08, 1.1886e-08, 1.3706e-08, 1.2848e-08, 2.0667e-08,\n 9.4341e-09, 1.0918e-08, 1.0628e-08, 1.3773e-08, 1.4277e-08, 1.6945e-08,\n 1.8511e-08, 6.3379e-09, 1.2417e-16, 1.5035e-08, 7.2409e-09, 9.1906e-09,\n 1.3902e-08, 8.8494e-09, 7.3537e-16, 1.5593e-08, 1.7396e-08, 1.0019e-08,\n 2.0258e-08, 2.0294e-08, 2.4492e-15, 1.4060e-08, 9.2846e-09, 5.7655e-17,\n 1.7995e-08, 1.3817e-08, 8.5326e-09, 1.5143e-08, 1.6261e-08, 1.0651e-08,\n 2.0190e-08, 9.4907e-09, 2.4420e-08, 1.2583e-08, 5.7598e-16, 1.1786e-08,\n 8.4822e-09, 1.8583e-08, 1.2614e-08, 1.2048e-08, 3.1019e-17, 1.3250e-08,\n 9.0643e-09, 2.1586e-08, 1.1169e-08, 1.4046e-08, 2.0214e-08, 1.6058e-08,\n 9.5788e-09, 2.1655e-08, 1.0019e-08, 1.3630e-08, 7.5684e-09, 1.8216e-08,\n 1.2493e-08, 2.0313e-08, 1.2193e-08, 1.3160e-08, 1.2128e-08, 2.2821e-08,\n 9.0259e-09, 1.2240e-08, 8.2919e-16, 1.2352e-08, 1.0516e-08, 6.9085e-09,\n 1.1062e-08, 1.0103e-08, 1.3090e-08, 9.8742e-09, 1.1326e-08, 1.1706e-08,\n 2.1429e-08, 2.1994e-08, 1.0947e-08, 2.1479e-24, 5.4315e-16, 1.0801e-08,\n 1.1359e-08, 1.0919e-08, 8.5128e-09, 1.5983e-08, 1.3505e-08, 8.8859e-09,\n 1.2133e-08, 2.1074e-08, 1.0667e-08, 6.2691e-09, 1.1886e-08, 1.6683e-08,\n 7.3846e-09, 1.4400e-08, 8.2478e-18, 9.7777e-09, 4.8330e-17, 1.1009e-08,\n 1.5855e-08, 7.2251e-09, 1.0868e-08, 1.1854e-08, 1.5664e-08, 3.3339e-09,\n 6.6973e-09, 1.5675e-08, 1.0987e-08, 1.5931e-08, 6.6135e-09, 1.2702e-08,\n 2.0689e-08, 1.6934e-08, 1.7496e-08, 8.6565e-09, 1.1698e-08, 1.2497e-08,\n 1.5123e-08, 2.4288e-08, 1.0532e-08, 1.2423e-08, 1.4374e-08, 1.5910e-08,\n 1.1060e-16, 1.4703e-08, 8.1824e-09, 1.4416e-08, 1.0424e-08, 1.3467e-08,\n 1.2235e-08, 1.4905e-08, 1.3870e-08, 1.0349e-16, 2.0475e-08, 1.4212e-08,\n 1.3978e-21, 1.3430e-08, 1.4786e-08, 7.9016e-09, 7.8210e-09, 1.3674e-08,\n 1.2765e-08, 1.5207e-08, 4.6765e-16, 1.2821e-08, 9.8351e-09, 8.0110e-09,\n 1.0979e-08, 1.2905e-08, 9.3405e-16, 9.6616e-09, 1.1492e-16, 9.8154e-09,\n 1.3100e-08, 1.1311e-08, 1.2635e-08, 1.4606e-08, 8.8597e-09, 1.3034e-08,\n 9.8236e-09, 1.3955e-08, 2.1011e-08, 1.5942e-08, 3.3846e-17, 1.0399e-08,\n 1.3152e-08, 9.8512e-09, 2.5167e-19, 1.2429e-08, 1.3656e-08, 2.0542e-08,\n 1.2487e-08, 1.5122e-08, 1.4112e-08, 1.1907e-08, 1.3778e-08, 1.1365e-08,\n 2.0188e-08, 1.9541e-08, 1.1248e-08, 1.0657e-08, 1.1056e-08, 1.1876e-08,\n 1.3289e-08, 1.1998e-08, 3.8651e-17, 2.2798e-08, 5.9575e-09, 1.0099e-08,\n 1.1046e-08, 8.5720e-17, 9.5405e-18, 7.8060e-09, 1.9373e-08, 9.7701e-09,\n 1.9767e-08, 1.1633e-08, 6.7541e-09, 1.2541e-08, 1.4000e-08, 8.7877e-09,\n 8.2308e-09, 1.3629e-08, 8.6444e-09, 1.2423e-08, 9.8187e-09, 1.2029e-08,\n 6.6888e-09, 1.0991e-08, 1.4392e-08, 2.9381e-16, 1.9707e-18, 1.3444e-08,\n 1.2660e-16, 9.6722e-09, 1.4485e-08, 2.0421e-08, 1.1796e-08, 1.5138e-08,\n 1.2234e-08, 1.2064e-08, 1.1160e-08, 2.1521e-08, 2.6334e-08, 8.4276e-09,\n 1.1721e-08, 1.0375e-08, 1.0908e-08, 1.1472e-22, 1.2824e-08, 2.0531e-08,\n 1.5203e-08, 1.4945e-08, 1.3390e-08, 1.2860e-08, 1.1478e-08, 1.0225e-08,\n 7.5401e-09, 1.0857e-08, 1.2446e-08, 1.1439e-08, 1.7961e-08, 1.3614e-08,\n 1.6028e-08, 1.2108e-08, 9.9800e-09, 9.9764e-09, 8.2776e-09, 2.0563e-08,\n 8.8752e-09, 5.2960e-09], device='cuda:0')"
},
"4": {
"step": "tensor(25030.)",
"exp_avg": "tensor([[-8.8849e-07, 1.4487e-05, -2.8260e-06, ..., -8.6488e-06,\n -1.9944e-06, 2.9425e-07],\n [-1.4100e-06, 1.0873e-06, -4.6981e-07, ..., -7.5205e-06,\n -1.3643e-06, 4.6396e-07],\n [ 6.1841e-07, 2.5608e-06, 2.4288e-06, ..., -9.6304e-06,\n -2.4920e-06, -1.0506e-06],\n ...,\n [ 8.6556e-07, 1.5519e-06, 7.8224e-06, ..., 2.0756e-06,\n 3.8751e-07, -1.8783e-06],\n [ 9.5254e-08, -1.8066e-06, -2.5443e-06, ..., -8.8996e-06,\n -3.1291e-06, 2.2515e-06],\n [-6.1196e-07, -3.7998e-06, 6.6844e-06, ..., -8.4606e-06,\n 2.4809e-06, 3.3585e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[9.6940e-12, 3.9633e-10, 7.3873e-11, ..., 1.2281e-09, 1.2278e-10,\n 5.6007e-11],\n [3.4046e-11, 1.7188e-10, 1.9552e-10, ..., 1.6688e-09, 5.3220e-10,\n 5.6190e-11],\n [3.1896e-11, 1.7805e-10, 6.9490e-10, ..., 7.5460e-10, 5.2078e-10,\n 5.8268e-11],\n ...,\n [4.6227e-11, 1.3593e-10, 1.3428e-10, ..., 1.9931e-09, 2.8504e-10,\n 1.3064e-10],\n [3.0990e-11, 2.5092e-10, 1.9492e-10, ..., 1.6370e-09, 2.7732e-10,\n 2.0686e-10],\n [2.2425e-11, 1.6777e-09, 2.9899e-10, ..., 1.0914e-09, 2.4384e-10,\n 6.7989e-10]], device='cuda:0')"
},
"5": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 8.2377e-06, -6.1206e-06, 8.3149e-06, ..., 1.9468e-06,\n 1.0763e-05, 2.3777e-06],\n [-2.0163e-06, 8.7859e-06, -6.2222e-06, ..., 4.3376e-06,\n 2.8483e-07, -1.1803e-06],\n [ 2.3553e-05, 4.1020e-06, 1.4998e-07, ..., 8.1157e-06,\n 8.1554e-07, -3.8363e-06],\n ...,\n [ 1.1281e-05, 2.3790e-06, 1.2723e-05, ..., 1.2674e-05,\n 4.8931e-06, 7.5615e-07],\n [ 1.4636e-07, -2.1320e-05, 1.6166e-06, ..., -5.3890e-06,\n 1.3231e-07, 6.9085e-06],\n [-6.3124e-07, 4.3427e-05, 2.3802e-07, ..., -6.5902e-07,\n -3.0607e-06, 1.3425e-05]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.7823e-08, 7.7352e-09, 7.9315e-10, ..., 2.7997e-10, 5.1188e-09,\n 5.6573e-10],\n [5.4849e-09, 1.7327e-09, 3.8915e-09, ..., 1.4788e-09, 1.0018e-09,\n 4.1312e-09],\n [3.5082e-08, 5.4208e-09, 1.2854e-09, ..., 3.2325e-10, 7.7536e-10,\n 3.2936e-09],\n ...,\n [2.5386e-09, 1.5697e-09, 9.9673e-09, ..., 3.6004e-09, 6.0716e-09,\n 1.0486e-09],\n [1.9191e-10, 1.9702e-09, 3.6070e-10, ..., 5.3106e-09, 4.7129e-10,\n 1.3351e-09],\n [3.6087e-09, 2.8246e-09, 2.8827e-09, ..., 7.4576e-10, 5.6754e-10,\n 1.9438e-08]], device='cuda:0')"
},
"6": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 1.3893e-04, 1.3631e-04, -7.3058e-05, ..., 1.2525e-04,\n -1.3658e-04, 1.5664e-05], device='cuda:0')",
"exp_avg_sq": "tensor([1.4291e-07, 1.2436e-07, 1.8828e-07, ..., 2.0344e-07, 1.1766e-07,\n 1.4554e-07], device='cuda:0')"
},
"7": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[-1.4505e-06, 5.5969e-06, 1.0612e-06, ..., 9.3652e-06,\n -4.4553e-07, 4.8973e-07],\n [-6.2838e-06, 1.5264e-05, -3.6018e-06, ..., -8.0006e-06,\n 7.9882e-06, 6.9780e-06],\n [ 1.3122e-05, 6.5722e-06, -1.2042e-05, ..., -1.8063e-05,\n -1.2107e-05, 2.7313e-06],\n ...,\n [ 1.1124e-05, 5.2344e-06, 3.2160e-06, ..., -1.0422e-05,\n -1.3859e-05, -4.3237e-06],\n [ 5.4144e-06, 5.1929e-07, 1.1280e-06, ..., -1.1434e-05,\n -1.3713e-05, -3.4398e-07],\n [-3.2838e-06, 1.7559e-05, 7.5401e-06, ..., 5.9797e-06,\n 3.1450e-06, 2.4699e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.2244e-10, 3.2385e-10, 1.7267e-10, ..., 2.4792e-10, 1.5273e-10,\n 1.1196e-10],\n [6.8456e-10, 1.7963e-10, 5.7821e-10, ..., 6.4322e-10, 4.7532e-10,\n 7.4898e-10],\n [1.3092e-09, 2.0293e-10, 1.6155e-09, ..., 1.6853e-09, 4.1880e-10,\n 6.6674e-10],\n ...,\n [1.2458e-09, 3.3258e-10, 9.8660e-10, ..., 4.2837e-10, 1.1733e-09,\n 1.2729e-09],\n [1.0812e-09, 2.9138e-10, 5.8904e-10, ..., 6.6020e-10, 1.4730e-09,\n 2.0583e-10],\n [7.1274e-10, 2.0600e-10, 6.0443e-10, ..., 4.3296e-10, 4.5109e-10,\n 1.6333e-10]], device='cuda:0')"
},
"14": {
"step": "tensor(20024.)",
"exp_avg": "tensor(-0.0003, device='cuda:0')",
"exp_avg_sq": "tensor(3.8975e-07, device='cuda:0')"
},
"15": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 5.7950e-15, -2.0496e-14, 3.1491e-15, ..., 1.2017e-15,\n 2.6794e-15, 9.3323e-16],\n [ 9.5179e-15, 2.5032e-14, 2.5023e-14, ..., 4.6325e-15,\n 1.1912e-14, -3.0879e-14],\n [-1.4408e-15, 1.0625e-15, 2.6997e-15, ..., 3.3008e-16,\n -7.5289e-17, -6.8340e-16],\n ...,\n [-4.9994e-15, -6.3500e-14, -1.0368e-14, ..., 1.5155e-15,\n -1.6402e-14, 3.7352e-14],\n [ 1.4922e-15, -1.3612e-15, 1.3419e-15, ..., 4.0365e-17,\n 1.2330e-15, -7.0703e-16],\n [ 7.9675e-16, 2.6735e-16, 2.2877e-16, ..., 1.9611e-15,\n 4.1329e-15, -1.4599e-14]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.4951e-20, 2.7015e-20, 7.7060e-20, ..., 9.3450e-21, 1.0215e-19,\n 5.7079e-20],\n [1.4283e-20, 9.4804e-21, 1.7362e-20, ..., 3.3872e-21, 1.2926e-20,\n 2.6650e-20],\n [3.0425e-21, 1.1501e-20, 4.3734e-21, ..., 9.5968e-22, 1.7088e-20,\n 1.9500e-21],\n ...,\n [7.8248e-20, 2.3626e-20, 9.0881e-20, ..., 6.6003e-21, 9.2131e-20,\n 2.8146e-21],\n [5.1036e-22, 1.0577e-21, 2.7826e-21, ..., 1.1135e-22, 3.1433e-21,\n 7.2420e-22],\n [1.8793e-20, 1.2155e-20, 1.8100e-20, ..., 4.5268e-21, 2.7264e-20,\n 4.6816e-20]], device='cuda:0')"
},
"16": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 8.8804e-15, 2.2692e-14, -8.0976e-15, -6.3692e-15, -4.6147e-14,\n -5.1077e-15, 1.1243e-14, -6.0171e-15, 5.8062e-15, -6.2215e-15,\n -7.5181e-16, -3.7554e-14, 3.2041e-14, -9.2131e-16, -7.0132e-15,\n -3.7756e-15, -4.8008e-15, -3.4027e-15, 9.8109e-15, -8.4095e-15,\n -5.7574e-15, -1.5794e-15, -1.9214e-14, -2.5991e-14, -2.3306e-13,\n 1.4642e-14, -7.3866e-15, -6.7458e-14, 6.5467e-14, -1.2303e-14,\n 9.8306e-15, -5.8725e-15, 1.5958e-14, 6.2871e-14, -8.8319e-15,\n -5.9028e-15, 7.8251e-14, 4.1789e-15, -9.6679e-16, 1.0861e-14,\n -5.5987e-16, 1.6018e-14, 9.6158e-15, -6.9706e-16, 1.3209e-13,\n 6.9287e-15, -3.3600e-15, -4.0113e-15, 9.8542e-14, 8.0041e-15,\n -1.3087e-14, -3.2024e-15, -6.9260e-16, -2.2418e-16, -4.6033e-16,\n -1.5519e-15, 9.6294e-16, -7.6555e-15, 7.7558e-16, 6.4013e-15,\n -3.4164e-15, -5.0049e-14, 3.5091e-16, -4.3426e-15], device='cuda:0')",
"exp_avg_sq": "tensor([3.4616e-17, 1.2343e-17, 4.5788e-18, 2.6800e-17, 2.0520e-18, 1.4346e-18,\n 9.8045e-17, 1.3075e-17, 1.6215e-17, 4.5172e-17, 1.2270e-19, 1.1753e-16,\n 7.4601e-18, 2.0859e-17, 1.1071e-16, 8.2253e-19, 7.5025e-17, 2.5171e-17,\n 1.4450e-17, 4.1633e-18, 2.7048e-18, 6.8672e-20, 3.0014e-18, 1.5490e-18,\n 1.6550e-17, 1.4409e-16, 3.8281e-18, 2.5403e-17, 4.6231e-18, 1.6656e-16,\n 5.6706e-18, 1.8145e-18, 1.2955e-18, 2.0481e-18, 1.5569e-17, 3.2521e-17,\n 2.8190e-16, 6.8215e-18, 4.3631e-18, 3.8741e-17, 2.7289e-17, 8.7858e-18,\n 2.1735e-17, 6.4728e-18, 1.1123e-16, 1.1012e-16, 1.7850e-18, 2.9004e-17,\n 4.0574e-17, 3.9549e-17, 1.2414e-18, 6.9763e-20, 1.5220e-16, 6.3440e-17,\n 3.2443e-18, 1.5696e-17, 1.8854e-17, 5.9628e-19, 1.3345e-18, 8.4332e-18,\n 1.1424e-17, 4.7054e-17, 1.0399e-18, 1.7863e-17], device='cuda:0')"
},
"17": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 7.1064e-14, 7.7558e-14, 9.4642e-16, 3.3548e-16, 5.6550e-15,\n -7.4339e-17, -6.0945e-15, -1.3992e-17, -3.6024e-15, 4.1215e-14,\n 3.3882e-16, 2.3450e-14, 7.5016e-14, -7.7852e-16, 9.0606e-16,\n 2.3100e-15, 1.9062e-16, -2.8987e-17, 3.5828e-14, 1.7139e-15,\n -5.2812e-17, 5.6449e-16, 5.7364e-15, -3.4393e-15, -2.7924e-13,\n 5.8631e-14, 5.0114e-16, -4.3287e-14, 6.1621e-14, 2.8070e-14,\n 3.9691e-14, 4.3137e-17, 8.6447e-15, 4.2618e-14, 8.5022e-16,\n -2.0466e-16, 1.4256e-13, -6.9748e-16, 3.2944e-16, 4.8325e-14,\n -5.3002e-16, 3.6575e-14, 4.5312e-14, 3.7289e-14, 2.0376e-13,\n -3.8837e-15, -1.8832e-16, 1.0995e-16, 1.3836e-13, -3.8536e-15,\n 6.0734e-15, 3.1820e-16, -1.4368e-15, -1.3972e-15, 1.6407e-16,\n -6.9928e-16, -9.5888e-16, -5.5720e-15, 6.3050e-15, -1.9640e-15,\n 7.9195e-17, 4.4860e-16, 3.8246e-16, 8.1322e-15], device='cuda:0')",
"exp_avg_sq": "tensor([2.1812e-19, 5.9118e-20, 3.4122e-20, 2.5930e-19, 7.8332e-21, 2.9448e-20,\n 5.8950e-19, 1.7680e-19, 9.7062e-20, 2.7214e-19, 9.4786e-21, 6.4853e-19,\n 4.7640e-20, 1.1912e-19, 5.8265e-19, 1.6468e-21, 4.8345e-19, 3.3852e-19,\n 8.7642e-20, 5.8865e-20, 2.3068e-20, 1.5740e-21, 1.1684e-20, 3.3779e-21,\n 1.6399e-19, 1.6872e-18, 2.9137e-20, 1.5984e-19, 2.5800e-20, 1.5247e-18,\n 2.2607e-20, 2.1007e-20, 2.6566e-21, 8.1548e-21, 1.8003e-19, 2.9978e-19,\n 2.4663e-18, 6.9651e-20, 1.0166e-19, 2.7334e-19, 2.7731e-19, 4.5174e-20,\n 1.2537e-19, 5.6856e-20, 7.1225e-19, 6.7569e-19, 4.1822e-20, 5.0378e-19,\n 2.6392e-19, 2.3607e-19, 1.2835e-21, 1.2126e-21, 1.7776e-18, 3.4187e-19,\n 7.0113e-20, 8.6653e-20, 1.0785e-19, 3.7526e-22, 1.8483e-21, 6.4207e-20,\n 1.0899e-19, 3.8191e-19, 2.4604e-20, 1.2019e-19], device='cuda:0')"
},
"18": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 5.0214e-14, 6.1956e-14, -1.2144e-15, -3.9086e-16, -1.1097e-14,\n 8.1285e-17, 7.3322e-15, 7.6310e-17, 3.8997e-15, 3.0818e-14,\n -2.2070e-16, 1.3245e-14, 6.1044e-14, 7.3253e-16, -1.1555e-15,\n 1.4784e-14, -3.1091e-16, 1.3578e-17, 3.4968e-14, -4.6350e-15,\n 4.3426e-17, -5.0130e-16, 5.7534e-15, -6.2169e-15, -1.6611e-13,\n 4.4687e-14, -8.2706e-16, -3.4814e-14, 7.9863e-14, 2.0806e-14,\n 3.5235e-14, -5.1122e-17, 3.2790e-14, 7.1130e-14, -5.1539e-16,\n 2.4118e-16, 1.0734e-13, 6.9931e-16, -2.8381e-16, 3.8377e-14,\n 6.1855e-16, 4.0282e-14, 3.7428e-14, 3.2685e-14, 1.4769e-13,\n 4.1580e-15, 1.5854e-16, -8.3307e-17, 1.1405e-13, 4.8259e-15,\n 8.7897e-15, -3.5283e-16, 1.6268e-15, 1.3715e-15, -1.5316e-16,\n 6.5880e-16, 7.5394e-16, -3.6273e-15, 1.7813e-14, 2.2596e-15,\n -3.3139e-17, -9.6897e-15, -4.1437e-16, 1.7715e-14], device='cuda:0')",
"exp_avg_sq": "tensor([3.2231e-19, 1.1972e-19, 7.4005e-20, 3.3934e-19, 1.3524e-20, 3.7930e-20,\n 1.0965e-18, 1.9549e-19, 2.0783e-19, 4.1971e-19, 1.1459e-20, 1.1864e-18,\n 7.7888e-20, 2.4736e-19, 1.2607e-18, 3.6398e-21, 8.6276e-19, 3.4221e-19,\n 1.0158e-19, 5.8066e-20, 5.1169e-20, 3.3538e-21, 1.4714e-20, 6.4789e-21,\n 2.1656e-19, 1.2967e-18, 5.4831e-20, 2.0741e-19, 4.0891e-20, 1.5104e-18,\n 4.0449e-20, 3.7863e-20, 4.3792e-21, 1.4425e-20, 2.1998e-19, 4.1309e-19,\n 2.7928e-18, 1.0981e-19, 8.8513e-20, 3.2461e-19, 3.6530e-19, 6.5046e-20,\n 1.9705e-19, 6.5180e-20, 1.0951e-18, 1.3215e-18, 4.4078e-20, 3.9980e-19,\n 3.8253e-19, 4.6335e-19, 2.4104e-21, 1.9508e-21, 1.8163e-18, 7.7441e-19,\n 7.0068e-20, 2.0775e-19, 2.5130e-19, 1.1381e-21, 3.2464e-21, 1.1374e-19,\n 1.5918e-19, 5.2493e-19, 2.9283e-20, 1.4035e-19], device='cuda:0')"
},
"19": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 3.0799e-13, 3.0544e-13, 1.9664e-14, -2.0251e-14, 2.3681e-14,\n -4.5624e-14, -3.7731e-14, -6.2476e-14, -2.2245e-14, 1.9145e-13,\n -2.4169e-14, 7.2281e-14, 2.6574e-13, -4.8553e-14, -2.9588e-14,\n 4.7432e-14, -3.2133e-14, -5.3582e-14, 2.5884e-13, -2.2214e-14,\n -5.5234e-15, -4.2912e-14, 6.0429e-14, -3.7693e-14, -6.5832e-13,\n 2.5948e-13, -4.9684e-14, -2.4890e-13, 3.4884e-13, 1.1536e-13,\n 2.7702e-13, 9.9293e-15, 1.3564e-13, 3.4138e-13, -4.6970e-14,\n -3.7316e-14, 4.3480e-13, -4.6319e-14, -2.8894e-14, 2.4719e-13,\n -7.6942e-14, 2.2887e-13, 2.3496e-13, 2.2384e-13, 7.9021e-13,\n -2.6545e-14, -1.5820e-14, -3.5669e-14, 6.1088e-13, -1.2112e-14,\n 7.8303e-14, -3.5996e-14, -4.2146e-14, -3.8995e-14, -3.0154e-14,\n -4.8668e-14, -3.3065e-14, -6.9583e-14, 1.0169e-13, -1.6424e-14,\n -1.3285e-14, 1.7836e-15, -2.5030e-14, 4.7271e-14],\n [-3.1297e-13, -2.9367e-13, -1.8196e-14, 2.1220e-14, -3.1663e-14,\n 4.7253e-14, 3.9904e-14, 6.2747e-14, 2.4193e-14, -1.9853e-13,\n 2.5564e-14, -9.0576e-14, -2.8350e-13, 4.8846e-14, 3.0764e-14,\n -4.6398e-14, 3.2458e-14, 5.4324e-14, -2.6875e-13, 2.2181e-14,\n 6.3264e-15, 4.5119e-14, -7.3945e-14, 4.1144e-14, 6.7950e-13,\n -2.7145e-13, 5.0372e-14, 2.5037e-13, -3.5278e-13, -1.0919e-13,\n -2.5866e-13, -8.9227e-15, -1.3978e-13, -3.4012e-13, 4.7184e-14,\n 3.7218e-14, -4.4555e-13, 4.6240e-14, 2.7386e-14, -2.5183e-13,\n 7.5725e-14, -2.4156e-13, -2.4104e-13, -2.1097e-13, -7.9356e-13,\n 2.6965e-14, 1.5718e-14, 3.5800e-14, -6.1176e-13, 1.3887e-14,\n -7.9487e-14, 3.7429e-14, 4.3227e-14, 4.0263e-14, 3.1703e-14,\n 4.9917e-14, 3.2101e-14, 6.8925e-14, -1.1047e-13, 1.6997e-14,\n 1.3870e-14, -2.3885e-14, 2.7516e-14, -6.1199e-14]], device='cuda:0')",
"exp_avg_sq": "tensor([[7.8811e-18, 1.5704e-18, 5.3122e-19, 1.7070e-17, 6.4925e-18, 2.2527e-17,\n 3.2578e-18, 3.1490e-17, 3.3616e-19, 1.5193e-17, 3.1856e-17, 9.6928e-18,\n 1.0876e-18, 3.4035e-19, 4.3265e-18, 6.2563e-18, 6.1269e-18, 5.3496e-17,\n 4.1252e-17, 4.6948e-20, 3.0262e-18, 2.6006e-18, 3.0056e-17, 1.0697e-17,\n 3.9495e-19, 8.2511e-17, 2.4434e-19, 2.0679e-17, 3.8157e-18, 4.7981e-17,\n 1.0039e-17, 3.8468e-18, 2.0421e-17, 4.4934e-18, 9.0302e-18, 1.7292e-17,\n 3.7231e-17, 7.6748e-18, 6.7360e-17, 2.1866e-17, 2.4025e-17, 1.1018e-17,\n 1.1749e-17, 8.6142e-18, 1.9703e-17, 9.2872e-18, 3.4803e-17, 7.1196e-17,\n 1.1969e-17, 1.8224e-18, 2.3100e-17, 1.2563e-18, 4.4630e-17, 5.4061e-18,\n 4.9535e-17, 4.0308e-18, 4.3667e-18, 5.9477e-18, 2.5787e-17, 1.6779e-18,\n 1.2860e-17, 1.1484e-18, 1.8531e-17, 3.3620e-17],\n [7.8811e-18, 1.5703e-18, 5.3122e-19, 1.7070e-17, 6.4925e-18, 2.2527e-17,\n 3.2578e-18, 3.1490e-17, 3.3616e-19, 1.5193e-17, 3.1856e-17, 9.6928e-18,\n 1.0876e-18, 3.4035e-19, 4.3265e-18, 6.2563e-18, 6.1269e-18, 5.3496e-17,\n 4.1252e-17, 4.6947e-20, 3.0262e-18, 2.6006e-18, 3.0056e-17, 1.0697e-17,\n 3.9488e-19, 8.2511e-17, 2.4434e-19, 2.0679e-17, 3.8157e-18, 4.7981e-17,\n 1.0039e-17, 3.8468e-18, 2.0421e-17, 4.4934e-18, 9.0302e-18, 1.7292e-17,\n 3.7231e-17, 7.6748e-18, 6.7360e-17, 2.1866e-17, 2.4025e-17, 1.1018e-17,\n 1.1749e-17, 8.6142e-18, 1.9703e-17, 9.2872e-18, 3.4803e-17, 7.1196e-17,\n 1.1969e-17, 1.8224e-18, 2.3100e-17, 1.2563e-18, 4.4630e-17, 5.4061e-18,\n 4.9535e-17, 4.0308e-18, 4.3667e-18, 5.9477e-18, 2.5787e-17, 1.6779e-18,\n 1.2860e-17, 1.1484e-18, 1.8531e-17, 3.3620e-17]], device='cuda:0')"
},
"20": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 1.9999e-13, -2.0463e-13], device='cuda:0')",
"exp_avg_sq": "tensor([1.4566e-16, 1.4566e-16], device='cuda:0')"
},
"21": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 5.5260e-17, -1.2954e-16, 1.4850e-16, ..., -5.9379e-16,\n 6.4860e-18, -4.6880e-16],\n [ 1.1421e-16, -2.7194e-15, -1.6894e-16, ..., -1.8124e-15,\n 2.5647e-16, 7.8690e-16],\n [-1.0639e-16, 9.4489e-17, 5.5863e-17, ..., -1.4111e-17,\n 1.2146e-16, 8.8141e-17],\n ...,\n [ 4.5876e-16, 5.0587e-16, -7.6188e-16, ..., 1.2054e-16,\n 3.9063e-16, 7.6223e-16],\n [ 7.1520e-17, -8.3519e-17, -2.5764e-17, ..., -1.9320e-17,\n -1.4919e-17, 6.9205e-17],\n [-3.1008e-16, 2.0368e-15, 5.5618e-16, ..., 3.4773e-15,\n -7.0701e-17, -1.0923e-15]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.2549e-22, 7.8975e-22, 7.7352e-22, ..., 1.5348e-22, 8.2849e-22,\n 1.4465e-21],\n [3.0065e-24, 1.2791e-22, 1.2741e-22, ..., 1.2810e-22, 1.8222e-23,\n 1.8144e-21],\n [2.1800e-22, 1.5268e-21, 1.2821e-21, ..., 5.3926e-22, 1.1104e-21,\n 4.2270e-21],\n ...,\n [5.1518e-22, 5.8186e-22, 1.8223e-22, ..., 1.3017e-22, 2.4672e-21,\n 1.6456e-21],\n [1.2678e-22, 1.5016e-22, 8.1180e-22, ..., 1.6140e-22, 3.8412e-22,\n 1.2694e-21],\n [4.0049e-21, 7.3641e-21, 1.4426e-20, ..., 5.6496e-21, 1.1385e-20,\n 1.1202e-20]], device='cuda:0')"
},
"22": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-1.6669e-15, -7.0067e-15, 1.3812e-15, -1.6136e-15, -8.0487e-16,\n 8.9421e-16, -9.2331e-16, -1.1289e-15, 2.9140e-15, 7.8006e-15,\n -9.0284e-15, 2.5441e-15, 1.6195e-14, -7.0986e-15, 6.9488e-16,\n 1.3327e-15, -8.9223e-15, 8.0332e-15, -9.5367e-16, 1.3403e-14,\n 3.8197e-16, 8.8364e-15, -4.2514e-15, -7.9808e-17, 5.3953e-15,\n 1.8314e-14, 3.6263e-16, 7.6172e-17, 1.2324e-15, 1.5968e-15,\n -5.1455e-16, 4.2666e-15, 7.2334e-16, 1.4883e-15, -2.9970e-16,\n -7.6284e-16, -3.8646e-15, 1.6932e-15, -6.5212e-15, -2.5519e-15,\n 3.6584e-16, 2.3801e-16, -2.5152e-16, 3.1193e-15, -2.3348e-15,\n -1.1214e-15, -7.0075e-16, -4.9084e-16, -2.5272e-14, -2.4205e-14,\n 2.8232e-16, -1.3450e-14, 6.3877e-16, 1.7775e-15, -1.1382e-15,\n 2.9049e-14, -6.7403e-16, 7.9138e-16, 1.5819e-15, -1.5107e-14,\n 1.0951e-14, 2.1413e-15, -1.8092e-16, -7.5774e-15], device='cuda:0')",
"exp_avg_sq": "tensor([6.1616e-19, 2.2953e-19, 1.8168e-18, 9.2720e-18, 2.3327e-18, 2.1598e-17,\n 1.0982e-17, 5.5571e-18, 4.1836e-19, 3.2442e-18, 7.3480e-18, 1.7717e-21,\n 1.1738e-17, 1.9049e-19, 1.6481e-17, 3.3442e-19, 1.8815e-18, 3.0689e-18,\n 1.2188e-20, 1.1220e-17, 8.4969e-19, 2.9240e-19, 2.8743e-18, 3.1605e-18,\n 2.9783e-18, 7.3695e-19, 1.7811e-18, 2.4279e-18, 7.0468e-18, 1.4781e-18,\n 7.5772e-18, 3.4711e-19, 1.2630e-18, 2.1672e-19, 8.9944e-19, 1.6302e-18,\n 1.5254e-19, 1.0871e-18, 4.5668e-18, 2.6874e-17, 2.4664e-19, 4.1041e-19,\n 1.5550e-17, 8.8501e-18, 1.7558e-17, 9.8296e-19, 5.4514e-18, 4.3543e-18,\n 2.3417e-18, 1.0655e-18, 8.2299e-19, 2.8527e-17, 3.9005e-20, 9.2208e-19,\n 1.5548e-18, 1.7699e-18, 8.5458e-19, 1.6613e-17, 2.5626e-17, 1.4950e-18,\n 1.4803e-17, 9.1889e-19, 4.8419e-19, 1.1952e-17], device='cuda:0')"
},
"23": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 9.9404e-17, -2.8992e-15, 1.1846e-16, 1.3074e-14, -5.7809e-17,\n -4.6316e-16, -3.7614e-18, -1.5249e-16, -5.3491e-16, 2.3700e-14,\n 9.2041e-15, 7.1257e-16, 1.9854e-14, -5.1824e-16, -4.9871e-16,\n 1.0675e-16, 3.2229e-15, 9.5763e-15, -8.9317e-17, 1.6522e-14,\n -8.4119e-17, 1.8825e-14, 3.1269e-15, -1.3097e-16, 1.1300e-14,\n 1.6940e-14, 5.3255e-17, -5.3386e-16, -7.3817e-16, 8.7078e-15,\n -5.4286e-16, 5.1008e-15, -7.3831e-17, 6.4062e-15, 4.2876e-17,\n 1.0500e-14, -3.2949e-17, 1.5115e-14, -4.7462e-15, 1.6121e-16,\n 6.2718e-16, -7.9878e-17, -3.2194e-16, -8.9723e-16, 2.4058e-15,\n -1.8512e-17, -1.1836e-16, 9.4537e-17, -3.1746e-14, -1.1551e-14,\n 1.0044e-16, -7.0736e-15, -9.7349e-16, 6.3856e-15, 4.9359e-17,\n 3.4417e-14, 1.8654e-16, 1.4302e-14, -9.4660e-16, 5.6653e-15,\n 2.5902e-14, 5.0531e-15, 3.9732e-17, -1.1000e-16], device='cuda:0')",
"exp_avg_sq": "tensor([2.9337e-21, 2.6530e-21, 2.3332e-20, 9.0959e-20, 1.4373e-20, 2.8190e-19,\n 5.0874e-20, 3.2768e-20, 4.8238e-21, 3.0265e-20, 5.4062e-20, 4.7655e-23,\n 7.1359e-20, 1.9405e-21, 1.3092e-19, 1.5855e-21, 1.3646e-20, 2.4181e-20,\n 4.1533e-24, 9.1735e-20, 3.6143e-21, 7.2897e-21, 2.2984e-20, 2.9357e-20,\n 2.9764e-20, 8.4557e-21, 1.4602e-20, 9.2841e-21, 3.8497e-20, 1.4168e-20,\n 3.1842e-20, 3.4443e-21, 5.0105e-21, 2.1797e-21, 3.9463e-21, 1.5735e-20,\n 4.5010e-22, 8.0354e-21, 3.9961e-20, 3.7484e-19, 2.5259e-21, 1.7654e-21,\n 1.2491e-19, 4.6228e-20, 1.4883e-19, 3.5534e-21, 2.8446e-20, 2.5967e-20,\n 1.8745e-20, 1.5201e-20, 3.3769e-21, 2.6910e-19, 3.6966e-22, 8.7354e-21,\n 6.4558e-21, 1.3033e-20, 4.1661e-21, 1.7474e-19, 2.1522e-19, 1.5201e-20,\n 1.3384e-19, 7.5279e-21, 3.8265e-21, 6.9388e-20], device='cuda:0')"
},
"24": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-9.5914e-17, -4.6092e-15, -1.1196e-16, 9.4385e-15, 6.1621e-17,\n 4.4455e-16, 4.8361e-17, 2.0778e-16, 3.9516e-15, 1.9413e-14,\n 3.2884e-15, -5.9511e-16, 1.3175e-14, -2.4024e-15, 3.1880e-16,\n -9.2004e-17, 4.9551e-16, 8.4506e-15, 6.9094e-17, 1.3716e-14,\n 7.3327e-17, 1.6172e-14, 4.3872e-15, 1.1435e-16, 1.1395e-14,\n 1.9280e-14, -4.9728e-17, 4.9101e-16, 6.8411e-16, 8.0161e-15,\n 5.4583e-16, 8.3509e-15, 5.3182e-17, 8.1094e-15, -5.3385e-17,\n 7.6433e-15, 2.4761e-18, 1.1412e-14, -1.2281e-15, -8.1609e-17,\n 3.3663e-15, 7.9605e-17, 3.2696e-16, 7.7633e-16, 9.2997e-15,\n 2.3127e-17, -5.3938e-17, -1.9872e-16, -2.3926e-14, -1.2219e-14,\n -9.9284e-17, -5.3239e-15, 3.9208e-15, 8.6889e-15, -4.7684e-17,\n 3.0573e-14, -1.8308e-16, 1.1814e-14, 8.1012e-16, 4.0720e-15,\n 2.4313e-14, 7.8069e-15, -2.2397e-17, 1.7407e-15], device='cuda:0')",
"exp_avg_sq": "tensor([3.8942e-21, 3.9619e-21, 1.3632e-20, 9.9617e-20, 1.9914e-20, 2.0460e-19,\n 9.5510e-20, 4.9329e-20, 6.3510e-21, 3.8125e-20, 8.3728e-20, 9.8518e-23,\n 1.3329e-19, 3.2584e-21, 1.6341e-19, 2.3186e-21, 2.2985e-20, 3.6549e-20,\n 9.7830e-24, 1.2336e-19, 6.2432e-21, 6.9487e-21, 3.2921e-20, 2.9664e-20,\n 3.3992e-20, 1.1027e-20, 1.5472e-20, 2.1650e-20, 6.3114e-20, 1.8713e-20,\n 7.4236e-20, 5.2548e-21, 1.0696e-20, 4.0845e-21, 7.3997e-21, 2.0424e-20,\n 9.2965e-22, 1.4198e-20, 5.0818e-20, 2.5613e-19, 3.9586e-21, 2.8859e-21,\n 1.3802e-19, 8.0835e-20, 1.8465e-19, 7.6438e-21, 5.5110e-20, 4.3362e-20,\n 3.0810e-20, 1.6699e-20, 6.4824e-21, 2.9674e-19, 9.2284e-22, 1.1924e-20,\n 1.3299e-20, 2.3653e-20, 5.7524e-21, 1.7641e-19, 2.5053e-19, 2.2640e-20,\n 1.5827e-19, 1.2751e-20, 3.8270e-21, 1.2943e-19], device='cuda:0')"
},
"25": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 1.0134e-14, 3.4341e-14, 6.8883e-15, -4.1640e-14, 1.4925e-14,\n 1.4043e-15, 8.2142e-15, 9.0308e-15, -1.2426e-15, -1.0514e-13,\n -2.7821e-14, 2.3320e-15, -4.2873e-14, 3.7122e-15, -7.9295e-15,\n 6.7773e-15, -1.6367e-14, -4.2264e-14, 6.2946e-15, -5.5095e-14,\n -2.0884e-15, -1.5359e-14, -1.5333e-14, 3.5400e-15, -6.9153e-14,\n -7.5501e-14, 9.3217e-15, 1.0003e-14, 8.1937e-15, -5.3717e-14,\n 1.0809e-14, -4.0573e-14, 7.1952e-15, -3.8599e-14, -1.6190e-15,\n -6.8331e-14, 2.0912e-14, -8.4875e-14, 3.1818e-14, 1.7119e-14,\n -3.7052e-15, 5.5918e-15, 1.0831e-14, 1.9388e-15, 6.8243e-15,\n 1.9603e-14, -1.2700e-15, 1.0636e-14, 6.8121e-14, 3.7850e-14,\n 4.2162e-15, 3.2027e-14, 1.8906e-15, -3.4743e-14, 6.5388e-15,\n -7.3652e-14, 7.6641e-15, -4.6782e-14, 1.4912e-15, 2.5776e-15,\n -7.7148e-14, -2.1530e-14, 2.4671e-15, 3.8149e-15],\n [-1.3000e-14, -2.8774e-14, -9.5855e-15, 6.2307e-14, -1.7335e-14,\n -3.7164e-15, -1.0366e-14, -1.2108e-14, 7.7877e-15, 1.1727e-13,\n 5.0184e-14, -4.7539e-15, 6.4813e-14, 9.4355e-16, 5.6625e-15,\n -8.6294e-15, 3.2630e-14, 5.5211e-14, -8.9067e-15, 7.0802e-14,\n 4.1861e-16, 4.7903e-14, 2.5083e-14, -5.6486e-15, 8.4573e-14,\n 8.7897e-14, -1.0876e-14, -1.1321e-14, -1.0676e-14, 6.9370e-14,\n -1.3941e-14, 5.2474e-14, -9.6228e-15, 4.3614e-14, -1.1682e-15,\n 8.5949e-14, -2.2826e-14, 9.5963e-14, -2.0847e-14, -1.8489e-14,\n 8.1148e-15, -8.0633e-15, -1.2733e-14, -4.8031e-15, 1.3823e-14,\n -2.2151e-14, -6.4257e-16, -1.3278e-14, -5.1363e-14, -1.9941e-14,\n -6.7332e-15, -1.1824e-14, -3.1288e-15, 4.8783e-14, -9.8557e-15,\n 9.4950e-14, -1.0788e-14, 6.4480e-14, -3.6236e-15, 2.3952e-14,\n 1.0116e-13, 3.2713e-14, -5.0615e-15, 3.2523e-15]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.6352e-18, 1.4110e-18, 1.2928e-17, 1.3916e-18, 1.5505e-18, 9.7582e-18,\n 1.0370e-18, 1.0834e-18, 2.0819e-18, 1.0271e-18, 1.2756e-18, 1.8927e-20,\n 2.7200e-19, 9.5760e-19, 1.8463e-18, 3.2234e-19, 5.9597e-19, 8.7552e-19,\n 2.9116e-20, 1.5754e-18, 1.2981e-18, 3.6529e-21, 1.4351e-18, 2.0877e-18,\n 3.4919e-18, 1.8069e-19, 3.7037e-18, 1.8231e-19, 1.0156e-18, 2.3778e-18,\n 3.7168e-19, 1.0825e-18, 4.5288e-19, 7.9152e-20, 2.2363e-19, 1.6953e-18,\n 1.4941e-19, 5.7509e-19, 2.5887e-18, 9.8882e-18, 8.1088e-19, 1.2286e-19,\n 4.2581e-18, 8.0721e-19, 2.2394e-18, 3.5039e-19, 5.5191e-19, 9.8557e-19,\n 1.0128e-20, 2.0163e-20, 4.0237e-19, 4.0162e-18, 1.1908e-19, 1.0292e-18,\n 9.3748e-19, 2.3436e-20, 8.6035e-19, 4.5319e-18, 3.5540e-18, 1.0139e-20,\n 2.3100e-18, 7.4227e-19, 4.7760e-18, 9.4087e-19],\n [1.6352e-18, 1.4110e-18, 1.2928e-17, 1.3916e-18, 1.5505e-18, 9.7582e-18,\n 1.0370e-18, 1.0834e-18, 2.0819e-18, 1.0271e-18, 1.2756e-18, 1.8927e-20,\n 2.7200e-19, 9.5762e-19, 1.8463e-18, 3.2234e-19, 5.9597e-19, 8.7552e-19,\n 2.9116e-20, 1.5754e-18, 1.2981e-18, 3.6551e-21, 1.4351e-18, 2.0877e-18,\n 3.4919e-18, 1.8069e-19, 3.7037e-18, 1.8231e-19, 1.0156e-18, 2.3778e-18,\n 3.7168e-19, 1.0826e-18, 4.5288e-19, 7.9152e-20, 2.2363e-19, 1.6953e-18,\n 1.4941e-19, 5.7509e-19, 2.5887e-18, 9.8883e-18, 8.1089e-19, 1.2286e-19,\n 4.2581e-18, 8.0721e-19, 2.2394e-18, 3.5039e-19, 5.5192e-19, 9.8557e-19,\n 1.0129e-20, 2.0164e-20, 4.0237e-19, 4.0162e-18, 1.1908e-19, 1.0292e-18,\n 9.3748e-19, 2.3438e-20, 8.6035e-19, 4.5319e-18, 3.5540e-18, 1.0141e-20,\n 2.3100e-18, 7.4228e-19, 4.7760e-18, 9.4087e-19]], device='cuda:0')"
},
"26": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-4.5569e-14, 6.0843e-14], device='cuda:0')",
"exp_avg_sq": "tensor([1.4325e-17, 1.4325e-17], device='cuda:0')"
},
"27": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[-2.9444e-21, -6.2797e-22, -9.0332e-22, ..., 8.8293e-22,\n -2.9421e-21, 1.1472e-21],\n [-1.7229e-21, -1.2763e-21, -1.2679e-21, ..., -2.9333e-22,\n 6.1193e-23, -3.6429e-21],\n [-4.3403e-20, 2.9910e-20, -6.0623e-20, ..., 3.9583e-21,\n 4.9488e-21, -3.1914e-20],\n ...,\n [ 4.5815e-21, 2.8023e-21, 7.5205e-21, ..., -1.3808e-21,\n -3.2628e-22, 2.8032e-21],\n [ 6.0020e-21, -2.1555e-20, -4.8597e-20, ..., 9.4577e-21,\n 7.8720e-21, 9.8173e-21],\n [ 6.7695e-22, 7.8180e-21, -4.3357e-21, ..., 5.2346e-22,\n -1.1501e-22, -3.2678e-22]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.6773e-21, 3.0086e-21, 1.2590e-21, ..., 2.8269e-21, 2.2773e-20,\n 1.1128e-20],\n [6.1434e-22, 2.4705e-22, 2.7324e-22, ..., 2.7086e-23, 1.9800e-21,\n 4.6718e-22],\n [1.7557e-21, 1.4495e-21, 1.7675e-21, ..., 8.2424e-23, 1.1858e-20,\n 2.9136e-22],\n ...,\n [1.0224e-20, 1.3823e-20, 1.3295e-20, ..., 1.0659e-20, 1.1528e-20,\n 1.6218e-20],\n [5.3181e-21, 6.9500e-21, 8.7902e-21, ..., 5.0606e-21, 5.5745e-21,\n 7.1936e-21],\n [2.1215e-22, 5.2824e-23, 9.7549e-23, ..., 3.9045e-23, 6.5969e-23,\n 1.9825e-22]], device='cuda:0')"
},
"28": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-1.0734e-21, -1.8413e-20, -1.3506e-19, -9.6009e-21, -2.0896e-19,\n 6.3593e-20, 9.6970e-21, -9.9731e-20, -9.4810e-21, -1.1273e-20,\n -1.7944e-20, -1.6090e-20, -1.1680e-19, -9.0796e-21, 1.4264e-20,\n 2.6774e-20, 9.0488e-20, -1.3683e-20, -1.8723e-21, 2.1700e-20,\n 3.2824e-19, -1.4188e-20, -2.5144e-20, -2.4336e-21, -5.0675e-20,\n -2.9285e-20, -2.0165e-19, -1.0816e-20, 1.5349e-19, 4.6841e-20,\n -2.0538e-21, 1.9250e-20, -5.4964e-20, -1.3189e-20, -2.4156e-21,\n -1.7915e-20, -5.2083e-20, 6.7655e-21, 6.3603e-21, 1.2444e-20,\n 8.4507e-21, -5.6301e-20, 2.6941e-20, -7.0434e-21, 1.8445e-20,\n -1.0783e-20, 1.3228e-19, -3.6441e-20, 5.2785e-19, -3.5855e-20,\n -4.5492e-21, -9.7005e-20, -8.7130e-21, -5.0374e-21, -1.9109e-20,\n 1.0312e-20, 4.1869e-21, -3.9194e-20, -1.2140e-20, 3.2408e-21,\n -5.6071e-21, 1.9695e-20, -7.7575e-20, 9.9154e-21], device='cuda:0')",
"exp_avg_sq": "tensor([4.0323e-18, 1.7369e-19, 7.5087e-19, 3.1628e-19, 1.0179e-17, 1.1314e-17,\n 9.5417e-18, 5.6836e-18, 2.0866e-19, 3.8117e-20, 3.9290e-18, 5.2688e-19,\n 2.8022e-17, 6.8189e-20, 1.2093e-17, 4.0037e-18, 2.6953e-19, 4.8314e-19,\n 1.9296e-18, 9.3328e-19, 2.1817e-18, 5.6284e-19, 1.0038e-18, 3.0363e-18,\n 2.1339e-17, 2.0649e-22, 4.7569e-18, 9.7804e-18, 1.4605e-17, 1.6542e-17,\n 1.0801e-18, 1.2477e-17, 3.9784e-18, 7.3712e-18, 2.0023e-17, 4.4539e-19,\n 1.9769e-17, 5.0089e-18, 2.0922e-18, 7.7553e-20, 3.1345e-17, 8.0417e-18,\n 1.6106e-18, 7.5121e-18, 4.6761e-19, 1.4344e-17, 7.3322e-19, 4.0190e-19,\n 2.4939e-17, 4.9591e-18, 2.9544e-19, 1.7933e-18, 5.0411e-18, 5.4159e-20,\n 3.4702e-18, 1.0014e-18, 8.8478e-19, 3.9315e-19, 1.6683e-17, 8.6619e-18,\n 2.1452e-18, 1.0045e-17, 7.0164e-18, 9.7591e-20], device='cuda:0')"
},
"29": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 1.6810e-22, -4.2288e-21, -5.0920e-20, -2.6871e-21, -1.3291e-19,\n 4.8978e-20, -1.6962e-21, -1.4759e-19, -1.0495e-21, 5.6626e-20,\n 1.5054e-21, 2.9375e-21, -1.9896e-19, 1.1672e-22, -4.4243e-21,\n -2.9963e-21, 1.3551e-19, 7.8271e-22, 4.3406e-20, 2.0910e-20,\n 3.4505e-19, -4.7807e-22, -1.6922e-21, 1.0789e-21, 4.9892e-20,\n -1.1359e-21, -2.2854e-19, 7.5685e-22, 8.3705e-20, -3.8678e-20,\n -1.2977e-21, -3.0403e-22, -8.6405e-20, 6.2196e-21, -1.0747e-21,\n -3.3764e-21, -6.5518e-20, 2.7936e-20, 1.4391e-21, 5.9227e-21,\n -2.3484e-21, 1.5087e-20, 9.6184e-20, 9.6653e-22, 3.9302e-20,\n 1.9174e-21, 1.3906e-19, 2.4841e-20, 5.5586e-19, 3.4972e-20,\n -1.6105e-20, -4.7248e-20, 1.0647e-21, 1.1760e-21, -4.1516e-20,\n 1.9025e-21, -1.9365e-22, 1.5026e-20, 3.2614e-21, -1.1044e-21,\n 7.4595e-24, -6.8952e-21, -7.5994e-20, 3.0692e-21], device='cuda:0')",
"exp_avg_sq": "tensor([1.6774e-20, 4.9547e-23, 6.6546e-21, 2.6821e-22, 1.0806e-19, 1.3357e-19,\n 6.1828e-20, 9.1085e-20, 4.9200e-22, 1.6433e-21, 1.6208e-20, 5.6550e-21,\n 3.2910e-19, 1.0398e-23, 5.9856e-20, 1.3560e-20, 7.6565e-21, 1.0757e-21,\n 2.3234e-20, 8.6381e-21, 1.5003e-20, 1.8866e-21, 1.8946e-21, 1.4924e-20,\n 1.6166e-19, 3.1097e-22, 3.8344e-20, 5.5103e-20, 1.1601e-19, 2.0262e-19,\n 2.5876e-21, 7.6235e-20, 3.5516e-20, 2.3492e-20, 1.1119e-19, 3.8260e-22,\n 2.8256e-19, 4.0220e-20, 8.5913e-21, 3.5180e-23, 3.6316e-19, 1.1312e-19,\n 1.7285e-20, 3.7941e-20, 4.9217e-21, 7.8503e-20, 6.1316e-21, 6.8247e-21,\n 2.1355e-19, 6.2344e-20, 3.2624e-21, 3.0607e-20, 2.4428e-20, 1.0160e-23,\n 2.5036e-20, 2.7528e-21, 1.6130e-21, 4.7961e-21, 1.0759e-19, 5.0299e-20,\n 7.7030e-21, 5.6604e-20, 6.2344e-20, 4.1836e-23], device='cuda:0')"
},
"30": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-2.2418e-22, 3.5132e-21, -7.0808e-20, 2.5292e-21, -1.5127e-19,\n 6.1472e-20, 1.6742e-21, -1.0271e-19, 9.2692e-22, 3.3169e-20,\n -1.5014e-21, 4.0592e-21, -1.4170e-19, -1.9981e-22, 3.8717e-21,\n 2.8050e-21, 1.2664e-19, -8.5839e-22, 3.1568e-20, 4.4584e-20,\n 3.3451e-19, 3.2038e-22, 1.9662e-21, -8.2566e-22, -5.2558e-22,\n -7.6276e-21, -1.7448e-19, -3.6454e-22, 1.3312e-19, 3.5813e-20,\n 1.2215e-21, -4.8933e-23, -3.2118e-20, -5.5664e-21, 3.1693e-22,\n 2.9558e-21, -6.3457e-20, 2.2846e-20, -1.1072e-21, -5.7150e-21,\n 3.1762e-21, -3.1872e-20, 6.4872e-20, -1.3120e-21, 3.9796e-20,\n -1.7127e-21, 1.3908e-19, 2.0293e-21, 5.2115e-19, 2.1949e-20,\n 7.0536e-21, -5.5669e-20, -9.0840e-22, -1.1380e-21, -2.8882e-20,\n -1.8788e-21, 3.0981e-22, -1.3011e-20, -2.5456e-21, 9.9143e-22,\n -7.4342e-24, 6.3867e-21, -5.9958e-20, -2.7415e-21], device='cuda:0')",
"exp_avg_sq": "tensor([2.3852e-20, 9.2453e-23, 1.1318e-20, 5.4370e-22, 1.3265e-19, 1.5621e-19,\n 6.0687e-20, 8.7906e-20, 7.9348e-22, 2.9950e-21, 2.6261e-20, 1.0788e-20,\n 3.5449e-19, 2.3404e-23, 9.3287e-20, 2.4427e-20, 9.6371e-21, 2.0644e-21,\n 3.4123e-20, 1.3668e-20, 2.5452e-20, 2.8294e-21, 2.7186e-21, 2.2341e-20,\n 2.5076e-19, 7.1555e-22, 6.6829e-20, 7.2218e-20, 1.8556e-19, 2.1409e-19,\n 4.4424e-21, 8.9796e-20, 5.3662e-20, 5.6494e-20, 1.6443e-19, 5.2748e-22,\n 2.5560e-19, 7.2827e-20, 7.3602e-21, 6.7018e-23, 2.3127e-19, 1.1452e-19,\n 2.7467e-20, 5.2985e-20, 1.0514e-20, 1.1433e-19, 1.3225e-20, 1.0559e-20,\n 3.0810e-19, 7.2463e-20, 6.7512e-21, 3.3166e-20, 3.0217e-20, 1.7484e-23,\n 4.6926e-20, 3.9719e-21, 1.9432e-21, 9.2147e-21, 1.3010e-19, 5.5465e-20,\n 1.1932e-20, 8.5357e-20, 9.1702e-20, 7.6435e-23], device='cuda:0')"
},
"31": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 5.5613e-20, -2.9061e-20, -4.9338e-19, 2.0351e-20, -5.6039e-19,\n 2.1294e-19, -7.2306e-20, -8.1382e-19, 2.2269e-20, 5.6062e-19,\n -2.4928e-20, 2.2961e-20, -6.4078e-19, -6.8075e-20, -7.8363e-21,\n 4.8224e-20, 1.1902e-18, -1.2946e-19, 2.8421e-19, 9.3246e-20,\n 1.2343e-18, -3.3041e-20, -1.3255e-19, 5.1838e-21, 1.6686e-19,\n -9.6653e-21, -1.1053e-18, -6.8437e-20, 2.5183e-19, -1.7971e-19,\n 1.2553e-19, 2.3266e-19, -4.2654e-19, 8.4187e-20, -1.6328e-20,\n -2.4966e-20, -2.3694e-19, 1.3975e-19, -3.7609e-20, -1.1844e-20,\n 3.7862e-22, 1.0569e-19, 6.1163e-19, 9.3943e-21, 2.9614e-19,\n -3.2976e-20, 8.3540e-19, 1.9989e-19, 1.6524e-18, 1.9181e-19,\n -1.3109e-19, -3.0238e-19, 3.5202e-20, -9.9597e-21, -1.7301e-19,\n -1.0287e-19, 2.8211e-20, 1.2896e-19, -1.8185e-20, -4.6086e-20,\n 1.3879e-19, -6.4200e-20, -3.4559e-19, 3.9199e-20],\n [-5.5612e-20, 2.9052e-20, 4.9346e-19, -2.0369e-20, 5.6052e-19,\n -2.1278e-19, 7.2292e-20, 8.1389e-19, -2.2285e-20, -5.6056e-19,\n 2.4913e-20, -2.2860e-20, 6.4088e-19, 6.8066e-20, 7.8201e-21,\n -4.8237e-20, -1.1901e-18, 1.2944e-19, -2.8407e-19, -9.3136e-20,\n -1.2343e-18, 3.3027e-20, 1.3254e-19, -5.2001e-21, -1.6677e-19,\n 9.6548e-21, 1.1054e-18, 6.8425e-20, -2.5174e-19, 1.7974e-19,\n -1.2554e-19, -2.3269e-19, 4.2665e-19, -8.4207e-20, 1.6317e-20,\n 2.4955e-20, 2.3702e-19, -1.3963e-19, 3.7611e-20, 1.1830e-20,\n -3.9176e-22, -1.0557e-19, -6.1152e-19, -9.4028e-21, -2.9602e-19,\n 3.2955e-20, -8.3530e-19, -1.9981e-19, -1.6523e-18, -1.9171e-19,\n 1.3116e-19, 3.0244e-19, -3.5198e-20, 9.9446e-21, 1.7312e-19,\n 1.0286e-19, -2.8231e-20, -1.2882e-19, 1.8167e-20, 4.6065e-20,\n -1.3881e-19, 6.4192e-20, 3.4564e-19, -3.9217e-20]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.9596e-18, 4.6604e-19, 2.4580e-21, 7.1125e-19, 3.1299e-18, 3.9701e-18,\n 5.2200e-18, 5.5852e-18, 4.1568e-20, 6.1985e-19, 7.7735e-19, 4.5651e-19,\n 4.5440e-18, 1.0709e-19, 1.5299e-18, 8.6353e-19, 2.3399e-18, 1.0943e-19,\n 1.8213e-18, 2.7540e-20, 2.6558e-21, 1.1725e-19, 2.2842e-18, 1.9509e-20,\n 1.5645e-18, 1.2865e-19, 1.1455e-18, 2.8790e-18, 1.1751e-18, 4.2587e-18,\n 4.2632e-19, 4.7728e-18, 9.5325e-19, 1.7709e-19, 1.3779e-18, 1.8255e-18,\n 6.2553e-18, 1.2333e-18, 6.6859e-18, 5.6914e-19, 1.3317e-17, 4.8876e-18,\n 1.1862e-18, 2.2638e-18, 5.0063e-19, 2.0593e-18, 1.3202e-19, 8.0169e-19,\n 2.2164e-18, 3.7034e-18, 2.0706e-19, 3.4193e-18, 3.0493e-18, 1.2179e-19,\n 2.1161e-19, 1.2778e-18, 2.8046e-18, 4.2006e-19, 3.1486e-18, 5.1165e-18,\n 1.0350e-18, 9.4008e-19, 1.7075e-18, 1.1502e-19],\n [1.9596e-18, 4.6604e-19, 2.4580e-21, 7.1125e-19, 3.1299e-18, 3.9701e-18,\n 5.2200e-18, 5.5852e-18, 4.1568e-20, 6.1985e-19, 7.7735e-19, 4.5651e-19,\n 4.5440e-18, 1.0709e-19, 1.5299e-18, 8.6353e-19, 2.3399e-18, 1.0943e-19,\n 1.8213e-18, 2.7540e-20, 2.6558e-21, 1.1725e-19, 2.2842e-18, 1.9509e-20,\n 1.5645e-18, 1.2865e-19, 1.1455e-18, 2.8790e-18, 1.1751e-18, 4.2587e-18,\n 4.2632e-19, 4.7728e-18, 9.5325e-19, 1.7709e-19, 1.3779e-18, 1.8255e-18,\n 6.2553e-18, 1.2333e-18, 6.6859e-18, 5.6914e-19, 1.3317e-17, 4.8876e-18,\n 1.1862e-18, 2.2638e-18, 5.0063e-19, 2.0593e-18, 1.3202e-19, 8.0169e-19,\n 2.2164e-18, 3.7034e-18, 2.0706e-19, 3.4193e-18, 3.0493e-18, 1.2179e-19,\n 2.1161e-19, 1.2778e-18, 2.8046e-18, 4.2006e-19, 3.1486e-18, 5.1165e-18,\n 1.0350e-18, 9.4008e-19, 1.7075e-18, 1.1502e-19]], device='cuda:0')"
},
"32": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 5.0334e-20, -5.0278e-20], device='cuda:0')",
"exp_avg_sq": "tensor([1.7754e-17, 1.7754e-17], device='cuda:0')"
},
"33": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 1.3436e-21, -3.3585e-20, -9.7144e-21, ..., -4.8187e-20,\n 6.2348e-21, 3.9804e-20],\n [-1.6766e-21, 1.7358e-20, 3.3127e-21, ..., -3.5946e-21,\n -2.9470e-21, 1.8259e-21],\n [-6.7992e-21, 1.1671e-20, 3.2218e-21, ..., 3.8494e-20,\n -1.1814e-20, 1.6206e-20],\n ...,\n [-1.8725e-22, 3.2989e-20, 2.0565e-20, ..., 2.5094e-20,\n -1.4833e-20, -3.0357e-20],\n [ 3.1410e-22, 7.9056e-21, -8.6345e-22, ..., 2.5503e-21,\n 5.9475e-22, -9.9955e-22],\n [-1.3049e-21, -2.8084e-21, 1.6300e-21, ..., -1.2903e-20,\n -1.1771e-21, 2.7207e-22]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.0539e-23, 1.1441e-25, 3.2981e-24, ..., 9.3937e-24, 7.7489e-23,\n 2.6068e-25],\n [1.8826e-20, 1.2083e-20, 3.5599e-21, ..., 8.6652e-21, 2.9269e-20,\n 6.1839e-20],\n [7.8930e-22, 2.2509e-21, 1.2100e-21, ..., 1.3643e-21, 5.2361e-21,\n 1.1085e-20],\n ...,\n [7.0281e-21, 7.8867e-21, 1.1530e-20, ..., 2.6199e-20, 7.2989e-20,\n 6.9166e-20],\n [1.8964e-21, 5.3734e-22, 4.4041e-21, ..., 6.3778e-22, 2.0289e-21,\n 6.8981e-21],\n [8.1919e-23, 2.7600e-23, 6.3698e-24, ..., 2.1748e-23, 4.5626e-22,\n 1.7479e-23]], device='cuda:0')"
},
"34": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 1.2168e-19, -8.7129e-20, 2.9680e-20, 3.3900e-20, 6.1900e-19,\n 1.3744e-20, -4.7830e-19, 5.3105e-20, 4.2231e-20, -5.3957e-19,\n -3.2101e-20, 2.1424e-19, 2.9317e-19, 2.5496e-20, -5.8044e-19,\n 4.6661e-19, -1.9964e-20, 5.7051e-19, 1.3578e-20, -3.5845e-19,\n 1.3195e-19, -4.4375e-20, -3.3133e-20, -1.1083e-19, 6.2718e-19,\n -1.4565e-20, 2.0574e-19, -9.0477e-20, 9.7899e-21, -3.2728e-20,\n 2.5183e-20, 3.6816e-20, 1.6347e-20, 6.6867e-20, 3.7046e-20,\n -4.2181e-19, -3.1548e-19, 9.3125e-20, -4.6629e-21, 3.0279e-20,\n -4.6936e-20, -8.1536e-20, 9.0584e-20, -1.4061e-19, -4.7160e-19,\n 4.4422e-19, -9.5090e-20, 3.4285e-21, 1.9608e-19, 2.1664e-19,\n -3.0150e-19, 1.8886e-19, -1.0551e-20, 6.1044e-20, -5.7094e-20,\n 4.8420e-19, -2.4810e-19, 7.4040e-21, -2.0765e-20, 7.9786e-20,\n -4.3056e-19, -5.2265e-19, 1.2861e-20, 2.8659e-20], device='cuda:0')",
"exp_avg_sq": "tensor([3.6293e-22, 2.6350e-17, 3.0366e-18, 5.7353e-20, 1.3934e-18, 9.9384e-20,\n 1.4354e-17, 4.0021e-21, 1.5193e-18, 3.4095e-17, 6.5654e-19, 9.6164e-20,\n 1.1656e-19, 5.1789e-18, 1.7536e-17, 6.3565e-19, 6.2328e-18, 1.7541e-19,\n 2.7034e-18, 7.5774e-18, 1.1569e-18, 1.9030e-17, 8.2834e-19, 4.5269e-18,\n 5.1460e-17, 5.5910e-18, 6.1102e-19, 2.0294e-18, 4.7999e-18, 7.7627e-19,\n 6.5802e-20, 2.8365e-17, 7.8640e-19, 1.1848e-18, 3.3507e-19, 2.1797e-17,\n 6.5460e-18, 7.4564e-18, 2.1249e-17, 1.3235e-19, 3.2666e-18, 2.2947e-17,\n 1.1948e-17, 7.9952e-19, 2.2353e-17, 1.0542e-18, 9.4407e-18, 2.6306e-17,\n 1.8416e-19, 3.4412e-19, 1.1123e-17, 9.7368e-21, 7.8926e-18, 2.3156e-19,\n 1.2996e-17, 1.8070e-17, 1.0575e-17, 1.2663e-17, 5.1667e-18, 5.0205e-21,\n 1.3648e-17, 3.3918e-17, 3.9910e-18, 6.9632e-20], device='cuda:0')"
},
"35": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 2.9048e-21, 3.6972e-20, -1.9128e-19, -8.7807e-22, 1.5801e-19,\n -6.5693e-21, -5.7959e-19, -3.3078e-21, 1.5546e-21, -5.8317e-19,\n -3.1530e-21, -9.5864e-20, -7.4454e-20, 1.7706e-21, -6.9717e-19,\n 4.3570e-20, 1.0488e-20, 1.6108e-19, 3.4701e-22, -5.5226e-19,\n -2.4029e-19, 2.6347e-20, 4.7701e-21, -1.8959e-19, 1.2719e-19,\n 6.9702e-21, -2.3670e-19, -2.8378e-19, 6.6921e-21, 3.3837e-21,\n -8.2845e-21, 6.3320e-22, 1.3092e-21, -7.8082e-23, -2.4674e-19,\n -4.0324e-19, -5.7944e-19, -4.7212e-21, 1.6571e-20, -3.4819e-21,\n -4.1389e-19, 3.3925e-20, -9.3527e-21, -4.6409e-19, -5.9792e-19,\n 1.0816e-19, 2.9705e-20, 1.1342e-20, -1.6948e-19, -1.7015e-19,\n -3.0829e-19, -7.2938e-20, 7.4177e-21, -1.5895e-20, 2.3743e-20,\n 1.5635e-19, -5.0854e-19, 1.2079e-20, 1.0585e-20, -2.1386e-21,\n -2.5542e-19, -4.1509e-19, 1.5098e-21, -4.5835e-22], device='cuda:0')",
"exp_avg_sq": "tensor([5.4912e-22, 1.8370e-19, 1.2008e-20, 6.9626e-22, 8.0431e-21, 1.4955e-21,\n 7.9326e-20, 7.2887e-22, 1.2330e-20, 1.8859e-19, 4.8760e-21, 4.7002e-23,\n 1.4482e-22, 3.3243e-20, 7.6983e-20, 2.7186e-21, 4.0107e-20, 1.6012e-22,\n 2.2987e-20, 3.9551e-20, 6.9093e-21, 1.4200e-19, 6.1238e-21, 2.2373e-20,\n 7.6268e-19, 4.6344e-20, 2.3478e-21, 6.9662e-21, 3.4301e-20, 5.5732e-21,\n 1.9324e-21, 2.3759e-19, 1.3425e-20, 7.6116e-21, 8.6469e-22, 1.1348e-19,\n 3.5734e-20, 8.2823e-20, 1.6304e-19, 1.3153e-21, 1.2399e-20, 2.5130e-19,\n 1.4849e-19, 4.1150e-21, 1.4332e-19, 4.7436e-21, 9.4168e-20, 1.6369e-19,\n 2.4675e-22, 1.1937e-21, 6.6386e-20, 5.7067e-25, 5.1615e-20, 1.8714e-21,\n 9.7337e-20, 1.1724e-19, 7.2082e-20, 1.5110e-19, 3.3436e-20, 5.5982e-22,\n 1.0229e-19, 2.6305e-19, 2.5152e-20, 9.0749e-22], device='cuda:0')"
},
"36": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-6.4556e-20, -3.5784e-20, -1.7271e-19, 1.7355e-21, 1.7353e-19,\n 6.5249e-21, -5.4113e-19, 1.0888e-20, -1.7889e-21, -5.1606e-19,\n 2.7297e-21, -1.0249e-19, -6.5046e-20, 2.6525e-23, -7.5924e-19,\n 3.4660e-20, -9.8649e-21, 1.3374e-19, -1.0936e-21, -4.9510e-19,\n -2.0737e-19, -3.1047e-20, -4.2232e-21, -2.5642e-19, 1.8360e-19,\n -6.2860e-21, -1.7881e-19, -3.0028e-19, -9.6268e-21, -3.0315e-21,\n 8.4601e-21, -7.5282e-21, -1.1322e-21, 6.1664e-23, -2.5749e-19,\n -4.3943e-19, -5.3620e-19, 4.2251e-21, -1.7636e-20, 3.4247e-21,\n -2.9713e-19, -2.9551e-20, 9.6587e-21, -4.2036e-19, -4.9871e-19,\n 8.6908e-20, -2.1621e-20, -1.3807e-20, -1.5610e-19, -1.5081e-19,\n -3.6476e-19, -1.1319e-19, -5.8520e-21, 9.9467e-21, -2.0186e-20,\n 1.5355e-19, -4.8218e-19, -1.2184e-20, -9.4495e-21, 6.2504e-21,\n -4.5705e-19, -4.7563e-19, -2.8045e-21, 6.5728e-22], device='cuda:0')",
"exp_avg_sq": "tensor([6.5546e-22, 2.8221e-19, 2.4188e-20, 1.2100e-21, 9.2228e-21, 2.7229e-21,\n 1.3644e-19, 9.9940e-22, 2.0095e-20, 3.2075e-19, 9.7980e-21, 6.5227e-23,\n 2.5503e-22, 5.8663e-20, 1.5484e-19, 2.9857e-21, 6.9997e-20, 1.9530e-22,\n 3.5643e-20, 6.0278e-20, 8.7599e-21, 2.0059e-19, 1.1578e-20, 3.5425e-20,\n 4.5525e-19, 6.5277e-20, 2.6129e-21, 1.4871e-20, 5.4305e-20, 1.0076e-20,\n 2.7103e-21, 3.0722e-19, 1.3275e-20, 1.6099e-20, 1.5901e-21, 1.8847e-19,\n 5.3361e-20, 9.0999e-20, 2.3193e-19, 2.3820e-21, 2.6066e-20, 2.4612e-19,\n 1.3914e-19, 6.1001e-21, 2.0616e-19, 5.3453e-21, 1.1599e-19, 2.7606e-19,\n 3.5334e-22, 1.3186e-21, 9.6161e-20, 1.0352e-24, 8.9977e-20, 3.2106e-21,\n 1.3723e-19, 1.6909e-19, 9.6955e-20, 1.4340e-19, 6.4003e-20, 8.0179e-22,\n 1.2104e-19, 2.9081e-19, 4.7927e-20, 1.3820e-21], device='cuda:0')"
},
"37": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[-7.7567e-20, -1.9011e-19, 1.0936e-18, -2.2719e-19, -1.4698e-18,\n -5.7903e-19, 2.2921e-18, -4.5590e-19, -5.9050e-19, 1.7523e-18,\n -3.7292e-19, 1.3785e-18, 9.8575e-19, 2.6507e-19, 2.5231e-18,\n -4.9040e-19, -2.4055e-19, -2.2956e-18, -2.7934e-19, 2.1647e-18,\n 2.6295e-18, -4.3822e-19, -3.2685e-19, 8.5305e-19, -4.3540e-19,\n -3.0719e-19, 2.7654e-18, 1.9104e-18, -3.7566e-19, -2.9071e-19,\n -4.2355e-19, -3.6085e-19, 2.0464e-19, -3.6627e-19, 2.3973e-18,\n 1.3159e-18, 3.2555e-18, -4.2682e-19, -3.0835e-19, -4.3175e-19,\n 2.3734e-18, -2.7129e-19, -2.4658e-19, 3.0770e-18, 1.9857e-18,\n -1.1899e-18, -2.3932e-19, -3.4404e-19, 2.2916e-18, 2.1387e-18,\n 1.2916e-18, 1.1217e-18, -1.9245e-19, -2.7729e-19, -1.8755e-19,\n -6.3933e-19, 2.2786e-18, -4.8423e-19, -2.3682e-19, -2.6770e-19,\n 9.6884e-19, 1.4631e-18, -2.2117e-19, 1.1197e-20],\n [ 7.7384e-20, 1.9016e-19, -1.0933e-18, 2.2719e-19, 1.4696e-18,\n 5.7916e-19, -2.2923e-18, 4.5598e-19, 5.9047e-19, -1.7523e-18,\n 3.7282e-19, -1.3788e-18, -9.8572e-19, -2.6502e-19, -2.5234e-18,\n 4.8960e-19, 2.4052e-19, 2.2959e-18, 2.7945e-19, -2.1651e-18,\n -2.6300e-18, 4.3825e-19, 3.2688e-19, -8.5358e-19, 4.3530e-19,\n 3.0724e-19, -2.7649e-18, -1.9108e-18, 3.7570e-19, 2.9075e-19,\n 4.2341e-19, 3.6085e-19, -2.0474e-19, 3.6632e-19, -2.3974e-18,\n -1.3160e-18, -3.2561e-18, 4.2693e-19, 3.0845e-19, 4.3177e-19,\n -2.3738e-18, 2.7129e-19, 2.4663e-19, -3.0764e-18, -1.9858e-18,\n 1.1896e-18, 2.3934e-19, 3.4421e-19, -2.2911e-18, -2.1391e-18,\n -1.2917e-18, -1.1219e-18, 1.9247e-19, 2.7728e-19, 1.8755e-19,\n 6.3870e-19, -2.2786e-18, 4.8422e-19, 2.3686e-19, 2.6772e-19,\n -9.6819e-19, -1.4632e-18, 2.2118e-19, -1.1290e-20]], device='cuda:0')",
"exp_avg_sq": "tensor([[3.8293e-18, 2.2553e-18, 3.2741e-19, 6.7939e-20, 4.1612e-18, 3.9126e-19,\n 1.4612e-18, 3.2529e-18, 1.0989e-18, 1.7093e-18, 8.6053e-19, 1.8868e-18,\n 7.6194e-19, 1.1359e-18, 9.6366e-19, 3.8664e-18, 1.0523e-18, 5.1948e-18,\n 1.5917e-18, 3.1240e-19, 1.6434e-20, 1.3552e-18, 1.9832e-19, 2.0438e-19,\n 2.0634e-17, 3.8123e-18, 3.9771e-18, 3.6519e-19, 1.8462e-19, 2.2150e-19,\n 3.0320e-18, 3.3613e-18, 6.2760e-18, 9.2344e-19, 9.7217e-20, 1.1866e-18,\n 1.8629e-18, 6.1561e-18, 2.8760e-18, 2.5595e-19, 2.7440e-19, 7.4742e-18,\n 8.3641e-18, 3.2034e-20, 1.5301e-18, 6.0618e-18, 3.3969e-18, 1.5247e-18,\n 2.7053e-18, 5.3850e-18, 1.3953e-18, 6.4717e-20, 1.2910e-18, 1.5172e-20,\n 2.5704e-18, 2.7593e-18, 1.9865e-18, 7.5372e-18, 9.8422e-19, 2.5110e-18,\n 2.0026e-18, 4.7235e-18, 1.2753e-18, 1.6571e-19],\n [3.8293e-18, 2.2553e-18, 3.2741e-19, 6.7939e-20, 4.1612e-18, 3.9126e-19,\n 1.4612e-18, 3.2529e-18, 1.0989e-18, 1.7093e-18, 8.6053e-19, 1.8868e-18,\n 7.6194e-19, 1.1359e-18, 9.6366e-19, 3.8664e-18, 1.0523e-18, 5.1948e-18,\n 1.5917e-18, 3.1240e-19, 1.6434e-20, 1.3552e-18, 1.9832e-19, 2.0438e-19,\n 2.0634e-17, 3.8123e-18, 3.9771e-18, 3.6519e-19, 1.8462e-19, 2.2150e-19,\n 3.0320e-18, 3.3613e-18, 6.2760e-18, 9.2344e-19, 9.7217e-20, 1.1866e-18,\n 1.8629e-18, 6.1561e-18, 2.8760e-18, 2.5595e-19, 2.7440e-19, 7.4742e-18,\n 8.3641e-18, 3.2034e-20, 1.5301e-18, 6.0618e-18, 3.3969e-18, 1.5247e-18,\n 2.7053e-18, 5.3850e-18, 1.3953e-18, 6.4717e-20, 1.2910e-18, 1.5172e-20,\n 2.5704e-18, 2.7593e-18, 1.9865e-18, 7.5372e-18, 9.8422e-19, 2.5110e-18,\n 2.0026e-18, 4.7235e-18, 1.2753e-18, 1.6571e-19]], device='cuda:0')"
},
"38": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 1.9216e-18, -1.9221e-18], device='cuda:0')",
"exp_avg_sq": "tensor([2.2217e-17, 2.2217e-17], device='cuda:0')"
},
"39": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 2.4640e-15, 7.9853e-16, 8.5385e-16, ..., -8.2011e-16,\n 2.6351e-15, 2.0394e-15],\n [ 2.6622e-16, 4.2554e-17, 3.2939e-16, ..., -1.3876e-16,\n -1.7885e-16, 4.2421e-16],\n [ 2.3618e-15, -6.0021e-16, -8.6574e-16, ..., -1.7308e-15,\n -2.1437e-15, -2.8089e-16],\n ...,\n [ 1.7058e-15, 8.8605e-16, 7.8316e-16, ..., 6.2783e-16,\n -1.6440e-15, -2.6271e-15],\n [-1.6037e-16, 7.9788e-17, 1.4750e-16, ..., 4.6835e-17,\n -8.7352e-17, -1.1714e-16],\n [ 2.3723e-16, 2.4168e-18, 8.9337e-18, ..., 8.4873e-17,\n -1.1862e-16, -4.0871e-16]], device='cuda:0')",
"exp_avg_sq": "tensor([[9.4747e-21, 1.5286e-20, 1.2218e-20, ..., 3.5609e-21, 1.6840e-20,\n 2.7512e-20],\n [7.6721e-21, 3.0718e-20, 1.3900e-20, ..., 4.7044e-21, 5.2449e-21,\n 6.5927e-20],\n [3.3871e-21, 4.3302e-21, 9.6113e-22, ..., 4.0952e-22, 5.4199e-21,\n 4.7972e-21],\n ...,\n [5.1390e-22, 3.0285e-21, 2.2963e-21, ..., 1.9400e-22, 1.0327e-21,\n 3.5667e-21],\n [2.3271e-22, 1.2768e-21, 1.2925e-21, ..., 1.6999e-22, 1.6563e-23,\n 2.5504e-21],\n [7.5109e-22, 1.6092e-21, 2.6752e-21, ..., 1.7246e-22, 1.1630e-21,\n 2.1374e-21]], device='cuda:0')"
},
"40": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 9.5174e-15, -1.1483e-15, -1.6976e-14, -2.2103e-16, 8.9060e-15,\n 1.6582e-14, -2.0104e-15, -1.4290e-15, -2.9216e-15, 1.1093e-15,\n -2.4016e-15, 4.9777e-15, -2.5678e-15, 2.0898e-14, 1.0038e-14,\n -7.5252e-15, -1.3085e-15, 1.7841e-15, -9.3484e-16, -1.0902e-16,\n -3.3417e-15, 1.8797e-15, -7.6492e-16, 1.3852e-15, 2.7164e-15,\n -1.2505e-15, -2.4905e-14, -9.4077e-15, 2.4857e-16, 1.1387e-14,\n -6.5046e-15, -1.6947e-15, 2.2688e-14, -1.5619e-15, -1.0801e-15,\n -8.4696e-16, -2.6625e-15, -9.0631e-15, -1.5777e-15, -6.0489e-15,\n -1.6168e-15, 1.0113e-14, 5.8221e-15, -5.0811e-16, -4.1496e-16,\n -3.2343e-16, -1.5624e-15, -1.1227e-15, 1.1863e-15, -2.6102e-14,\n 6.6809e-15, -8.1063e-15, 1.2051e-14, 1.6618e-16, 4.7141e-17,\n 1.0121e-15, -1.5560e-14, 4.3900e-15, 3.8314e-15, -1.5168e-15,\n 6.5859e-15, -1.2654e-15, 8.7661e-16, 1.4826e-15], device='cuda:0')",
"exp_avg_sq": "tensor([1.8046e-17, 2.9593e-17, 4.1877e-18, 2.0591e-18, 2.2153e-19, 2.2416e-18,\n 1.2951e-18, 2.7077e-20, 1.6599e-18, 6.3951e-19, 6.9122e-18, 8.3435e-20,\n 8.5028e-19, 3.0463e-18, 5.2909e-18, 1.4536e-18, 3.2758e-18, 4.4712e-18,\n 4.2434e-18, 2.3891e-18, 7.6945e-18, 2.0722e-19, 3.6450e-19, 4.4229e-20,\n 1.2040e-17, 5.4575e-19, 9.2351e-18, 1.1042e-18, 3.5923e-19, 8.4696e-19,\n 2.7764e-18, 1.5313e-17, 1.2443e-18, 2.1789e-20, 1.2524e-17, 1.5373e-19,\n 1.1585e-18, 1.0896e-17, 6.4220e-18, 6.4742e-19, 3.0011e-18, 5.4919e-19,\n 3.0644e-19, 1.4347e-19, 3.0906e-19, 5.4212e-18, 1.3749e-18, 1.0988e-18,\n 7.3267e-19, 2.0940e-18, 1.6751e-19, 3.9774e-19, 4.7847e-18, 2.0206e-18,\n 1.1509e-20, 1.8512e-20, 1.3669e-17, 4.9638e-20, 6.8319e-19, 8.8829e-18,\n 1.8401e-18, 1.9270e-18, 1.2329e-18, 1.4251e-18], device='cuda:0')"
},
"41": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-4.8706e-14, 1.3587e-15, -7.7343e-14, 4.0114e-16, -5.9997e-15,\n -4.0899e-14, 9.4001e-16, -5.8964e-16, -5.6393e-14, 1.9983e-16,\n 2.1145e-15, 1.0814e-15, -3.0803e-14, -4.3904e-14, -7.4577e-14,\n -6.4302e-14, 4.3665e-16, -5.0540e-14, 1.4265e-15, -1.4287e-17,\n 1.4483e-15, 2.0332e-16, 1.8074e-16, -1.0481e-16, -6.0192e-14,\n -2.7595e-14, -1.0331e-13, -5.5067e-14, -4.2747e-14, -1.4873e-14,\n -8.1180e-14, 1.4544e-15, -4.8623e-14, -8.5811e-16, 1.5334e-15,\n -4.3686e-16, 2.3839e-16, -8.4419e-14, 9.3670e-16, -4.3068e-14,\n 5.7588e-16, -7.7289e-15, -1.9831e-14, -2.0990e-16, -1.9871e-16,\n 4.0248e-16, 1.5011e-16, 5.0179e-16, 6.1264e-16, -8.6514e-14,\n 7.9652e-16, -3.4532e-14, -4.2900e-14, 5.0305e-16, -4.7301e-16,\n -9.8873e-16, -7.0272e-14, 3.4239e-16, -2.2965e-14, 1.5244e-15,\n -4.4897e-14, -5.0971e-14, 6.1411e-17, 2.4409e-17], device='cuda:0')",
"exp_avg_sq": "tensor([2.9573e-19, 3.5692e-19, 3.5346e-20, 1.0832e-20, 2.6507e-21, 2.4895e-20,\n 5.3442e-21, 8.4004e-23, 1.9695e-20, 3.0824e-21, 3.1914e-20, 2.1270e-22,\n 1.1986e-20, 2.8710e-20, 6.3585e-20, 1.5860e-20, 2.5609e-20, 5.2511e-20,\n 2.0318e-20, 2.3570e-20, 5.5879e-20, 1.0761e-21, 1.5661e-21, 1.6396e-22,\n 1.5422e-19, 6.9543e-21, 8.2982e-20, 1.4102e-20, 5.9324e-21, 1.2348e-20,\n 2.7700e-20, 1.1816e-19, 1.6315e-20, 3.5845e-24, 7.8338e-20, 7.6943e-22,\n 7.4432e-21, 1.2321e-19, 4.5461e-20, 8.5304e-21, 2.2193e-20, 9.3724e-21,\n 3.0673e-21, 6.4501e-22, 1.4488e-21, 4.9278e-20, 8.1772e-21, 5.7349e-21,\n 3.1860e-21, 2.1390e-20, 1.3830e-21, 5.2841e-21, 5.2374e-20, 9.9076e-21,\n 1.1287e-24, 5.0556e-24, 1.7766e-19, 1.8041e-22, 9.3628e-21, 5.7114e-20,\n 2.1158e-20, 2.0607e-20, 8.4479e-21, 1.5518e-20], device='cuda:0')"
},
"42": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-3.9136e-14, -1.3808e-15, -5.6022e-14, -4.1418e-16, -1.9161e-14,\n -3.6114e-14, -9.5362e-16, 6.0546e-16, -4.6303e-14, -2.2036e-16,\n -2.3504e-15, -1.9858e-15, -4.0389e-14, -3.0029e-14, -4.4880e-14,\n -5.5612e-14, -4.6821e-16, -4.3581e-14, -1.5060e-15, 1.0884e-17,\n -1.5488e-15, -1.9521e-16, -1.9274e-16, 1.0884e-16, -4.6926e-14,\n -3.7765e-14, -7.3623e-14, -5.0269e-14, -3.7138e-14, -2.5071e-14,\n -5.9057e-14, -1.5616e-15, -3.3742e-14, 8.2514e-16, -1.7150e-15,\n 4.5187e-16, -2.3145e-16, -6.7798e-14, -9.5473e-16, -4.6573e-14,\n -5.7799e-16, -2.1101e-14, -2.9538e-14, 2.2115e-16, 1.9932e-16,\n -4.2476e-16, -1.5236e-16, -5.1413e-16, -6.3838e-16, -6.6623e-14,\n -1.5785e-14, -4.3637e-14, -3.4799e-14, -5.7622e-16, 4.8241e-16,\n 1.0779e-15, -6.0897e-14, -7.3314e-16, -3.2239e-14, -1.5794e-15,\n -4.1756e-14, -4.7208e-14, -6.7461e-17, -1.3456e-17], device='cuda:0')",
"exp_avg_sq": "tensor([2.0562e-19, 2.8201e-19, 6.3107e-20, 1.9046e-20, 4.5414e-21, 3.2534e-20,\n 1.2953e-20, 1.4211e-22, 2.7906e-20, 5.8039e-21, 6.8368e-20, 4.1476e-22,\n 1.3718e-20, 4.7489e-20, 8.5116e-20, 2.4547e-20, 3.0007e-20, 5.8279e-20,\n 3.9757e-20, 2.1724e-20, 7.5728e-20, 2.1934e-21, 3.2269e-21, 3.4170e-22,\n 1.4394e-19, 1.0508e-20, 1.2260e-19, 1.9783e-20, 9.8738e-21, 1.1146e-20,\n 4.5444e-20, 1.4768e-19, 2.7105e-20, 1.5370e-23, 1.2346e-19, 1.1259e-21,\n 1.0577e-20, 1.2852e-19, 5.9216e-20, 1.2310e-20, 2.8260e-20, 6.7857e-21,\n 6.0280e-21, 1.1390e-21, 2.4323e-21, 5.1279e-20, 1.2241e-20, 1.0605e-20,\n 7.3814e-21, 3.5021e-20, 2.2333e-21, 8.0502e-21, 6.5770e-20, 1.9390e-20,\n 3.9761e-24, 1.3533e-23, 1.6436e-19, 3.3029e-22, 1.0702e-20, 8.5944e-20,\n 2.6331e-20, 2.9890e-20, 1.0589e-20, 1.2869e-20], device='cuda:0')"
},
"43": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 2.0737e-13, -2.7518e-14, 2.5259e-13, -4.1049e-14, 6.1301e-14,\n 2.1283e-13, -3.2529e-14, -3.0444e-14, 2.4652e-13, -4.4539e-14,\n -4.0994e-14, -3.7801e-14, 1.9460e-13, 1.7426e-13, 2.1850e-13,\n 2.7492e-13, -4.2181e-14, 2.4009e-13, -4.1060e-14, -3.5528e-14,\n -3.8487e-14, -4.6599e-14, -4.3728e-14, -4.9205e-14, 2.5097e-13,\n 1.6714e-13, 3.1196e-13, 2.6472e-13, 1.7926e-13, 1.1594e-13,\n 2.9075e-13, -3.8516e-14, 1.5785e-13, -3.0142e-14, -3.6949e-14,\n -3.5076e-14, -4.1792e-14, 3.5640e-13, -3.4921e-14, 2.4369e-13,\n -3.4947e-14, 7.6999e-14, 1.3227e-13, -3.6466e-14, -4.0753e-14,\n -4.2049e-14, -3.6424e-14, -4.0557e-14, -4.6018e-14, 3.2150e-13,\n 4.2436e-15, 2.0706e-13, 1.8282e-13, -3.3038e-14, -3.0611e-14,\n -3.8277e-14, 2.8393e-13, -4.0626e-14, 1.7008e-13, -3.5160e-14,\n 2.4546e-13, 2.5617e-13, -4.0172e-14, -3.8822e-14],\n [-1.8293e-13, 2.5355e-14, -2.3327e-13, 3.9498e-14, -5.4445e-14,\n -1.9650e-13, 2.9535e-14, 2.7974e-14, -2.2302e-13, 4.1854e-14,\n 3.8723e-14, 3.6201e-14, -1.8368e-13, -1.4548e-13, -1.8036e-13,\n -2.7094e-13, 3.9176e-14, -2.2595e-13, 3.8802e-14, 3.2984e-14,\n 3.7322e-14, 4.4035e-14, 4.1198e-14, 4.5600e-14, -2.3273e-13,\n -1.5768e-13, -3.0080e-13, -2.4802e-13, -1.6667e-13, -1.0974e-13,\n -2.7175e-13, 3.4675e-14, -1.2359e-13, 2.7922e-14, 3.6014e-14,\n 3.2174e-14, 3.9546e-14, -3.2287e-13, 3.1967e-14, -2.3172e-13,\n 3.2531e-14, -7.1415e-14, -1.3131e-13, 3.4956e-14, 3.9814e-14,\n 4.0872e-14, 3.3579e-14, 3.7665e-14, 4.2905e-14, -2.9577e-13,\n -3.9860e-15, -2.0040e-13, -1.5715e-13, 3.2093e-14, 2.9220e-14,\n 3.5454e-14, -2.6298e-13, 3.7829e-14, -1.6349e-13, 3.2544e-14,\n -2.1837e-13, -2.3283e-13, 3.8571e-14, 3.6923e-14]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1374e-17, 6.5250e-18, 4.1155e-19, 1.2343e-18, 8.0069e-19, 2.2902e-18,\n 1.5424e-19, 1.1324e-18, 7.1815e-19, 4.4341e-19, 2.8514e-19, 2.4412e-19,\n 2.4865e-18, 5.2345e-19, 3.1097e-19, 6.6615e-19, 2.5653e-18, 2.9164e-18,\n 3.4520e-19, 5.5868e-18, 1.6247e-18, 1.1241e-19, 4.0519e-19, 1.9053e-19,\n 5.2267e-18, 9.9059e-19, 8.8339e-19, 7.4934e-19, 1.4127e-19, 6.4080e-18,\n 4.4563e-19, 2.4684e-18, 1.0866e-19, 1.7517e-19, 6.8454e-19, 1.7580e-18,\n 1.4102e-18, 4.5840e-18, 1.9380e-18, 9.5626e-19, 1.7137e-18, 9.1779e-18,\n 6.9906e-19, 9.7595e-19, 9.5891e-19, 4.3539e-18, 1.2032e-18, 3.8754e-19,\n 9.7899e-20, 3.9691e-19, 1.1540e-18, 7.0335e-19, 1.5538e-18, 4.6316e-19,\n 1.6245e-19, 7.6118e-19, 5.7974e-18, 1.1380e-19, 3.4158e-18, 6.1533e-19,\n 1.7693e-18, 1.3039e-18, 1.9637e-18, 5.5064e-18],\n [1.1374e-17, 6.5250e-18, 4.1161e-19, 1.2343e-18, 8.0070e-19, 2.2903e-18,\n 1.5424e-19, 1.1324e-18, 7.1819e-19, 4.4341e-19, 2.8514e-19, 2.4412e-19,\n 2.4865e-18, 5.2350e-19, 3.1102e-19, 6.6620e-19, 2.5653e-18, 2.9164e-18,\n 3.4520e-19, 5.5867e-18, 1.6247e-18, 1.1240e-19, 4.0520e-19, 1.9053e-19,\n 5.2267e-18, 9.9060e-19, 8.8344e-19, 7.4937e-19, 1.4130e-19, 6.4080e-18,\n 4.4569e-19, 2.4684e-18, 1.0872e-19, 1.7517e-19, 6.8454e-19, 1.7580e-18,\n 1.4102e-18, 4.5840e-18, 1.9380e-18, 9.5628e-19, 1.7137e-18, 9.1779e-18,\n 6.9907e-19, 9.7595e-19, 9.5891e-19, 4.3539e-18, 1.2032e-18, 3.8754e-19,\n 9.7901e-20, 3.9696e-19, 1.1540e-18, 7.0337e-19, 1.5538e-18, 4.6316e-19,\n 1.6245e-19, 7.6118e-19, 5.7975e-18, 1.1380e-19, 3.4158e-18, 6.1533e-19,\n 1.7693e-18, 1.3039e-18, 1.9637e-18, 5.5063e-18]], device='cuda:0')"
},
"44": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 2.3357e-13, -2.1911e-13], device='cuda:0')",
"exp_avg_sq": "tensor([1.4169e-17, 1.4169e-17], device='cuda:0')"
},
"45": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[-1.2087e-19, 7.7197e-20, -5.2032e-20, ..., 4.1495e-20,\n 6.3647e-20, -7.0846e-20],\n [ 1.5451e-20, -4.4445e-20, 1.8186e-20, ..., 1.4315e-20,\n -2.6069e-20, 2.4949e-19],\n [-8.0645e-21, 3.5192e-21, 2.7792e-21, ..., 2.8414e-21,\n -5.5898e-21, -1.0339e-20],\n ...,\n [ 2.9208e-20, -3.6133e-20, 3.4774e-20, ..., -4.0234e-21,\n -6.9248e-20, 1.9957e-20],\n [-1.0749e-20, 2.5311e-20, 4.2268e-21, ..., 5.2960e-21,\n 5.0834e-21, -4.3375e-20],\n [ 1.0808e-21, 7.2374e-21, -2.6561e-21, ..., -5.0162e-21,\n -6.0132e-21, 9.9615e-21]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.9370e-21, 7.7071e-22, 3.4488e-22, ..., 6.6231e-22, 1.7060e-21,\n 3.3389e-21],\n [1.0532e-22, 2.0882e-22, 9.6357e-23, ..., 1.9747e-22, 4.1781e-23,\n 1.5726e-21],\n [3.2848e-22, 3.3543e-22, 1.3397e-22, ..., 1.5770e-23, 1.2869e-21,\n 5.6093e-22],\n ...,\n [3.0841e-25, 1.6153e-26, 2.8130e-26, ..., 7.8004e-25, 3.3623e-23,\n 7.0267e-25],\n [9.5268e-22, 7.9708e-22, 4.3194e-22, ..., 7.2207e-22, 5.2504e-21,\n 2.4130e-21],\n [4.6479e-23, 1.3022e-23, 1.4096e-23, ..., 8.0222e-28, 2.6432e-23,\n 6.3166e-23]], device='cuda:0')"
},
"46": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-5.7171e-19, 8.2334e-19, -1.0422e-19, -4.4322e-19, 4.1251e-20,\n -3.8168e-20, 6.4235e-20, 8.2527e-20, -2.0066e-20, -3.1549e-20,\n -8.8461e-19, 4.1505e-19, 7.3547e-19, -1.6913e-18, 2.5454e-19,\n -5.2266e-20, 1.1057e-19, 1.3998e-19, 5.9282e-20, -8.2944e-20,\n -1.2243e-19, -6.3238e-20, 5.8096e-20, 1.1759e-19, -2.4597e-19,\n 5.2167e-20, -2.1044e-19, 1.1052e-19, 1.1300e-18, 9.7405e-19,\n 1.2638e-20, 3.3975e-19, 1.3732e-19, -2.6801e-20, 1.9340e-19,\n 4.6370e-20, -6.4437e-19, -1.1967e-19, -8.8517e-19, 2.2928e-19,\n -1.6528e-19, -5.4158e-19, 5.8228e-20, 5.4173e-19, 4.5836e-19,\n 1.1040e-19, 4.7636e-20, -1.5901e-19, 7.7626e-20, -1.3893e-20,\n 5.4548e-19, 1.7451e-19, 1.1336e-19, -3.1335e-21, 1.1412e-20,\n 4.3558e-20, -7.1239e-19, -2.0623e-19, -4.3310e-19, -1.5654e-20,\n -5.9238e-21, 2.0880e-19, -1.1898e-19, 9.4834e-20], device='cuda:0')",
"exp_avg_sq": "tensor([1.0083e-18, 2.2015e-19, 1.4626e-19, 1.7467e-18, 5.8027e-21, 3.4274e-19,\n 2.7549e-19, 3.7146e-20, 3.6976e-18, 7.3128e-19, 2.4004e-18, 2.3442e-19,\n 2.8441e-18, 3.5880e-18, 1.3655e-19, 7.1145e-19, 3.4163e-21, 8.4493e-20,\n 1.6586e-19, 2.7760e-20, 3.2042e-18, 4.5413e-19, 1.7794e-18, 5.4780e-20,\n 4.4365e-19, 2.0068e-20, 9.4860e-19, 7.5331e-20, 3.4157e-19, 6.5391e-20,\n 1.3023e-19, 1.1291e-19, 2.4843e-19, 2.5150e-19, 9.7352e-19, 1.9245e-19,\n 5.6771e-19, 4.0706e-19, 4.2693e-18, 4.1060e-19, 1.2249e-18, 3.4289e-19,\n 1.3465e-18, 3.6064e-21, 6.8396e-20, 6.1032e-21, 2.3348e-20, 1.4317e-18,\n 4.2335e-21, 6.8881e-20, 5.0190e-19, 1.5743e-18, 1.0620e-19, 1.4959e-20,\n 5.7134e-21, 7.1836e-20, 1.6646e-18, 3.1812e-18, 7.9381e-19, 1.3293e-19,\n 1.6109e-19, 2.5081e-23, 1.3623e-18, 1.4274e-20], device='cuda:0')"
},
"47": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-1.9895e-18, -2.7196e-19, 2.1903e-20, -1.5149e-18, -2.3118e-21,\n 2.7873e-20, -1.8127e-20, -6.8792e-19, 2.1386e-20, 1.2474e-20,\n -2.0400e-18, -6.8514e-19, -3.3218e-20, -2.7466e-18, -9.1215e-19,\n 2.0895e-20, 9.0339e-21, -3.4129e-20, -7.7532e-23, -1.3035e-20,\n 5.7626e-20, 1.4327e-20, 2.8585e-21, 5.5853e-21, -1.4645e-18,\n -7.3634e-21, -1.2776e-18, -7.6446e-19, 1.8058e-19, -2.0048e-19,\n -4.0977e-21, -9.1644e-19, 7.7187e-22, 1.1100e-20, -1.0979e-18,\n -2.9401e-22, -1.8807e-18, 4.0320e-20, -2.1812e-18, -6.5297e-19,\n 7.1130e-20, -1.7477e-18, -5.5953e-19, -3.0826e-19, -6.6844e-19,\n 3.7142e-21, -1.5914e-21, 5.7690e-20, -2.6363e-21, -6.9877e-21,\n -5.1608e-19, -4.5741e-20, 3.8468e-21, -7.8082e-21, -9.4759e-21,\n -1.2740e-21, -1.7678e-18, 1.1559e-19, -1.6682e-18, -1.4703e-21,\n -4.1760e-21, 1.6274e-20, 3.1866e-20, 8.9428e-21], device='cuda:0')",
"exp_avg_sq": "tensor([3.8596e-21, 1.1236e-21, 1.2826e-21, 1.2491e-20, 6.0700e-23, 1.8907e-21,\n 2.1964e-21, 8.5643e-23, 3.1857e-20, 7.4766e-21, 1.6142e-20, 1.4458e-21,\n 2.3242e-20, 3.1202e-20, 4.2259e-22, 3.8681e-21, 9.6038e-23, 8.2944e-22,\n 2.1633e-21, 2.9961e-22, 2.5348e-20, 3.0746e-21, 1.4763e-20, 6.0588e-22,\n 2.4846e-21, 3.8189e-22, 7.0249e-21, 6.9420e-22, 1.8883e-21, 5.1035e-22,\n 1.0752e-21, 5.1303e-22, 2.2559e-21, 1.7863e-21, 9.0468e-21, 2.1432e-21,\n 3.4640e-21, 2.5587e-21, 2.4626e-20, 2.0781e-21, 9.7566e-21, 2.1111e-21,\n 8.6270e-21, 4.1407e-24, 2.4400e-22, 3.9860e-22, 2.8939e-22, 1.1729e-20,\n 1.4548e-22, 5.3107e-22, 4.7375e-21, 9.9866e-21, 1.2257e-21, 1.5822e-22,\n 1.1976e-22, 8.6528e-22, 1.3184e-20, 2.0940e-20, 4.2128e-21, 1.2759e-21,\n 1.7389e-21, 1.3921e-22, 1.2053e-20, 3.5305e-22], device='cuda:0')"
},
"48": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-1.5035e-18, -2.0631e-19, -2.2828e-20, -1.2850e-18, 1.2196e-21,\n -2.3338e-20, 1.5358e-20, -7.5357e-19, -3.1649e-20, -1.3584e-20,\n -1.5659e-18, -5.8637e-19, -5.8347e-20, -2.3790e-18, -7.6745e-19,\n -2.4848e-20, -1.5703e-20, 2.2355e-20, -2.6936e-22, 1.3032e-20,\n -6.0159e-20, -1.4166e-20, -3.4774e-21, -7.5091e-21, -1.1702e-18,\n 7.0633e-21, -1.0904e-18, -7.6155e-19, 1.5504e-19, -1.1054e-19,\n 3.7278e-21, -7.3414e-19, -1.0102e-21, -1.1643e-20, -8.1867e-19,\n 4.4790e-22, -1.5557e-18, -4.3658e-20, -1.6726e-18, -6.4882e-19,\n -7.0048e-20, -1.4770e-18, -5.7802e-19, -3.4575e-19, -5.3912e-19,\n -1.4058e-20, 1.9284e-21, -5.8341e-20, 1.0149e-20, 7.0999e-21,\n -4.3073e-19, 5.6772e-20, -5.5506e-21, 7.3554e-21, 1.4571e-20,\n 9.8928e-22, -1.5233e-18, -1.2500e-19, -1.3117e-18, 1.5997e-21,\n 4.3866e-21, -1.7115e-19, -3.1104e-20, -1.0108e-20], device='cuda:0')",
"exp_avg_sq": "tensor([8.8884e-21, 1.6504e-21, 2.1897e-21, 1.5230e-20, 1.5617e-22, 4.3933e-21,\n 3.9426e-21, 2.0412e-22, 4.1788e-20, 9.1059e-21, 2.3166e-20, 1.6476e-21,\n 2.6750e-20, 3.4998e-20, 9.3231e-22, 8.7083e-21, 1.6407e-22, 1.0850e-21,\n 2.6153e-21, 6.2748e-22, 3.6156e-20, 5.5337e-21, 2.0085e-20, 9.7798e-22,\n 3.4893e-21, 5.0012e-22, 8.0177e-21, 6.6817e-22, 2.7406e-21, 5.5517e-22,\n 1.9692e-21, 6.1715e-22, 3.4799e-21, 3.0936e-21, 8.0898e-21, 2.6506e-21,\n 4.8205e-21, 5.0857e-21, 4.0982e-20, 3.1788e-21, 1.4386e-20, 3.2222e-21,\n 1.1900e-20, 4.1524e-24, 3.9873e-22, 3.4233e-22, 5.0766e-22, 1.7322e-20,\n 2.0977e-22, 1.0801e-21, 4.0697e-21, 1.9819e-20, 1.6540e-21, 2.6228e-22,\n 1.9260e-22, 1.3344e-21, 1.5374e-20, 3.3958e-20, 6.9759e-21, 2.0004e-21,\n 2.2150e-21, 1.1801e-22, 1.5583e-20, 4.4953e-22], device='cuda:0')"
},
"49": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 9.6594e-18, 2.3385e-18, -1.3912e-18, 7.1445e-18, -1.1170e-18,\n -1.5227e-18, -6.5555e-19, 6.7826e-18, -4.7854e-19, -9.9962e-19,\n 8.5734e-18, 6.3547e-18, 1.2373e-19, 9.9417e-18, 7.9186e-18,\n -1.6567e-18, -1.6617e-18, -1.3001e-18, -1.1957e-18, -1.3521e-18,\n -1.0610e-18, -1.0440e-18, -7.4740e-19, -1.1629e-18, 1.0469e-17,\n -1.6341e-18, 7.7183e-18, 5.0075e-18, -1.2427e-18, 3.4008e-18,\n -1.5775e-18, 1.0228e-17, -1.2806e-18, -1.3975e-18, 7.4239e-18,\n -1.1623e-18, 1.4078e-17, -1.6557e-18, 7.5353e-18, 4.3943e-18,\n -1.2455e-18, 1.0884e-17, 2.0267e-18, 4.3322e-18, 6.4549e-18,\n -9.7926e-19, -1.3201e-18, -5.4742e-19, -5.6577e-19, -1.4674e-18,\n 4.3009e-18, -1.2590e-18, -1.0738e-18, -1.1733e-18, -1.3454e-18,\n -8.7532e-19, 8.9371e-18, -9.0198e-19, 1.0507e-17, -1.4188e-18,\n -1.2466e-18, -2.5441e-19, -1.0541e-18, -9.9749e-19],\n [-9.6596e-18, -2.3387e-18, 1.3911e-18, -7.1443e-18, 1.1170e-18,\n 1.5227e-18, 6.5557e-19, -6.7827e-18, 4.7852e-19, 9.9961e-19,\n -8.5733e-18, -6.3547e-18, -1.2384e-19, -9.9421e-18, -7.9187e-18,\n 1.6567e-18, 1.6618e-18, 1.3002e-18, 1.1957e-18, 1.3521e-18,\n 1.0610e-18, 1.0440e-18, 7.4749e-19, 1.1629e-18, -1.0469e-17,\n 1.6341e-18, -7.7182e-18, -5.0076e-18, 1.2421e-18, -3.4006e-18,\n 1.5775e-18, -1.0228e-17, 1.2807e-18, 1.3975e-18, -7.4239e-18,\n 1.1623e-18, -1.4078e-17, 1.6557e-18, -7.5360e-18, -4.3944e-18,\n 1.2455e-18, -1.0884e-17, -2.0264e-18, -4.3320e-18, -6.4548e-18,\n 9.7927e-19, 1.3201e-18, 5.4743e-19, 5.6579e-19, 1.4674e-18,\n -4.3003e-18, 1.2590e-18, 1.0738e-18, 1.1733e-18, 1.3454e-18,\n 8.7536e-19, -8.9368e-18, 9.0200e-19, -1.0507e-17, 1.4188e-18,\n 1.2466e-18, 2.5433e-19, 1.0541e-18, 9.9750e-19]], device='cuda:0')",
"exp_avg_sq": "tensor([[7.9368e-20, 2.6712e-19, 3.3303e-20, 3.8444e-19, 1.4124e-20, 2.7169e-20,\n 5.3113e-23, 1.6997e-20, 5.7216e-19, 5.9842e-19, 3.7033e-19, 6.0587e-19,\n 6.4582e-19, 5.1837e-19, 6.9289e-20, 1.2935e-19, 6.2161e-20, 8.6408e-22,\n 6.6079e-19, 6.3510e-20, 4.7528e-19, 1.8273e-19, 5.5586e-19, 1.9772e-19,\n 2.6536e-19, 4.8041e-19, 5.0520e-19, 7.8044e-21, 1.5689e-19, 4.9123e-22,\n 2.6490e-19, 6.5837e-19, 5.0290e-19, 5.9237e-20, 1.0673e-18, 4.3002e-19,\n 5.8420e-19, 2.1379e-20, 3.4194e-19, 1.9043e-19, 7.6443e-20, 9.1488e-20,\n 6.9215e-20, 7.5361e-19, 5.7714e-20, 1.3533e-18, 1.9612e-19, 7.1102e-20,\n 4.5048e-19, 1.0512e-19, 1.4202e-18, 7.0852e-20, 5.4128e-19, 1.3637e-20,\n 1.7726e-19, 3.8380e-19, 5.9221e-19, 1.6082e-19, 3.3012e-19, 2.1304e-19,\n 5.2369e-19, 1.8305e-18, 5.3161e-19, 4.9794e-19],\n [7.9368e-20, 2.6712e-19, 3.3303e-20, 3.8444e-19, 1.4124e-20, 2.7169e-20,\n 5.3113e-23, 1.6997e-20, 5.7216e-19, 5.9842e-19, 3.7033e-19, 6.0587e-19,\n 6.4582e-19, 5.1837e-19, 6.9289e-20, 1.2935e-19, 6.2161e-20, 8.6408e-22,\n 6.6079e-19, 6.3510e-20, 4.7528e-19, 1.8273e-19, 5.5586e-19, 1.9772e-19,\n 2.6536e-19, 4.8041e-19, 5.0520e-19, 7.8044e-21, 1.5689e-19, 4.9123e-22,\n 2.6490e-19, 6.5837e-19, 5.0290e-19, 5.9237e-20, 1.0673e-18, 4.3002e-19,\n 5.8420e-19, 2.1379e-20, 3.4194e-19, 1.9043e-19, 7.6443e-20, 9.1488e-20,\n 6.9215e-20, 7.5361e-19, 5.7714e-20, 1.3533e-18, 1.9612e-19, 7.1102e-20,\n 4.5048e-19, 1.0512e-19, 1.4202e-18, 7.0852e-20, 5.4128e-19, 1.3637e-20,\n 1.7726e-19, 3.8380e-19, 5.9221e-19, 1.6082e-19, 3.3012e-19, 2.1304e-19,\n 5.2369e-19, 1.8305e-18, 5.3161e-19, 4.9794e-19]], device='cuda:0')"
},
"50": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 6.6066e-18, -6.6064e-18], device='cuda:0')",
"exp_avg_sq": "tensor([3.3029e-18, 3.3029e-18], device='cuda:0')"
},
"51": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 4.2330e-24, -9.4750e-24, -5.6691e-24, ..., -6.4907e-23,\n -9.6682e-24, 2.7431e-22],\n [-1.1670e-23, -3.2350e-23, 1.1471e-23, ..., -2.6564e-22,\n -1.4838e-23, 2.2094e-23],\n [ 1.2046e-23, 1.7843e-23, 2.5962e-24, ..., 1.0568e-22,\n -5.7511e-24, -2.1749e-22],\n ...,\n [-4.6835e-23, 1.1472e-22, -7.7535e-23, ..., 1.6723e-22,\n 1.4611e-23, 5.0074e-22],\n [ 1.5765e-24, -7.9808e-23, 9.6745e-24, ..., -5.1780e-23,\n -1.6316e-23, -2.3468e-23],\n [-2.2568e-23, -8.9667e-24, -2.5096e-23, ..., 1.0918e-22,\n -1.9917e-23, 1.7048e-22]], device='cuda:0')",
"exp_avg_sq": "tensor([[3.0870e-25, 6.5218e-26, 6.0780e-25, ..., 4.9950e-24, 6.6457e-24,\n 1.4543e-25],\n [1.0859e-26, 2.1674e-27, 5.6365e-27, ..., 1.6046e-25, 1.2914e-25,\n 6.2211e-29],\n [9.2503e-26, 1.5266e-27, 9.1416e-27, ..., 9.4826e-26, 3.3277e-25,\n 3.2117e-29],\n ...,\n [3.8968e-25, 1.3764e-26, 5.6351e-26, ..., 9.7545e-27, 9.1043e-25,\n 2.8192e-29],\n [6.5618e-25, 1.2992e-26, 3.9267e-24, ..., 5.8111e-28, 1.1567e-24,\n 2.4599e-25],\n [2.0754e-26, 1.3114e-26, 1.3633e-27, ..., 2.5741e-26, 4.5374e-28,\n 9.6435e-27]], device='cuda:0')"
},
"52": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 2.3349e-22, -3.8238e-22, -1.1369e-22, 4.0109e-22, -4.6622e-23,\n -1.8551e-22, 1.0738e-22, -1.5686e-22, 7.8570e-23, 5.4644e-22,\n -3.1730e-22, -2.2134e-22, -4.6807e-23, -3.7996e-22, -1.6516e-22,\n -6.4459e-23, -1.5474e-21, -1.8463e-22, 4.0580e-23, -1.8213e-22,\n -5.6054e-23, 1.4862e-22, 1.2416e-21, 8.5435e-22, -1.5131e-22,\n 8.6350e-22, -1.7714e-22, 6.1780e-22, 1.0150e-22, 6.0852e-22,\n -3.9715e-22, 3.2133e-22, -2.2817e-22, 1.5701e-22, -1.7168e-22,\n -2.0150e-22, 1.0752e-21, 8.3747e-23, -3.0835e-23, -1.8266e-22,\n -5.3840e-22, -2.3664e-22, -2.6914e-21, -1.3616e-22, -1.7029e-22,\n 5.3897e-22, 1.3810e-22, -3.6296e-22, -1.8117e-22, 5.6454e-22,\n -8.6051e-23, 6.1933e-23, -2.4249e-23, -1.8660e-22, 6.9040e-22,\n -2.9205e-22, -1.5889e-22, -4.0490e-22, -7.7505e-24, 7.7475e-23,\n -1.2114e-22, 1.5511e-21, -1.3895e-22, 2.2509e-22], device='cuda:0')",
"exp_avg_sq": "tensor([1.1679e-21, 1.4888e-23, 1.7453e-23, 3.1884e-24, 1.7101e-24, 9.3710e-25,\n 2.0381e-22, 1.3401e-23, 9.0428e-23, 1.7086e-22, 1.4809e-22, 7.3250e-24,\n 1.6368e-24, 6.1266e-23, 2.3351e-22, 2.3001e-22, 1.3280e-23, 1.6798e-23,\n 1.2478e-24, 2.7181e-23, 1.9809e-22, 2.8755e-23, 8.0175e-22, 7.3910e-23,\n 4.9435e-22, 1.8618e-22, 1.0305e-23, 3.7861e-22, 1.4194e-25, 2.4935e-23,\n 1.3409e-22, 2.7718e-23, 5.8478e-24, 6.7249e-23, 1.6834e-23, 3.6103e-23,\n 4.9892e-23, 2.4278e-23, 8.5674e-24, 8.3580e-24, 4.0590e-22, 4.5939e-26,\n 2.0520e-25, 1.9031e-22, 2.3384e-25, 1.8278e-22, 6.6599e-23, 3.3260e-23,\n 4.5145e-23, 8.4398e-23, 2.1113e-24, 8.1986e-24, 7.8317e-23, 6.7847e-22,\n 3.5470e-23, 7.9924e-26, 6.4382e-24, 1.5502e-22, 2.3393e-23, 2.5550e-22,\n 5.9485e-24, 5.7070e-23, 3.6253e-23, 1.9296e-24], device='cuda:0')"
},
"53": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 3.8531e-23, 2.2704e-24, 8.3005e-23, 1.9403e-22, 1.0721e-22,\n 2.2858e-23, -2.0057e-22, -3.4430e-23, 2.6040e-22, -3.9105e-25,\n 5.1918e-23, 3.4107e-23, -7.6508e-24, 6.9471e-23, 1.6502e-23,\n -2.4007e-23, -3.5705e-22, -6.0801e-25, 7.9301e-23, 3.7491e-22,\n 3.5449e-23, 3.2165e-22, 4.5239e-22, 5.0696e-22, -4.7206e-23,\n 2.4785e-22, 3.9643e-23, 4.1044e-23, -2.2972e-24, 2.1558e-22,\n -7.5233e-24, 8.1711e-24, 4.7348e-25, 6.3228e-26, 6.7896e-23,\n 3.2257e-24, 1.1851e-22, 3.3181e-23, 1.1917e-23, 9.6954e-24,\n -3.9635e-23, -9.1981e-23, -6.6746e-22, 4.0025e-23, -1.2310e-22,\n 4.8319e-22, 2.9703e-24, -1.9264e-22, -6.1094e-25, 6.4630e-22,\n 3.3346e-23, 2.9512e-22, -2.2287e-25, 2.8547e-23, 3.9779e-22,\n -6.8324e-23, 8.3617e-23, -6.7795e-24, 1.0916e-22, -1.1589e-23,\n 2.5539e-23, 2.1432e-22, -5.0908e-23, -3.1239e-23], device='cuda:0')",
"exp_avg_sq": "tensor([1.7193e-21, 2.1745e-23, 4.3035e-24, 5.6374e-24, 6.7722e-24, 8.0280e-26,\n 1.0402e-22, 2.3184e-24, 6.7725e-23, 2.7304e-22, 2.9270e-22, 1.2234e-23,\n 4.8697e-25, 2.6223e-24, 1.1169e-22, 3.0386e-23, 5.4580e-24, 2.8751e-23,\n 9.3277e-24, 1.6937e-23, 3.7804e-23, 1.8290e-23, 3.5056e-22, 1.4313e-23,\n 6.8086e-22, 7.7726e-23, 6.6954e-24, 3.1498e-22, 3.0029e-26, 2.7999e-23,\n 1.1655e-22, 2.8336e-23, 3.6876e-24, 1.0142e-22, 3.5894e-24, 1.9637e-23,\n 3.3339e-23, 3.8182e-23, 1.5726e-24, 5.6430e-24, 2.9217e-22, 3.1638e-27,\n 1.2751e-23, 2.3822e-22, 2.6556e-25, 1.3157e-22, 3.3235e-23, 2.9323e-23,\n 3.9479e-24, 8.8307e-23, 2.9059e-24, 1.7468e-23, 8.6010e-23, 2.5078e-22,\n 5.8494e-24, 1.3648e-25, 1.0914e-23, 1.4884e-22, 2.1138e-23, 2.0857e-22,\n 1.8093e-24, 1.2414e-22, 1.6543e-23, 8.5317e-28], device='cuda:0')"
},
"54": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 5.6522e-23, 2.4258e-24, 8.1975e-23, 2.4304e-22, 1.2797e-22,\n 7.6767e-23, 7.8604e-23, -1.3003e-23, 1.9717e-22, 1.7349e-22,\n 1.2076e-22, 8.9562e-23, -5.7236e-23, 7.3062e-23, 9.1275e-24,\n 9.9860e-23, -1.7599e-22, 1.5960e-23, 1.2140e-22, 1.5020e-22,\n 2.4497e-23, 1.7164e-22, 4.4488e-22, 3.5936e-22, 1.9981e-23,\n 3.1083e-22, 7.2108e-23, 2.7861e-22, -2.8219e-24, 2.7677e-22,\n -3.2521e-23, 1.3607e-22, 2.8635e-25, -1.0047e-25, 5.2974e-23,\n -1.4159e-23, 4.0172e-22, 1.6000e-22, 8.2243e-23, -5.2683e-25,\n -9.6708e-23, 3.3743e-23, -4.0700e-22, 1.2164e-23, 1.4521e-23,\n 2.8862e-22, -1.7617e-24, -1.8774e-23, -2.2304e-24, 3.5525e-22,\n 1.4983e-22, 2.4616e-22, 9.7118e-25, -1.8761e-23, 3.6386e-22,\n -1.6801e-23, 1.1896e-22, -8.4708e-24, 8.5992e-23, 6.9567e-24,\n 3.1802e-23, 4.8725e-22, 5.2580e-23, 3.6230e-23], device='cuda:0')",
"exp_avg_sq": "tensor([9.5108e-22, 1.3844e-23, 1.0164e-23, 4.4095e-24, 5.4318e-24, 2.7629e-25,\n 1.4041e-22, 8.4084e-24, 1.0019e-22, 1.9154e-22, 1.3929e-22, 7.9912e-24,\n 1.1865e-24, 5.6202e-23, 2.0098e-22, 2.7317e-22, 9.3848e-24, 1.3562e-23,\n 5.4319e-24, 2.4641e-23, 1.2897e-22, 3.8468e-23, 7.5346e-22, 6.9343e-23,\n 5.6151e-22, 1.7651e-22, 9.9730e-24, 2.7150e-22, 7.4473e-25, 1.2471e-23,\n 1.0503e-22, 3.6993e-23, 6.7580e-24, 6.4292e-23, 1.4337e-23, 2.7246e-23,\n 4.8643e-23, 3.8026e-23, 9.9661e-24, 4.9712e-24, 3.0962e-22, 8.1152e-27,\n 2.6677e-24, 1.8668e-22, 3.3742e-25, 1.9156e-22, 3.2717e-23, 3.2038e-23,\n 3.2197e-23, 9.4554e-23, 4.9879e-25, 1.2429e-23, 5.9143e-23, 5.7345e-22,\n 3.9438e-23, 9.9058e-26, 1.2624e-23, 1.1228e-22, 2.5887e-23, 2.8330e-22,\n 3.3523e-24, 6.4273e-23, 6.4358e-23, 8.2451e-27], device='cuda:0')"
},
"55": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 4.5948e-22, -4.3097e-22, -8.6945e-22, -2.3674e-21, -1.4554e-21,\n -4.7385e-23, 9.8264e-22, 2.0966e-21, -1.7849e-21, -3.9087e-23,\n -2.1868e-22, -5.9216e-22, 6.8707e-22, -2.4324e-21, 5.6737e-22,\n 5.5751e-24, -2.1729e-21, 3.0205e-22, -3.7623e-22, -3.2378e-21,\n 3.5054e-22, -2.4931e-21, -1.7865e-21, -2.6391e-21, 7.7660e-23,\n -1.3169e-21, -1.1051e-21, -2.5033e-22, 2.7076e-22, -9.8720e-22,\n -1.3038e-22, 2.4167e-22, 1.1188e-21, 5.2127e-22, -3.8647e-22,\n 4.2284e-22, -6.1849e-22, -9.5116e-23, -1.4431e-22, 1.1112e-22,\n -3.8176e-22, 1.2596e-21, -5.2972e-21, 1.1307e-21, 1.8225e-21,\n -3.1563e-21, 1.7552e-22, 1.4561e-21, 4.0846e-22, -5.0789e-21,\n 1.5827e-22, -2.9503e-21, 4.1472e-22, 1.4503e-22, -2.7858e-21,\n 1.0271e-21, -6.6672e-22, 2.7472e-22, -7.5855e-22, 1.4807e-22,\n -2.7558e-22, -1.2558e-21, 5.6868e-22, 1.8983e-22],\n [-4.5948e-22, 4.3096e-22, 8.6944e-22, 2.3674e-21, 1.4556e-21,\n 4.7354e-23, -9.8251e-22, -2.0966e-21, 1.7850e-21, 3.9265e-23,\n 2.1869e-22, 5.9221e-22, -6.8709e-22, 2.4324e-21, -5.6737e-22,\n -5.4280e-24, 2.1729e-21, -3.0205e-22, 3.7627e-22, 3.2378e-21,\n -3.5054e-22, 2.4931e-21, 1.7864e-21, 2.6391e-21, -7.7653e-23,\n 1.3170e-21, 1.1051e-21, 2.5033e-22, -2.7066e-22, 9.8718e-22,\n 1.3036e-22, -2.4154e-22, -1.1189e-21, -5.2126e-22, 3.8650e-22,\n -4.2284e-22, 6.1858e-22, 9.4956e-23, 1.4424e-22, -1.1113e-22,\n 3.8176e-22, -1.2596e-21, 5.2972e-21, -1.1307e-21, -1.8226e-21,\n 3.1564e-21, -1.7553e-22, -1.4561e-21, -4.0846e-22, 5.0791e-21,\n -1.5826e-22, 2.9503e-21, -4.1472e-22, -1.4503e-22, 2.7859e-21,\n -1.0271e-21, 6.6661e-22, -2.7473e-22, 7.5871e-22, -1.4808e-22,\n 2.7560e-22, 1.2558e-21, -5.6860e-22, -1.8984e-22]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.2194e-22, 2.0019e-23, 9.9247e-24, 1.9532e-22, 3.1080e-22, 5.7745e-25,\n 8.6185e-25, 2.2734e-24, 6.2915e-23, 2.5428e-22, 2.0980e-22, 2.5588e-23,\n 7.4124e-24, 1.3087e-26, 7.8084e-23, 2.9862e-26, 1.8999e-23, 5.4108e-22,\n 2.7754e-23, 2.5069e-23, 1.1481e-24, 2.8080e-23, 6.3002e-23, 1.8185e-24,\n 9.0524e-23, 1.3687e-23, 3.9072e-23, 3.6606e-23, 4.9982e-24, 1.8775e-25,\n 4.6093e-23, 5.9311e-24, 1.1142e-24, 2.1312e-22, 6.3684e-25, 8.4316e-23,\n 1.0287e-24, 1.3561e-22, 2.9913e-24, 3.2233e-23, 8.4447e-23, 2.0786e-23,\n 5.4175e-23, 2.0708e-22, 9.5808e-23, 1.0969e-22, 8.7395e-23, 1.9429e-23,\n 5.3301e-24, 1.2514e-22, 1.0268e-23, 5.5957e-23, 1.6699e-22, 4.6768e-23,\n 4.1652e-25, 7.6252e-23, 2.4163e-23, 5.8992e-23, 3.4509e-23, 1.3997e-22,\n 5.2249e-23, 3.8398e-23, 6.4453e-27, 4.5721e-24],\n [5.2194e-22, 2.0019e-23, 9.9247e-24, 1.9532e-22, 3.1080e-22, 5.7745e-25,\n 8.6185e-25, 2.2734e-24, 6.2915e-23, 2.5428e-22, 2.0980e-22, 2.5588e-23,\n 7.4124e-24, 1.3087e-26, 7.8084e-23, 2.9862e-26, 1.8999e-23, 5.4108e-22,\n 2.7754e-23, 2.5069e-23, 1.1481e-24, 2.8080e-23, 6.3002e-23, 1.8185e-24,\n 9.0524e-23, 1.3687e-23, 3.9072e-23, 3.6606e-23, 4.9982e-24, 1.8775e-25,\n 4.6093e-23, 5.9311e-24, 1.1142e-24, 2.1312e-22, 6.3684e-25, 8.4316e-23,\n 1.0287e-24, 1.3561e-22, 2.9913e-24, 3.2233e-23, 8.4447e-23, 2.0786e-23,\n 5.4175e-23, 2.0708e-22, 9.5808e-23, 1.0969e-22, 8.7395e-23, 1.9429e-23,\n 5.3301e-24, 1.2514e-22, 1.0268e-23, 5.5957e-23, 1.6699e-22, 4.6768e-23,\n 4.1652e-25, 7.6252e-23, 2.4163e-23, 5.8992e-23, 3.4509e-23, 1.3997e-22,\n 5.2249e-23, 3.8398e-23, 6.4453e-27, 4.5721e-24]], device='cuda:0')"
},
"56": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-1.8349e-21, 1.8350e-21], device='cuda:0')",
"exp_avg_sq": "tensor([4.7846e-22, 4.7846e-22], device='cuda:0')"
},
"57": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[-1.7952e-18, 8.8004e-15, -3.9723e-14, -1.9221e-07, 2.4790e-17,\n 3.3688e-14, -8.2073e-21, 1.6665e-19],\n [ 9.1558e-21, -4.5102e-17, 2.0273e-16, 9.7875e-10, -1.2699e-19,\n -1.7304e-16, 4.1782e-23, -9.0046e-22],\n [ 5.5169e-23, -4.2552e-19, 1.3767e-18, 5.7411e-12, -1.0639e-21,\n -1.6074e-18, 2.6091e-25, -2.1605e-23],\n [ 1.3617e-22, -7.0531e-19, 3.0461e-18, 1.4573e-11, -1.9438e-21,\n -2.6701e-18, 6.2366e-25, -1.6027e-23],\n [-1.7522e-18, 8.5745e-15, -3.8744e-14, -1.8756e-07, 2.4179e-17,\n 3.2861e-14, -8.0018e-21, 1.6350e-19],\n [ 1.4666e-22, -6.5625e-19, 3.1806e-18, 1.5781e-11, -1.9035e-21,\n -2.5171e-18, 6.6637e-25, -6.8633e-24],\n [-1.4096e-18, 6.8976e-15, -3.1159e-14, -1.5083e-07, 1.9449e-17,\n 2.6426e-14, -6.4380e-21, 1.3165e-19],\n [ 5.0365e-23, -4.8739e-19, 1.3567e-18, 5.1419e-12, -1.1455e-21,\n -1.7971e-18, 2.4653e-25, -2.8931e-23]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.4055e-16, 1.0288e-16, 1.7944e-16, 9.1475e-12, 7.7715e-16, 2.5291e-16,\n 3.9550e-17, 1.7848e-16],\n [2.5429e-17, 1.3505e-17, 2.5692e-17, 5.1260e-14, 1.0712e-16, 2.8743e-17,\n 6.3374e-18, 2.9119e-17],\n [1.1387e-16, 4.7348e-17, 8.4747e-17, 1.4484e-13, 4.1767e-16, 1.1317e-16,\n 2.3533e-17, 1.0744e-16],\n [9.8089e-18, 4.3876e-18, 6.7201e-18, 3.7917e-14, 3.2694e-17, 9.6858e-18,\n 1.4927e-18, 7.0079e-18],\n [1.2844e-16, 6.3665e-17, 1.1460e-16, 6.4956e-12, 5.2064e-16, 1.4024e-16,\n 3.0753e-17, 1.3849e-16],\n [4.9064e-17, 2.1508e-17, 3.6055e-17, 4.2379e-14, 1.4565e-16, 4.4274e-17,\n 9.1389e-18, 3.8621e-17],\n [3.5644e-21, 5.3653e-21, 1.3403e-20, 3.9272e-12, 2.1139e-20, 5.4293e-20,\n 2.6582e-21, 4.4839e-21],\n [1.5370e-17, 7.7873e-18, 1.3716e-17, 8.9685e-14, 7.3484e-17, 1.9562e-17,\n 4.0582e-18, 1.8671e-17]], device='cuda:0')"
},
"58": {
"step": "tensor(20024.)",
"exp_avg": "tensor([-1.9218e-07, 9.7873e-10, 5.7413e-12, 1.4573e-11, -1.8755e-07,\n 1.5781e-11, -1.5083e-07, 5.1418e-12], device='cuda:0')",
"exp_avg_sq": "tensor([9.1644e-12, 5.3280e-14, 1.5212e-13, 3.8476e-14, 6.5055e-12, 4.5413e-14,\n 3.9272e-12, 9.0969e-14], device='cuda:0')"
},
"59": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 1.2051e-07, -1.1498e-08, -1.1515e-08, -1.1515e-08, 1.3077e-07,\n -1.1515e-08, 1.4639e-07, -1.1515e-08],\n [-1.1986e-07, 1.1436e-08, 1.1453e-08, 1.1453e-08, -1.3006e-07,\n 1.1453e-08, -1.4560e-07, 1.1453e-08],\n [-9.0193e-10, 8.6059e-11, 8.6184e-11, 8.6184e-11, -9.7870e-10,\n 8.6184e-11, -1.0956e-09, 8.6183e-11],\n [-2.9146e-10, 2.7810e-11, 2.7850e-11, 2.7850e-11, -3.1627e-10,\n 2.7850e-11, -3.5403e-10, 2.7850e-11]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.9735e-12, 1.3900e-13, 9.4071e-13, 7.6858e-13, 2.1551e-12, 1.2592e-12,\n 4.4293e-12, 8.4299e-13],\n [4.2537e-13, 1.3680e-14, 5.1495e-14, 4.1965e-14, 4.8792e-13, 8.0313e-14,\n 6.9383e-13, 4.4148e-14],\n [3.8283e-13, 2.4961e-14, 1.7673e-13, 1.4651e-13, 4.1319e-13, 2.2204e-13,\n 8.7801e-13, 1.6143e-13],\n [3.3884e-13, 2.2065e-14, 1.4956e-13, 1.2459e-13, 3.6578e-13, 1.8726e-13,\n 7.6874e-13, 1.3683e-13]], device='cuda:0')"
},
"60": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 6.7765e-08, -6.7400e-08, -5.0718e-10, -1.6390e-10], device='cuda:0')",
"exp_avg_sq": "tensor([4.6314e-11, 3.1856e-12, 8.0081e-12, 6.7517e-12], device='cuda:0')"
},
"61": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 3.2808e-15, 3.9050e-11, 1.4238e-15, ..., 1.3233e-14,\n -5.9125e-15, 6.1298e-13],\n [-8.1378e-16, 2.8944e-11, 4.0552e-16, ..., 4.1906e-15,\n -4.5414e-15, 4.7543e-13],\n [-4.0119e-16, -2.3920e-12, 1.0523e-16, ..., 1.4617e-15,\n -1.6984e-16, -5.3436e-14],\n ...,\n [-1.1327e-17, -2.3169e-12, 1.2466e-16, ..., 1.1819e-15,\n -1.9268e-16, -3.2656e-14],\n [ 2.4445e-15, 2.6836e-11, 1.1302e-15, ..., 1.0999e-14,\n -3.9139e-15, 4.2138e-13],\n [-4.5918e-17, -2.2686e-12, 7.1813e-17, ..., 5.8811e-16,\n -2.6917e-16, -2.8549e-14]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.3302e-17, 6.9200e-18, 8.4830e-18, ..., 4.7486e-18, 1.1956e-17,\n 1.4365e-17],\n [4.7211e-18, 2.8568e-18, 3.7005e-18, ..., 1.8264e-18, 4.2017e-18,\n 5.5563e-18],\n [1.6704e-18, 1.1435e-18, 1.4349e-18, ..., 5.6021e-19, 2.2029e-18,\n 2.2678e-18],\n ...,\n [4.8430e-19, 3.4330e-19, 3.1214e-19, ..., 1.7595e-19, 6.8210e-19,\n 7.4448e-19],\n [7.6141e-19, 4.5294e-19, 6.6218e-19, ..., 3.5486e-19, 5.3198e-19,\n 9.0104e-19],\n [1.4221e-18, 9.9551e-19, 1.0036e-18, ..., 3.9503e-19, 1.6128e-18,\n 1.4019e-18]], device='cuda:0')"
},
"62": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 8.5001e-11, 6.3782e-11, -4.9398e-12, -1.8730e-12, -3.8365e-12,\n 7.8217e-11, -2.9908e-12, -5.7523e-12, 5.4127e-11, 5.5474e-11,\n 6.1257e-11, 5.0154e-11, 5.2447e-11, 4.7092e-11, -5.7779e-13,\n 9.4609e-11, 3.9550e-11, -3.4823e-12, 7.3301e-11, 3.7824e-11,\n 5.0355e-11, 8.1024e-11, 6.4799e-11, -2.6720e-12, -5.9669e-12,\n -2.2712e-12, 9.1437e-11, -3.2942e-12, -6.6223e-12, 4.3245e-11,\n -3.0242e-12, 7.2622e-11, 5.5916e-11, 6.4619e-11, 1.7686e-11,\n -7.7613e-13, -3.7454e-12, 7.8798e-11, -2.1607e-12, -3.6132e-12,\n -2.2647e-12, -1.9337e-12, -4.3421e-12, -3.4553e-12, -3.5468e-12,\n -2.9815e-12, -2.4920e-12, 4.1324e-11, 4.7110e-11, -2.4449e-12,\n 8.5202e-11, -2.8066e-12, -1.6942e-12, 5.6289e-11, -2.2610e-12,\n -6.8233e-12, 4.7783e-11, -1.3323e-12, 3.7920e-11, 6.0879e-11,\n -1.4832e-12, 5.1430e-11, -3.2353e-12, 6.5411e-11, 8.8638e-11,\n -2.8046e-12, -2.1022e-12, -4.2108e-12, 7.5330e-11, 5.8693e-11,\n 6.7329e-11, 7.2586e-11, 5.1870e-11, -3.0694e-12, -1.6863e-12,\n 5.9468e-11, -3.5975e-12, 5.5854e-11, 4.2308e-11, 4.2327e-11,\n 7.3231e-11, -6.6199e-13, -3.6983e-12, 7.9090e-11, -1.6957e-12,\n 8.9546e-11, 4.7229e-11, -5.2355e-12, 7.3387e-11, -2.2543e-12,\n -4.4841e-12, -3.9513e-12, 7.2991e-11, 8.5001e-11, -4.7344e-12,\n 6.6901e-11, 4.9212e-11, 6.7188e-11, -2.8954e-12, 5.1593e-11,\n -2.5283e-12, -3.2527e-12, 7.2360e-11, -2.4799e-12, 5.3567e-11,\n -1.7627e-12, 5.5150e-11, 5.1260e-11, -2.7720e-12, -2.0517e-12,\n -2.3932e-12, 4.7827e-11, -3.5582e-12, 5.7026e-11, 6.2555e-11,\n 5.3050e-11, -4.4584e-12, -4.7935e-12, -2.3614e-12, -5.1369e-12,\n 4.6255e-11, -2.0599e-12, -3.7854e-12, -3.5129e-12, 3.1821e-11,\n 6.3096e-11, -4.4063e-12, -1.2622e-12, -9.3045e-13, -4.2599e-12,\n 6.4377e-12, -5.3996e-12, 5.3225e-11, -3.1092e-12, 3.3956e-11,\n 8.4286e-11, 5.6659e-11, 6.0293e-11, 6.1464e-11, -2.7821e-12,\n -7.8213e-12, -1.6463e-12, -3.7609e-12, 6.1569e-11, 6.1416e-11,\n -2.6959e-12, 4.7865e-11, 5.5767e-11, 5.7000e-11, -2.1128e-12,\n 6.3492e-11, 5.0212e-11, 8.4368e-11, -2.2252e-12, 7.8277e-11,\n -1.9803e-12, -5.7849e-12, -5.3406e-12, 5.9105e-11, 4.0791e-11,\n 7.2859e-11, -2.2881e-12, 5.0438e-11, 3.9873e-11, 3.3282e-11,\n 5.1530e-11, 5.9180e-11, 7.8146e-11, 4.5404e-11, 2.2587e-12,\n -1.8463e-12, -1.7639e-12, 4.0614e-11, -1.7805e-12, -2.7190e-12,\n -3.1030e-12, 6.7951e-11, 4.9580e-11, -3.1618e-12, 5.9040e-11,\n 5.9331e-11, -6.4630e-12, -7.4889e-12, 6.5539e-11, 6.6302e-11,\n 7.2566e-11, 6.3582e-11, 5.0280e-11, -1.2011e-12, -4.5160e-12,\n 5.8402e-11, -4.4801e-12], device='cuda:0')",
"exp_avg_sq": "tensor([8.6782e-15, 3.3544e-15, 1.3908e-15, 1.2954e-16, 2.2706e-15, 6.2250e-15,\n 1.3362e-15, 1.6341e-15, 8.0888e-16, 4.9014e-15, 1.2153e-15, 1.8648e-16,\n 4.6879e-15, 3.5090e-15, 4.9416e-17, 7.6272e-15, 4.4647e-16, 1.1203e-15,\n 3.4811e-15, 5.5480e-16, 1.0416e-15, 4.3531e-15, 7.0014e-15, 5.9740e-17,\n 5.3236e-16, 7.9831e-17, 1.3101e-14, 6.1172e-17, 9.2190e-16, 4.7370e-16,\n 7.3693e-16, 2.1293e-15, 5.3864e-16, 1.8977e-15, 1.0165e-17, 5.5520e-17,\n 3.9255e-16, 1.1220e-14, 4.3224e-17, 2.3241e-15, 1.5802e-16, 1.4677e-16,\n 4.5546e-17, 3.4975e-16, 4.2926e-17, 4.0385e-17, 1.6214e-16, 2.7587e-15,\n 7.8561e-16, 3.4029e-16, 7.7462e-15, 2.3573e-17, 3.6476e-17, 3.2921e-15,\n 1.6913e-15, 1.8830e-15, 8.5058e-16, 1.2400e-16, 3.6769e-16, 6.8688e-15,\n 2.2737e-16, 2.3370e-15, 2.0007e-16, 1.0626e-15, 1.3203e-14, 3.3204e-16,\n 1.1825e-16, 2.2061e-16, 9.6982e-15, 4.4848e-16, 2.7460e-15, 3.9821e-15,\n 2.7316e-15, 3.0207e-16, 1.8495e-16, 7.0377e-16, 1.8445e-16, 1.1979e-16,\n 2.0130e-15, 2.2129e-15, 2.2181e-15, 5.1560e-17, 1.8761e-15, 6.7308e-15,\n 3.3554e-17, 1.6236e-14, 7.2051e-16, 6.3309e-16, 5.0102e-15, 8.2179e-17,\n 2.2601e-16, 2.4878e-16, 2.9950e-15, 6.2712e-15, 8.0838e-16, 5.5012e-15,\n 3.7699e-15, 6.0435e-15, 7.1934e-17, 2.3902e-16, 3.5695e-16, 4.4844e-16,\n 5.1623e-15, 8.9035e-17, 2.8065e-15, 3.2825e-16, 2.3883e-15, 4.2138e-16,\n 4.5047e-17, 9.8729e-17, 4.7919e-17, 8.5872e-16, 1.9823e-15, 3.6095e-15,\n 1.3599e-16, 2.0308e-15, 2.1831e-15, 1.3146e-15, 3.2388e-16, 6.6909e-16,\n 1.7595e-15, 1.0820e-16, 2.0085e-16, 1.0757e-16, 6.7534e-16, 2.5949e-16,\n 2.5863e-16, 8.2464e-17, 3.3102e-16, 4.8332e-16, 9.4772e-18, 4.1584e-16,\n 3.4566e-15, 2.4404e-16, 3.9169e-16, 3.7907e-15, 6.3115e-17, 1.8376e-16,\n 9.3339e-16, 7.7212e-18, 1.1466e-17, 2.4480e-16, 2.6015e-16, 9.1460e-15,\n 1.4739e-15, 2.1002e-16, 8.4711e-16, 1.8832e-15, 2.8208e-15, 3.3331e-17,\n 1.5891e-15, 4.9021e-15, 6.5492e-15, 4.8711e-17, 7.4792e-15, 8.0933e-17,\n 8.3506e-16, 2.6232e-15, 3.8787e-15, 1.9456e-17, 5.7133e-15, 5.7151e-16,\n 8.8387e-16, 6.3143e-16, 2.7682e-16, 4.2513e-16, 8.4343e-16, 8.5154e-15,\n 7.8627e-16, 8.6261e-18, 2.1439e-17, 2.2220e-17, 3.3641e-17, 3.2230e-16,\n 4.6698e-16, 2.0072e-16, 1.4259e-15, 7.0963e-16, 8.7702e-16, 7.4903e-15,\n 3.2162e-15, 1.7420e-15, 1.2831e-17, 1.2394e-15, 7.8871e-16, 4.4909e-15,\n 4.1187e-15, 1.2021e-15, 2.1184e-16, 3.8576e-16, 5.6569e-16, 1.0635e-15],\n device='cuda:0')"
},
"63": {
"step": "tensor(20024.)",
"exp_avg": "tensor([[ 2.1148e-10, 2.2317e-10, -6.1971e-11, -6.2647e-11, -6.2781e-11,\n 2.0487e-10, -6.2981e-11, -6.1515e-11, 1.9760e-10, 2.0919e-10,\n 2.0279e-10, 2.2690e-10, 2.0933e-10, 2.0274e-10, -6.3646e-11,\n 2.1576e-10, 2.7886e-10, -6.2225e-11, 1.9877e-10, 2.4138e-10,\n 2.4965e-10, 2.3491e-10, 2.0654e-10, -6.2592e-11, -6.0137e-11,\n -6.2645e-11, 2.0601e-10, -6.1931e-11, -6.1567e-11, 2.4337e-10,\n -6.3063e-11, 2.3153e-10, 2.4176e-10, 1.8403e-10, 1.0128e-10,\n -6.3627e-11, -6.2032e-11, 2.1549e-10, -6.2547e-11, -6.2447e-11,\n -6.2871e-11, -6.2941e-11, -6.1242e-11, -6.2714e-11, -6.2058e-11,\n -6.2435e-11, -6.3038e-11, 2.0675e-10, 2.3316e-10, -6.2117e-11,\n 2.3944e-10, -6.2652e-11, -6.3297e-11, 2.0833e-10, -6.3017e-11,\n -6.0276e-11, 2.1425e-10, -6.3135e-11, 2.2239e-10, 2.1383e-10,\n -6.3167e-11, 2.2300e-10, -6.0152e-11, 2.1390e-10, 2.1146e-10,\n -6.2748e-11, -6.2790e-11, -6.1420e-11, 2.0911e-10, 2.5411e-10,\n 2.0312e-10, 2.1856e-10, 2.3412e-10, -6.2201e-11, -6.3041e-11,\n 2.1185e-10, -6.2077e-11, 2.3907e-10, 2.1468e-10, 2.3689e-10,\n 2.4059e-10, -6.3593e-11, -6.2993e-11, 2.2644e-10, -6.3175e-11,\n 2.0498e-10, 2.4508e-10, -6.1504e-11, 2.2341e-10, -6.2555e-11,\n -6.0583e-11, -6.1783e-11, 2.0331e-10, 2.3366e-10, -6.1439e-11,\n 2.0958e-10, 2.2297e-10, 2.1690e-10, -6.2807e-11, 2.5537e-10,\n -6.2999e-11, -6.2142e-11, 2.2813e-10, -6.2193e-11, 2.2709e-10,\n -6.3284e-11, 2.1895e-10, 2.2001e-10, -5.9064e-11, -6.3028e-11,\n -6.2641e-11, 2.2235e-10, -6.3173e-11, 1.9954e-10, 2.4419e-10,\n 2.1161e-10, -6.2476e-11, -6.2258e-11, -6.2125e-11, -6.1369e-11,\n 2.3685e-10, -6.1221e-11, -6.1419e-11, -6.0018e-11, 2.2581e-10,\n 2.3950e-10, -6.1973e-11, -6.3518e-11, -6.3650e-11, -6.2527e-11,\n 4.7670e-11, -6.1777e-11, 2.2927e-10, -6.2671e-11, 2.4658e-10,\n 2.0507e-10, 1.9828e-10, 2.4488e-10, 2.2045e-10, -6.1423e-11,\n -5.3801e-11, -6.2688e-11, -6.2419e-11, 2.2474e-10, 2.3959e-10,\n -6.2937e-11, 2.5695e-10, 2.2839e-10, 2.2562e-10, -6.2511e-11,\n 2.1523e-10, 1.8740e-10, 2.1755e-10, -6.1419e-11, 2.2727e-10,\n -6.2489e-11, -6.1278e-11, -6.1762e-11, 2.1136e-10, 2.0676e-10,\n 2.3674e-10, -6.2680e-11, 2.4638e-10, 2.1820e-10, 2.4480e-10,\n 2.3186e-10, 2.2464e-10, 2.0053e-10, 2.2792e-10, 7.2581e-11,\n -6.3343e-11, -6.1195e-11, 2.9204e-10, -6.3262e-11, -6.2756e-11,\n -6.2466e-11, 2.3079e-10, 2.2809e-10, -6.2653e-11, 1.8876e-10,\n 2.0792e-10, -5.9944e-11, -5.6167e-11, 2.1818e-10, 2.0633e-10,\n 2.0463e-10, 2.0053e-10, 2.2757e-10, -6.3575e-11, -6.0592e-11,\n 2.5406e-10, -6.2155e-11],\n [-3.2511e-11, -3.3555e-11, 8.7781e-12, 8.8326e-12, 8.8195e-12,\n -3.0962e-11, 8.8734e-12, 8.7489e-12, -3.1735e-11, -3.2294e-11,\n -3.1370e-11, -3.4624e-11, -3.1792e-11, -3.2000e-11, 8.9459e-12,\n -3.2972e-11, -3.9335e-11, 8.7912e-12, -3.1258e-11, -3.5957e-11,\n -3.6813e-11, -3.4783e-11, -3.1079e-11, 8.8203e-12, 8.5564e-12,\n 8.8232e-12, -3.1008e-11, 8.7747e-12, 8.7199e-12, -3.5968e-11,\n 8.9149e-12, -3.4525e-11, -3.5451e-11, -2.9364e-11, -1.4917e-11,\n 8.9447e-12, 8.8019e-12, -3.2604e-11, 8.8505e-12, 8.8439e-12,\n 8.8752e-12, 8.8831e-12, 8.7361e-12, 8.8273e-12, 8.8076e-12,\n 8.8265e-12, 8.8866e-12, -3.1861e-11, -3.4637e-11, 8.7909e-12,\n -3.4853e-11, 8.8652e-12, 8.8968e-12, -3.0967e-11, 8.8994e-12,\n 8.5977e-12, -3.3802e-11, 8.9255e-12, -3.3851e-11, -3.1841e-11,\n 8.8920e-12, -3.4016e-11, 8.6069e-12, -3.2641e-11, -3.1308e-11,\n 8.8714e-12, 8.8762e-12, 8.7208e-12, -3.0857e-11, -3.6758e-11,\n -3.0915e-11, -3.3807e-11, -3.3664e-11, 8.7965e-12, 8.8713e-12,\n -3.2608e-11, 8.7477e-12, -3.5397e-11, -3.2297e-11, -3.5021e-11,\n -3.5803e-11, 8.9331e-12, 8.9059e-12, -3.3038e-11, 8.8917e-12,\n -3.1672e-11, -3.6672e-11, 8.7590e-12, -3.4379e-11, 8.8177e-12,\n 8.6502e-12, 8.7521e-12, -3.0960e-11, -3.4096e-11, 8.7369e-12,\n -3.1616e-11, -3.4150e-11, -3.2204e-11, 8.8379e-12, -3.8228e-11,\n 8.8547e-12, 8.7881e-12, -3.3610e-11, 8.8240e-12, -3.4329e-11,\n 8.8773e-12, -3.3217e-11, -3.3580e-11, 8.4441e-12, 8.8807e-12,\n 8.8496e-12, -3.2977e-11, 8.9080e-12, -3.1145e-11, -3.7110e-11,\n -3.2342e-11, 8.8064e-12, 8.8353e-12, 8.8177e-12, 8.7646e-12,\n -3.5372e-11, 8.6973e-12, 8.7345e-12, 8.5829e-12, -3.4304e-11,\n -3.4848e-11, 8.7985e-12, 8.9658e-12, 8.9704e-12, 8.8544e-12,\n -6.2747e-12, 8.7843e-12, -3.3370e-11, 8.8197e-12, -3.6701e-11,\n -3.1255e-11, -3.1003e-11, -3.6702e-11, -3.2620e-11, 8.7196e-12,\n 7.8329e-12, 8.8741e-12, 8.8215e-12, -3.3606e-11, -3.5822e-11,\n 8.8607e-12, -3.7768e-11, -3.3895e-11, -3.4630e-11, 8.8262e-12,\n -3.2694e-11, -2.8659e-11, -3.3503e-11, 8.7317e-12, -3.3552e-11,\n 8.8356e-12, 8.7465e-12, 8.7699e-12, -3.1714e-11, -3.0996e-11,\n -3.4388e-11, 8.8418e-12, -3.6691e-11, -3.3366e-11, -3.6765e-11,\n -3.5375e-11, -3.3335e-11, -3.0243e-11, -3.4232e-11, -1.0886e-11,\n 8.9110e-12, 8.7241e-12, -4.1811e-11, 8.8835e-12, 8.8392e-12,\n 8.8409e-12, -3.3859e-11, -3.4776e-11, 8.8065e-12, -2.8741e-11,\n -3.0334e-11, 8.6172e-12, 8.1360e-12, -3.2991e-11, -3.1512e-11,\n -3.1384e-11, -3.0397e-11, -3.5201e-11, 8.9474e-12, 8.6525e-12,\n -3.6668e-11, 8.8124e-12],\n [-1.7654e-10, -1.8710e-10, 5.2699e-11, 5.3328e-11, 5.3486e-11,\n -1.7182e-10, 5.3641e-11, 5.2257e-11, -1.6294e-10, -1.7473e-10,\n -1.6900e-10, -1.8956e-10, -1.7511e-10, -1.6826e-10, 5.4230e-11,\n -1.8007e-10, -2.3803e-10, 5.2946e-11, -1.6512e-10, -2.0304e-10,\n -2.1058e-10, -1.9796e-10, -1.7371e-10, 5.3272e-11, 5.1083e-11,\n 5.3339e-11, -1.7309e-10, 5.2668e-11, 5.2359e-11, -2.0520e-10,\n 5.3652e-11, -1.9445e-10, -2.0415e-10, -1.5249e-10, -8.5190e-11,\n 5.4195e-11, 5.2733e-11, -1.8023e-10, 5.3199e-11, 5.3110e-11,\n 5.3503e-11, 5.3569e-11, 5.1983e-11, 5.3416e-11, 5.2754e-11,\n 5.3106e-11, 5.3651e-11, -1.7271e-10, -1.9625e-10, 5.2842e-11,\n -2.0241e-10, 5.3286e-11, 5.3914e-11, -1.7558e-10, 5.3620e-11,\n 5.1166e-11, -1.7767e-10, 5.3714e-11, -1.8579e-10, -1.8014e-10,\n 5.3786e-11, -1.8605e-10, 5.1031e-11, -1.7868e-10, -1.7814e-10,\n 5.3384e-11, 5.3428e-11, 5.2208e-11, -1.7638e-10, -2.1505e-10,\n -1.7006e-10, -1.8261e-10, -1.9854e-10, 5.2905e-11, 5.3685e-11,\n -1.7702e-10, 5.2837e-11, -2.0165e-10, -1.8040e-10, -1.9952e-10,\n -2.0239e-10, 5.4176e-11, 5.3596e-11, -1.9164e-10, 5.3814e-11,\n -1.7061e-10, -2.0582e-10, 5.2223e-11, -1.8616e-10, 5.3249e-11,\n 5.1456e-11, 5.2543e-11, -1.7038e-10, -1.9775e-10, 5.2200e-11,\n -1.7563e-10, -1.8596e-10, -1.8264e-10, 5.3490e-11, -2.1415e-10,\n 5.3670e-11, 5.2876e-11, -1.9255e-10, 5.2883e-11, -1.9091e-10,\n 5.3973e-11, -1.8357e-10, -1.8457e-10, 5.0111e-11, 5.3667e-11,\n 5.3313e-11, -1.8651e-10, 5.3766e-11, -1.6608e-10, -2.0418e-10,\n -1.7684e-10, 5.3190e-11, 5.2916e-11, 5.2800e-11, 5.2091e-11,\n -1.9908e-10, 5.2026e-11, 5.2175e-11, 5.0921e-11, -1.8878e-10,\n -2.0261e-10, 5.2683e-11, 5.4065e-11, 5.4191e-11, 5.3178e-11,\n -4.1271e-11, 5.2480e-11, -1.9397e-10, 5.3374e-11, -2.0715e-10,\n -1.7195e-10, -1.6458e-10, -2.0594e-10, -1.8537e-10, 5.2210e-11,\n 4.5435e-11, 5.3312e-11, 5.3107e-11, -1.8903e-10, -2.0151e-10,\n 5.3610e-11, -2.1676e-10, -1.9212e-10, -1.8851e-10, 5.3192e-11,\n -1.8016e-10, -1.5660e-10, -1.8169e-10, 5.2196e-11, -1.9142e-10,\n 5.3153e-11, 5.2017e-11, 5.2490e-11, -1.7767e-10, -1.7306e-10,\n -2.0000e-10, 5.3344e-11, -2.0735e-10, -1.8226e-10, -2.0525e-10,\n -1.9375e-10, -1.8911e-10, -1.6824e-10, -1.9160e-10, -6.1035e-11,\n 5.3959e-11, 5.1970e-11, -2.4794e-10, 5.3898e-11, 5.3449e-11,\n 5.3129e-11, -1.9495e-10, -1.9065e-10, 5.3376e-11, -1.5771e-10,\n -1.7627e-10, 5.0804e-11, 4.7550e-11, -1.8282e-10, -1.7248e-10,\n -1.7086e-10, -1.6785e-10, -1.8975e-10, 5.4140e-11, 5.1439e-11,\n -2.1550e-10, 5.2837e-11],\n [-5.9308e-12, -6.0633e-12, 1.0639e-12, 1.0601e-12, 1.0437e-12,\n -5.2253e-12, 1.0380e-12, 1.0753e-12, -6.3110e-12, -5.5548e-12,\n -5.8196e-12, -6.1686e-12, -5.6547e-12, -5.8939e-12, 1.0264e-12,\n -5.8875e-12, -4.9828e-12, 1.0696e-12, -5.6240e-12, -5.9589e-12,\n -5.7647e-12, -5.6271e-12, -4.7891e-12, 1.0501e-12, 1.0721e-12,\n 1.0534e-12, -5.1340e-12, 1.0601e-12, 1.0673e-12, -5.6159e-12,\n 1.0657e-12, -5.8840e-12, -5.6177e-12, -5.6052e-12, -2.3819e-12,\n 1.0428e-12, 1.0645e-12, -5.8407e-12, 1.0759e-12, 1.0703e-12,\n 1.0563e-12, 1.0638e-12, 1.0936e-12, 1.0394e-12, 1.0699e-12,\n 1.0660e-12, 1.0572e-12, -5.5073e-12, -5.5994e-12, 1.0657e-12,\n -5.5185e-12, 1.0702e-12, 1.0317e-12, -4.9519e-12, 1.0700e-12,\n 1.0878e-12, -6.4119e-12, 1.0727e-12, -6.1068e-12, -5.0632e-12,\n 1.0440e-12, -6.3615e-12, 1.0942e-12, -5.7915e-12, -5.1779e-12,\n 1.0594e-12, 1.0628e-12, 1.0800e-12, -4.9839e-12, -5.6923e-12,\n -5.3228e-12, -5.7289e-12, -5.1321e-12, 1.0541e-12, 1.0549e-12,\n -5.6446e-12, 1.0608e-12, -5.5309e-12, -5.3367e-12, -5.6263e-12,\n -5.8888e-12, 1.0390e-12, 1.0707e-12, -4.9820e-12, 1.0243e-12,\n -6.0785e-12, -6.0615e-12, 1.0952e-12, -6.3623e-12, 1.0563e-12,\n 1.0600e-12, 1.0676e-12, -5.1997e-12, -5.1417e-12, 1.0844e-12,\n -5.4786e-12, -6.2733e-12, -5.3695e-12, 1.0321e-12, -6.6532e-12,\n 1.0274e-12, 1.0601e-12, -5.2202e-12, 1.0722e-12, -5.1561e-12,\n 9.9876e-13, -5.5205e-12, -5.3341e-12, 1.0966e-12, 1.0412e-12,\n 1.0544e-12, -5.9222e-12, 1.0658e-12, -5.6258e-12, -6.7137e-12,\n -5.7705e-12, 1.0582e-12, 1.0873e-12, 1.0867e-12, 1.0970e-12,\n -5.8734e-12, 1.0767e-12, 1.0806e-12, 1.0864e-12, -6.1539e-12,\n -5.3294e-12, 1.0692e-12, 1.0632e-12, 1.0596e-12, 1.0679e-12,\n -4.6884e-13, 1.0728e-12, -5.0256e-12, 1.0207e-12, -6.1489e-12,\n -5.0349e-12, -5.6795e-12, -5.9098e-12, -5.4701e-12, 1.0878e-12,\n 1.1205e-12, 1.0675e-12, 1.0630e-12, -5.2833e-12, -5.7520e-12,\n 1.0280e-12, -5.9073e-12, -5.6861e-12, -6.0279e-12, 1.0720e-12,\n -5.5684e-12, -5.1824e-12, -5.8321e-12, 1.0727e-12, -5.6046e-12,\n 1.0720e-12, 1.0901e-12, 1.0875e-12, -5.3050e-12, -5.2387e-12,\n -5.5573e-12, 1.0511e-12, -5.8223e-12, -6.0652e-12, -6.1863e-12,\n -6.3835e-12, -5.4911e-12, -5.1265e-12, -5.4963e-12, -1.6938e-12,\n 1.0315e-12, 1.0864e-12, -5.7919e-12, 1.0433e-12, 1.0327e-12,\n 1.0724e-12, -5.1862e-12, -6.2257e-12, 1.0543e-12, -5.3604e-12,\n -4.3457e-12, 1.1106e-12, 1.0774e-12, -5.7431e-12, -5.7626e-12,\n -5.6590e-12, -5.3895e-12, -6.0804e-12, 1.0456e-12, 1.0771e-12,\n -5.2960e-12, 1.0724e-12]], device='cuda:0')",
"exp_avg_sq": "tensor([[9.2829e-14, 7.8202e-14, 2.1864e-14, 1.3214e-14, 1.8867e-14, 5.9375e-14,\n 2.0691e-14, 1.8262e-14, 4.3571e-14, 6.7024e-14, 4.9708e-14, 4.6506e-14,\n 7.6067e-14, 6.7643e-14, 1.8630e-14, 6.4890e-14, 5.9613e-14, 1.4318e-14,\n 5.4014e-14, 5.0778e-14, 6.2535e-14, 6.7959e-14, 5.4581e-14, 1.7133e-14,\n 1.3424e-14, 1.5202e-14, 5.5395e-14, 1.3507e-14, 1.5563e-14, 3.0710e-14,\n 2.1691e-14, 5.1725e-14, 6.5361e-14, 5.8554e-14, 3.0387e-15, 1.5881e-14,\n 1.8329e-14, 8.2953e-14, 1.2195e-14, 1.7860e-14, 1.8107e-14, 1.4251e-14,\n 1.3835e-14, 1.8972e-14, 1.3829e-14, 1.3026e-14, 1.8739e-14, 4.9449e-14,\n 5.0517e-14, 1.4879e-14, 9.0569e-14, 1.3256e-14, 1.7652e-14, 5.2543e-14,\n 1.7252e-14, 1.2666e-14, 5.1942e-14, 1.5030e-14, 5.7914e-14, 8.6755e-14,\n 2.1188e-14, 5.9545e-14, 1.6188e-14, 7.2273e-14, 5.3881e-14, 1.7757e-14,\n 1.6896e-14, 1.3220e-14, 4.6541e-14, 4.9760e-14, 5.1429e-14, 7.3588e-14,\n 5.8607e-14, 2.1939e-14, 1.6384e-14, 4.0083e-14, 1.3255e-14, 3.1314e-14,\n 5.7861e-14, 5.9836e-14, 6.8991e-14, 1.6911e-14, 1.6515e-14, 6.2142e-14,\n 2.4314e-14, 9.3014e-14, 4.8399e-14, 1.5421e-14, 6.8866e-14, 1.5280e-14,\n 1.5483e-14, 1.1435e-14, 6.3610e-14, 6.9572e-14, 1.3418e-14, 4.5958e-14,\n 6.3763e-14, 6.9517e-14, 1.8398e-14, 4.6760e-14, 2.2222e-14, 1.4375e-14,\n 4.5161e-14, 1.5786e-14, 4.0784e-14, 1.9191e-14, 5.2918e-14, 5.5580e-14,\n 9.0981e-15, 1.9641e-14, 1.3676e-14, 3.7997e-14, 1.9521e-14, 4.2936e-14,\n 4.0021e-14, 4.5495e-14, 1.8625e-14, 1.7478e-14, 1.7154e-14, 2.1585e-14,\n 7.6764e-14, 1.4952e-14, 1.4979e-14, 1.4628e-14, 6.8616e-14, 3.8200e-14,\n 2.3155e-14, 1.7427e-14, 1.8505e-14, 2.0923e-14, 2.2571e-15, 1.8518e-14,\n 7.8710e-14, 2.2448e-14, 5.0976e-14, 5.6408e-14, 1.2509e-14, 4.0234e-14,\n 3.4011e-14, 5.0736e-15, 2.4513e-15, 1.8856e-14, 1.7439e-14, 5.6303e-14,\n 7.7942e-14, 2.1435e-14, 9.0851e-14, 4.4123e-14, 5.8864e-14, 1.3068e-14,\n 4.4462e-14, 5.5311e-14, 8.6309e-14, 1.2677e-14, 7.0041e-14, 1.3173e-14,\n 1.3802e-14, 1.6277e-14, 5.0290e-14, 3.8532e-15, 5.1707e-14, 1.9963e-14,\n 6.1072e-14, 5.2762e-14, 4.0548e-14, 5.1477e-14, 6.1255e-14, 4.2333e-14,\n 5.8465e-14, 1.8009e-15, 1.5992e-14, 1.2192e-14, 4.5537e-15, 1.8886e-14,\n 1.8268e-14, 1.4286e-14, 4.0633e-14, 8.1881e-14, 1.6591e-14, 4.7868e-14,\n 4.5101e-14, 1.2198e-14, 2.8243e-15, 5.4066e-14, 3.9946e-14, 4.8657e-14,\n 6.7912e-14, 6.5932e-14, 2.4967e-14, 1.6436e-14, 6.1730e-14, 2.1729e-14],\n [1.0481e-14, 8.7955e-15, 2.4229e-15, 1.4848e-15, 2.0919e-15, 6.6795e-15,\n 2.2933e-15, 2.0264e-15, 4.9393e-15, 7.5136e-15, 5.6430e-15, 5.2431e-15,\n 8.5979e-15, 7.5915e-15, 2.0628e-15, 7.3280e-15, 6.7088e-15, 1.5949e-15,\n 6.1076e-15, 5.7383e-15, 7.0248e-15, 7.6684e-15, 6.1808e-15, 1.9123e-15,\n 1.5044e-15, 1.6931e-15, 6.2429e-15, 1.5130e-15, 1.7356e-15, 3.5060e-15,\n 2.4080e-15, 5.8590e-15, 7.3590e-15, 6.6145e-15, 3.3257e-16, 1.7682e-15,\n 2.0300e-15, 9.3356e-15, 1.3755e-15, 1.9817e-15, 2.0189e-15, 1.5923e-15,\n 1.5470e-15, 2.1090e-15, 1.5393e-15, 1.4635e-15, 2.0807e-15, 5.5860e-15,\n 5.7243e-15, 1.6622e-15, 1.0240e-14, 1.4714e-15, 1.9618e-15, 5.9718e-15,\n 1.9273e-15, 1.4253e-15, 5.8832e-15, 1.6866e-15, 6.4820e-15, 9.7407e-15,\n 2.3524e-15, 6.6853e-15, 1.7972e-15, 8.1267e-15, 6.0854e-15, 1.9713e-15,\n 1.8836e-15, 1.4848e-15, 5.2568e-15, 5.6625e-15, 5.8153e-15, 8.2784e-15,\n 6.6042e-15, 2.4340e-15, 1.8228e-15, 4.5541e-15, 1.4845e-15, 3.5419e-15,\n 6.5412e-15, 6.7496e-15, 7.7723e-15, 1.8756e-15, 1.8523e-15, 7.0184e-15,\n 2.6937e-15, 1.0411e-14, 5.4967e-15, 1.7304e-15, 7.7724e-15, 1.7067e-15,\n 1.7295e-15, 1.2883e-15, 7.1870e-15, 7.8161e-15, 1.5104e-15, 5.1848e-15,\n 7.1762e-15, 7.8276e-15, 2.0442e-15, 5.2965e-15, 2.4653e-15, 1.6109e-15,\n 5.1162e-15, 1.7616e-15, 4.5986e-15, 2.1313e-15, 5.9583e-15, 6.2282e-15,\n 1.0353e-15, 2.1774e-15, 1.5243e-15, 4.3033e-15, 2.1680e-15, 4.8597e-15,\n 4.5305e-15, 5.1779e-15, 2.0737e-15, 1.9438e-15, 1.9112e-15, 2.3877e-15,\n 8.6230e-15, 1.6640e-15, 1.6744e-15, 1.6298e-15, 7.7012e-15, 4.3381e-15,\n 2.5630e-15, 1.9352e-15, 2.0538e-15, 2.3191e-15, 2.4036e-16, 2.0521e-15,\n 8.8247e-15, 2.4828e-15, 5.7350e-15, 6.3545e-15, 1.4071e-15, 4.5717e-15,\n 3.8694e-15, 5.5512e-16, 2.6229e-16, 2.0982e-15, 1.9445e-15, 6.3402e-15,\n 8.7682e-15, 2.3769e-15, 1.0174e-14, 4.9836e-15, 6.6488e-15, 1.4590e-15,\n 5.0652e-15, 6.2354e-15, 9.6822e-15, 1.4181e-15, 7.8539e-15, 1.4783e-15,\n 1.5491e-15, 1.8143e-15, 5.6986e-15, 4.4165e-16, 5.8589e-15, 2.2124e-15,\n 6.8602e-15, 5.9538e-15, 4.5883e-15, 5.7994e-15, 6.8847e-15, 4.8155e-15,\n 6.5916e-15, 2.0403e-16, 1.7755e-15, 1.3507e-15, 5.4008e-16, 2.0970e-15,\n 2.0328e-15, 1.6077e-15, 4.6383e-15, 9.1904e-15, 1.8479e-15, 5.4230e-15,\n 5.1251e-15, 1.3774e-15, 3.0426e-16, 6.1025e-15, 4.5571e-15, 5.5135e-15,\n 7.6346e-15, 7.3989e-15, 2.7632e-15, 1.8307e-15, 6.9494e-15, 2.4060e-15],\n [1.2020e-14, 1.0179e-14, 2.8128e-15, 1.7069e-15, 2.4367e-15, 7.7549e-15,\n 2.6679e-15, 2.3629e-15, 5.7054e-15, 8.7730e-15, 6.4836e-15, 6.0789e-15,\n 9.8943e-15, 8.8509e-15, 2.4061e-15, 8.4318e-15, 7.7768e-15, 1.8593e-15,\n 7.0568e-15, 6.6556e-15, 8.1762e-15, 8.8130e-15, 7.1165e-15, 2.2087e-15,\n 1.7358e-15, 1.9637e-15, 7.2304e-15, 1.7487e-15, 2.0108e-15, 4.0232e-15,\n 2.7852e-15, 6.7429e-15, 8.4971e-15, 7.6149e-15, 3.9402e-16, 2.0541e-15,\n 2.3714e-15, 1.0796e-14, 1.5715e-15, 2.3113e-15, 2.3367e-15, 1.8444e-15,\n 1.7884e-15, 2.4464e-15, 1.7915e-15, 1.6789e-15, 2.4190e-15, 6.4750e-15,\n 6.5898e-15, 1.9246e-15, 1.1718e-14, 1.7157e-15, 2.2802e-15, 6.8481e-15,\n 2.2289e-15, 1.6364e-15, 6.7941e-15, 1.9403e-15, 7.5757e-15, 1.1299e-14,\n 2.7284e-15, 7.7956e-15, 2.0974e-15, 9.3892e-15, 7.0400e-15, 2.2976e-15,\n 2.1805e-15, 1.7096e-15, 6.0780e-15, 6.4723e-15, 6.7131e-15, 9.5829e-15,\n 7.6774e-15, 2.8245e-15, 2.1192e-15, 5.2386e-15, 1.7144e-15, 4.1077e-15,\n 7.5793e-15, 7.8249e-15, 8.9528e-15, 2.1877e-15, 2.1275e-15, 8.0734e-15,\n 3.1179e-15, 1.2093e-14, 6.3373e-15, 1.9861e-15, 8.9617e-15, 1.9761e-15,\n 1.9996e-15, 1.4770e-15, 8.2575e-15, 9.0588e-15, 1.7317e-15, 6.0282e-15,\n 8.3297e-15, 9.0451e-15, 2.3718e-15, 6.0937e-15, 2.8569e-15, 1.8595e-15,\n 5.9106e-15, 2.0372e-15, 5.3567e-15, 2.4732e-15, 6.9361e-15, 7.2426e-15,\n 1.1699e-15, 2.5359e-15, 1.7719e-15, 4.9813e-15, 2.5125e-15, 5.6247e-15,\n 5.2173e-15, 5.9425e-15, 2.3981e-15, 2.2578e-15, 2.2160e-15, 2.7792e-15,\n 1.0021e-14, 1.9379e-15, 1.9343e-15, 1.8953e-15, 8.9873e-15, 4.9909e-15,\n 2.9765e-15, 2.2504e-15, 2.3876e-15, 2.6949e-15, 2.8279e-16, 2.3933e-15,\n 1.0261e-14, 2.8904e-15, 6.6965e-15, 7.3382e-15, 1.6047e-15, 5.2443e-15,\n 4.4536e-15, 6.5574e-16, 3.1240e-16, 2.4324e-15, 2.2494e-15, 7.3723e-15,\n 1.0131e-14, 2.7600e-15, 1.1824e-14, 5.7761e-15, 7.7009e-15, 1.6919e-15,\n 5.7951e-15, 7.2461e-15, 1.1205e-14, 1.6366e-15, 9.1235e-15, 1.7025e-15,\n 1.7825e-15, 2.1051e-15, 6.5672e-15, 5.0849e-16, 6.7321e-15, 2.5780e-15,\n 7.9698e-15, 6.8959e-15, 5.2893e-15, 6.7038e-15, 7.9689e-15, 5.5351e-15,\n 7.6223e-15, 2.2576e-16, 2.0672e-15, 1.5825e-15, 6.0335e-16, 2.4396e-15,\n 2.3615e-15, 1.8417e-15, 5.2887e-15, 1.0638e-14, 2.1434e-15, 6.2516e-15,\n 5.8771e-15, 1.5715e-15, 3.5581e-16, 7.0539e-15, 5.2032e-15, 6.3487e-15,\n 8.8751e-15, 8.5988e-15, 3.2016e-15, 2.1261e-15, 8.0234e-15, 2.7977e-15],\n [8.6043e-15, 7.2353e-15, 2.0836e-15, 1.2345e-15, 1.7891e-15, 5.4704e-15,\n 1.9663e-15, 1.7261e-15, 3.9684e-15, 6.1823e-15, 4.5412e-15, 4.2728e-15,\n 7.0034e-15, 6.2343e-15, 1.7695e-15, 5.9887e-15, 5.4997e-15, 1.3422e-15,\n 4.9460e-15, 4.6358e-15, 5.7641e-15, 6.2932e-15, 5.0015e-15, 1.6163e-15,\n 1.2565e-15, 1.4344e-15, 5.0973e-15, 1.2630e-15, 1.4659e-15, 2.7749e-15,\n 2.0680e-15, 4.7397e-15, 6.0503e-15, 5.3971e-15, 2.9244e-16, 1.4968e-15,\n 1.7368e-15, 7.6706e-15, 1.1379e-15, 1.6882e-15, 1.7080e-15, 1.3367e-15,\n 1.2984e-15, 1.7973e-15, 1.3014e-15, 1.2205e-15, 1.7753e-15, 4.5212e-15,\n 4.6240e-15, 1.3967e-15, 8.3922e-15, 1.2529e-15, 1.6693e-15, 4.7970e-15,\n 1.6215e-15, 1.1814e-15, 4.7419e-15, 1.4072e-15, 5.3570e-15, 8.0369e-15,\n 2.0133e-15, 5.4833e-15, 1.5273e-15, 6.7043e-15, 4.9393e-15, 1.6777e-15,\n 1.5937e-15, 1.2343e-15, 4.2694e-15, 4.5492e-15, 4.7148e-15, 6.8029e-15,\n 5.3711e-15, 2.0869e-15, 1.5452e-15, 3.6501e-15, 1.2414e-15, 2.8553e-15,\n 5.2837e-15, 5.4872e-15, 6.3959e-15, 1.6000e-15, 1.5508e-15, 5.7363e-15,\n 2.3271e-15, 8.6644e-15, 4.3996e-15, 1.4476e-15, 6.3492e-15, 1.4351e-15,\n 1.4561e-15, 1.0656e-15, 5.8747e-15, 6.4428e-15, 1.2523e-15, 4.2001e-15,\n 5.8698e-15, 6.4255e-15, 1.7444e-15, 4.2888e-15, 2.1169e-15, 1.3446e-15,\n 4.1181e-15, 1.4879e-15, 3.7233e-15, 1.8212e-15, 4.8503e-15, 5.1592e-15,\n 8.4378e-16, 1.8633e-15, 1.2847e-15, 3.4606e-15, 1.8551e-15, 3.9148e-15,\n 3.6726e-15, 4.1364e-15, 1.7643e-15, 1.6512e-15, 1.6176e-15, 2.0594e-15,\n 7.0888e-15, 1.4061e-15, 1.4080e-15, 1.3747e-15, 6.3165e-15, 3.4832e-15,\n 2.2119e-15, 1.6500e-15, 1.7551e-15, 1.9909e-15, 2.3118e-16, 1.7558e-15,\n 7.2944e-15, 2.1418e-15, 4.6659e-15, 5.2136e-15, 1.1815e-15, 3.6774e-15,\n 3.0864e-15, 4.8853e-16, 2.4546e-16, 1.7835e-15, 1.6457e-15, 5.1660e-15,\n 7.2228e-15, 2.0398e-15, 8.4494e-15, 4.0363e-15, 5.3886e-15, 1.2265e-15,\n 4.0489e-15, 5.0651e-15, 8.0344e-15, 1.1914e-15, 6.4977e-15, 1.2315e-15,\n 1.2914e-15, 1.5320e-15, 4.5966e-15, 3.4811e-16, 4.7438e-15, 1.8944e-15,\n 5.6437e-15, 4.8414e-15, 3.7210e-15, 4.7530e-15, 5.6769e-15, 3.8470e-15,\n 5.3861e-15, 1.7415e-16, 1.5131e-15, 1.1510e-15, 3.9514e-16, 1.7876e-15,\n 1.7233e-15, 1.3355e-15, 3.6984e-15, 7.6103e-15, 1.5648e-15, 4.3755e-15,\n 4.1193e-15, 1.1372e-15, 2.8466e-16, 4.9704e-15, 3.6358e-15, 4.4521e-15,\n 6.2570e-15, 6.1025e-15, 2.3925e-15, 1.5475e-15, 5.7173e-15, 2.0711e-15]],\n device='cuda:0')"
},
"64": {
"step": "tensor(20024.)",
"exp_avg": "tensor([ 3.7611e-10, -5.3218e-11, -3.2007e-10, -6.3778e-12], device='cuda:0')",
"exp_avg_sq": "tensor([2.0563e-12, 2.2825e-13, 2.5721e-13, 2.0226e-13], device='cuda:0')"
},
"8": {
"step": "tensor(12515.)",
"exp_avg": "tensor([[-3.5103e-06, 5.2735e-08, 1.3877e-06, ..., -1.2441e-06,\n 2.9776e-06, 2.7382e-05],\n [-1.6968e-06, 6.0594e-07, 5.3879e-07, ..., -4.4697e-06,\n -3.6992e-06, 5.4548e-06],\n [-4.1679e-06, 8.0031e-06, 1.5055e-06, ..., -3.4458e-06,\n -5.2231e-08, 7.1695e-07],\n ...,\n [ 3.1509e-06, -6.2440e-06, 2.0962e-05, ..., -3.8914e-07,\n 6.7541e-07, -1.2398e-05],\n [-3.2034e-06, 1.9077e-07, -3.6124e-07, ..., -1.5298e-07,\n 1.7659e-08, 4.7645e-06],\n [-1.1605e-06, -2.6278e-06, 9.0848e-07, ..., -1.0259e-05,\n -1.5571e-06, 3.9765e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.0165e-09, 1.3045e-10, 2.2678e-10, ..., 5.0492e-10, 1.0917e-09,\n 3.6878e-09],\n [5.8970e-10, 1.9331e-10, 2.1875e-09, ..., 1.5926e-10, 7.4360e-10,\n 3.1936e-09],\n [1.5169e-09, 2.2833e-09, 2.1359e-10, ..., 4.3851e-10, 3.5657e-11,\n 3.3030e-10],\n ...,\n [1.2685e-09, 9.7480e-10, 1.1291e-08, ..., 8.3496e-11, 7.7228e-10,\n 1.1403e-09],\n [1.5445e-09, 9.9650e-11, 8.7604e-10, ..., 2.1436e-10, 2.5279e-11,\n 2.2054e-10],\n [1.2783e-09, 4.8129e-11, 1.4046e-10, ..., 2.0561e-09, 2.2837e-10,\n 4.6700e-09]], device='cuda:0')"
},
"9": {
"step": "tensor(12515.)",
"exp_avg": "tensor([ 3.6460e-05, 3.5118e-05, -3.3795e-05, ..., 3.6109e-05,\n -1.6276e-05, 3.3168e-06], device='cuda:0')",
"exp_avg_sq": "tensor([4.0352e-08, 2.6323e-08, 2.3462e-08, ..., 2.9875e-08, 2.8448e-08,\n 4.5149e-08], device='cuda:0')"
},
"10": {
"step": "tensor(12515.)",
"exp_avg": "tensor([[ 1.2346e-06, 2.2285e-06, 8.3167e-08, ..., -1.4891e-06,\n 2.7970e-07, -8.3853e-07],\n [ 3.3974e-06, -1.9452e-07, 6.3252e-07, ..., 2.1271e-06,\n 4.1392e-06, -3.3585e-06],\n [-1.6336e-06, 2.5375e-08, 1.3283e-06, ..., -2.4285e-06,\n 1.9636e-06, -5.0890e-06],\n ...,\n [ 8.4677e-09, 2.1570e-07, -6.5440e-07, ..., -2.6549e-06,\n 3.5920e-06, 1.1137e-06],\n [ 7.2295e-08, 4.5700e-06, 1.1015e-06, ..., -2.3379e-06,\n 1.7457e-06, -5.1525e-07],\n [ 3.1682e-06, 7.0902e-07, -3.5217e-07, ..., -1.7236e-07,\n 1.8265e-07, -1.5897e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[7.2273e-11, 8.1468e-11, 1.5562e-11, ..., 4.2671e-11, 2.4373e-11,\n 7.5395e-11],\n [1.4353e-10, 9.4496e-11, 2.3739e-11, ..., 4.1552e-11, 6.1320e-11,\n 1.3573e-10],\n [1.5750e-10, 1.0513e-10, 2.5366e-11, ..., 6.8646e-11, 6.8341e-11,\n 3.2136e-10],\n ...,\n [8.0130e-11, 1.0670e-10, 4.0950e-11, ..., 7.4995e-11, 6.5449e-11,\n 1.2270e-10],\n [8.7400e-11, 1.4039e-10, 3.2618e-11, ..., 5.9332e-11, 3.8988e-11,\n 1.5562e-10],\n [7.5614e-11, 1.1078e-10, 5.1203e-11, ..., 4.7128e-11, 2.7307e-11,\n 1.4653e-10]], device='cuda:0')"
},
"11": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-3.0513e-05, 9.5094e-06, -6.7728e-06, ..., -1.0721e-05,\n -2.1337e-05, -1.2558e-05],\n [-1.1267e-05, -1.3693e-05, 1.1334e-06, ..., -4.1298e-05,\n -1.4261e-06, 4.8277e-05],\n [-1.5066e-06, -1.8512e-06, -1.4820e-05, ..., -2.5729e-06,\n 5.0603e-06, -8.1810e-06],\n ...,\n [ 1.1510e-05, -3.8799e-06, -5.3953e-06, ..., 2.0350e-05,\n 2.9903e-05, -1.0249e-05],\n [ 1.2588e-05, -6.8158e-06, 5.5155e-06, ..., -1.2404e-05,\n -3.3554e-06, -1.4175e-05],\n [ 9.0948e-06, -4.0794e-05, -4.4192e-05, ..., -5.2379e-05,\n 2.7073e-05, 1.9501e-05]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.0774e-08, 6.4734e-09, 2.0988e-08, ..., 4.4338e-09, 4.7933e-09,\n 1.4980e-08],\n [7.0445e-09, 3.0141e-09, 9.1201e-08, ..., 4.9668e-08, 4.2176e-09,\n 5.9210e-08],\n [3.3018e-09, 1.6201e-09, 2.3871e-09, ..., 3.4391e-09, 8.8701e-10,\n 3.7341e-08],\n ...,\n [1.3800e-08, 4.7957e-09, 1.7290e-08, ..., 1.9646e-08, 6.7315e-09,\n 3.2527e-09],\n [1.2049e-08, 6.1212e-09, 6.9265e-09, ..., 9.3354e-09, 4.1005e-09,\n 1.5952e-08],\n [3.3500e-09, 6.4674e-09, 1.6640e-08, ..., 7.4177e-09, 6.1166e-09,\n 2.7867e-08]], device='cuda:0')"
},
"12": {
"step": "tensor(5006.)",
"exp_avg": "tensor([-2.4611e-04, 9.7250e-05, -7.5870e-05, ..., -6.0308e-05,\n -6.5723e-05, 8.4106e-05], device='cuda:0')",
"exp_avg_sq": "tensor([3.5958e-07, 3.8444e-07, 3.0579e-07, ..., 2.8393e-07, 4.1452e-07,\n 2.4829e-07], device='cuda:0')"
},
"13": {
"step": "tensor(5006.)",
"exp_avg": "tensor([[-1.7851e-07, -3.2742e-06, -1.4269e-05, ..., -7.1558e-06,\n 2.1575e-06, -2.1967e-06],\n [ 1.8410e-06, -5.0829e-06, -5.2866e-06, ..., -1.2025e-06,\n 8.2634e-07, -4.9790e-06],\n [ 6.0166e-06, -2.4669e-06, 5.2935e-06, ..., 9.0185e-06,\n 2.1133e-06, -2.4020e-05],\n ...,\n [-4.1430e-06, -1.3917e-05, -1.0711e-05, ..., 8.0037e-06,\n -5.6615e-06, 8.7018e-06],\n [-8.7173e-07, 7.6848e-06, -6.1957e-06, ..., -7.1281e-06,\n 7.2486e-06, -2.8154e-06],\n [-4.1459e-06, -1.1582e-05, 1.2702e-05, ..., -4.2357e-06,\n -2.9169e-06, -2.9699e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[6.3012e-10, 5.6786e-10, 7.1870e-10, ..., 6.9587e-10, 7.6093e-10,\n 6.6298e-10],\n [1.1836e-09, 6.0614e-10, 1.2327e-09, ..., 5.5848e-10, 1.4980e-09,\n 1.7352e-09],\n [1.8092e-09, 9.8718e-10, 1.5222e-09, ..., 7.0473e-10, 1.6798e-09,\n 9.9963e-10],\n ...,\n [1.1665e-09, 1.1853e-09, 1.8152e-09, ..., 8.4230e-10, 1.5491e-09,\n 1.3053e-09],\n [1.3943e-09, 1.3370e-09, 1.9493e-09, ..., 2.6855e-09, 2.0361e-09,\n 1.0020e-09],\n [1.9557e-09, 1.2749e-09, 1.6607e-09, ..., 1.0465e-09, 1.4934e-09,\n 2.4238e-09]], device='cuda:0')"
}
},
"param_groups": [
{
"lr": 0.01,
"name": "shared",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
0,
1
]
},
{
"lr": 0.01,
"name": "scale_256",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
2,
3,
4
]
},
{
"lr": 0.01,
"name": "scale_512",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
5,
6,
7
]
},
{
"lr": 0.01,
"name": "scale_768",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
8,
9,
10
]
},
{
"lr": 0.01,
"name": "scale_1024",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
11,
12,
13
]
},
{
"lr": 0.005,
"name": "fusion",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.005,
"params": [
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64
]
}
]
},
"scheduler_state_dict": {
"T_0": 10,
"T_i": 20,
"T_mult": 2,
"eta_min": 1e-06,
"T_cur": 0,
"base_lrs": [
0.01,
0.01,
0.01,
0.01,
0.01,
0.005
],
"last_epoch": 10,
"_step_count": 0,
"_is_initial": false,
"_get_lr_called_within_step": false,
"_last_lr": [
0.01,
0.01,
0.01,
0.01,
0.01,
0.005
]
},
"metrics": {
"best_val_acc": 75.74,
"best_epoch": 9,
"scale_accuracies": {
"256": 74.18,
"512": 75.73,
"768": 76.046,
"1024": 76.08
},
"training_history": {
"epochs": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10
],
"train_loss": [
3.078377773987499,
1.970284724826104,
2.077278166926007,
1.7731811365888825,
1.6536811950727792,
1.927997774473915,
1.7512225106007282,
1.6524894679492823,
1.915095723680244,
1.7705897627189262
],
"train_acc": [
58.631622575355124,
71.63597719891318,
75.18789509876542,
77.50847469533636,
78.97818941636804,
80.19610245971056,
81.37947668024543,
82.42379799042591,
83.27489702747573,
83.8206104278365
],
"val_acc": [
69.481,
71.403,
73.618,
74.733,
74.972,
75.311,
75.621,
75.594,
75.672,
75.74
],
"scale_accs": {
"256": [
69.481,
71.403,
72.534,
72.935,
73.218,
73.621,
73.86,
73.983,
74.112,
74.18
],
"512": [
73.041,
74.481,
74.8,
75.211,
75.56,
75.537,
75.643,
75.73
],
"768": [
74.388,
75.318,
75.558,
75.918,
76.046
],
"1024": [
75.106,
76.08
]
},
"lr": [
0.00975530705321762,
0.00904518046337755,
0.00793913236883622,
0.00654543046337755,
0.005000500000000001,
0.0034555695366224513,
0.0020618676311637816,
0.0009558195366224509,
0.00024569294678237997,
0.01
]
}
},
"train_config": {
"name": "david_training",
"run_id": "20251012_194945",
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
"model_variant": [
"clip_vit_b16",
"clip_vit_laion_b32"
],
"num_classes": 1000,
"preset": "balanced",
"custom_config_path": null,
"num_classes_override": null,
"use_belly_override": null,
"belly_expand_override": null,
"progressive_training_override": true,
"scale_warmup_epochs_override": {
"256": 0,
"512": 2,
"768": 5,
"1024": 8
},
"num_epochs": 10,
"batch_size": 1024,
"learning_rate": 0.01,
"weight_decay": 1e-05,
"warmup_epochs": 3,
"use_rose_loss": true,
"rose_initial_weight": 0.2,
"rose_max_weight": 0.8,
"rose_weight_schedule": "adaptive",
"use_cayley_loss": true,
"cayley_weight": 0.01,
"scale_loss_balance": null,
"use_mixed_precision": false,
"gradient_clip": 10.0,
"scheduler_type": "cosine_restarts",
"min_lr": 1e-06,
"freeze_strategy": "never",
"freeze_threshold": 90.0,
"unfreeze_on_plateau": true,
"patience": 10,
"track_gradients": true,
"gradient_scale_threshold": 1e-05,
"gradient_scale_multiplier": 10.0,
"log_interval": 50,
"val_interval": 1,
"save_interval": 5,
"log_fusion_weights": true,
"log_loss_components": true,
"save_format": "safetensors",
"hf_repo": "AbstractPhil/david-shared-space",
"upload_to_hub": true,
"base_dir": "./david_training",
"num_workers": 10,
"pin_memory": true,
"prefetch_factor": 4,
"persistent_workers": true
}
}