gated-david / weights /checkpoint_epoch_10_metadata.json
AbstractPhil's picture
Upload weights and configs - Run 20251012_041353
0311dd6 verified
raw
history blame
172 kB
{
"epoch": 9,
"optimizer_state_dict": {
"state": {
"0": {
"step": "tensor(12520.)",
"exp_avg": "tensor([[ 1.6739e-05, -4.5606e-05, 6.5077e-06, ..., 2.0072e-05,\n 1.0489e-05, 1.5309e-05],\n [-5.8033e-05, -4.5931e-06, 1.6543e-05, ..., -3.2093e-06,\n 7.0874e-06, -1.1865e-05],\n [-3.9455e-06, -2.1452e-07, -1.0664e-06, ..., -1.2511e-06,\n 3.0180e-07, 4.5739e-07],\n ...,\n [ 2.0947e-05, -2.4873e-05, 3.2363e-05, ..., 4.5308e-06,\n -2.4054e-05, 4.9465e-06],\n [-4.7665e-06, -1.1796e-05, -4.0522e-05, ..., 3.3716e-05,\n -1.2486e-05, 8.4833e-06],\n [ 4.7208e-06, -2.1184e-05, 1.8751e-05, ..., -7.1920e-08,\n 1.4588e-05, -4.2959e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1258e-08, 1.2121e-08, 5.4884e-09, ..., 6.9905e-09, 5.4585e-09,\n 4.2942e-09],\n [9.2535e-09, 8.9640e-09, 1.0811e-08, ..., 7.6101e-09, 5.1284e-09,\n 4.8838e-09],\n [3.1808e-11, 2.8556e-11, 3.5860e-11, ..., 1.6663e-11, 2.3441e-11,\n 1.9822e-11],\n ...,\n [1.0021e-08, 7.3923e-09, 7.7857e-09, ..., 5.8398e-09, 5.7656e-09,\n 4.0426e-09],\n [1.2363e-08, 9.2526e-09, 8.9886e-09, ..., 8.5243e-09, 7.1149e-09,\n 5.4051e-09],\n [2.6955e-09, 3.7269e-09, 2.8274e-09, ..., 1.8437e-09, 1.9974e-09,\n 1.6057e-09]], device='cuda:0')"
},
"1": {
"step": "tensor(12520.)",
"exp_avg": "tensor([ 6.4066e-04, -5.0628e-04, -9.4757e-05, ..., -1.3069e-04,\n 8.1707e-04, -5.6003e-04], device='cuda:0')",
"exp_avg_sq": "tensor([1.3089e-05, 1.2634e-05, 4.7040e-08, ..., 1.1755e-05, 1.4330e-05,\n 4.6614e-06], device='cuda:0')"
},
"2": {
"step": "tensor(12520.)",
"exp_avg": "tensor([[-7.2368e-06, -1.1073e-07, -4.4978e-09, ..., -8.5510e-06,\n 1.4223e-06, 1.2094e-05],\n [ 3.7071e-06, 9.5835e-08, -1.7412e-08, ..., -5.0400e-07,\n 5.4481e-06, -3.7365e-06],\n [-2.8565e-06, -6.0232e-07, -2.4080e-33, ..., -7.4501e-07,\n 4.5857e-06, -3.0325e-08],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 1.5191e-05, 6.0268e-07, -2.5183e-08, ..., -9.7206e-07,\n -4.0751e-06, 4.1295e-06],\n [-2.4974e-06, -3.3868e-06, 3.0848e-08, ..., 4.4032e-06,\n -8.5272e-06, 9.1030e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[9.6016e-10, 1.8050e-10, 3.9481e-12, ..., 1.6111e-09, 7.7856e-10,\n 4.3555e-10],\n [6.5044e-10, 3.1379e-11, 2.9507e-13, ..., 2.3564e-09, 1.2419e-09,\n 6.2676e-11],\n [4.6684e-10, 1.2257e-09, 1.5144e-14, ..., 1.0586e-09, 2.6087e-09,\n 3.8964e-11],\n ...,\n [0.0000e+00, 1.9922e-21, 0.0000e+00, ..., 5.8361e-23, 6.9702e-23,\n 0.0000e+00],\n [5.0624e-09, 6.0870e-10, 1.3764e-11, ..., 1.5517e-09, 6.7616e-10,\n 5.6453e-10],\n [1.0861e-09, 1.6294e-09, 4.6904e-12, ..., 8.3689e-10, 5.6864e-09,\n 2.6142e-10]], device='cuda:0')"
},
"3": {
"step": "tensor(12520.)",
"exp_avg": "tensor([ 3.8338e-05, 1.5834e-05, 1.8492e-04, -6.5094e-05, -1.9471e-04,\n 5.6052e-45, -6.7887e-05, -2.9346e-05, 3.3579e-06, -2.4709e-04,\n -1.0341e-04, 2.5436e-06, 1.3127e-04, 3.9198e-05, -2.8213e-05,\n 1.0745e-04, -4.4677e-04, 1.3987e-05, 5.2284e-05, 9.3746e-06,\n -3.6078e-05, 1.2300e-05, 1.0411e-04, -9.9970e-05, -3.1225e-04,\n -1.8167e-05, 4.0361e-05, 5.6052e-45, 1.3360e-05, 2.9073e-05,\n 6.3603e-05, 3.4925e-05, -4.7478e-06, -1.5589e-04, -3.5119e-05,\n -1.3531e-05, 9.9754e-05, 5.4368e-05, 1.3799e-04, -5.4927e-05,\n -1.5107e-04, 5.4087e-05, 3.1507e-06, 7.3043e-05, 9.3905e-05,\n -1.5327e-04, 1.9220e-04, -1.1672e-04, -2.3976e-04, 8.6001e-05,\n 5.6052e-45, -4.8713e-06, -4.6724e-05, -1.0336e-04, 1.5337e-04,\n -6.6736e-05, 5.5120e-05, -1.4183e-04, 1.1549e-04, 1.0652e-04,\n 4.1970e-05, -1.3084e-05, 5.1940e-05, 1.4866e-05, 4.0846e-05,\n -6.2122e-05, 6.7902e-05, 2.3593e-05, 9.0107e-05, 4.6437e-05,\n 5.4268e-05, -5.6052e-45, -2.2738e-05, -1.1862e-04, -1.7789e-05,\n 1.3623e-04, 8.2502e-05, 1.7110e-05, 2.9336e-05, 6.2896e-05,\n 1.3278e-04, -5.7928e-05, 6.6922e-05, -5.6052e-45, 2.5500e-06,\n -9.9526e-05, 3.0418e-04, 1.6249e-04, -8.4341e-05, 2.7578e-05,\n 1.8996e-04, -3.5378e-06, 2.0701e-05, -1.7009e-04, -8.6813e-07,\n -1.2712e-04, 3.1232e-05, -2.0433e-05, 1.7426e-05, 1.5201e-05,\n -7.8730e-05, -3.4225e-05, -7.0286e-05, -4.7577e-05, -3.7810e-05,\n -5.0647e-05, -9.1227e-05, 7.8203e-05, 7.1191e-05, 9.1648e-05,\n 3.5480e-04, -1.2691e-04, 8.9211e-05, -2.0269e-05, -4.0072e-05,\n 4.1760e-06, -1.2247e-04, 5.3114e-05, -2.8885e-05, 5.6052e-45,\n 5.4588e-05, -2.2759e-04, -1.1325e-04, -5.6052e-45, 5.3218e-05,\n 1.6035e-04, -1.1660e-04, -2.3849e-06, 4.5767e-05, 1.3015e-04,\n -3.3889e-04, 3.2962e-06, 5.6052e-45, 1.6555e-04, -1.7252e-06,\n 2.7753e-05, -7.2577e-17, 2.9808e-05, 4.0751e-05, 6.1913e-05,\n 5.6052e-45, 5.6052e-45, -1.1975e-05, -2.0122e-04, 5.6052e-45,\n 5.6052e-45, -5.2588e-05, 1.0905e-05, 5.6052e-45, -6.1229e-05,\n 3.3126e-05, -8.8374e-05, 5.9280e-05, 2.3443e-04, 9.3245e-06,\n -4.3880e-05, 4.7527e-05, -1.2535e-04, 5.6052e-45, 2.1243e-05,\n -9.5787e-05, -7.3071e-06, -6.1816e-06, 1.3621e-04, 2.7513e-05,\n -1.3907e-04, 5.7287e-05, 5.6052e-45, 2.0030e-04, 4.2312e-04,\n 3.2210e-05, -1.9548e-04, 1.6744e-05, -1.4295e-04, -1.0301e-05,\n 4.8333e-05, 1.7491e-04, 4.7614e-05, -1.0770e-04, 2.0719e-04,\n -2.0302e-04, -1.1586e-05, 2.9632e-04, -1.3544e-04, 5.6052e-45,\n 5.6052e-45, 1.8066e-04, 2.1504e-05, 6.2024e-05, -4.6624e-05,\n -4.9966e-05, 1.3313e-04, 6.7837e-05, -1.3627e-04, -6.9981e-05,\n 7.0191e-05, 2.7019e-04, 2.3466e-04, 3.1086e-05, 5.6052e-45,\n -1.1904e-04, -8.0481e-05, -4.7839e-06, -3.6322e-05, -4.4722e-05,\n 1.5362e-04, 2.0214e-04, 1.0044e-04, -6.4273e-05, 5.6052e-45,\n -2.9045e-04, -4.4491e-17, -4.7793e-05, 4.7031e-05, 1.3973e-04,\n 3.1927e-05, -9.8028e-05, 8.3025e-05, -2.5517e-05, 7.0862e-05,\n -6.4046e-05, 4.5396e-05, -1.1648e-04, -2.0059e-05, 8.7001e-05,\n -5.6052e-45, 2.2055e-05, 1.8401e-04, 1.5473e-04, -1.0084e-05,\n -4.4574e-05, 8.5887e-05, 5.4844e-05, 5.6215e-05, 5.6052e-45,\n -2.7584e-04, 1.5147e-05, 5.6052e-45, 9.3455e-05, -1.1566e-06,\n -9.7674e-06, -1.3733e-43, 2.0191e-04, -8.9031e-05, 3.4434e-05,\n 9.4515e-06, 1.0362e-04, 1.9286e-04, 4.0914e-05, 3.8426e-34,\n 6.9085e-05, -7.6557e-05, -1.0770e-04, 3.8685e-05, 5.6416e-05,\n 6.8888e-05, 5.6052e-45, 3.9085e-05, -6.9866e-05, -7.2529e-05,\n 5.6052e-45, 1.4369e-04, -8.0630e-05, 3.3343e-05, -5.3500e-05,\n 6.6456e-05, 9.9716e-05, 1.1840e-04, 3.1793e-05, 1.1513e-04,\n 4.8116e-05, 5.1183e-05, 1.0271e-04, -1.4210e-04, 1.0144e-04,\n 7.2953e-05, -2.2398e-04, -1.1546e-07, 9.9781e-05, 3.4274e-05,\n 5.6052e-45, 1.8358e-04, 1.9391e-05, -7.3410e-04, -1.2678e-04,\n 4.5389e-05, 4.7737e-05, 5.5905e-05, 6.2708e-05, 3.1626e-05,\n -5.9282e-05, -3.2450e-04, 2.9844e-05, 4.1126e-05, -1.7190e-04,\n -3.3179e-05, -4.1625e-05, -6.6495e-05, -4.7444e-05, 6.0293e-05,\n 5.9508e-05, 1.4603e-04, 2.1163e-05, 2.5824e-04, -7.6519e-05,\n -2.4071e-05, 3.2787e-05, -1.6573e-05, -8.7245e-05, -8.7377e-05,\n -1.9887e-04, 5.6052e-45, 3.1261e-05, -6.0945e-05, 2.6664e-04,\n 5.6052e-45, 1.7670e-05, -1.1690e-05, 7.0645e-05, -3.6721e-05,\n 2.4997e-05, -2.6209e-04, -5.4700e-05, -1.5361e-04, 6.1355e-05,\n 5.6052e-45, 1.0565e-04, -8.9283e-05, -2.1914e-04, 1.4611e-05,\n -1.3683e-04, 8.2620e-06, 1.8445e-04, -2.6210e-04, -7.0572e-05,\n 7.4014e-05, -3.1608e-04, 1.0134e-04, 5.6052e-45, 2.8914e-05,\n 8.4659e-05, 7.4354e-05, -2.1309e-05, -4.2984e-05, 4.8063e-04,\n -1.2969e-04, -6.9257e-05, 1.1480e-04, 2.9987e-05, -1.7517e-05,\n -4.6342e-04, -1.9304e-05, 1.3261e-05, 5.6052e-45, 8.3010e-06,\n -1.8000e-05, 3.4677e-05, 1.9226e-05, 6.3529e-05, -2.0970e-05,\n 1.1591e-04, -2.7765e-04, -5.6052e-45, 4.1092e-05, -8.4599e-05,\n 3.5604e-05, 4.7008e-05, 1.2554e-04, -1.0503e-04, 1.1108e-05,\n 9.1951e-05, -1.7860e-04, 6.0412e-05, 1.0916e-04, -1.1216e-06,\n -1.0128e-04, -8.4402e-11, 4.9091e-05, 5.6052e-45, -3.3904e-06,\n -1.4325e-05, 2.9898e-05, 1.5733e-04, 6.0061e-05, 5.6052e-45,\n 8.4418e-05, 1.0916e-04, -2.9743e-05, 7.3438e-05, -8.2105e-06,\n -3.1672e-05, -1.4579e-04, 3.0832e-05, 1.7042e-05, -5.2231e-05,\n 1.5488e-04, -6.4987e-05, -6.4436e-08, 5.6052e-45, -6.3597e-05,\n 3.6016e-05, -3.4403e-04, 1.5039e-04, -3.9174e-04, 1.4728e-04,\n -3.7639e-04, 2.3566e-05, 8.7664e-06, 4.7571e-05, -1.0533e-04,\n -2.2126e-05, 5.6052e-45, -2.0378e-05, 1.0171e-04, -7.1242e-05,\n 1.7051e-05, -2.4669e-06, 1.5573e-08, 7.8447e-05, 1.1643e-04,\n 2.4580e-04, -1.7523e-05, 9.8476e-07, 5.6052e-45, 5.6052e-45,\n 1.2230e-04, -4.3845e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 2.4881e-05, -1.7767e-04, -3.8357e-05, 9.2008e-05, -1.7059e-04,\n -5.6169e-05, -2.3600e-04, 7.0385e-05, -2.4619e-05, -5.6052e-45,\n -1.0185e-04, -6.7543e-05, 2.1006e-05, 5.6052e-45, -9.0692e-05,\n 6.5383e-05, -9.0332e-05, 2.4078e-05, 4.3493e-05, 4.5471e-05,\n 1.2336e-04, -4.6968e-06, -3.2414e-05, -9.3189e-05, -1.5567e-06,\n -3.5074e-05, -1.1660e-04, 9.1233e-05, 5.6052e-45, 6.5415e-05,\n -2.2935e-04, 3.0779e-05, 5.6052e-45, -6.7117e-05, 5.3318e-05,\n 7.1927e-05, 5.8392e-05, -1.1579e-05, 3.4361e-05, -4.6428e-06,\n -1.3955e-06, 5.6052e-45, -1.7393e-04, -2.5234e-04, 5.3000e-06,\n -9.8991e-06, 1.9618e-04, 7.6443e-05, 7.3790e-05, -1.5924e-04,\n 4.1227e-05, -8.3633e-06, -5.5199e-05, -6.4029e-06, 1.6722e-04,\n -1.3585e-04, 7.9882e-05, -8.2658e-05, 1.7762e-04, 1.1994e-04,\n 7.7944e-05, 1.0208e-06, -1.4618e-04, 3.1729e-05, -9.3161e-05,\n -9.3007e-06, -1.0199e-04, -3.0214e-04, -5.0228e-05, 1.7326e-05,\n 5.6052e-45, -1.0791e-04, 6.9524e-06, 6.8538e-05, 4.8785e-05,\n -5.9602e-05, 1.0645e-04, 9.5111e-05, 9.3567e-05, -9.1263e-05,\n 2.6586e-05, -1.1018e-04, -1.3177e-04, 5.5136e-05, -1.5823e-05,\n 1.6781e-06, -4.4973e-05, -3.1025e-05, 5.6052e-45, 1.4700e-04,\n 7.3040e-05, -6.5949e-05, -1.0560e-04, 1.5101e-04, 1.0070e-04,\n -1.7089e-04, 5.6052e-45, -4.0446e-05, 6.8111e-05, 3.2299e-05,\n -1.3718e-05, 1.0884e-04, 5.8561e-05, 1.4967e-04, -1.7478e-04,\n 5.5226e-05, 3.9817e-05, 5.6052e-45, -3.4492e-05, -1.7767e-04,\n 9.1644e-05, -2.8791e-05, -7.3023e-05, -1.3001e-05, 1.3151e-04,\n 5.6052e-45, -1.0105e-04, -1.2606e-04, 2.2201e-05, 7.5053e-05,\n -1.3393e-04, 2.2534e-04, -1.2907e-05, 1.5013e-04, 3.4749e-05,\n 4.0745e-05, -5.3109e-05, -4.3454e-07, -4.7733e-05, 3.9174e-05,\n 5.6052e-45, -1.8733e-05, 1.0224e-05, -2.5377e-05, -1.4553e-04,\n -7.4754e-05, 7.3736e-05, 5.6052e-45, 6.1199e-05, 5.6052e-45,\n 3.5704e-04, -1.8321e-05, 8.8511e-05, -7.7434e-05, 8.3819e-05,\n -4.7208e-05, 2.4364e-05, 4.1879e-06, -9.5273e-05, -8.7779e-05,\n 1.3414e-04, -5.6052e-45, -1.0957e-04, -3.2330e-05, 5.6052e-45,\n 1.9042e-04, 1.5884e-04, 1.2635e-05, -5.5440e-05, -6.6424e-05,\n -2.0537e-05, 5.6052e-45, -8.2701e-05, 8.2325e-05, 1.6928e-04,\n 9.8139e-05, 2.1447e-04, -4.1205e-05, 1.7929e-05, 3.8252e-05,\n 3.1297e-06, -6.9015e-05, -1.2699e-04, -6.4814e-05, -3.5941e-04,\n -2.7355e-05, 2.1245e-06, 1.4619e-04, 2.1627e-05, 1.1990e-04,\n 1.0346e-04, 4.6594e-05, -1.4110e-04, -1.2765e-05, -5.9671e-05,\n 5.6052e-45, -2.1466e-04, 5.6052e-45, 6.2670e-05, 5.4257e-05,\n 4.1252e-06, 2.0069e-07, 1.2483e-05, -4.7608e-05, -1.3693e-05,\n 2.1601e-05, 5.6052e-45, 1.5222e-04, -1.4990e-04, -6.2353e-05,\n -1.1900e-05, 4.3843e-05, 5.6052e-45, 5.6052e-45, 8.6596e-06,\n -2.0068e-05, 6.5634e-10, 1.6175e-04, -9.2979e-05, -4.5357e-06,\n -7.3964e-05, 5.6052e-45, 2.4775e-05, -2.5325e-05, -1.1181e-04,\n -5.8519e-05, -5.6319e-06, 1.3475e-04, -4.5591e-05, -3.5382e-05,\n -6.6902e-05, 4.7747e-05, -1.7127e-04, 8.5852e-05, 6.7748e-05,\n 2.0767e-04, 2.6949e-06, -2.6934e-06, 4.6227e-05, 2.4897e-05,\n 2.0819e-04, -6.8990e-06, 9.0135e-06, -1.8648e-04, 8.0874e-05,\n 2.2404e-05, -5.8739e-05, -2.7193e-05, -1.2047e-05, 3.8849e-05,\n 1.1425e-04, 5.6052e-45, -9.5623e-06, 7.1133e-05, -2.5090e-05,\n 1.3318e-04, -2.5409e-04, -4.0115e-06, 1.8578e-06, 5.6052e-45,\n 6.8821e-06, 1.0957e-04, 9.0231e-05, 5.6052e-45, -1.6217e-04,\n 5.6052e-45, 3.1255e-05, 6.1926e-05, 2.9526e-05, -3.6155e-04,\n 5.8803e-05, 8.3979e-05, -2.3119e-08, -3.2408e-05, 5.4701e-05,\n 1.2841e-04, 4.6234e-05, 1.5196e-04, -4.5652e-05, -2.7551e-05,\n -8.2365e-05, -8.3169e-05, 4.9639e-05, -8.8392e-06, 1.3806e-04,\n 7.9803e-05, 5.6052e-45, -1.4632e-04, 1.3225e-05, 5.6052e-45,\n -8.0408e-06, -1.9889e-05, 5.7814e-06, -1.0916e-04, -4.1572e-05,\n -3.9116e-05, 7.4170e-05, 1.6995e-04, 9.8269e-05, 2.3556e-04,\n -8.8282e-44, 6.2668e-05, 1.8452e-05, -2.2965e-04, 1.0121e-04,\n -2.0938e-05, 9.3765e-05, -2.8954e-05, -9.6139e-05, 1.5137e-05,\n 5.8620e-05, -5.0483e-05, 2.6956e-05, 8.5260e-05, 5.6052e-45,\n 8.6723e-05, -9.7520e-06, -2.1795e-05, 5.6677e-05, 7.3113e-05,\n -6.0831e-05, -1.3910e-04, 1.7094e-05, 1.2432e-05, -3.1050e-06,\n 8.4306e-05, 1.9496e-04, -2.4183e-15, -9.6555e-05, -4.5010e-32,\n -5.3339e-05, -8.8532e-05, 2.4145e-05, 1.2026e-04, 1.7821e-04,\n 1.1394e-05, -1.7625e-05, -2.6625e-04, -3.3426e-06, 2.0769e-05,\n -4.1537e-05, 9.1242e-05, -8.4406e-05, 2.3981e-05, 5.6052e-45,\n 5.6052e-45, 1.4304e-04, 6.1875e-05], device='cuda:0')",
"exp_avg_sq": "tensor([1.2831e-07, 7.8835e-08, 1.1975e-07, 1.1083e-07, 2.7089e-07, 9.7589e-11,\n 2.7010e-07, 1.1604e-07, 8.4197e-08, 2.4342e-07, 1.1561e-07, 4.1218e-08,\n 1.3980e-07, 4.8864e-08, 1.3590e-07, 1.5475e-07, 1.8933e-07, 9.7735e-08,\n 1.1497e-07, 1.0996e-07, 4.2841e-08, 4.6955e-08, 1.0673e-07, 1.6627e-07,\n 2.1611e-07, 2.4904e-07, 1.0364e-07, 2.2161e-13, 1.6223e-07, 1.5843e-07,\n 5.4412e-08, 1.8164e-07, 1.2861e-07, 1.9029e-07, 1.1467e-07, 1.1823e-07,\n 5.0594e-08, 1.2410e-07, 1.3814e-07, 1.4044e-07, 2.1348e-07, 1.3688e-07,\n 2.2986e-07, 2.4630e-07, 1.7245e-07, 9.6979e-08, 1.4059e-07, 1.5480e-07,\n 3.3408e-07, 2.3735e-07, 1.9008e-10, 3.3312e-07, 1.2550e-07, 1.3018e-07,\n 1.3802e-07, 7.5437e-08, 1.7158e-07, 9.5840e-08, 6.6703e-08, 1.6074e-07,\n 7.3181e-08, 1.2519e-07, 3.1346e-07, 1.1711e-07, 5.4631e-08, 1.4526e-07,\n 2.4939e-07, 1.3793e-07, 6.6989e-08, 6.1171e-08, 1.5723e-07, 8.6475e-11,\n 9.4480e-08, 4.1507e-07, 2.1404e-07, 1.8806e-07, 7.2521e-08, 3.2092e-07,\n 7.8551e-08, 1.5137e-07, 9.5632e-08, 1.6666e-07, 2.2959e-07, 1.6572e-12,\n 9.5166e-08, 1.7150e-07, 1.6618e-07, 2.5501e-07, 1.0166e-07, 3.4299e-07,\n 1.7882e-07, 6.9997e-08, 1.6664e-07, 1.7929e-07, 9.3558e-08, 1.9799e-07,\n 1.4364e-07, 8.3178e-08, 1.3263e-07, 1.9286e-07, 1.6292e-07, 1.6182e-07,\n 3.0819e-07, 9.4559e-08, 1.6943e-07, 1.4908e-07, 1.2463e-07, 1.2261e-07,\n 1.2437e-07, 7.8194e-08, 1.8852e-07, 1.6853e-07, 1.3706e-07, 6.4679e-08,\n 1.9734e-07, 1.7178e-07, 1.9068e-07, 1.1823e-07, 7.6092e-08, 4.5368e-11,\n 1.4799e-07, 2.0619e-07, 1.2484e-07, 1.1369e-10, 1.4194e-07, 1.8260e-07,\n 1.4154e-07, 9.1256e-08, 2.1497e-07, 9.4471e-08, 1.2992e-07, 1.0975e-07,\n 4.9661e-12, 2.4187e-07, 1.9821e-07, 1.4403e-07, 2.0179e-14, 1.4593e-07,\n 1.0896e-07, 9.3644e-08, 5.0069e-11, 2.7533e-11, 1.0684e-07, 8.2629e-08,\n 1.0667e-13, 1.4189e-10, 2.1111e-07, 1.7993e-07, 3.8122e-10, 9.2490e-08,\n 1.2195e-07, 1.7369e-07, 1.2866e-07, 1.7901e-07, 4.6329e-08, 1.7994e-07,\n 1.4216e-07, 1.5305e-07, 1.5406e-10, 8.8328e-08, 2.0359e-07, 1.8017e-07,\n 1.9046e-07, 2.9282e-07, 1.1386e-07, 1.5698e-07, 2.7590e-08, 2.0057e-16,\n 1.4570e-07, 1.5285e-07, 2.1889e-07, 2.1374e-07, 1.5585e-07, 1.7286e-07,\n 1.7271e-07, 1.4085e-07, 1.1321e-07, 1.9387e-07, 1.7832e-07, 2.6723e-07,\n 2.0759e-07, 1.6794e-07, 1.5630e-07, 1.2358e-07, 8.4960e-15, 1.1719e-19,\n 2.5460e-07, 1.7721e-07, 8.2262e-08, 9.1652e-08, 9.4354e-08, 1.0355e-07,\n 1.9285e-07, 2.3172e-07, 1.9008e-07, 9.8755e-08, 2.4754e-07, 1.8396e-07,\n 1.5351e-07, 1.2836e-21, 4.3904e-08, 9.5908e-08, 1.7584e-07, 1.6028e-07,\n 2.5037e-07, 1.3867e-07, 1.9192e-07, 1.1454e-07, 5.2882e-08, 9.6215e-13,\n 2.1426e-07, 1.3499e-12, 9.3998e-08, 2.7865e-07, 1.7437e-07, 1.2379e-07,\n 1.5386e-07, 1.0282e-07, 1.7268e-07, 1.4373e-07, 5.2500e-08, 2.4251e-07,\n 2.2427e-07, 1.0942e-07, 6.4549e-08, 1.9005e-10, 1.7753e-07, 2.9705e-07,\n 2.7250e-07, 2.9689e-08, 1.4526e-07, 1.2964e-07, 1.6821e-07, 3.4815e-07,\n 1.2026e-10, 2.1915e-07, 1.8790e-07, 2.2820e-14, 9.8907e-08, 1.4693e-07,\n 2.9290e-08, 6.2556e-11, 2.5916e-07, 1.5958e-07, 1.1549e-07, 1.6191e-07,\n 9.2879e-08, 1.6551e-07, 1.6365e-07, 7.9796e-11, 2.0054e-07, 3.0862e-07,\n 1.0217e-07, 2.0116e-07, 1.5177e-07, 2.4505e-07, 2.4397e-14, 8.5326e-08,\n 1.1957e-07, 3.1166e-07, 1.4366e-19, 1.3975e-07, 1.6399e-07, 1.0319e-07,\n 1.8390e-07, 9.3911e-08, 9.0408e-08, 1.0758e-07, 1.8804e-07, 2.3285e-07,\n 1.7552e-07, 2.0642e-07, 1.1103e-07, 1.5153e-07, 2.8263e-07, 1.4024e-07,\n 2.3541e-07, 3.2482e-13, 8.4294e-08, 2.6203e-07, 3.5467e-10, 2.9966e-07,\n 7.3008e-08, 2.4355e-07, 1.8710e-07, 1.2610e-07, 2.8211e-07, 1.9532e-07,\n 1.1553e-07, 1.8804e-07, 1.3760e-07, 1.2104e-07, 6.1363e-08, 9.1622e-08,\n 2.1649e-07, 8.9807e-08, 1.2477e-07, 1.9518e-07, 1.2794e-07, 5.9230e-08,\n 1.6406e-07, 1.4811e-07, 5.7825e-08, 1.0195e-07, 1.5871e-07, 3.5050e-08,\n 4.2095e-08, 8.2458e-08, 1.8160e-07, 1.7110e-07, 1.8040e-07, 2.0593e-14,\n 1.7658e-07, 1.3343e-07, 2.0231e-07, 4.0905e-14, 5.4089e-08, 3.0430e-07,\n 7.3112e-08, 1.1244e-07, 1.6697e-07, 4.4856e-08, 8.9548e-08, 1.8273e-07,\n 1.2095e-07, 1.4840e-10, 1.3726e-07, 1.5653e-07, 1.4608e-07, 2.0512e-07,\n 1.6992e-07, 6.4597e-08, 1.4089e-07, 1.2128e-07, 7.1303e-08, 1.8041e-07,\n 1.5103e-07, 1.3927e-07, 1.1146e-19, 7.2460e-09, 2.6282e-07, 1.6153e-07,\n 1.7683e-07, 7.9917e-08, 2.4417e-07, 1.6417e-07, 2.2003e-07, 5.5896e-08,\n 1.8729e-07, 1.8227e-07, 2.1909e-07, 1.5788e-07, 9.2848e-08, 3.0344e-11,\n 1.4666e-07, 3.5292e-07, 6.7597e-08, 8.0603e-08, 1.5194e-07, 1.0514e-07,\n 1.7822e-07, 2.0553e-07, 1.2584e-10, 1.8304e-07, 1.3752e-07, 5.8689e-08,\n 9.7881e-08, 2.1554e-07, 1.1277e-07, 7.1661e-08, 8.8973e-08, 5.8726e-08,\n 8.8394e-08, 1.2571e-07, 1.2477e-07, 1.0719e-07, 2.1908e-11, 7.9464e-08,\n 5.9748e-11, 9.1225e-08, 3.7134e-07, 1.6494e-07, 9.4660e-08, 1.6226e-07,\n 1.2486e-10, 2.5789e-07, 9.2184e-08, 2.2079e-07, 2.6215e-07, 9.3403e-08,\n 1.3190e-07, 2.6655e-07, 2.2141e-07, 4.3144e-08, 2.1729e-07, 2.3754e-07,\n 5.2850e-08, 5.2293e-11, 4.2943e-10, 5.9558e-08, 1.3728e-07, 2.5096e-07,\n 2.2236e-07, 2.9772e-07, 1.6785e-07, 2.5167e-07, 2.2849e-08, 1.9994e-07,\n 1.7409e-07, 2.2715e-07, 2.2258e-07, 7.7301e-16, 8.5321e-08, 1.7055e-07,\n 9.9993e-08, 1.6192e-07, 4.2466e-08, 2.3894e-07, 1.2571e-07, 1.2247e-07,\n 1.5328e-07, 2.2692e-07, 2.5414e-07, 5.2282e-14, 3.4294e-21, 1.2066e-07,\n 3.5301e-08, 2.8841e-11, 7.9289e-14, 1.6582e-13, 1.8588e-07, 1.6123e-07,\n 1.4180e-07, 1.5382e-07, 2.2172e-07, 2.4935e-08, 2.4100e-07, 1.8557e-07,\n 1.9297e-07, 4.3236e-13, 1.0109e-07, 1.3145e-07, 1.0630e-07, 5.2429e-13,\n 1.2806e-07, 2.9403e-07, 1.0930e-07, 1.2879e-07, 9.5214e-08, 7.3850e-08,\n 3.1836e-07, 8.4224e-08, 1.5709e-07, 1.9552e-07, 2.1044e-07, 1.6637e-07,\n 9.4918e-08, 3.0636e-07, 1.6249e-11, 1.3069e-07, 9.4134e-08, 4.3312e-08,\n 3.9587e-15, 1.4510e-07, 1.7645e-07, 2.3014e-07, 8.1495e-08, 1.0928e-07,\n 1.7427e-07, 9.9783e-08, 9.4182e-08, 1.6025e-12, 5.7380e-08, 2.4504e-07,\n 7.6571e-08, 2.6336e-08, 1.4169e-07, 1.2426e-07, 5.2072e-08, 2.2054e-07,\n 1.6611e-07, 1.7326e-07, 1.7315e-07, 1.8136e-07, 1.8374e-07, 1.7230e-07,\n 2.3466e-07, 2.1170e-07, 1.5341e-07, 1.4902e-07, 1.4816e-07, 1.0382e-07,\n 2.3125e-07, 6.8401e-08, 2.1317e-07, 6.1400e-08, 1.7588e-07, 1.9143e-07,\n 1.9782e-07, 5.7935e-08, 8.5786e-10, 3.7417e-07, 1.4126e-07, 1.3105e-07,\n 1.5596e-07, 8.6862e-08, 2.0110e-07, 1.7027e-07, 1.4499e-07, 2.0062e-07,\n 1.1743e-07, 2.2115e-07, 3.3995e-07, 2.4315e-07, 1.1944e-07, 1.4273e-07,\n 1.2757e-07, 8.3071e-08, 1.1882e-10, 1.6534e-07, 1.3527e-07, 9.0788e-08,\n 1.4204e-07, 1.3316e-07, 1.4192e-07, 2.1360e-07, 3.4048e-15, 2.7393e-07,\n 1.0938e-07, 1.1590e-07, 1.8752e-07, 1.8352e-07, 1.5113e-07, 1.8544e-07,\n 1.5928e-07, 8.8225e-08, 1.5824e-07, 3.9677e-10, 7.2909e-08, 1.7175e-07,\n 2.9230e-07, 1.9404e-07, 2.5022e-07, 3.6183e-07, 1.7020e-07, 1.4298e-12,\n 1.8184e-07, 1.9170e-07, 2.4780e-07, 1.1442e-07, 2.2179e-07, 2.0976e-07,\n 1.5138e-07, 1.4580e-07, 1.4132e-07, 1.8396e-07, 1.4203e-07, 1.7264e-07,\n 1.6327e-07, 1.3491e-07, 2.7022e-14, 1.9990e-07, 1.9329e-07, 9.3029e-08,\n 2.0107e-07, 1.6634e-07, 1.2807e-07, 5.6572e-13, 1.6130e-07, 2.7593e-11,\n 1.8473e-07, 9.4365e-08, 1.7907e-07, 1.1016e-07, 7.6176e-08, 1.1599e-07,\n 1.9837e-07, 6.5648e-08, 1.3194e-07, 1.1039e-07, 1.8114e-07, 7.1754e-14,\n 7.9211e-08, 3.5429e-08, 9.5254e-18, 2.5400e-07, 1.8547e-07, 1.3168e-07,\n 1.0945e-07, 8.1519e-08, 1.6609e-07, 1.1073e-14, 1.1400e-07, 1.4864e-07,\n 1.0909e-07, 1.1116e-07, 7.3128e-08, 1.2005e-07, 1.1564e-07, 7.4782e-08,\n 2.4570e-07, 1.0353e-07, 1.8088e-07, 1.6858e-07, 1.9503e-07, 2.0353e-07,\n 2.8291e-07, 6.0443e-08, 1.7968e-07, 1.5253e-07, 1.6400e-07, 9.1780e-08,\n 1.8151e-07, 3.2965e-08, 1.5789e-07, 5.0069e-14, 2.4271e-07, 5.8095e-11,\n 1.8866e-07, 2.9964e-07, 2.8999e-07, 2.3843e-07, 2.1427e-07, 2.6388e-07,\n 9.8598e-08, 2.0798e-07, 4.9677e-15, 1.9585e-07, 1.9750e-07, 6.1221e-08,\n 3.7398e-08, 3.8916e-08, 1.3042e-12, 4.0648e-14, 1.7173e-07, 1.8718e-07,\n 1.5488e-16, 1.3757e-07, 2.0822e-07, 2.4024e-07, 2.7724e-07, 1.6496e-14,\n 1.6027e-07, 8.3112e-08, 8.7305e-08, 1.1929e-07, 2.0394e-07, 1.7410e-07,\n 2.1293e-07, 7.2864e-08, 1.7607e-07, 2.0699e-07, 2.5212e-07, 1.9218e-07,\n 1.1574e-07, 1.4342e-07, 2.6811e-07, 6.7538e-08, 1.3887e-07, 8.7046e-08,\n 3.1437e-07, 8.2063e-08, 2.2938e-07, 1.2597e-07, 1.0912e-07, 2.8958e-07,\n 2.5934e-07, 5.7909e-08, 1.2146e-07, 8.0593e-08, 1.4381e-07, 1.4497e-15,\n 7.3044e-08, 1.5426e-07, 1.3671e-07, 1.7924e-07, 3.0557e-07, 1.3344e-07,\n 8.1144e-11, 1.5608e-13, 2.7785e-07, 1.4119e-07, 1.6066e-07, 3.6924e-10,\n 1.7762e-07, 6.8190e-11, 2.1135e-07, 1.1818e-07, 1.3039e-07, 2.2939e-07,\n 1.0824e-07, 1.1487e-07, 5.6391e-10, 6.2336e-08, 1.1870e-07, 1.0706e-07,\n 2.2749e-07, 1.4476e-07, 3.6668e-08, 8.5882e-08, 1.7310e-07, 1.9516e-07,\n 1.5165e-07, 2.1458e-07, 2.1055e-07, 1.3111e-07, 3.5269e-13, 4.4671e-07,\n 2.8273e-07, 2.7797e-15, 2.5016e-07, 1.0818e-07, 5.8329e-08, 1.7170e-07,\n 1.8592e-07, 5.4891e-08, 1.7747e-07, 1.6642e-07, 1.0317e-07, 2.1918e-07,\n 2.4807e-10, 3.3138e-07, 1.5726e-07, 2.2528e-07, 1.3155e-07, 7.6904e-08,\n 1.8493e-07, 2.7469e-07, 4.6596e-07, 2.2559e-07, 8.2020e-08, 1.5538e-07,\n 1.5293e-07, 2.4268e-07, 3.3305e-10, 7.1665e-08, 1.7651e-07, 1.5316e-07,\n 1.1554e-07, 1.0802e-07, 1.5200e-07, 1.7261e-07, 5.2591e-08, 1.1501e-07,\n 1.1969e-07, 1.6211e-07, 1.4236e-07, 2.2191e-10, 1.4357e-07, 6.6674e-10,\n 1.4838e-07, 1.0033e-07, 1.2919e-07, 1.3396e-07, 1.9060e-07, 2.6000e-07,\n 4.6359e-08, 1.0982e-07, 1.8557e-07, 1.8185e-07, 6.8053e-08, 1.1152e-07,\n 1.9576e-07, 2.1048e-08, 3.5304e-13, 5.3363e-20, 1.9559e-07, 1.2772e-07],\n device='cuda:0')"
},
"4": {
"step": "tensor(12520.)",
"exp_avg": "tensor([[ 3.7913e-06, 2.3616e-06, 2.6045e-06, ..., 5.6052e-45,\n 2.3472e-06, 5.2559e-06],\n [ 4.5047e-07, -3.1322e-06, -3.4756e-07, ..., -5.6052e-45,\n 1.6621e-05, 2.3197e-06],\n [ 1.2454e-05, -1.6514e-06, 2.9350e-06, ..., -5.6052e-45,\n -8.7886e-06, 8.9616e-06],\n ...,\n [-5.3137e-06, 1.4171e-06, -4.4351e-06, ..., -5.6052e-45,\n -1.2921e-05, -4.1211e-06],\n [ 4.1929e-07, 9.0702e-07, 2.7188e-06, ..., -5.6052e-45,\n 2.0623e-06, 1.0666e-05],\n [ 9.3573e-06, -3.4758e-06, 1.0540e-05, ..., -5.6052e-45,\n -8.1107e-06, 5.2951e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[6.1060e-10, 1.2806e-10, 2.2165e-10, ..., 1.4957e-22, 7.5602e-10,\n 2.4200e-10],\n [1.3156e-09, 2.0569e-10, 4.8627e-10, ..., 7.6442e-21, 1.5856e-09,\n 1.2756e-09],\n [2.0091e-09, 2.0868e-10, 1.5649e-09, ..., 3.3338e-21, 1.4527e-09,\n 1.1337e-09],\n ...,\n [1.8264e-09, 3.1459e-10, 5.5927e-10, ..., 2.9167e-21, 7.8048e-10,\n 7.1168e-10],\n [2.1710e-09, 2.3202e-10, 9.5616e-10, ..., 4.3469e-23, 1.3147e-09,\n 1.4398e-09],\n [1.9604e-09, 2.8769e-10, 1.5251e-09, ..., 2.7858e-21, 1.0443e-09,\n 1.1364e-09]], device='cuda:0')"
},
"5": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[ 1.1599e-07, 2.2857e-07, -3.4151e-11, ..., -3.1298e-08,\n 2.0240e-07, 2.0271e-08],\n [-5.3855e-07, 3.7336e-06, 2.1577e-09, ..., -2.6119e-06,\n 1.2509e-06, 3.5843e-09],\n [-6.0029e-07, -2.2290e-07, -3.1713e-09, ..., 5.7409e-07,\n 8.5152e-07, 4.8019e-07],\n ...,\n [ 2.3804e-06, -2.5157e-07, -5.1364e-09, ..., 1.6934e-08,\n -1.8594e-09, 3.6059e-06],\n [-4.2838e-10, -9.1303e-06, 1.7475e-25, ..., 1.0324e-06,\n -2.1559e-06, 1.9967e-08],\n [-7.0799e-06, 8.2117e-07, 7.1301e-08, ..., 2.0003e-06,\n -1.6733e-07, 6.6642e-06]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.2898e-12, 3.3457e-12, 1.9259e-13, ..., 2.5661e-11, 3.3699e-12,\n 1.1815e-13],\n [9.6188e-11, 7.7773e-11, 1.1753e-13, ..., 1.4002e-10, 4.5675e-11,\n 4.5412e-12],\n [3.7172e-12, 7.0033e-12, 1.5784e-14, ..., 2.1418e-12, 5.2960e-11,\n 2.5451e-12],\n ...,\n [1.9349e-10, 5.0861e-11, 4.3159e-15, ..., 7.7778e-12, 3.3042e-11,\n 8.4707e-10],\n [2.1370e-12, 3.5325e-11, 6.6160e-17, ..., 1.2003e-10, 9.3455e-11,\n 1.2041e-12],\n [3.2000e-11, 8.0542e-12, 4.6934e-12, ..., 1.0697e-10, 2.7268e-11,\n 2.2689e-10]], device='cuda:0')"
},
"6": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-1.5622e-06, 2.7535e-05, 1.4905e-05, ..., 1.4736e-05,\n -7.9414e-05, -1.2797e-05], device='cuda:0')",
"exp_avg_sq": "tensor([1.2895e-09, 5.8635e-09, 3.0205e-09, ..., 4.9408e-09, 4.8330e-09,\n 3.4006e-09], device='cuda:0')"
},
"7": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-4.0175e-07, 2.6153e-07, -4.4164e-07, ..., 1.5109e-08,\n -3.6510e-07, -2.9767e-08],\n [ 3.6301e-07, -1.6075e-07, -1.7987e-07, ..., -2.7110e-07,\n 1.6054e-07, 4.4112e-08],\n [ 4.3648e-07, 1.2500e-07, 4.6372e-07, ..., 3.8124e-07,\n 4.4902e-07, 1.5755e-07],\n ...,\n [-1.0879e-07, 4.6580e-07, 2.0011e-07, ..., -6.7723e-08,\n -4.2241e-07, -1.6531e-07],\n [-1.9719e-08, -5.7121e-07, 9.1336e-07, ..., -1.8679e-07,\n 3.8208e-06, -3.0580e-07],\n [ 8.3522e-08, 1.4345e-06, 2.5744e-06, ..., -8.0709e-08,\n -3.2101e-06, 3.6498e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.4865e-12, 2.8154e-12, 1.4179e-11, ..., 2.5227e-12, 2.0677e-12,\n 2.4300e-12],\n [2.4543e-12, 8.7770e-12, 1.0210e-11, ..., 3.8514e-12, 4.0809e-12,\n 3.1522e-12],\n [2.9186e-12, 6.6856e-12, 5.1441e-12, ..., 3.7565e-12, 2.6595e-12,\n 5.2788e-12],\n ...,\n [1.5313e-12, 5.9537e-12, 6.3315e-12, ..., 5.8116e-12, 2.9812e-12,\n 5.9506e-12],\n [2.2939e-12, 8.7969e-12, 2.5779e-11, ..., 5.3767e-12, 3.7568e-12,\n 5.0467e-12],\n [2.9056e-12, 4.8691e-12, 5.6937e-11, ..., 4.2881e-12, 5.2000e-12,\n 3.3737e-12]], device='cuda:0')"
},
"14": {
"step": "tensor(11268.)",
"exp_avg": "tensor([5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([3.3510e-10], device='cuda:0')"
},
"15": {
"step": "tensor(11268.)",
"exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([4.3230e-13, 2.4472e-11, 1.8399e-11], device='cuda:0')"
},
"16": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.6032e-08, 2.6823e-09, 2.9023e-09, 3.1003e-09], device='cuda:0')"
},
"18": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.7754e-14, 1.7326e-14, 0.0000e+00, ..., 1.1432e-13, 4.1116e-14,\n 1.3197e-15],\n [5.9214e-15, 1.5222e-14, 0.0000e+00, ..., 1.3994e-14, 4.9209e-14,\n 1.3868e-14],\n [1.7796e-15, 6.2805e-15, 0.0000e+00, ..., 2.0989e-15, 1.3607e-14,\n 4.4564e-15],\n ...,\n [2.2550e-16, 1.2128e-15, 0.0000e+00, ..., 1.7296e-15, 2.3952e-14,\n 1.3494e-16],\n [4.3435e-14, 4.5663e-14, 0.0000e+00, ..., 5.5269e-14, 2.3166e-13,\n 1.3902e-14],\n [1.3567e-16, 1.0544e-16, 0.0000e+00, ..., 2.0869e-16, 2.0700e-15,\n 4.9030e-16]], device='cuda:0')"
},
"19": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([3.9263e-11, 1.7902e-11, 4.0904e-12, 7.4258e-12, 2.7016e-12, 2.6180e-14,\n 4.6482e-13, 7.3727e-13, 1.8842e-12, 1.0666e-13, 5.9756e-12, 3.0389e-11,\n 1.2522e-11, 1.5169e-11, 2.2901e-12, 1.4282e-11, 3.4040e-11, 2.8216e-12,\n 1.8873e-11, 3.0916e-11, 3.6904e-13, 1.2989e-11, 2.8081e-12, 2.7765e-11,\n 5.6171e-12, 1.1368e-11, 5.0664e-12, 3.1516e-12, 8.5820e-13, 5.3166e-12,\n 1.5905e-11, 7.9710e-12, 2.8963e-13, 7.3115e-11, 3.7909e-11, 1.8431e-11,\n 1.9111e-12, 2.1076e-13, 1.4486e-11, 1.4118e-11, 6.9264e-11, 1.7133e-12,\n 3.0573e-13, 9.6821e-13, 2.4442e-12, 1.7758e-11, 7.6355e-12, 4.9773e-12,\n 4.8433e-12, 5.6777e-12, 2.3226e-13, 8.1755e-12, 2.0765e-11, 1.2668e-11,\n 2.6864e-12, 1.4009e-12, 9.5468e-12, 1.2161e-13, 3.6722e-12, 1.3203e-12,\n 4.1125e-12, 9.0858e-12, 5.8595e-12, 3.3960e-12, 3.3284e-13, 2.1675e-12,\n 7.8816e-12, 4.6363e-13, 9.9344e-12, 9.3506e-12, 8.9432e-14, 3.0187e-12,\n 7.9856e-12, 3.6302e-13, 3.2038e-12, 2.7449e-14, 2.1242e-12, 5.4243e-11,\n 1.0785e-12, 9.9677e-14, 1.6628e-11, 2.2256e-11, 3.6474e-15, 1.2411e-13,\n 3.9648e-12, 1.3771e-11, 4.1435e-13, 7.0479e-12, 9.0351e-13, 1.5671e-11,\n 2.4885e-12, 2.2729e-11, 2.8972e-11, 2.0501e-12, 2.8702e-12, 1.1426e-11,\n 7.5324e-13, 4.4918e-11, 1.4983e-11, 5.3667e-11, 1.5317e-11, 1.0613e-12,\n 2.1178e-13, 6.4812e-12, 5.3865e-13, 5.3503e-11, 1.3430e-12, 3.8052e-12,\n 8.5742e-12, 6.6151e-14, 6.9027e-11, 1.2787e-12, 2.0530e-11, 8.7188e-12,\n 2.9576e-12, 4.0686e-11, 1.0137e-12, 4.9319e-12, 1.3617e-11, 6.5153e-12,\n 7.1071e-12, 1.3151e-11, 1.3059e-12, 2.1952e-13, 8.5604e-14, 4.7821e-12,\n 1.1405e-10, 8.8927e-13, 7.4857e-12, 1.4846e-12, 1.9978e-13, 3.6675e-11,\n 3.0824e-12, 5.1624e-12, 2.0893e-11, 1.4919e-11, 1.2609e-11, 3.8658e-11,\n 1.6669e-11, 3.6672e-15, 1.5205e-13, 2.4671e-13, 1.3425e-11, 5.5091e-11,\n 3.0856e-11, 2.0272e-12, 2.0734e-12, 2.6766e-12, 1.6512e-11, 3.7393e-14,\n 8.5571e-13, 1.7190e-11, 5.9344e-12, 6.6091e-12, 1.4785e-11, 5.0866e-13,\n 4.6443e-12, 1.2202e-11, 2.3706e-12, 3.9524e-11, 1.8740e-11, 3.0752e-12,\n 7.2945e-12, 1.9805e-11, 1.2520e-12, 8.9577e-12, 8.8182e-11, 1.3398e-11,\n 9.2578e-12, 4.2689e-12, 6.1745e-13, 1.4729e-11, 6.0208e-11, 1.5144e-11,\n 1.9901e-11, 1.4497e-13, 2.0066e-12, 6.9286e-12, 7.8958e-12, 8.4651e-12,\n 1.0389e-11, 4.8507e-12, 9.7399e-13, 5.8485e-12, 1.1132e-11, 3.0891e-14,\n 2.9158e-12, 1.1975e-11, 3.0375e-12, 4.8272e-11, 3.6084e-11, 3.7422e-12,\n 2.4773e-12, 1.0730e-12, 7.1612e-15, 1.7292e-10, 2.0264e-12, 3.4890e-12,\n 3.8521e-12, 8.4457e-11, 8.4143e-16, 7.8870e-11, 1.4836e-11, 2.2572e-12,\n 1.4411e-11, 1.7707e-10, 1.6280e-11, 5.0807e-12, 7.3738e-14, 2.2854e-11,\n 1.8452e-12, 3.3353e-11, 5.1931e-12, 1.0581e-11, 8.0999e-14, 1.3474e-11,\n 1.0361e-10, 5.8654e-14, 1.1420e-12, 3.3494e-12, 2.4659e-12, 6.8929e-11,\n 8.6808e-12, 4.9563e-11, 6.8710e-12, 8.3435e-15, 1.7702e-13, 1.0141e-12,\n 3.7587e-13, 1.4471e-11, 1.2372e-13, 1.3224e-13, 2.5119e-11, 9.8235e-12,\n 7.6614e-12, 1.7070e-12, 2.0433e-12, 1.5466e-11, 1.9364e-12, 1.7015e-12,\n 1.9739e-11, 8.0333e-14, 4.3457e-12, 2.6647e-11, 4.6451e-12, 1.3307e-13,\n 2.4868e-13, 1.1182e-11, 1.2625e-12, 4.0219e-11, 2.0968e-11, 2.1068e-12,\n 1.8112e-11, 2.1385e-12, 6.0488e-11, 1.0696e-12], device='cuda:0')"
},
"20": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([9.6867e-14, 8.8582e-14, 5.9571e-15, 3.8874e-14, 4.5606e-15, 3.7613e-16,\n 8.2706e-16, 3.8406e-15, 2.2806e-15, 1.3397e-16, 1.1007e-14, 5.5119e-14,\n 3.1735e-14, 7.1388e-14, 7.3486e-15, 3.3640e-14, 1.1033e-13, 4.8938e-15,\n 6.1049e-14, 8.7960e-14, 1.8228e-16, 5.4636e-14, 6.1776e-15, 1.1331e-13,\n 2.2025e-14, 2.2063e-14, 1.2813e-14, 1.0472e-14, 3.1144e-15, 9.4101e-15,\n 4.8574e-14, 2.3649e-14, 1.8446e-15, 1.9684e-13, 1.4688e-13, 4.5965e-14,\n 3.7942e-15, 3.8399e-18, 3.0101e-14, 2.5521e-14, 2.9298e-13, 4.0870e-15,\n 1.3007e-15, 4.2477e-15, 4.8831e-15, 3.9947e-14, 1.6632e-14, 1.0500e-14,\n 7.4072e-15, 8.7724e-15, 1.8198e-16, 2.2338e-14, 4.3112e-14, 2.7616e-14,\n 3.6799e-15, 6.6063e-15, 3.0886e-14, 2.5444e-16, 1.2120e-14, 3.7396e-15,\n 7.4253e-15, 6.7780e-14, 1.6435e-14, 8.1567e-15, 5.0368e-18, 3.4627e-15,\n 1.7209e-14, 2.5172e-16, 1.8343e-14, 3.0274e-14, 2.1045e-15, 6.9360e-15,\n 8.8664e-15, 1.2188e-15, 5.8595e-15, 1.7740e-15, 1.2386e-14, 2.7541e-13,\n 9.4394e-16, 2.8168e-19, 6.9647e-14, 5.7702e-14, 5.3028e-16, 6.1310e-16,\n 7.7989e-15, 3.0768e-14, 1.6512e-15, 3.2994e-14, 2.6480e-15, 2.9221e-14,\n 7.5376e-15, 5.1937e-14, 1.0532e-13, 2.5993e-15, 7.9409e-15, 4.0271e-14,\n 1.6310e-15, 9.5674e-14, 5.8310e-14, 1.5090e-13, 4.5905e-14, 4.9408e-15,\n 2.5898e-15, 1.1487e-14, 2.2469e-16, 1.2573e-13, 1.4039e-15, 8.9709e-15,\n 1.7853e-14, 9.3489e-17, 1.7618e-13, 3.9861e-15, 5.8181e-14, 1.8307e-14,\n 5.3969e-15, 1.6422e-13, 3.9937e-15, 1.0516e-14, 4.1058e-14, 1.5805e-14,\n 1.3077e-14, 2.7389e-14, 1.0975e-15, 2.1182e-17, 8.7359e-16, 7.3591e-15,\n 5.1570e-13, 3.3724e-15, 2.3909e-14, 2.2643e-15, 7.3207e-17, 1.0938e-13,\n 7.5428e-15, 1.5359e-14, 4.0106e-14, 3.3692e-14, 3.5416e-14, 9.9776e-14,\n 4.5602e-14, 6.1868e-16, 3.8992e-15, 1.4137e-15, 2.9497e-14, 1.6193e-13,\n 1.9389e-13, 8.0249e-15, 8.3292e-15, 6.2655e-15, 3.4205e-14, 9.4836e-18,\n 8.5657e-15, 3.7694e-14, 1.9160e-14, 7.5240e-15, 3.4686e-14, 3.0816e-15,\n 6.8540e-15, 2.0789e-14, 3.8997e-15, 1.5632e-13, 3.6544e-14, 3.4802e-15,\n 1.6994e-14, 6.6021e-14, 3.6383e-15, 1.1641e-14, 3.0312e-13, 2.5411e-14,\n 2.8329e-14, 1.0879e-14, 3.1642e-16, 5.8747e-14, 1.4503e-13, 5.6519e-14,\n 3.7462e-14, 5.5766e-16, 3.1358e-15, 3.0992e-14, 2.4186e-14, 2.2108e-14,\n 2.6828e-14, 9.3872e-15, 1.1828e-15, 8.7811e-15, 2.7868e-14, 1.6659e-20,\n 4.4918e-15, 2.6640e-14, 2.6751e-15, 1.2087e-13, 9.4181e-14, 4.7135e-15,\n 7.3672e-15, 5.8709e-15, 6.7503e-16, 6.7030e-13, 2.5798e-15, 5.5868e-15,\n 1.1658e-14, 2.3212e-13, 1.9997e-16, 2.1191e-13, 2.6771e-14, 3.0833e-15,\n 4.7935e-14, 4.5128e-13, 3.5263e-14, 1.2821e-14, 2.4171e-17, 3.5713e-14,\n 2.5275e-15, 8.3138e-14, 3.5262e-14, 3.2291e-14, 4.8407e-17, 2.3399e-14,\n 4.9294e-13, 2.0110e-17, 4.2875e-15, 4.0122e-15, 3.1496e-15, 3.0320e-13,\n 4.8304e-14, 1.2061e-13, 2.0313e-14, 1.7752e-16, 8.6044e-18, 3.0069e-15,\n 3.7228e-16, 4.0689e-14, 1.0000e-15, 9.3130e-17, 1.4817e-13, 4.3378e-14,\n 2.3187e-14, 3.9763e-15, 2.6135e-15, 4.4984e-14, 5.3146e-15, 1.6989e-15,\n 4.8875e-14, 1.8714e-17, 1.3039e-14, 6.8632e-14, 7.0438e-15, 2.2812e-17,\n 5.8573e-17, 2.1390e-14, 1.9913e-15, 1.0854e-13, 4.2606e-14, 2.9612e-15,\n 4.0569e-14, 4.2322e-15, 1.9371e-13, 5.6323e-15], device='cuda:0')"
},
"21": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.6614e-13, 8.4806e-14, 1.1676e-14, 3.8164e-14, 6.0014e-15, 2.5494e-16,\n 1.1563e-15, 6.2805e-15, 3.8334e-15, 7.5124e-17, 1.8245e-14, 1.0298e-13,\n 3.6319e-14, 7.0773e-14, 1.2368e-14, 6.5240e-14, 1.4754e-13, 7.8009e-15,\n 5.4221e-14, 1.2802e-13, 2.0608e-16, 6.1174e-14, 1.4159e-14, 1.1953e-13,\n 2.9426e-14, 3.4074e-14, 2.3595e-14, 1.6556e-14, 6.4134e-15, 1.3718e-14,\n 7.1327e-14, 3.7111e-14, 2.8844e-15, 2.5164e-13, 1.6213e-13, 7.9180e-14,\n 2.7419e-15, 7.9699e-19, 6.3934e-14, 4.8558e-14, 2.2532e-13, 9.5070e-15,\n 3.1372e-15, 7.5036e-15, 8.3721e-15, 5.7086e-14, 3.5260e-14, 2.4576e-14,\n 1.2170e-14, 1.8318e-14, 2.9740e-16, 3.6146e-14, 6.7688e-14, 3.9705e-14,\n 6.6988e-15, 9.0625e-15, 4.4125e-14, 2.2942e-16, 2.0462e-14, 7.8585e-15,\n 9.8411e-15, 4.7735e-14, 2.6806e-14, 1.5535e-14, 8.2755e-17, 5.4171e-15,\n 3.7262e-14, 6.8019e-16, 4.4168e-14, 4.5315e-14, 2.3135e-15, 1.5323e-14,\n 2.7757e-14, 2.6303e-15, 7.6182e-15, 1.6258e-15, 1.3191e-14, 2.3970e-13,\n 1.4225e-15, 2.2605e-19, 8.0323e-14, 1.0022e-13, 5.8418e-16, 1.3264e-15,\n 2.0211e-14, 6.4128e-14, 2.9160e-15, 3.7315e-14, 2.9300e-15, 6.8963e-14,\n 1.3027e-14, 9.7827e-14, 9.3301e-14, 5.4370e-15, 1.2916e-14, 3.0860e-14,\n 2.1391e-15, 1.5338e-13, 7.0255e-14, 2.2477e-13, 6.7519e-14, 7.3750e-15,\n 2.9535e-15, 1.7667e-14, 3.3190e-16, 1.7659e-13, 2.3918e-15, 1.9913e-14,\n 2.4128e-14, 6.2038e-17, 2.3180e-13, 7.7812e-15, 9.2346e-14, 3.2123e-14,\n 7.1040e-15, 1.2852e-13, 5.4902e-15, 1.3299e-14, 6.2287e-14, 3.1498e-14,\n 2.0139e-14, 3.8792e-14, 2.0192e-15, 5.8413e-17, 1.6268e-15, 1.3718e-14,\n 4.5847e-13, 4.9907e-15, 3.7795e-14, 4.1805e-15, 8.4516e-17, 1.5883e-13,\n 1.5734e-14, 2.4457e-14, 6.5860e-14, 4.8689e-14, 5.5854e-14, 1.3037e-13,\n 7.4210e-14, 7.5454e-16, 2.9484e-15, 2.3123e-15, 4.2967e-14, 1.8847e-13,\n 1.4339e-13, 1.2015e-14, 1.1893e-14, 1.3989e-14, 5.4140e-14, 2.1173e-17,\n 7.8547e-15, 5.1718e-14, 3.0082e-14, 1.8958e-14, 4.1052e-14, 4.6853e-15,\n 1.1625e-14, 4.4238e-14, 6.2869e-15, 1.6752e-13, 6.1538e-14, 8.5564e-15,\n 1.7159e-14, 8.9069e-14, 6.7279e-15, 2.5584e-14, 3.1487e-13, 4.2708e-14,\n 4.2439e-14, 2.0854e-14, 8.6969e-16, 3.8355e-14, 2.0652e-13, 7.1779e-14,\n 5.9846e-14, 1.1393e-15, 4.6647e-15, 3.7040e-14, 1.8065e-14, 4.0471e-14,\n 2.7438e-14, 1.2120e-14, 1.9724e-15, 1.5978e-14, 4.6672e-14, 4.7891e-18,\n 8.9490e-15, 3.4172e-14, 8.1649e-15, 1.9816e-13, 1.2043e-13, 9.8808e-15,\n 1.3480e-14, 7.0822e-15, 8.8664e-16, 7.0952e-13, 4.0994e-15, 7.5332e-15,\n 1.8704e-14, 2.9761e-13, 5.0013e-16, 3.2837e-13, 4.7737e-14, 5.0112e-15,\n 6.7591e-14, 6.1222e-13, 4.8790e-14, 1.1942e-14, 6.1053e-17, 7.9454e-14,\n 3.0576e-15, 1.4384e-13, 2.9601e-14, 5.1117e-14, 5.7872e-17, 4.4943e-14,\n 3.4793e-13, 1.3557e-17, 7.2163e-15, 7.4746e-15, 4.5802e-15, 2.8905e-13,\n 4.5029e-14, 2.1087e-13, 3.5307e-14, 3.2684e-16, 5.8982e-18, 6.8323e-15,\n 9.4974e-16, 6.2199e-14, 2.0006e-15, 2.0115e-16, 1.1234e-13, 4.6439e-14,\n 3.5346e-14, 8.5473e-15, 5.2912e-15, 6.9384e-14, 1.1026e-14, 4.1249e-15,\n 8.9082e-14, 1.7685e-17, 2.1152e-14, 8.3378e-14, 1.3221e-14, 4.4099e-17,\n 5.4105e-17, 3.5619e-14, 1.9885e-15, 1.7317e-13, 6.6904e-14, 4.9079e-15,\n 5.4377e-14, 7.1806e-15, 2.0058e-13, 7.8484e-15], device='cuda:0')"
},
"22": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1739e-14, 4.4097e-14, 0.0000e+00, ..., 6.4209e-14, 1.0500e-13,\n 1.6065e-14],\n [5.3846e-15, 6.0095e-15, 0.0000e+00, ..., 9.9170e-15, 3.0590e-14,\n 1.9445e-17],\n [7.2292e-15, 3.9271e-15, 0.0000e+00, ..., 6.6041e-15, 1.4110e-14,\n 1.1364e-15],\n ...,\n [1.2348e-14, 1.5224e-14, 0.0000e+00, ..., 1.8804e-14, 3.5582e-14,\n 1.1319e-14],\n [3.7596e-14, 1.1937e-14, 0.0000e+00, ..., 1.1866e-14, 7.5295e-14,\n 7.1681e-15],\n [1.8402e-16, 2.5771e-16, 0.0000e+00, ..., 2.7789e-15, 4.4290e-15,\n 9.0399e-16]], device='cuda:0')"
},
"23": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([4.5859e-11, 3.5298e-12, 7.7962e-12, 5.8672e-12, 2.0007e-12, 1.0732e-12,\n 4.6161e-12, 2.7465e-15, 1.8407e-12, 3.1221e-13, 3.4660e-11, 6.8629e-12,\n 9.2335e-12, 5.9910e-12, 1.7532e-12, 4.3267e-12, 1.8132e-11, 1.1861e-12,\n 3.4328e-12, 3.6565e-11, 1.4433e-13, 1.3882e-11, 6.0429e-12, 3.4911e-12,\n 4.6718e-13, 1.4560e-11, 4.5994e-12, 7.3127e-13, 5.1133e-13, 4.0727e-12,\n 2.0117e-11, 4.1887e-12, 5.1384e-13, 1.4719e-10, 1.4539e-11, 1.0187e-11,\n 1.7446e-12, 1.2643e-16, 1.3594e-11, 6.8561e-11, 8.6226e-12, 5.2634e-12,\n 9.3043e-13, 5.3526e-12, 3.1255e-11, 2.3190e-11, 1.3114e-11, 7.6734e-12,\n 1.9333e-12, 3.3303e-11, 3.7344e-13, 6.0195e-11, 4.1723e-11, 9.5154e-12,\n 9.6556e-13, 1.7445e-13, 2.2890e-11, 7.2778e-13, 1.0452e-11, 2.1074e-12,\n 4.9184e-12, 5.6401e-12, 3.0007e-11, 1.5927e-11, 7.2672e-13, 1.8803e-12,\n 1.1223e-11, 2.0015e-12, 1.5715e-11, 3.7784e-13, 5.5506e-16, 6.4480e-12,\n 1.1859e-11, 2.0831e-13, 5.0294e-12, 6.1464e-13, 1.7430e-12, 3.3452e-11,\n 4.9180e-13, 2.1023e-13, 2.6876e-12, 2.3570e-11, 6.7220e-16, 1.7468e-12,\n 4.8944e-13, 1.7755e-11, 7.1450e-12, 4.0036e-12, 7.0637e-12, 1.0346e-10,\n 1.3860e-12, 2.3328e-11, 2.9751e-11, 4.4995e-12, 5.6453e-11, 4.2181e-12,\n 4.9947e-12, 3.1711e-11, 1.0657e-11, 1.2427e-11, 2.7605e-11, 1.5541e-12,\n 2.9936e-13, 1.1153e-11, 4.1331e-13, 5.9530e-11, 3.3535e-12, 6.7878e-12,\n 2.0734e-12, 9.5880e-15, 4.8682e-11, 3.4834e-12, 4.8349e-11, 1.4759e-11,\n 1.6148e-12, 2.6859e-12, 3.2001e-12, 2.7195e-12, 2.0520e-12, 4.9871e-12,\n 1.4928e-11, 8.7300e-12, 1.4832e-12, 1.1831e-15, 8.7792e-15, 5.3721e-12,\n 2.9889e-11, 1.9380e-11, 1.5682e-12, 2.6039e-11, 2.0431e-14, 1.2868e-10,\n 5.8543e-12, 1.0883e-11, 1.8670e-11, 5.0126e-11, 1.5903e-11, 5.0533e-11,\n 1.3784e-11, 6.2882e-15, 1.8031e-13, 9.4148e-13, 6.0515e-12, 5.4490e-11,\n 9.5011e-12, 2.5728e-12, 2.2768e-12, 8.8244e-13, 5.4165e-11, 1.1631e-12,\n 9.4988e-13, 4.5247e-12, 4.9440e-12, 1.9678e-11, 6.7082e-12, 8.9844e-13,\n 4.7362e-12, 5.0301e-11, 1.4221e-12, 6.9256e-11, 1.7602e-11, 2.5589e-11,\n 2.6713e-12, 2.3285e-11, 1.7811e-11, 1.4676e-11, 1.4140e-10, 1.9892e-11,\n 2.1877e-11, 2.8293e-12, 9.0249e-13, 3.8055e-12, 2.6226e-11, 8.0679e-12,\n 1.5047e-11, 1.6116e-13, 1.3256e-12, 8.8420e-13, 1.7950e-12, 6.3191e-12,\n 9.2826e-13, 5.1205e-12, 3.3793e-12, 4.5292e-12, 3.4943e-11, 1.1877e-13,\n 3.5394e-12, 1.0837e-11, 2.0812e-12, 5.6176e-11, 5.9231e-12, 8.4627e-12,\n 9.1395e-12, 7.2221e-13, 3.0725e-15, 1.6072e-10, 1.9107e-12, 1.9807e-12,\n 1.1824e-11, 3.0788e-11, 1.2483e-15, 8.7402e-12, 3.3121e-11, 3.9188e-13,\n 1.9907e-11, 6.6256e-11, 1.0441e-11, 2.1446e-12, 8.0105e-15, 9.0554e-11,\n 1.8615e-12, 3.1055e-11, 9.5425e-13, 4.2669e-12, 7.2201e-13, 8.6128e-12,\n 1.1386e-11, 3.9154e-14, 8.6644e-13, 1.1215e-12, 3.7456e-12, 1.4553e-11,\n 2.6961e-12, 5.1593e-11, 9.0939e-12, 3.7150e-13, 1.1480e-13, 5.5451e-13,\n 2.0609e-12, 4.7226e-12, 1.4330e-13, 5.0325e-13, 8.2279e-12, 6.6630e-12,\n 2.6766e-11, 8.3860e-12, 1.9602e-12, 1.4948e-11, 2.2359e-12, 3.9218e-12,\n 1.4336e-11, 2.9780e-14, 1.8749e-11, 2.3263e-11, 8.0784e-12, 1.8374e-14,\n 6.4159e-16, 8.0161e-11, 3.8701e-13, 2.9458e-11, 1.0106e-11, 1.7380e-12,\n 1.3270e-11, 1.3883e-11, 3.2225e-11, 9.0876e-13], device='cuda:0')"
},
"24": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.2854e-13, 1.0860e-14, 1.2315e-14, 2.6649e-14, 3.1170e-15, 4.2087e-15,\n 1.0821e-14, 3.6218e-17, 2.2144e-15, 2.1244e-16, 1.2360e-13, 1.6859e-14,\n 1.9205e-14, 1.4293e-14, 4.9111e-15, 1.1782e-14, 4.3670e-14, 2.4134e-15,\n 7.5322e-15, 8.1294e-14, 1.8358e-18, 4.7634e-14, 1.8503e-14, 9.6343e-15,\n 1.9690e-15, 2.6768e-14, 1.0265e-14, 2.5550e-15, 2.0060e-15, 1.0057e-14,\n 7.6999e-14, 7.7304e-15, 1.2707e-15, 5.8276e-13, 3.3482e-14, 2.1460e-14,\n 4.2414e-15, 3.7183e-17, 3.7433e-14, 1.5231e-13, 1.6718e-14, 1.7605e-14,\n 2.1244e-15, 4.4303e-14, 9.3291e-14, 4.5603e-14, 5.9680e-14, 1.9615e-14,\n 3.0849e-15, 1.2764e-13, 3.2825e-16, 1.8131e-13, 1.0383e-13, 1.6166e-14,\n 8.2482e-16, 5.8211e-16, 7.5136e-14, 4.7070e-16, 2.9158e-14, 5.2548e-15,\n 1.1036e-14, 1.6242e-14, 1.2934e-13, 4.3737e-14, 5.1413e-16, 2.6377e-15,\n 2.9932e-14, 2.2076e-15, 3.2373e-14, 1.8827e-15, 6.5784e-16, 1.3039e-14,\n 2.4182e-14, 1.2445e-15, 9.0177e-15, 4.2134e-15, 5.5755e-15, 8.6200e-14,\n 7.8833e-16, 2.2400e-16, 6.3897e-15, 6.2026e-14, 5.9717e-17, 1.3954e-14,\n 2.2586e-15, 4.3151e-14, 3.2110e-14, 9.1629e-15, 1.2801e-14, 3.6737e-13,\n 3.7987e-15, 5.5658e-14, 1.0410e-13, 6.6684e-15, 3.6365e-13, 8.0989e-15,\n 6.4447e-15, 6.3716e-14, 2.7720e-14, 3.1964e-14, 1.1694e-13, 1.1719e-14,\n 1.5013e-15, 2.8983e-14, 1.7292e-16, 1.1061e-13, 6.5554e-15, 1.7042e-14,\n 3.2502e-15, 4.1294e-16, 1.0594e-13, 1.3226e-14, 3.0235e-13, 3.0144e-14,\n 1.4017e-15, 4.7266e-15, 8.8722e-15, 4.1306e-15, 7.3992e-15, 1.6700e-14,\n 4.2870e-14, 2.2215e-14, 1.6544e-15, 5.2720e-18, 2.0681e-16, 9.0086e-15,\n 6.5767e-14, 1.1511e-13, 3.4997e-15, 8.7621e-14, 2.6036e-17, 4.1693e-13,\n 1.4609e-14, 2.5379e-14, 2.7695e-14, 1.8208e-13, 3.6096e-14, 1.4671e-13,\n 3.2830e-14, 4.6769e-17, 1.8511e-15, 3.4620e-15, 1.0268e-14, 1.7011e-13,\n 2.0039e-14, 8.7198e-15, 7.0728e-15, 2.5143e-15, 1.8878e-13, 1.6618e-15,\n 4.1060e-15, 7.5846e-15, 1.4410e-14, 6.7585e-14, 1.1089e-14, 4.3932e-15,\n 7.0255e-15, 1.4948e-13, 3.1911e-15, 2.1243e-13, 3.6980e-14, 9.8402e-14,\n 3.7507e-15, 7.8857e-14, 1.0831e-13, 3.2835e-14, 4.8477e-13, 3.7704e-14,\n 9.4665e-14, 1.0554e-14, 6.6949e-16, 5.4511e-15, 4.9425e-14, 2.4610e-14,\n 2.7759e-14, 5.3062e-16, 1.5243e-15, 2.1011e-15, 3.1993e-15, 1.0938e-14,\n 1.6861e-15, 1.1973e-14, 5.7216e-15, 7.7797e-15, 7.4844e-14, 7.6297e-19,\n 6.4106e-15, 2.6364e-14, 2.7464e-15, 1.5261e-13, 7.4028e-15, 2.1852e-14,\n 4.1924e-14, 2.6114e-15, 7.5946e-17, 6.3469e-13, 3.4962e-15, 3.0640e-15,\n 4.7055e-14, 4.4240e-14, 2.0859e-16, 1.4827e-14, 1.1315e-13, 4.6509e-16,\n 6.1012e-14, 1.2438e-13, 1.7398e-14, 3.3746e-15, 2.6090e-18, 2.8827e-13,\n 3.8872e-15, 6.9610e-14, 4.5288e-15, 9.3483e-15, 8.4023e-16, 1.6557e-14,\n 2.1331e-14, 1.5921e-17, 2.6621e-15, 1.3714e-15, 9.1188e-15, 4.1993e-14,\n 1.2298e-14, 1.6589e-13, 3.1582e-14, 1.4530e-15, 5.2239e-17, 5.8138e-15,\n 3.2562e-15, 1.3576e-14, 7.4440e-16, 8.1867e-16, 1.4101e-14, 2.6788e-14,\n 6.7488e-14, 2.7367e-14, 3.0716e-15, 3.2806e-14, 6.8812e-15, 5.8805e-15,\n 3.6413e-14, 1.2630e-17, 1.5852e-13, 5.6780e-14, 1.3227e-14, 1.2510e-17,\n 2.2784e-16, 3.7817e-13, 4.0881e-16, 9.1271e-14, 1.9284e-14, 2.6040e-15,\n 3.0391e-14, 2.8784e-14, 7.0270e-14, 3.4083e-15], device='cuda:0')"
},
"25": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.8657e-13, 1.6450e-14, 2.2196e-14, 3.0104e-14, 4.6704e-15, 6.6862e-15,\n 1.3537e-14, 4.2514e-17, 4.0055e-15, 3.7026e-16, 1.1057e-13, 2.4871e-14,\n 2.7783e-14, 2.7758e-14, 8.8854e-15, 2.1638e-14, 7.9055e-14, 3.3601e-15,\n 1.0988e-14, 1.5804e-13, 2.8826e-17, 6.3277e-14, 3.0131e-14, 1.6331e-14,\n 2.4791e-15, 4.7539e-14, 2.1815e-14, 4.4880e-15, 3.5907e-15, 1.2059e-14,\n 9.1154e-14, 1.8642e-14, 3.1821e-15, 5.1885e-13, 6.3600e-14, 4.4339e-14,\n 3.2992e-15, 2.8117e-17, 6.1023e-14, 2.4103e-13, 3.0196e-14, 2.5435e-14,\n 5.2963e-15, 2.9453e-14, 1.0220e-13, 8.1077e-14, 6.0271e-14, 3.7048e-14,\n 5.0085e-15, 1.0731e-13, 3.3235e-16, 2.4580e-13, 1.3554e-13, 3.0277e-14,\n 1.7414e-15, 1.4189e-15, 1.0209e-13, 1.3024e-15, 4.4905e-14, 1.1745e-14,\n 1.2673e-14, 2.6459e-14, 1.2894e-13, 6.9184e-14, 8.9229e-16, 5.0048e-15,\n 4.9891e-14, 4.6559e-15, 6.6793e-14, 2.3253e-15, 5.4358e-16, 3.0764e-14,\n 3.8134e-14, 1.3141e-15, 1.4390e-14, 4.7519e-15, 1.0240e-14, 1.3866e-13,\n 1.2930e-15, 1.9855e-16, 1.4231e-14, 1.0560e-13, 1.5940e-16, 1.1815e-14,\n 2.9626e-15, 7.5522e-14, 3.5880e-14, 1.8102e-14, 2.2005e-14, 4.1579e-13,\n 6.8461e-15, 1.0109e-13, 9.4744e-14, 1.3988e-14, 2.4193e-13, 1.3003e-14,\n 1.5775e-14, 1.1208e-13, 4.7499e-14, 5.8096e-14, 1.1882e-13, 1.0444e-14,\n 2.8456e-15, 3.2425e-14, 5.0105e-16, 2.0337e-13, 8.1982e-15, 3.2858e-14,\n 5.9488e-15, 7.3689e-16, 1.6715e-13, 1.7900e-14, 2.0633e-13, 5.0843e-14,\n 4.2654e-15, 8.7277e-15, 1.6491e-14, 8.2345e-15, 1.0020e-14, 2.5787e-14,\n 4.4145e-14, 2.6596e-14, 3.3294e-15, 3.4521e-18, 2.1885e-16, 1.6420e-14,\n 1.2742e-13, 8.8181e-14, 7.9302e-15, 7.9304e-14, 3.4555e-17, 5.2179e-13,\n 2.6532e-14, 4.6748e-14, 5.9527e-14, 1.6792e-13, 7.0830e-14, 1.7317e-13,\n 5.9343e-14, 1.0178e-16, 2.3479e-15, 6.3108e-15, 1.8836e-14, 1.8336e-13,\n 4.0316e-14, 1.3899e-14, 1.1452e-14, 4.6218e-15, 1.8361e-13, 1.8791e-15,\n 6.6780e-15, 1.4987e-14, 2.3939e-14, 5.8277e-14, 1.9339e-14, 6.3966e-15,\n 1.2735e-14, 1.7041e-13, 4.2343e-15, 2.8235e-13, 5.8503e-14, 7.7071e-14,\n 6.3270e-15, 1.0404e-13, 8.2673e-14, 4.6109e-14, 4.9356e-13, 6.7543e-14,\n 9.3054e-14, 1.3832e-14, 1.7268e-15, 1.0803e-14, 9.0837e-14, 3.7867e-14,\n 4.8886e-14, 1.2456e-15, 2.9454e-15, 4.2999e-15, 4.1415e-15, 2.8985e-14,\n 2.4732e-15, 1.2711e-14, 9.2592e-15, 1.4020e-14, 1.4601e-13, 1.2852e-17,\n 1.1496e-14, 3.2121e-14, 5.4655e-15, 2.3345e-13, 1.9893e-14, 2.4606e-14,\n 4.3401e-14, 4.1492e-15, 2.0005e-16, 6.4480e-13, 4.5575e-15, 5.2491e-15,\n 5.4972e-14, 1.0563e-13, 5.0468e-16, 3.8819e-14, 1.0420e-13, 8.7908e-16,\n 8.2428e-14, 2.3262e-13, 3.4346e-14, 5.5300e-15, 4.6276e-18, 3.2642e-13,\n 3.6231e-15, 1.3042e-13, 6.7414e-15, 2.0798e-14, 1.1351e-15, 2.8880e-14,\n 3.7977e-14, 1.5631e-16, 5.2759e-15, 2.5425e-15, 7.6225e-15, 6.3641e-14,\n 1.4393e-14, 2.1611e-13, 4.3016e-14, 2.9859e-15, 4.1440e-17, 5.1302e-15,\n 4.3234e-15, 2.2142e-14, 1.5911e-15, 8.8182e-16, 3.6999e-14, 3.3950e-14,\n 1.1500e-13, 3.9530e-14, 4.5540e-15, 6.3214e-14, 1.3009e-14, 1.1149e-14,\n 6.3583e-14, 2.7714e-17, 8.8254e-14, 7.7374e-14, 2.3637e-14, 9.4120e-17,\n 4.7122e-16, 2.7276e-13, 6.5713e-16, 1.1955e-13, 3.0876e-14, 4.0090e-15,\n 4.1039e-14, 4.1474e-14, 1.0480e-13, 6.1555e-15], device='cuda:0')"
},
"26": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[8.9964e-15, 1.2436e-14, 0.0000e+00, ..., 2.6322e-14, 4.2445e-14,\n 5.1848e-15],\n [8.3171e-16, 1.1563e-15, 0.0000e+00, ..., 3.2315e-17, 1.0417e-15,\n 3.3267e-17],\n [6.7224e-15, 5.2533e-15, 0.0000e+00, ..., 5.4073e-15, 5.8400e-15,\n 3.7985e-15],\n ...,\n [3.3475e-15, 2.7510e-15, 0.0000e+00, ..., 1.9406e-15, 1.8953e-14,\n 5.4700e-16],\n [1.9644e-14, 6.0793e-15, 0.0000e+00, ..., 5.2391e-15, 4.2938e-14,\n 9.5091e-15],\n [3.9726e-15, 6.9843e-16, 0.0000e+00, ..., 9.6362e-16, 8.2776e-15,\n 8.7026e-17]], device='cuda:0')"
},
"27": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.5247e-11, 6.8379e-13, 5.4273e-12, 7.0522e-12, 2.1475e-12, 5.4130e-13,\n 7.1233e-12, 1.7336e-13, 3.1968e-12, 4.8788e-13, 1.6207e-11, 5.6094e-11,\n 9.5496e-12, 1.1230e-11, 1.1717e-11, 4.8500e-11, 5.4356e-12, 5.3887e-12,\n 1.8189e-11, 2.1148e-11, 6.7816e-14, 2.2721e-12, 4.5827e-12, 5.6818e-12,\n 5.2623e-12, 9.7554e-12, 7.7554e-12, 1.5233e-12, 2.2259e-13, 2.5354e-12,\n 1.3473e-11, 8.9015e-12, 1.3958e-12, 6.8463e-11, 2.4798e-11, 1.5199e-11,\n 4.5214e-13, 8.5694e-15, 6.3756e-12, 2.0587e-10, 3.0712e-11, 4.3178e-12,\n 1.3535e-12, 8.4712e-13, 3.5508e-11, 6.2530e-11, 9.7401e-12, 2.1578e-11,\n 2.6881e-12, 1.7274e-11, 3.3312e-13, 3.6776e-11, 9.3766e-12, 8.4474e-12,\n 1.3425e-12, 1.1710e-13, 1.6014e-11, 5.9427e-13, 1.0643e-11, 9.3140e-13,\n 5.4694e-12, 1.0471e-11, 2.6396e-11, 1.4068e-11, 3.9677e-13, 9.8171e-13,\n 1.0554e-11, 6.8709e-13, 6.6926e-12, 1.2716e-12, 2.3215e-14, 6.4858e-12,\n 2.7412e-11, 2.0133e-12, 5.5499e-12, 3.7100e-14, 3.6041e-13, 2.2390e-11,\n 1.4449e-12, 4.7712e-16, 1.3253e-11, 1.5949e-11, 2.2839e-14, 1.3538e-12,\n 9.4492e-12, 4.0361e-11, 3.2444e-12, 1.3936e-11, 2.7606e-12, 2.3020e-11,\n 1.6302e-11, 1.0027e-11, 2.5487e-11, 4.6504e-12, 8.2121e-12, 1.2587e-12,\n 4.2814e-12, 2.9312e-11, 7.1121e-12, 5.9196e-11, 5.2439e-11, 1.2831e-12,\n 2.2806e-14, 7.0428e-12, 3.9581e-13, 4.6438e-11, 3.7218e-12, 3.3990e-12,\n 1.0128e-11, 6.3158e-14, 5.0203e-12, 3.2760e-14, 3.2719e-11, 3.2050e-11,\n 2.5265e-12, 2.8018e-11, 2.3924e-12, 4.5695e-12, 6.7701e-12, 6.9537e-12,\n 2.4234e-12, 5.5138e-13, 9.9429e-13, 1.3190e-15, 1.2920e-13, 6.2232e-12,\n 5.2887e-11, 8.7350e-12, 2.2088e-11, 1.0043e-11, 5.5464e-14, 6.3733e-11,\n 1.0275e-11, 9.4976e-12, 2.3194e-11, 1.3533e-11, 1.5082e-11, 2.0238e-11,\n 1.3747e-11, 5.2964e-14, 4.0117e-13, 7.3915e-13, 5.4424e-12, 6.9256e-11,\n 1.3232e-11, 8.0834e-13, 1.2479e-11, 1.5075e-12, 1.2741e-11, 3.1518e-13,\n 6.1401e-13, 2.1590e-11, 1.5751e-11, 1.9711e-11, 4.1855e-12, 7.0563e-13,\n 1.2551e-11, 3.7155e-11, 6.5183e-12, 8.2133e-12, 1.0147e-11, 1.1650e-11,\n 6.3249e-12, 8.4453e-12, 7.9071e-12, 7.4425e-12, 5.3425e-11, 4.1148e-11,\n 6.0065e-12, 2.2663e-11, 2.2563e-12, 1.6282e-12, 3.3625e-11, 3.1992e-12,\n 2.4014e-11, 1.7861e-12, 6.8724e-13, 2.1683e-12, 2.3188e-12, 2.5784e-12,\n 3.0392e-12, 2.6159e-12, 9.5288e-13, 6.7880e-12, 1.0445e-10, 1.5956e-14,\n 5.1784e-12, 4.3423e-12, 3.0655e-12, 3.1329e-11, 9.2098e-12, 1.1295e-12,\n 4.6577e-12, 6.4217e-12, 4.8775e-14, 5.6899e-11, 2.9939e-12, 4.5595e-13,\n 1.4220e-11, 6.3100e-11, 5.2840e-17, 6.1705e-12, 3.0627e-11, 6.8161e-12,\n 5.6916e-12, 4.4435e-11, 1.0990e-11, 2.3677e-12, 2.3600e-15, 5.3291e-11,\n 1.0825e-12, 5.3399e-11, 3.2808e-14, 1.1841e-11, 1.6995e-13, 1.4227e-11,\n 2.0026e-11, 3.4044e-16, 1.6929e-12, 2.3540e-12, 2.0038e-12, 6.5169e-11,\n 3.4452e-12, 1.3669e-11, 3.0776e-12, 1.2663e-13, 7.0499e-14, 2.5115e-13,\n 2.3026e-12, 4.5880e-12, 3.8464e-13, 5.6439e-14, 2.4006e-11, 1.7135e-12,\n 1.2596e-12, 8.7835e-12, 7.4715e-12, 1.0994e-11, 4.5467e-12, 3.5279e-12,\n 1.1922e-12, 2.7007e-15, 9.0518e-12, 5.8695e-12, 2.6259e-12, 8.3881e-14,\n 5.4195e-14, 4.4840e-11, 2.0835e-12, 5.1698e-11, 1.6434e-11, 1.7940e-12,\n 2.4676e-11, 6.9742e-12, 1.4569e-11, 2.7690e-12], device='cuda:0')"
},
"28": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.7281e-14, 3.3842e-15, 9.9367e-15, 2.3043e-14, 4.1104e-15, 2.5398e-15,\n 1.4664e-14, 9.7165e-16, 4.9708e-15, 6.0484e-16, 3.0398e-14, 1.4829e-13,\n 2.0846e-14, 2.8092e-14, 4.5059e-14, 2.0108e-13, 1.6401e-14, 1.3248e-14,\n 6.1324e-14, 4.1933e-14, 7.4162e-18, 6.0168e-15, 1.7056e-14, 1.1564e-14,\n 1.8764e-14, 2.3529e-14, 1.6144e-14, 4.8055e-15, 5.7192e-16, 4.4383e-15,\n 3.6471e-14, 2.1180e-14, 3.2850e-15, 1.7179e-13, 6.4869e-14, 4.5882e-14,\n 9.6862e-16, 3.6595e-18, 1.5468e-14, 1.2586e-12, 6.8915e-14, 1.3813e-14,\n 4.5200e-15, 2.5277e-15, 1.2020e-13, 2.7592e-13, 3.0071e-14, 9.8290e-14,\n 2.9193e-15, 3.5827e-14, 3.3192e-16, 9.2884e-14, 1.7997e-14, 1.8666e-14,\n 1.8999e-15, 4.4382e-16, 5.1319e-14, 5.0513e-16, 4.3481e-14, 3.2350e-15,\n 1.2669e-14, 7.5181e-14, 1.2585e-13, 4.9270e-14, 1.8564e-16, 1.1359e-15,\n 2.8576e-14, 6.0666e-16, 1.0957e-14, 4.1259e-15, 9.0982e-16, 1.9561e-14,\n 6.3739e-14, 4.7507e-15, 1.3305e-14, 3.8936e-16, 1.2954e-15, 6.4215e-14,\n 2.0492e-15, 3.6490e-17, 4.9248e-14, 3.4320e-14, 3.9863e-17, 6.5771e-15,\n 3.7444e-14, 1.1744e-13, 9.3946e-15, 5.9470e-14, 6.7560e-15, 5.2749e-14,\n 9.2383e-14, 2.5115e-14, 6.2969e-14, 6.8099e-15, 1.7763e-14, 1.2299e-15,\n 8.6431e-15, 5.2064e-14, 1.6738e-14, 1.6566e-13, 3.4333e-13, 5.8542e-15,\n 1.3095e-16, 1.6056e-14, 2.0979e-16, 1.0270e-13, 1.1348e-14, 9.2587e-15,\n 3.0305e-14, 2.6187e-19, 1.3196e-14, 6.6232e-16, 1.0011e-13, 6.3167e-14,\n 3.5290e-15, 7.3768e-14, 8.0041e-15, 8.7656e-15, 1.6982e-14, 1.8294e-14,\n 3.1793e-15, 2.2631e-15, 1.8406e-15, 7.8048e-18, 5.1456e-16, 1.1733e-14,\n 1.3469e-13, 2.7127e-14, 1.1240e-13, 2.2766e-14, 1.1967e-17, 1.6643e-13,\n 4.7684e-14, 1.8458e-14, 7.2304e-14, 2.4563e-14, 2.5810e-14, 3.4379e-14,\n 2.9623e-14, 2.9111e-18, 1.9999e-15, 4.5015e-15, 6.4739e-15, 1.9779e-13,\n 3.3972e-14, 2.3903e-15, 5.6602e-14, 3.9656e-15, 2.2534e-14, 6.3833e-17,\n 1.6805e-15, 5.6099e-14, 9.7122e-14, 7.9871e-14, 4.2153e-15, 6.0548e-15,\n 5.9443e-14, 9.9064e-14, 1.4330e-14, 1.7663e-14, 1.3818e-14, 1.9478e-14,\n 3.1019e-14, 1.5571e-14, 2.4598e-14, 1.7418e-14, 9.6770e-14, 1.3572e-13,\n 1.2358e-14, 5.3369e-14, 4.1593e-15, 2.3697e-15, 7.3788e-14, 6.2942e-15,\n 7.0227e-14, 1.0750e-14, 1.4899e-15, 5.0379e-15, 4.5225e-15, 9.0607e-15,\n 4.0943e-15, 5.7055e-15, 1.0743e-15, 1.2615e-14, 3.2120e-13, 1.0858e-16,\n 8.2249e-15, 7.8925e-15, 3.4143e-15, 6.8724e-14, 1.3606e-14, 1.8775e-15,\n 1.6367e-14, 2.9539e-14, 2.0413e-16, 1.2637e-13, 1.0124e-14, 7.0020e-16,\n 6.6009e-14, 1.5632e-13, 2.0867e-17, 1.4057e-14, 1.1170e-13, 2.2429e-14,\n 2.1048e-14, 8.8363e-14, 2.1390e-14, 6.1483e-15, 8.6269e-18, 1.1394e-13,\n 1.7061e-15, 1.4490e-13, 3.7800e-16, 3.1674e-14, 1.7786e-16, 3.2102e-14,\n 2.8220e-14, 1.5317e-16, 5.6751e-15, 4.1434e-15, 5.0669e-15, 2.9515e-13,\n 1.2494e-14, 3.5775e-14, 8.5487e-15, 1.3712e-15, 1.9482e-18, 1.4405e-15,\n 3.1706e-15, 1.0471e-14, 1.4812e-15, 1.0755e-18, 8.9453e-14, 5.7871e-15,\n 4.5769e-15, 3.1735e-14, 2.7153e-14, 2.2729e-14, 2.0491e-14, 7.9068e-15,\n 6.1535e-15, 5.5133e-17, 3.0382e-14, 9.8261e-15, 4.7894e-15, 8.4312e-17,\n 9.7652e-17, 1.4908e-13, 4.7709e-15, 1.6880e-13, 2.7977e-14, 3.0928e-15,\n 1.0042e-13, 1.2646e-14, 2.6281e-14, 1.7900e-14], device='cuda:0')"
},
"29": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([6.0847e-14, 3.3436e-15, 1.8174e-14, 3.0951e-14, 4.9777e-15, 3.4575e-15,\n 2.1957e-14, 1.5730e-15, 9.0649e-15, 9.9095e-16, 5.3492e-14, 2.0471e-13,\n 3.1888e-14, 4.5705e-14, 5.3298e-14, 2.0833e-13, 2.7999e-14, 1.8185e-14,\n 5.9803e-14, 9.1248e-14, 1.0282e-18, 1.0718e-14, 2.2205e-14, 2.6256e-14,\n 2.5624e-14, 3.2386e-14, 3.4628e-14, 8.1598e-15, 1.4103e-15, 7.9083e-15,\n 6.0913e-14, 3.8288e-14, 7.3504e-15, 2.4852e-13, 1.0725e-13, 6.8785e-14,\n 7.7714e-16, 1.6814e-17, 2.8362e-14, 7.4912e-13, 1.0717e-13, 2.1774e-14,\n 7.3486e-15, 4.9894e-15, 1.2352e-13, 2.1928e-13, 4.4757e-14, 9.7544e-14,\n 7.2911e-15, 6.0279e-14, 5.7950e-16, 1.5634e-13, 3.0852e-14, 2.8983e-14,\n 3.4585e-15, 1.1417e-15, 6.9715e-14, 1.5466e-15, 5.0136e-14, 5.2514e-15,\n 1.6009e-14, 5.1402e-14, 1.1438e-13, 6.2630e-14, 6.5428e-16, 2.8200e-15,\n 4.7681e-14, 1.4885e-15, 2.9484e-14, 6.2790e-15, 5.8753e-16, 3.1072e-14,\n 9.1566e-14, 1.0910e-14, 1.7046e-14, 3.2715e-16, 2.0935e-15, 9.7719e-14,\n 3.0267e-15, 4.2455e-17, 5.8674e-14, 7.1384e-14, 1.5306e-16, 7.6612e-15,\n 4.5091e-14, 1.7467e-13, 1.4729e-14, 6.0740e-14, 9.4947e-15, 1.0151e-13,\n 7.5237e-14, 4.4700e-14, 8.9299e-14, 1.5069e-14, 3.6023e-14, 3.6815e-15,\n 1.4014e-14, 1.0803e-13, 3.3918e-14, 2.3850e-13, 2.2641e-13, 7.7021e-15,\n 2.2492e-16, 2.1692e-14, 3.1347e-16, 1.6966e-13, 9.5463e-15, 1.6368e-14,\n 3.2297e-14, 6.3450e-18, 1.7882e-14, 1.8033e-16, 1.3655e-13, 1.1431e-13,\n 8.0626e-15, 9.7751e-14, 1.1638e-14, 1.4456e-14, 3.0759e-14, 2.9360e-14,\n 7.7058e-15, 1.8306e-15, 2.1208e-15, 2.0353e-17, 1.2141e-15, 1.9675e-14,\n 2.2439e-13, 4.0356e-14, 9.7228e-14, 3.2544e-14, 7.2775e-19, 2.5658e-13,\n 4.8711e-14, 4.2744e-14, 7.9006e-14, 4.6404e-14, 6.5672e-14, 7.4527e-14,\n 6.0308e-14, 6.2631e-18, 2.9806e-15, 5.2637e-15, 1.8728e-14, 2.4254e-13,\n 5.5706e-14, 4.5204e-15, 5.7017e-14, 7.9422e-15, 4.2947e-14, 5.2062e-16,\n 3.6794e-15, 7.2950e-14, 7.1550e-14, 6.5430e-14, 1.2776e-14, 5.1294e-15,\n 3.8419e-14, 1.3569e-13, 2.0153e-14, 3.7886e-14, 3.6004e-14, 3.9525e-14,\n 1.7129e-14, 3.6125e-14, 3.5594e-14, 2.4265e-14, 1.9629e-13, 1.4246e-13,\n 2.9048e-14, 1.0120e-13, 5.6229e-15, 5.5447e-15, 1.2014e-13, 1.4799e-14,\n 8.0496e-14, 1.0994e-14, 1.9813e-15, 1.0926e-14, 5.9589e-15, 1.3013e-14,\n 9.0296e-15, 7.6178e-15, 2.5445e-15, 2.1111e-14, 4.0969e-13, 2.0355e-16,\n 1.7311e-14, 1.3622e-14, 8.8594e-15, 1.2930e-13, 3.3751e-14, 3.4999e-15,\n 2.1956e-14, 3.2275e-14, 7.4865e-16, 2.4637e-13, 8.0979e-15, 1.2001e-15,\n 6.6234e-14, 2.3585e-13, 1.9153e-16, 2.8274e-14, 1.0566e-13, 1.8747e-14,\n 2.7902e-14, 1.7326e-13, 3.6953e-14, 7.2275e-15, 7.3518e-17, 1.9414e-13,\n 2.3672e-15, 2.1498e-13, 3.5647e-16, 5.2072e-14, 1.4092e-16, 5.2708e-14,\n 6.9201e-14, 1.8003e-16, 9.3974e-15, 7.5157e-15, 4.2214e-15, 2.7041e-13,\n 1.7121e-14, 6.3262e-14, 1.6086e-14, 1.8027e-15, 3.0352e-18, 2.3401e-15,\n 6.0554e-15, 2.0424e-14, 2.6446e-15, 3.2020e-17, 1.0588e-13, 8.5362e-15,\n 6.1324e-15, 4.0665e-14, 2.1752e-14, 4.9786e-14, 2.2764e-14, 1.0385e-14,\n 4.8449e-15, 1.2928e-16, 4.1602e-14, 2.0418e-14, 8.7709e-15, 1.0257e-16,\n 4.2603e-17, 1.5535e-13, 4.9534e-15, 2.1347e-13, 5.5387e-14, 4.8690e-15,\n 8.1136e-14, 2.2445e-14, 5.5484e-14, 1.5380e-14], device='cuda:0')"
},
"30": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.6028e-15, 1.8922e-14, 0.0000e+00, ..., 6.5091e-14, 3.7039e-14,\n 3.9596e-14],\n [8.9504e-15, 3.3431e-15, 0.0000e+00, ..., 2.7960e-15, 3.2386e-14,\n 1.4604e-15],\n [7.2898e-15, 1.2864e-14, 0.0000e+00, ..., 8.9481e-15, 6.0702e-14,\n 3.5889e-15],\n ...,\n [1.7624e-15, 4.7981e-15, 0.0000e+00, ..., 6.1026e-15, 9.0703e-15,\n 5.5995e-17],\n [4.0241e-14, 3.5772e-14, 0.0000e+00, ..., 5.2018e-14, 2.2189e-13,\n 4.5234e-14],\n [3.2486e-16, 4.3437e-16, 0.0000e+00, ..., 1.6754e-15, 3.9182e-15,\n 4.0773e-17]], device='cuda:0')"
},
"31": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([3.4719e-11, 1.0792e-11, 1.3723e-11, 4.2706e-13, 3.8803e-12, 4.1044e-13,\n 4.2098e-12, 3.6002e-13, 2.4599e-12, 1.5766e-12, 3.2389e-11, 3.1925e-11,\n 4.3816e-12, 3.1632e-12, 6.1892e-12, 8.3730e-13, 1.5719e-11, 4.6759e-12,\n 1.5701e-11, 4.3716e-11, 7.1908e-14, 1.2532e-11, 2.7613e-12, 2.5654e-11,\n 1.8922e-12, 4.1552e-12, 2.7475e-11, 4.4416e-13, 1.5600e-12, 4.7632e-12,\n 8.2629e-12, 2.7199e-11, 3.5945e-14, 4.8952e-11, 2.8165e-11, 3.3436e-11,\n 1.2682e-12, 4.5202e-14, 1.8169e-11, 1.1997e-10, 5.0420e-11, 1.4199e-12,\n 1.2904e-12, 1.3007e-12, 9.1687e-12, 2.0929e-11, 1.2756e-11, 5.8952e-12,\n 1.2840e-12, 1.7735e-11, 4.7534e-13, 4.4411e-11, 2.2259e-11, 1.4830e-11,\n 6.5648e-13, 1.3634e-12, 9.9812e-12, 1.1677e-12, 7.3999e-12, 2.3397e-12,\n 2.8590e-12, 7.3995e-12, 1.2180e-11, 2.9960e-11, 3.1387e-13, 7.4675e-12,\n 7.1923e-12, 1.7388e-12, 1.4327e-11, 2.0801e-12, 1.4588e-14, 5.7965e-12,\n 2.3789e-11, 2.3185e-12, 1.4197e-12, 1.8377e-14, 3.7318e-12, 4.1752e-11,\n 1.6336e-12, 1.1724e-14, 1.7012e-11, 5.4330e-11, 2.4282e-14, 3.9961e-13,\n 6.7475e-12, 4.0221e-11, 1.8160e-12, 8.2494e-12, 6.0349e-12, 1.5448e-12,\n 8.5460e-12, 3.0814e-11, 9.3865e-12, 1.0890e-11, 3.4924e-11, 8.7048e-12,\n 1.5435e-11, 5.2368e-11, 8.1629e-12, 6.3347e-11, 5.9010e-12, 1.2085e-12,\n 2.3544e-15, 6.2484e-12, 6.8590e-14, 7.5510e-11, 2.1614e-12, 8.5421e-12,\n 6.4952e-12, 4.5150e-14, 5.7254e-11, 2.5326e-12, 1.7785e-11, 2.6784e-11,\n 4.6797e-12, 3.7291e-11, 8.0381e-13, 2.9611e-12, 4.6327e-12, 2.4863e-11,\n 2.5928e-12, 1.9853e-12, 1.1053e-12, 1.8561e-14, 1.8374e-13, 1.3359e-11,\n 2.2355e-11, 3.2935e-12, 1.1078e-11, 2.2704e-11, 2.4789e-13, 5.0905e-11,\n 1.2432e-11, 1.8611e-12, 2.1549e-11, 8.1525e-12, 4.0793e-11, 7.9048e-12,\n 6.2293e-12, 4.4817e-15, 1.5885e-14, 1.5355e-13, 2.7937e-11, 1.0358e-10,\n 1.8137e-11, 3.5707e-12, 4.3817e-12, 6.9322e-12, 6.1217e-12, 5.2312e-13,\n 1.6016e-12, 1.4188e-11, 1.0289e-11, 2.5182e-12, 3.0819e-12, 1.8229e-12,\n 4.9023e-12, 5.5175e-11, 2.4414e-12, 2.6448e-12, 8.7403e-12, 2.7362e-12,\n 2.6054e-12, 7.8057e-12, 2.4774e-12, 1.1625e-11, 5.3307e-11, 3.8566e-12,\n 1.8206e-11, 3.3010e-12, 1.6733e-13, 6.0455e-12, 1.1173e-11, 1.7478e-12,\n 2.2615e-11, 3.8445e-12, 2.6695e-12, 2.5498e-12, 2.8422e-12, 7.6846e-12,\n 9.2791e-12, 2.7685e-12, 4.1421e-12, 6.8790e-12, 6.0781e-11, 3.5520e-14,\n 4.7656e-12, 1.7914e-11, 3.1811e-12, 5.8374e-11, 1.0141e-11, 5.9710e-12,\n 3.4330e-12, 1.1971e-12, 5.2521e-15, 1.6188e-10, 2.2618e-12, 4.8284e-13,\n 4.7448e-12, 2.2198e-11, 2.5477e-14, 6.5852e-11, 4.5392e-12, 5.6645e-12,\n 5.5651e-12, 1.5292e-11, 2.7762e-11, 5.6534e-13, 1.2930e-14, 1.0598e-11,\n 3.0614e-12, 7.7826e-12, 2.9004e-13, 1.9369e-11, 3.8356e-13, 4.2177e-11,\n 4.8938e-11, 1.9263e-13, 4.8194e-13, 4.9028e-12, 1.9970e-12, 2.7325e-11,\n 2.5411e-12, 3.3084e-11, 4.4623e-12, 6.1255e-16, 5.6744e-15, 4.3750e-13,\n 6.7903e-13, 7.1505e-12, 1.8821e-13, 6.3489e-13, 7.3872e-12, 4.2855e-12,\n 2.9245e-11, 2.2510e-13, 2.0553e-12, 1.3692e-11, 3.8252e-12, 4.4809e-12,\n 8.1256e-12, 3.3136e-14, 3.2174e-12, 1.1479e-11, 4.7170e-12, 1.1381e-13,\n 2.6524e-13, 2.1925e-11, 1.5196e-12, 4.9066e-12, 3.9460e-11, 6.0445e-13,\n 2.4610e-11, 2.1760e-12, 5.8590e-11, 1.4014e-12], device='cuda:0')"
},
"32": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([7.4833e-14, 3.4591e-14, 3.8331e-14, 2.2686e-15, 8.5952e-15, 1.2184e-15,\n 6.1402e-15, 2.9299e-15, 3.3565e-15, 3.7844e-15, 5.9629e-14, 5.2491e-14,\n 9.0689e-15, 8.0156e-15, 2.0436e-14, 6.0798e-15, 3.7511e-14, 8.1373e-15,\n 3.8418e-14, 1.0395e-13, 3.4598e-17, 4.3993e-14, 1.3161e-14, 5.4036e-14,\n 6.3513e-15, 4.8086e-15, 1.3120e-13, 1.3675e-15, 5.4254e-15, 9.1422e-15,\n 1.2502e-14, 1.9811e-13, 3.3802e-16, 8.3697e-14, 8.3762e-14, 1.2734e-13,\n 1.5882e-15, 5.1210e-17, 5.3006e-14, 3.0366e-13, 1.4912e-13, 2.3614e-15,\n 3.4383e-15, 5.9213e-15, 1.1886e-14, 3.7538e-14, 5.2619e-14, 1.4196e-14,\n 1.6389e-15, 4.1185e-14, 5.1914e-16, 1.3606e-13, 4.6260e-14, 3.2304e-14,\n 7.6817e-16, 9.7224e-15, 2.1504e-14, 1.2922e-15, 3.7463e-14, 8.0673e-15,\n 6.5750e-15, 3.0348e-14, 3.8669e-14, 1.3454e-13, 3.7840e-16, 2.1731e-14,\n 1.9597e-14, 2.7882e-15, 3.3278e-14, 4.5096e-15, 4.2255e-18, 2.1140e-14,\n 6.0811e-14, 7.6662e-15, 2.1463e-15, 6.9492e-17, 2.6528e-14, 1.2137e-13,\n 3.4017e-15, 6.2794e-19, 9.9310e-14, 2.9902e-13, 1.4746e-16, 2.2445e-15,\n 3.0362e-14, 1.1745e-13, 6.9536e-15, 3.2190e-14, 1.2854e-14, 8.1125e-15,\n 2.3257e-14, 7.3838e-14, 1.3592e-14, 2.8760e-14, 1.1938e-13, 1.7368e-14,\n 4.1283e-14, 1.1986e-13, 2.0807e-14, 1.8348e-13, 1.3047e-14, 7.3233e-15,\n 3.3594e-16, 1.1969e-14, 3.4816e-17, 2.3248e-13, 4.2945e-15, 4.3286e-14,\n 1.0439e-14, 1.6608e-20, 1.7535e-13, 7.4572e-15, 4.4817e-14, 4.6614e-14,\n 8.3324e-15, 1.1715e-13, 2.5156e-15, 4.8449e-15, 9.2287e-15, 1.0746e-13,\n 3.1597e-15, 2.8445e-15, 1.1728e-15, 8.6840e-16, 1.7054e-15, 4.4225e-14,\n 4.0029e-14, 8.2864e-15, 3.4095e-14, 8.5597e-14, 1.2918e-17, 9.5537e-14,\n 4.7969e-14, 5.6970e-15, 3.5717e-14, 1.1732e-14, 1.3922e-13, 1.7833e-14,\n 1.5778e-14, 6.9011e-19, 6.5274e-18, 7.5573e-16, 6.7137e-14, 4.2867e-13,\n 6.2858e-14, 1.3575e-14, 1.0753e-14, 4.1396e-14, 7.2456e-15, 5.2136e-16,\n 1.0500e-14, 2.8969e-14, 4.0715e-14, 4.1078e-15, 4.0637e-15, 1.4568e-14,\n 8.6754e-15, 1.4018e-13, 3.6885e-15, 1.1513e-14, 1.9370e-14, 4.0681e-15,\n 3.1531e-15, 2.6602e-14, 5.9418e-15, 2.5242e-14, 9.9902e-14, 7.3898e-15,\n 8.7661e-14, 1.0873e-14, 5.2717e-17, 1.1346e-14, 1.8154e-14, 5.3349e-15,\n 6.4190e-14, 2.6652e-14, 3.9324e-15, 8.2297e-15, 5.9376e-15, 1.6430e-14,\n 2.1019e-14, 4.2527e-15, 8.8365e-15, 1.1272e-14, 1.4112e-13, 2.2919e-18,\n 8.0325e-15, 6.5344e-14, 3.6390e-15, 1.2736e-13, 1.7069e-14, 1.4700e-14,\n 1.3283e-14, 3.7626e-15, 1.3549e-17, 6.6320e-13, 2.4298e-15, 3.5779e-16,\n 9.9010e-15, 4.2544e-14, 8.7788e-16, 2.8918e-13, 7.9642e-15, 1.5635e-14,\n 1.1098e-14, 3.4133e-14, 7.1615e-14, 9.9347e-16, 9.3283e-19, 1.8443e-14,\n 6.3142e-15, 1.7632e-14, 9.1535e-16, 6.7831e-14, 2.7697e-16, 1.2225e-13,\n 1.1531e-13, 2.3340e-17, 1.3415e-15, 1.0509e-14, 3.8912e-15, 6.6426e-14,\n 8.5898e-15, 7.0356e-14, 1.2228e-14, 5.5549e-17, 4.1190e-17, 1.6006e-15,\n 9.6471e-16, 2.1588e-14, 5.6355e-16, 3.6485e-16, 2.5565e-14, 1.4992e-14,\n 9.2185e-14, 1.5268e-15, 3.0369e-15, 2.9244e-14, 1.6344e-14, 8.2069e-15,\n 1.5961e-14, 1.8039e-17, 9.1347e-15, 3.2724e-14, 7.7134e-15, 8.9883e-18,\n 1.1399e-16, 3.5872e-14, 2.4012e-15, 1.6764e-14, 1.1604e-13, 5.6502e-16,\n 8.8243e-14, 4.1007e-15, 1.5625e-13, 4.4170e-15], device='cuda:0')"
},
"33": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.4353e-13, 5.0604e-14, 4.0212e-14, 2.6127e-15, 8.3924e-15, 3.2630e-15,\n 1.1555e-14, 3.7839e-15, 5.6599e-15, 2.6326e-15, 1.0575e-13, 1.1361e-13,\n 1.3641e-14, 1.5119e-14, 3.2148e-14, 3.4800e-15, 7.0444e-14, 1.1984e-14,\n 5.0493e-14, 1.8227e-13, 2.8518e-17, 5.8116e-14, 1.6246e-14, 1.0780e-13,\n 1.0526e-14, 1.3136e-14, 1.2332e-13, 2.6041e-15, 9.4469e-15, 1.2488e-14,\n 3.9232e-14, 1.2734e-13, 3.8015e-16, 1.6764e-13, 1.1943e-13, 1.4785e-13,\n 2.6692e-15, 9.1919e-17, 8.0975e-14, 4.2012e-13, 1.6880e-13, 8.3617e-15,\n 7.6502e-15, 8.1109e-15, 3.0919e-14, 6.9858e-14, 6.1588e-14, 2.8771e-14,\n 3.4059e-15, 6.0040e-14, 4.4701e-16, 1.8884e-13, 7.5654e-14, 4.4989e-14,\n 1.3060e-15, 1.0423e-14, 4.4319e-14, 2.8697e-15, 3.8222e-14, 1.2821e-14,\n 6.3638e-15, 3.7796e-14, 5.8054e-14, 1.3335e-13, 5.2203e-16, 1.9537e-14,\n 3.5766e-14, 3.7266e-15, 6.1958e-14, 1.0597e-14, 1.0571e-19, 2.9044e-14,\n 7.6751e-14, 1.3170e-14, 3.9400e-15, 3.1940e-16, 2.1622e-14, 1.7257e-13,\n 2.4916e-15, 1.7113e-17, 8.0572e-14, 2.3608e-13, 2.0135e-16, 3.6016e-15,\n 3.4258e-14, 1.6808e-13, 1.0385e-14, 4.0846e-14, 1.9664e-14, 6.8092e-15,\n 3.9316e-14, 1.3614e-13, 3.0555e-14, 3.4724e-14, 1.5235e-13, 2.5063e-14,\n 4.7186e-14, 1.7891e-13, 4.0776e-14, 2.6614e-13, 2.8453e-14, 7.9216e-15,\n 7.1194e-16, 1.7463e-14, 2.1098e-17, 2.5480e-13, 4.4303e-15, 4.2527e-14,\n 1.8998e-14, 2.2000e-17, 1.9274e-13, 1.3769e-14, 8.0118e-14, 9.2088e-14,\n 1.1635e-14, 1.2375e-13, 4.2968e-15, 8.3673e-15, 2.0920e-14, 1.1432e-13,\n 7.8812e-15, 5.6791e-15, 2.0822e-15, 4.5962e-16, 2.1193e-15, 3.9274e-14,\n 9.4061e-14, 1.6596e-14, 5.1296e-14, 6.8422e-14, 6.9723e-17, 2.0857e-13,\n 5.8397e-14, 9.7562e-15, 7.0740e-14, 2.6349e-14, 1.7257e-13, 2.7270e-14,\n 2.9279e-14, 2.8104e-17, 6.0851e-17, 1.3346e-15, 8.5739e-14, 3.4909e-13,\n 7.9683e-14, 1.8883e-14, 2.0884e-14, 3.7110e-14, 2.0331e-14, 5.5469e-16,\n 1.0804e-14, 4.5113e-14, 5.0053e-14, 7.6131e-15, 9.4500e-15, 1.3186e-14,\n 1.3776e-14, 1.8845e-13, 6.9650e-15, 1.1866e-14, 2.9203e-14, 8.0261e-15,\n 6.4061e-15, 3.5872e-14, 1.2737e-14, 3.6418e-14, 1.8771e-13, 1.2753e-14,\n 8.6499e-14, 1.5941e-14, 2.9096e-16, 1.7240e-14, 3.9413e-14, 9.5440e-15,\n 7.3192e-14, 2.3495e-14, 6.7182e-15, 1.4533e-14, 5.9758e-15, 3.4915e-14,\n 2.5422e-14, 7.2883e-15, 9.9275e-15, 1.9765e-14, 2.5671e-13, 1.8965e-17,\n 1.4995e-14, 5.1633e-14, 8.7483e-15, 2.3672e-13, 3.4187e-14, 1.7333e-14,\n 1.7627e-14, 7.6629e-15, 1.2857e-17, 6.5393e-13, 5.3199e-15, 8.8767e-16,\n 2.3515e-14, 7.9614e-14, 1.5418e-15, 2.8350e-13, 1.6090e-14, 1.4290e-14,\n 2.6447e-14, 5.7677e-14, 8.5336e-14, 1.6158e-15, 4.4219e-17, 3.7993e-14,\n 6.6344e-15, 3.2600e-14, 2.1983e-15, 8.5271e-14, 3.1094e-16, 1.3918e-13,\n 1.6565e-13, 2.8558e-17, 2.7345e-15, 1.2254e-14, 3.0577e-15, 1.1917e-13,\n 1.4302e-14, 1.4171e-13, 2.1391e-14, 1.4722e-16, 3.3725e-17, 3.3793e-15,\n 1.6136e-15, 3.4601e-14, 1.2516e-15, 7.4273e-16, 3.3547e-14, 2.2136e-14,\n 1.2701e-13, 1.1537e-15, 4.9456e-15, 6.1441e-14, 2.0919e-14, 1.1863e-14,\n 3.6515e-14, 1.8836e-17, 1.7182e-14, 4.0525e-14, 1.4038e-14, 4.0817e-19,\n 1.5847e-16, 7.1199e-14, 3.3886e-15, 2.2562e-14, 1.3264e-13, 1.2295e-15,\n 7.3504e-14, 6.7546e-15, 2.0368e-13, 8.6268e-15], device='cuda:0')"
},
"34": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[5.1158e-16, 1.8473e-15, 1.1269e-15, ..., 2.5506e-16, 1.3965e-15,\n 8.3917e-16],\n [3.6412e-17, 5.4473e-18, 5.9371e-17, ..., 1.5844e-16, 1.8419e-16,\n 6.5646e-18],\n [1.7748e-16, 7.8058e-16, 2.4613e-16, ..., 1.2594e-16, 7.5843e-16,\n 1.4512e-16],\n ...,\n [4.4412e-15, 1.0535e-14, 1.0917e-14, ..., 1.9399e-15, 1.6380e-14,\n 2.3706e-14],\n [2.9656e-14, 4.6907e-14, 7.2397e-14, ..., 1.6077e-14, 7.0837e-14,\n 1.3006e-13],\n [4.0689e-13, 9.0251e-13, 1.0785e-12, ..., 2.2959e-13, 1.3584e-12,\n 2.1312e-12]], device='cuda:0')"
},
"35": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.0314e-14, 3.3148e-16, 2.9592e-15, 1.9474e-16, 1.3039e-15, 1.9277e-15,\n 2.8465e-16, 2.4624e-16, 1.1996e-17, 2.0097e-15, 1.6301e-15, 2.8193e-15,\n 7.0024e-17, 5.9280e-17, 7.2126e-16, 4.7305e-16, 1.5605e-16, 6.1762e-15,\n 2.1077e-15, 2.6996e-17, 1.0628e-16, 8.6245e-16, 1.9914e-15, 7.6815e-17,\n 3.7204e-15, 4.3633e-16, 6.1780e-16, 6.2993e-17, 1.4358e-16, 2.9370e-16,\n 2.9313e-16, 4.8755e-15, 7.0656e-16, 3.7821e-17, 1.4267e-18, 1.4576e-15,\n 5.1870e-16, 2.0897e-16, 6.2485e-18, 2.7861e-19, 8.4653e-16, 1.0593e-16,\n 5.1726e-16, 2.4424e-16, 4.1781e-17, 4.7565e-16, 6.0228e-16, 6.1107e-16,\n 3.2265e-16, 1.3710e-15, 5.3205e-16, 1.5133e-16, 1.9746e-16, 1.1628e-16,\n 4.3620e-18, 5.9307e-18, 1.3574e-16, 1.0895e-17, 4.3675e-16, 1.8574e-17,\n 1.5425e-15, 5.1444e-16, 1.2140e-18, 9.3588e-16, 1.8100e-15, 6.1647e-16,\n 9.2429e-16, 1.8302e-16, 2.3439e-17, 1.4448e-17, 9.4702e-16, 1.0004e-16,\n 7.9599e-18, 3.9108e-16, 2.2787e-16, 6.1424e-16, 4.5178e-16, 1.1700e-16,\n 1.9377e-21, 4.2429e-16, 3.2312e-16, 2.8441e-15, 4.3521e-17, 4.0209e-16,\n 5.4699e-17, 2.7599e-16, 1.6088e-16, 4.4518e-19, 2.7374e-16, 3.5529e-17,\n 1.2708e-18, 2.0618e-16, 1.8437e-16, 5.2543e-15, 1.7312e-15, 1.2039e-15,\n 1.3116e-16, 6.1889e-15, 4.2750e-16, 7.9281e-16, 7.3686e-17, 8.4764e-17,\n 4.1748e-17, 4.5342e-16, 5.5114e-18, 1.5812e-15, 2.5966e-15, 2.8862e-16,\n 3.2772e-16, 4.9268e-17, 4.9838e-16, 5.6595e-15, 4.2812e-17, 6.4838e-15,\n 8.9936e-16, 4.0066e-15, 3.9317e-22, 3.6752e-15, 2.6772e-15, 1.3250e-15,\n 5.0707e-15, 2.2685e-17, 2.6473e-15, 9.9822e-18, 1.4448e-15, 2.4065e-15,\n 2.1809e-15, 1.5796e-15, 2.9158e-15, 4.4125e-15, 5.0745e-16, 9.8972e-16,\n 2.9806e-17, 1.7008e-15, 4.8863e-16, 4.4693e-16, 1.0334e-15, 1.3992e-15,\n 1.1757e-18, 2.7299e-16, 3.4322e-15, 1.2631e-17, 7.2189e-16, 2.6635e-16,\n 1.6673e-15, 6.9357e-15, 1.3791e-15, 1.7712e-17, 4.3742e-15, 6.6333e-17,\n 1.4108e-16, 1.1461e-15, 4.7587e-15, 3.9113e-16, 1.6805e-15, 2.5211e-15,\n 1.8565e-17, 4.6817e-18, 1.7161e-16, 8.4369e-16, 8.0491e-16, 2.2323e-16,\n 3.3605e-15, 1.7382e-15, 1.9356e-16, 1.8677e-18, 3.7297e-17, 5.2840e-18,\n 1.0551e-16, 4.7513e-16, 9.6075e-17, 5.8284e-17, 4.0236e-16, 4.8742e-16,\n 4.4367e-17, 1.2448e-17, 4.6642e-16, 1.0769e-16, 6.6878e-17, 9.5316e-18,\n 7.7713e-16, 2.2605e-18, 1.6808e-16, 7.2560e-16, 2.0657e-16, 9.6323e-17,\n 7.1981e-16, 2.4165e-16, 2.4248e-15, 7.9167e-17, 6.7617e-16, 5.5733e-17,\n 6.1244e-16, 6.0815e-15, 8.5835e-16, 1.8291e-15, 9.3560e-16, 2.7348e-15,\n 3.1280e-19, 3.3211e-17, 1.5908e-16, 2.3619e-16, 4.0719e-16, 3.3544e-15,\n 8.4212e-17, 2.5214e-16, 1.8855e-17, 2.8399e-16, 3.2712e-17, 7.5963e-16,\n 1.2999e-19, 5.1081e-16, 3.4512e-17, 3.7624e-16, 3.4617e-15, 1.7549e-15,\n 2.5780e-15, 6.9720e-16, 8.8226e-16, 2.7447e-15, 6.9701e-16, 1.3310e-17,\n 3.5289e-16, 3.1390e-15, 2.0079e-17, 4.1441e-16, 2.5094e-15, 8.9219e-16,\n 7.8705e-16, 7.6145e-16, 6.5903e-16, 1.9622e-17, 2.8943e-16, 1.5289e-16,\n 7.0928e-16, 1.9467e-16, 4.9021e-18, 4.4970e-17, 1.8160e-16, 1.9125e-17,\n 1.7592e-15, 4.8522e-16, 7.4282e-17, 5.8803e-16, 1.8011e-17, 6.7251e-16,\n 1.8063e-15, 2.3177e-16, 7.6796e-16, 2.3940e-16, 1.8263e-16, 1.8782e-15,\n 7.2947e-16, 2.0783e-15, 1.9442e-15, 4.2143e-16, 6.6860e-32, 2.9556e-33,\n 1.2058e-30, 8.4780e-32, 1.3500e-30, 5.3059e-32, 3.4804e-32, 7.3728e-31,\n 4.8315e-31, 7.0326e-31, 2.1231e-32, 2.6297e-32, 7.3749e-31, 6.6280e-31,\n 3.6539e-32, 8.0516e-32, 2.9661e-31, 1.8897e-34, 1.8751e-30, 2.9679e-31,\n 9.7664e-32, 6.6711e-31, 2.9669e-31, 3.0458e-31, 2.2270e-31, 7.1131e-32,\n 1.7476e-31, 2.0001e-32, 1.5112e-30, 3.1060e-31, 2.6981e-31, 2.4031e-31,\n 1.3007e-30, 3.9401e-31, 4.2503e-32, 1.5061e-31, 2.3934e-31, 3.4151e-33,\n 3.2874e-32, 1.0312e-32, 7.0995e-34, 3.1730e-32, 1.5712e-31, 2.2556e-33,\n 2.5770e-31, 7.5760e-32, 4.0034e-31, 2.0685e-33, 3.2743e-31, 2.8207e-33,\n 8.6317e-32, 9.8595e-36, 1.2122e-31, 5.7705e-31, 1.6744e-31, 3.2136e-32,\n 1.2536e-30, 1.6128e-31, 2.3295e-30, 1.8744e-31, 5.3851e-33, 2.9954e-32,\n 2.7193e-31, 1.6123e-31, 4.1539e-32, 2.1649e-31, 7.9232e-31, 1.0382e-30,\n 5.5290e-31, 2.2720e-32, 1.9344e-30, 1.4101e-30, 1.4819e-30, 2.4733e-30,\n 5.1169e-30, 1.6090e-31, 3.1887e-30, 6.7814e-32, 2.3486e-31, 4.6835e-32,\n 6.9021e-31, 2.7339e-30, 1.6335e-30, 1.0643e-30, 2.8318e-31, 5.1218e-31,\n 2.4925e-30, 5.6058e-30, 3.1394e-31, 8.5984e-31, 4.9495e-31, 2.5918e-30,\n 2.7195e-31, 1.5891e-32, 1.8033e-31, 1.7217e-31, 1.6011e-30, 5.4435e-33,\n 3.2645e-31, 2.3607e-33, 1.8561e-31, 8.7641e-31, 1.2885e-30, 1.1043e-31,\n 2.9130e-32, 1.2725e-31, 2.2018e-31, 1.2627e-32, 7.7560e-33, 3.6255e-32,\n 4.1911e-32, 7.1666e-33, 1.6588e-31, 2.0733e-31, 2.9327e-31, 2.6599e-31,\n 1.8916e-31, 2.4980e-33, 8.4374e-35, 4.8609e-33, 3.4428e-33, 3.5764e-31,\n 4.9587e-31, 3.6294e-31, 1.7137e-31, 4.6496e-31, 2.3046e-33, 7.7919e-31,\n 8.2957e-33, 9.5284e-32, 3.8812e-32, 1.2368e-30, 4.9435e-31, 7.7294e-31,\n 6.4305e-31, 1.3620e-32, 1.2115e-32, 1.4301e-31, 2.8631e-31, 8.8615e-31,\n 9.3671e-32, 4.4439e-32, 2.5498e-32, 3.8622e-33, 1.4537e-30, 1.3673e-31,\n 4.1095e-31, 3.0963e-32, 4.8686e-32, 3.1490e-30, 5.3531e-31, 6.0867e-33,\n 4.3445e-31, 5.1571e-31, 1.4968e-30, 8.8979e-31, 1.1397e-30, 5.4522e-31,\n 8.2280e-33, 6.4161e-30, 1.4537e-30, 1.7828e-32, 1.7584e-30, 4.4670e-31,\n 3.8878e-31, 2.2894e-32, 2.6394e-31, 1.2581e-31, 1.3106e-31, 5.1510e-31,\n 4.1196e-31, 4.9024e-35, 3.8223e-31, 3.8634e-32, 5.9482e-33, 9.0908e-32,\n 9.3085e-31, 6.5872e-31, 3.4059e-33, 5.1883e-31, 1.5061e-31, 1.0817e-31,\n 1.8127e-30, 1.8156e-30, 1.9217e-31, 9.8759e-33, 1.8552e-30, 1.7189e-30,\n 8.0153e-31, 1.0195e-31, 1.0403e-31, 4.1315e-31, 4.8450e-31, 5.4260e-31,\n 2.6700e-30, 1.8794e-31, 5.8873e-32, 9.2952e-31, 1.7942e-31, 8.9672e-31,\n 1.4152e-31, 1.5340e-31, 1.4548e-31, 6.9298e-31, 1.3521e-30, 9.7505e-31,\n 6.4951e-33, 2.4628e-32, 3.1648e-31, 1.3076e-34, 9.5980e-32, 6.0221e-31,\n 6.4276e-32, 1.8651e-31, 2.7517e-31, 5.4922e-31, 1.7160e-31, 2.7302e-31,\n 5.6508e-31, 3.0924e-31, 2.6703e-32, 6.1914e-32, 1.9724e-32, 3.5845e-32,\n 2.4363e-31, 1.6156e-31, 4.5570e-31, 1.5651e-30, 9.8681e-31, 6.7719e-31,\n 4.8466e-31, 5.7883e-31, 2.0906e-32, 1.1991e-30, 3.6289e-32, 5.4685e-32,\n 2.5796e-31, 1.4958e-31, 2.7254e-31, 5.5055e-32, 4.2238e-32, 8.4782e-36,\n 2.5585e-32, 2.1172e-32, 3.9383e-31, 7.0014e-31, 5.4466e-32, 4.7057e-31,\n 2.0704e-31, 6.1879e-31, 5.1131e-33, 1.7376e-31, 4.9524e-32, 7.3310e-31,\n 1.1369e-31, 6.5556e-32, 1.3605e-12, 2.1297e-11, 3.7488e-12, 3.5751e-12,\n 2.8287e-11, 2.5133e-11, 3.4284e-13, 1.8274e-13, 4.5813e-13, 4.5853e-12,\n 6.0733e-15, 1.2224e-12, 2.2190e-11, 7.4642e-13, 1.2282e-12, 5.6237e-12,\n 8.2560e-12, 1.6959e-12, 1.5116e-11, 5.7033e-12, 6.3257e-15, 1.7765e-14,\n 3.0679e-12, 2.0211e-11, 1.7027e-11, 5.9319e-14, 7.6696e-12, 4.1250e-13,\n 2.1580e-12, 4.3781e-13, 9.5440e-12, 7.9694e-12, 3.0889e-11, 3.3157e-13,\n 6.3194e-12, 9.0715e-12, 5.3514e-12, 5.4162e-12, 4.7768e-12, 1.8880e-13,\n 2.9253e-11, 9.5797e-14, 4.7323e-13, 8.3320e-13, 1.4167e-11, 1.2115e-12,\n 2.6492e-12, 1.1055e-12, 2.1264e-13, 2.0248e-12, 4.0587e-12, 7.2498e-13,\n 1.1838e-11, 1.1173e-11, 1.2041e-12, 8.4383e-13, 1.3057e-12, 9.4636e-14,\n 7.6454e-12, 2.9003e-12, 6.1819e-12, 6.1182e-12, 6.4417e-13, 1.6681e-13,\n 3.4439e-16, 1.6790e-14, 5.1860e-14, 1.5510e-13, 2.2368e-12, 1.5940e-13,\n 1.9918e-12, 5.1944e-12, 7.4757e-12, 1.1916e-12, 2.7004e-12, 3.4097e-14,\n 1.5229e-12, 4.5095e-11, 2.3661e-13, 2.2733e-12, 3.0521e-12, 2.0873e-11,\n 7.6607e-15, 5.9203e-12, 1.7058e-12, 8.2389e-12, 5.2601e-12, 6.1081e-12,\n 4.9910e-16, 1.9483e-11, 1.3097e-11, 1.1923e-14, 5.8656e-13, 1.4109e-11,\n 6.5532e-13, 2.7371e-12, 3.8685e-14, 9.7216e-12, 1.8718e-12, 9.0977e-12,\n 4.7419e-12, 1.6444e-11, 2.0710e-13, 3.9760e-12, 8.1519e-12, 2.9706e-11,\n 1.3136e-11, 2.3185e-14, 4.9807e-13, 4.9876e-12, 3.1175e-12, 5.7570e-13,\n 8.7833e-13, 1.0383e-12, 3.0794e-12, 4.5799e-13, 3.8802e-12, 7.7007e-14,\n 1.1918e-11, 1.5063e-13, 1.1677e-11, 1.3378e-11, 4.1219e-12, 1.2147e-13,\n 1.4087e-12, 1.6491e-13, 3.8565e-12, 9.1402e-12, 9.1266e-13, 2.3676e-13,\n 9.5032e-12, 4.7154e-13, 5.5501e-12, 3.1273e-12, 1.6111e-12, 1.0973e-14,\n 2.2388e-13, 2.3713e-11, 3.0558e-13, 1.6451e-11, 2.8406e-15, 8.1430e-12,\n 3.0854e-12, 3.0821e-12, 9.0966e-13, 1.1083e-11, 4.1693e-12, 1.6192e-12,\n 6.7212e-12, 2.0977e-14, 1.5276e-11, 9.6218e-13, 1.1846e-13, 1.5884e-12,\n 3.1406e-12, 1.2318e-11, 2.3737e-12, 5.6421e-12, 8.7515e-14, 1.4191e-11,\n 4.1491e-12, 4.2403e-12, 3.3275e-12, 2.6024e-11, 2.3701e-14, 7.3628e-13,\n 2.2582e-11, 1.7625e-12, 2.0821e-12, 1.0832e-13, 3.6080e-12, 9.3345e-12,\n 8.1110e-12, 1.2335e-11, 2.9931e-13, 3.6713e-12, 2.1424e-12, 1.9171e-12,\n 1.9594e-12, 1.2343e-12, 3.1232e-12, 7.8353e-12, 6.0178e-13, 1.3361e-11,\n 1.4921e-11, 3.6257e-12, 3.5609e-12, 3.3892e-12, 2.4513e-14, 7.2524e-13,\n 7.3862e-13, 1.5929e-12, 7.2061e-12, 9.1273e-13, 3.9714e-13, 5.4934e-12,\n 1.5159e-15, 1.3355e-11, 1.1293e-13, 1.4633e-11, 6.2254e-12, 1.7499e-11,\n 4.5170e-13, 2.9932e-12, 1.1754e-11, 4.4227e-12, 4.2940e-13, 1.1345e-11,\n 8.6434e-13, 5.4989e-12, 2.7934e-14, 8.4723e-13, 2.4746e-11, 5.3986e-12,\n 2.4836e-11, 2.7945e-12, 3.9900e-12, 9.8829e-15, 1.3801e-12, 3.4802e-12,\n 3.2002e-14, 2.3427e-13, 3.0064e-12, 9.6260e-13, 1.3831e-12, 7.8780e-13,\n 8.9259e-12, 8.9410e-13, 2.2579e-13, 7.4531e-14, 2.4679e-14, 4.2981e-14,\n 5.3855e-13, 7.3816e-12, 5.1498e-13, 1.0530e-12, 1.3161e-13, 1.2567e-11,\n 6.1626e-12, 1.1950e-11, 2.7190e-11, 5.6315e-13, 1.4717e-11, 6.0062e-12,\n 1.3337e-12, 4.9955e-11, 1.0780e-12, 5.1407e-12, 1.7236e-12, 1.2368e-12,\n 6.4342e-13, 5.5270e-12, 2.9774e-12, 1.4381e-13, 7.4478e-13, 1.2798e-11],\n device='cuda:0')"
},
"36": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[4.5570e-13, 2.5447e-16, 2.5447e-12, ..., 6.5901e-15, 6.6343e-14,\n 1.8339e-13],\n [6.1769e-13, 6.4375e-17, 3.6094e-12, ..., 1.0330e-14, 9.8033e-14,\n 2.5890e-13],\n [3.4880e-13, 4.4368e-17, 2.0400e-12, ..., 5.3900e-15, 5.8059e-14,\n 1.3191e-13],\n ...,\n [2.6393e-13, 3.7672e-17, 1.6007e-12, ..., 2.4757e-15, 3.9936e-14,\n 1.0617e-13],\n [3.4674e-14, 9.2735e-17, 1.9099e-13, ..., 6.6429e-16, 3.7543e-15,\n 1.4394e-14],\n [1.8346e-13, 2.2107e-16, 1.0765e-12, ..., 3.1331e-15, 2.5072e-14,\n 7.3672e-14]], device='cuda:0')"
},
"37": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([1.6486e-11, 2.2935e-11, 1.2265e-11, 6.5120e-11, 1.9763e-13, 1.8888e-12,\n 3.4589e-11, 6.4949e-11, 5.2533e-11, 2.6160e-11, 1.1339e-12, 5.4622e-11,\n 2.6985e-12, 1.3457e-13, 7.1031e-12, 6.7866e-14, 2.9668e-11, 5.6184e-11,\n 1.5713e-11, 3.3568e-12, 6.1177e-12, 4.5027e-12, 2.1434e-11, 2.7477e-13,\n 7.1422e-13, 6.0605e-12, 6.2491e-13, 9.1365e-12, 4.3298e-12, 1.3593e-11,\n 2.9724e-14, 7.2944e-12, 7.3810e-12, 2.5707e-12, 3.3096e-13, 1.4471e-11,\n 4.9551e-13, 8.3210e-11, 1.3954e-11, 9.9308e-13, 3.1348e-11, 2.0420e-12,\n 6.0001e-12, 5.4256e-13, 1.0948e-11, 1.8777e-11, 2.3578e-13, 2.4529e-14,\n 1.5403e-11, 1.3995e-11, 1.9992e-11, 8.3323e-13, 8.6888e-12, 3.2567e-11,\n 4.1604e-13, 3.6390e-12, 3.2918e-11, 1.3060e-13, 1.5106e-12, 2.7120e-12,\n 1.1384e-11, 8.9107e-12, 1.8331e-12, 1.7418e-11, 1.0950e-13, 2.7455e-11,\n 3.7394e-12, 7.1491e-12, 8.0502e-12, 2.9477e-12, 1.3848e-12, 2.7131e-11,\n 3.3242e-11, 6.9644e-11, 4.4943e-13, 5.4375e-14, 2.7425e-12, 2.3416e-11,\n 3.5482e-13, 6.7341e-14, 5.5043e-12, 3.4902e-13, 1.3955e-11, 2.3972e-12,\n 2.6378e-12, 1.3635e-12, 2.6643e-12, 3.8306e-12, 3.9489e-12, 4.6070e-11,\n 4.0148e-14, 2.7389e-11, 1.1358e-11, 3.0518e-11, 7.4374e-11, 4.7404e-11,\n 4.8139e-12, 6.4900e-11, 2.3330e-12, 8.2026e-13, 2.8887e-11, 9.9306e-15,\n 1.2593e-11, 7.6079e-12, 7.1720e-13, 2.0128e-11, 1.2097e-14, 1.1854e-10,\n 2.4559e-11, 2.5478e-11, 9.0040e-14, 7.3697e-13, 2.2822e-12, 2.2767e-11,\n 7.1768e-12, 1.9132e-11, 4.0457e-14, 2.3647e-11, 2.9735e-11, 1.1921e-11,\n 3.9755e-11, 1.5705e-11, 1.2275e-11, 1.7592e-12, 8.3169e-11, 4.3614e-11,\n 1.1085e-11, 4.5811e-12, 9.4973e-12, 3.3803e-14, 9.8900e-11, 9.6118e-12,\n 1.2486e-13, 3.6872e-14, 6.4239e-11, 2.4780e-11, 3.7312e-12, 1.6396e-12,\n 1.5162e-13, 4.9645e-14, 4.0954e-11, 8.5182e-14, 6.1449e-11, 1.1957e-12,\n 1.3872e-12, 2.9407e-12, 2.4470e-11, 2.0524e-11, 5.9608e-12, 7.1781e-14,\n 1.1818e-12, 3.7711e-13, 4.4751e-14, 4.0461e-11, 3.5801e-12, 5.3081e-11,\n 1.2736e-12, 2.9756e-11, 4.0260e-12, 4.0324e-11, 1.0541e-11, 1.7158e-12,\n 1.8102e-11, 1.5748e-11, 2.1849e-11, 4.8154e-11, 1.0117e-10, 4.7801e-14,\n 1.8983e-13, 9.2062e-12, 5.3236e-12, 8.1050e-14, 3.2647e-11, 4.2022e-12,\n 4.7199e-11, 5.8859e-14, 2.2057e-11, 2.4613e-12, 1.0619e-11, 1.0657e-11,\n 1.1362e-10, 1.1313e-11, 7.3053e-12, 1.0861e-14, 2.1285e-12, 3.2992e-11,\n 1.3476e-13, 1.2053e-11, 3.0708e-11, 2.7361e-12, 3.1374e-11, 2.7281e-12,\n 7.9092e-12, 7.2395e-12, 4.7837e-11, 2.5599e-11, 3.0120e-11, 2.2110e-13,\n 9.5950e-12, 6.1451e-11, 6.5702e-14, 5.6998e-12, 2.9381e-12, 4.3085e-11,\n 4.2094e-11, 6.0207e-12, 1.9915e-11, 4.1082e-12, 2.9427e-13, 1.1225e-13,\n 1.3331e-13, 6.7038e-12, 3.4542e-11, 3.0744e-11, 1.1094e-11, 3.8593e-11,\n 4.6876e-12, 2.3188e-11, 1.6892e-13, 6.0844e-13, 1.1619e-14, 3.2749e-12,\n 3.6326e-11, 2.7071e-12, 9.1296e-13, 1.8697e-11, 5.2632e-12, 1.6435e-11,\n 1.1687e-12, 2.3182e-11, 3.2444e-13, 1.2686e-11, 3.9431e-11, 1.6632e-13,\n 3.2833e-11, 7.3674e-13, 7.7961e-12, 2.9264e-14, 6.1904e-11, 1.2477e-12,\n 5.6744e-12, 1.2810e-12, 1.4987e-12, 8.7782e-12, 1.2919e-11, 8.9522e-12,\n 3.2361e-11, 1.2306e-11, 5.2388e-11, 5.0338e-12, 2.7247e-11, 5.9067e-16,\n 2.1476e-12, 1.0119e-11, 1.3080e-12, 6.9621e-12], device='cuda:0')"
},
"38": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1535e-10, 3.0533e-11, 4.3705e-11, ..., 5.5667e-12, 3.3978e-11,\n 6.2744e-11],\n [1.1910e-11, 3.0958e-12, 4.6347e-12, ..., 6.1720e-13, 3.4306e-12,\n 6.2202e-12],\n [1.2974e-11, 3.3450e-12, 4.8291e-12, ..., 5.9429e-13, 3.8511e-12,\n 7.1941e-12],\n [1.3593e-11, 3.7530e-12, 5.1103e-12, ..., 6.4459e-13, 4.0581e-12,\n 7.5345e-12]], device='cuda:0')"
},
"39": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.8925e-09, 2.9804e-10, 3.2248e-10, 3.4448e-10], device='cuda:0')"
},
"40": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1535e-10, 3.0533e-11, 4.3705e-11, ..., 5.5667e-12, 3.3978e-11,\n 6.2744e-11],\n [1.1910e-11, 3.0958e-12, 4.6347e-12, ..., 6.1720e-13, 3.4306e-12,\n 6.2202e-12],\n [1.2974e-11, 3.3450e-12, 4.8291e-12, ..., 5.9429e-13, 3.8511e-12,\n 7.1941e-12],\n [1.3593e-11, 3.7530e-12, 5.1103e-12, ..., 6.4459e-13, 4.0581e-12,\n 7.5345e-12]], device='cuda:0')"
},
"41": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.8925e-09, 2.9804e-10, 3.2248e-10, 3.4448e-10], device='cuda:0')"
},
"42": {
"step": "tensor(11268.)",
"exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.1535e-10, 3.0533e-11, 4.3705e-11, ..., 5.5667e-12, 3.3978e-11,\n 6.2744e-11],\n [1.1910e-11, 3.0958e-12, 4.6347e-12, ..., 6.1720e-13, 3.4306e-12,\n 6.2202e-12],\n [1.2974e-11, 3.3450e-12, 4.8291e-12, ..., 5.9429e-13, 3.8511e-12,\n 7.1941e-12],\n [1.3593e-11, 3.7530e-12, 5.1103e-12, ..., 6.4459e-13, 4.0581e-12,\n 7.5345e-12]], device='cuda:0')"
},
"43": {
"step": "tensor(11268.)",
"exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')",
"exp_avg_sq": "tensor([2.8925e-09, 2.9804e-10, 3.2248e-10, 3.4448e-10], device='cuda:0')"
},
"8": {
"step": "tensor(10016.)",
"exp_avg": "tensor([[ 3.5830e-07, -4.7272e-07, 6.6890e-14, ..., -5.9283e-07,\n -7.0022e-07, 1.6392e-07],\n [ 5.5262e-08, 1.1677e-07, -4.7579e-16, ..., -2.6206e-07,\n -6.7251e-06, -2.1945e-07],\n [ 1.6351e-07, -6.5632e-07, 4.2770e-11, ..., 2.4396e-07,\n -2.4826e-06, -1.8223e-08],\n ...,\n [-2.7947e-07, -1.5997e-07, 1.3550e-10, ..., 6.8615e-07,\n -9.7374e-07, -1.7039e-09],\n [-8.7589e-07, 5.9438e-07, 4.6189e-12, ..., 1.4602e-06,\n -1.6519e-08, 4.9783e-08],\n [ 1.6671e-06, -1.7836e-07, -8.5272e-11, ..., -4.7100e-07,\n 5.3849e-08, -1.7018e-11]], device='cuda:0')",
"exp_avg_sq": "tensor([[2.7138e-11, 3.7846e-12, 1.8628e-15, ..., 1.6709e-11, 3.3457e-11,\n 4.4967e-11],\n [3.3173e-11, 1.7046e-11, 3.4975e-15, ..., 7.7845e-12, 4.2268e-10,\n 2.0913e-11],\n [7.2414e-11, 9.6392e-12, 4.7727e-14, ..., 4.4724e-12, 6.8948e-11,\n 1.8952e-11],\n ...,\n [2.8560e-11, 6.7778e-11, 7.7726e-14, ..., 6.2342e-12, 1.4878e-10,\n 5.1638e-13],\n [4.8135e-11, 1.4569e-11, 1.1268e-13, ..., 2.2046e-11, 5.6775e-11,\n 5.9974e-12],\n [8.4850e-11, 1.4482e-11, 1.5858e-14, ..., 3.3600e-11, 1.0312e-11,\n 2.8457e-13]], device='cuda:0')"
},
"9": {
"step": "tensor(10016.)",
"exp_avg": "tensor([-3.4355e-06, -1.5383e-05, -3.4650e-06, ..., -1.2359e-05,\n 3.8409e-07, 1.3553e-05], device='cuda:0')",
"exp_avg_sq": "tensor([3.3921e-09, 6.1093e-09, 2.8624e-09, ..., 5.2694e-09, 3.1605e-09,\n 4.1730e-09], device='cuda:0')"
},
"10": {
"step": "tensor(10016.)",
"exp_avg": "tensor([[ 1.4383e-07, -1.3417e-06, -4.8123e-08, ..., -9.2854e-08,\n 2.5828e-07, -2.5039e-07],\n [ 8.9377e-08, 1.2158e-06, 1.3673e-07, ..., 1.8003e-07,\n 2.9341e-07, 2.8339e-07],\n [-9.1742e-09, -5.8510e-07, 2.3210e-08, ..., 2.5089e-08,\n -3.6084e-07, -1.2081e-07],\n ...,\n [ 9.4840e-08, -1.2304e-06, -1.4321e-07, ..., 1.0502e-07,\n 6.8465e-07, -1.8084e-07],\n [ 1.8060e-07, -1.2089e-06, -4.3010e-07, ..., 2.1643e-07,\n 1.7319e-07, -7.5282e-07],\n [ 1.3763e-07, -2.2046e-07, -5.3914e-08, ..., 3.7226e-08,\n 2.6425e-07, -2.1010e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.0251e-12, 1.7401e-12, 2.7347e-12, ..., 1.6625e-12, 1.7281e-12,\n 2.6461e-12],\n [1.8418e-12, 4.9405e-12, 2.6799e-12, ..., 4.2973e-12, 2.3866e-12,\n 2.6414e-12],\n [3.0638e-12, 6.5872e-12, 2.8719e-12, ..., 3.9160e-12, 3.1680e-12,\n 4.7629e-12],\n ...,\n [3.5742e-12, 6.6160e-12, 2.8266e-12, ..., 4.3170e-12, 2.8925e-12,\n 5.3263e-12],\n [2.5809e-12, 6.1012e-12, 2.4197e-12, ..., 5.6444e-12, 1.7958e-12,\n 2.8557e-12],\n [1.7516e-12, 7.0823e-12, 3.8597e-12, ..., 9.2378e-12, 2.9795e-12,\n 2.2880e-12]], device='cuda:0')"
},
"11": {
"step": "tensor(8764.)",
"exp_avg": "tensor([[ 6.0021e-08, -3.2553e-07, 4.9713e-09, ..., 2.2780e-06,\n 8.6043e-06, 3.6332e-05],\n [ 1.4480e-07, -2.4843e-07, -8.7837e-09, ..., 7.5468e-09,\n -3.0002e-07, 1.3550e-07],\n [ 4.7906e-07, 1.9650e-07, -3.2210e-32, ..., 3.9265e-07,\n -1.2447e-06, 1.5269e-09],\n ...,\n [-1.7850e-07, 6.4171e-08, -6.0749e-15, ..., -3.1413e-08,\n 4.4758e-07, 9.7309e-07],\n [ 2.4173e-08, -2.8099e-08, -9.6631e-15, ..., -1.3745e-06,\n 1.3046e-06, 1.3819e-09],\n [-1.1925e-07, -3.4211e-09, 6.4164e-09, ..., 4.0321e-06,\n -2.1851e-08, -3.2193e-08]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.9185e-11, 2.3644e-11, 4.5448e-14, ..., 1.3217e-10, 3.1798e-11,\n 1.6684e-10],\n [3.8224e-11, 1.0157e-12, 4.6828e-14, ..., 4.7451e-12, 6.5152e-11,\n 6.1060e-11],\n [1.2827e-11, 7.4580e-12, 2.1721e-18, ..., 2.8518e-12, 1.0346e-10,\n 5.8547e-13],\n ...,\n [1.6589e-10, 1.7890e-12, 3.8632e-17, ..., 7.1532e-12, 1.4612e-11,\n 1.1697e-11],\n [1.2481e-11, 1.8018e-11, 4.8229e-18, ..., 8.4913e-11, 6.2744e-10,\n 6.5968e-13],\n [2.0206e-11, 3.5634e-11, 8.5991e-13, ..., 2.2366e-10, 5.5520e-11,\n 1.6105e-12]], device='cuda:0')"
},
"12": {
"step": "tensor(8764.)",
"exp_avg": "tensor([ 4.9924e-05, 9.5775e-06, -1.5585e-05, ..., 2.1194e-05,\n 6.8780e-06, 1.9381e-05], device='cuda:0')",
"exp_avg_sq": "tensor([3.5543e-09, 1.9272e-09, 3.4568e-09, ..., 3.8770e-09, 4.4961e-09,\n 3.5167e-09], device='cuda:0')"
},
"13": {
"step": "tensor(8764.)",
"exp_avg": "tensor([[ 2.1761e-07, 8.9785e-10, 3.9884e-07, ..., 1.8240e-07,\n -1.3000e-07, 2.1703e-08],\n [ 3.5875e-07, -1.1533e-07, -7.1832e-07, ..., 3.2054e-07,\n 9.5066e-08, 4.1268e-08],\n [ 1.8207e-07, -1.3665e-07, 7.6200e-07, ..., -6.0928e-07,\n -2.0485e-07, -6.8308e-08],\n ...,\n [-3.0917e-07, -1.2670e-07, 3.6646e-07, ..., 4.5317e-07,\n -2.3019e-07, -2.4820e-07],\n [ 7.5420e-08, 4.3427e-07, 5.6661e-07, ..., -2.4217e-07,\n -3.0063e-07, -1.0919e-07],\n [ 9.4264e-08, -6.9451e-08, 3.5249e-07, ..., -3.6601e-08,\n -1.2295e-07, -1.8545e-07]], device='cuda:0')",
"exp_avg_sq": "tensor([[1.2304e-12, 5.8640e-13, 9.8039e-12, ..., 1.0803e-12, 7.0289e-13,\n 6.5312e-13],\n [1.5815e-12, 1.0095e-12, 1.5282e-11, ..., 2.8355e-12, 7.6226e-13,\n 8.8480e-13],\n [2.5201e-12, 9.2520e-13, 1.4950e-11, ..., 8.4340e-12, 1.4941e-12,\n 1.2202e-12],\n ...,\n [2.9555e-12, 1.2850e-12, 3.6760e-12, ..., 1.8609e-12, 1.5176e-12,\n 1.2512e-12],\n [2.8887e-12, 2.2663e-12, 2.9999e-12, ..., 6.7842e-12, 1.1652e-12,\n 1.1676e-12],\n [2.3223e-12, 1.5954e-12, 1.6244e-12, ..., 2.7359e-12, 1.0949e-12,\n 1.1456e-12]], device='cuda:0')"
}
},
"param_groups": [
{
"lr": 0.01,
"name": "shared",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
0,
1
]
},
{
"lr": 0.01,
"name": "scale_384",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
2,
3,
4
]
},
{
"lr": 0.01,
"name": "scale_768",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
5,
6,
7
]
},
{
"lr": 0.01,
"name": "scale_1024",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
8,
9,
10
]
},
{
"lr": 0.01,
"name": "scale_1280",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.01,
"params": [
11,
12,
13
]
},
{
"lr": 0.005,
"name": "fusion",
"betas": [
0.9,
0.999
],
"eps": 1e-08,
"weight_decay": 1e-05,
"amsgrad": false,
"maximize": false,
"foreach": null,
"capturable": false,
"differentiable": false,
"fused": null,
"decoupled_weight_decay": true,
"initial_lr": 0.005,
"params": [
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43
]
}
]
},
"scheduler_state_dict": {
"T_0": 10,
"T_i": 20,
"T_mult": 2,
"eta_min": 1e-06,
"T_cur": 0,
"base_lrs": [
0.01,
0.01,
0.01,
0.01,
0.01,
0.005
],
"last_epoch": 10,
"_step_count": 0,
"_is_initial": false,
"_get_lr_called_within_step": false,
"_last_lr": [
0.01,
0.01,
0.01,
0.01,
0.01,
0.005
]
},
"metrics": {
"val_acc": 82.316
},
"train_config": {
"name": "david_training",
"run_id": "20251012_041353",
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
"model_variant": "clip_vit_l14",
"num_classes": 1000,
"preset": "clip_vit_l14",
"custom_config_path": null,
"num_classes_override": null,
"use_belly_override": null,
"belly_expand_override": null,
"progressive_training_override": true,
"scale_warmup_epochs_override": {
"384": 0,
"768": 1,
"1024": 2,
"1280": 3
},
"num_epochs": 20,
"batch_size": 1024,
"learning_rate": 0.01,
"weight_decay": 1e-05,
"warmup_epochs": 3,
"use_rose_loss": true,
"rose_initial_weight": 0.1,
"rose_max_weight": 0.5,
"rose_weight_schedule": "adaptive",
"use_cayley_loss": false,
"cayley_weight": 0.001,
"scale_loss_balance": null,
"use_mixed_precision": false,
"gradient_clip": 5.0,
"scheduler_type": "cosine_restarts",
"min_lr": 1e-06,
"freeze_strategy": "never",
"freeze_threshold": 90.0,
"unfreeze_on_plateau": true,
"patience": 10,
"track_gradients": true,
"gradient_scale_threshold": 1e-07,
"gradient_scale_multiplier": 5.0,
"log_interval": 50,
"val_interval": 1,
"save_interval": 5,
"log_fusion_weights": true,
"log_loss_components": true,
"save_format": "safetensors",
"hf_repo": "AbstractPhil/gated-david",
"upload_to_hub": true,
"base_dir": "./david_training",
"num_workers": 10,
"pin_memory": true,
"prefetch_factor": 4,
"persistent_workers": true
}
}