diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,36 +1,196 @@ { - "epoch": 0, + "epoch": 1, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-5.1115e-05, 2.0107e-05, 3.4546e-05, ..., -3.3245e-05,\n 9.1718e-06, 8.0877e-06],\n [-3.6630e-05, -1.6314e-05, -5.6690e-06, ..., -3.2203e-05,\n 2.1494e-05, -9.5532e-06],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [-5.7992e-07, 1.7772e-05, -2.0679e-05, ..., 5.6017e-06,\n 3.1833e-05, 2.3251e-05],\n [-1.5739e-05, -2.9994e-05, -6.0391e-06, ..., 1.8479e-05,\n -2.8892e-05, 4.3773e-06],\n [-5.3456e-06, 4.4878e-05, -2.1462e-05, ..., 2.3223e-05,\n 6.6821e-06, 2.3600e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2885e-08, 1.3700e-08, 6.3741e-09, ..., 8.7725e-09, 7.6021e-09,\n 4.8925e-09],\n [7.9402e-09, 9.3185e-09, 6.0706e-09, ..., 5.7460e-09, 5.5121e-09,\n 3.8883e-09],\n [8.2648e-18, 9.9391e-18, 1.1785e-17, ..., 6.7162e-18, 3.4203e-18,\n 8.1850e-18],\n ...,\n [1.4760e-08, 1.4014e-08, 9.2830e-09, ..., 1.0177e-08, 7.3684e-09,\n 6.2340e-09],\n [1.8348e-08, 1.5192e-08, 1.2791e-08, ..., 1.1322e-08, 9.8771e-09,\n 8.2061e-09],\n [1.9906e-09, 3.1307e-09, 2.1364e-09, ..., 3.3339e-09, 1.6830e-09,\n 1.6688e-09]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-2.5660e-05, 8.6654e-06, 6.0734e-05, ..., -6.8806e-05,\n 1.7010e-05, -1.2918e-05],\n [ 3.4550e-05, -2.6272e-05, -7.1295e-05, ..., 3.2710e-05,\n 4.9773e-05, -4.5199e-06],\n [-5.4501e-24, -1.1627e-23, -1.2960e-24, ..., -3.1867e-24,\n -3.0199e-24, 7.0640e-24],\n ...,\n [-1.1757e-05, -3.3489e-05, -5.5402e-05, ..., 1.1567e-05,\n -1.0570e-05, -2.4651e-05],\n [-2.2741e-05, -4.6035e-06, 2.8693e-05, ..., -8.8789e-06,\n 9.3655e-06, 9.3527e-06],\n [ 9.8680e-06, 2.4155e-06, -1.8717e-05, ..., -1.3777e-07,\n -1.6496e-06, 5.8280e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3867e-08, 1.5095e-08, 6.9419e-09, ..., 8.9740e-09, 7.5087e-09,\n 5.2236e-09],\n [9.8261e-09, 1.0136e-08, 8.8865e-09, ..., 6.9002e-09, 5.5357e-09,\n 4.8151e-09],\n [1.7317e-11, 2.0972e-11, 2.0908e-11, ..., 3.0171e-11, 5.8204e-12,\n 1.3405e-11],\n ...,\n [1.3815e-08, 1.2816e-08, 9.8299e-09, ..., 9.5702e-09, 7.5078e-09,\n 6.0434e-09],\n [1.6530e-08, 1.3552e-08, 1.1877e-08, ..., 1.0545e-08, 9.1210e-09,\n 7.8570e-09],\n [2.7099e-09, 4.2244e-09, 2.9066e-09, ..., 2.3599e-09, 1.9022e-09,\n 1.9200e-09]], device='cuda:0')" }, "1": { - "step": "tensor(1252.)", - "exp_avg": "tensor([-1.4937e-03, -1.0415e-03, 5.6052e-45, ..., -6.1226e-04,\n 1.0764e-03, 9.4429e-04], device='cuda:0')", - "exp_avg_sq": "tensor([1.6521e-05, 1.2791e-05, 3.4756e-14, ..., 2.0229e-05, 2.3449e-05,\n 3.8690e-06], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([-1.9185e-03, 3.0139e-03, 3.1894e-22, ..., 9.6413e-04,\n -7.8702e-04, 3.0819e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.6176e-05, 1.4221e-05, 3.6212e-08, ..., 1.8365e-05, 2.1177e-05,\n 4.3719e-06], device='cuda:0')" }, "2": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 3.8469e-06, 4.4727e-06, 5.6052e-45, ..., 6.8052e-05,\n -1.6977e-08, -1.1068e-06],\n [ 5.6052e-45, 3.4778e-23, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 2.6602e-12, 6.5419e-14, 0.0000e+00, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 1.7557e-06, -4.9403e-07, 0.0000e+00, ..., 1.2390e-06,\n 1.6308e-05, -2.5592e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.7774e-09, 6.2782e-09, 5.4596e-17, ..., 9.7538e-09, 6.3006e-08,\n 1.4746e-09],\n [3.5497e-11, 6.6977e-10, 0.0000e+00, ..., 1.2565e-10, 4.6488e-14,\n 4.6279e-10],\n [9.2665e-10, 3.2794e-09, 0.0000e+00, ..., 1.8004e-09, 1.5863e-14,\n 3.6890e-09],\n ...,\n [0.0000e+00, 1.5680e-16, 0.0000e+00, ..., 4.5935e-18, 5.4860e-18,\n 0.0000e+00],\n [1.6481e-09, 1.6852e-08, 0.0000e+00, ..., 3.6060e-09, 2.4500e-10,\n 2.2296e-08],\n [2.6776e-10, 3.7610e-10, 0.0000e+00, ..., 2.7872e-10, 4.5433e-09,\n 8.8203e-11]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-1.7566e-06, -3.6239e-06, 3.7904e-41, ..., 2.9396e-06,\n -2.2079e-06, 2.0373e-06],\n [ 1.1574e-29, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [-6.0374e-06, 4.9641e-06, -5.2032e-41, ..., 6.7283e-06,\n 3.1242e-06, 5.2013e-06],\n [-7.6419e-06, 2.0507e-06, -7.4114e-40, ..., 6.3524e-06,\n 1.0472e-05, -6.2212e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.4391e-09, 2.5623e-09, 9.7713e-12, ..., 6.4433e-09, 1.9342e-08,\n 8.6120e-10],\n [1.0450e-11, 1.9139e-10, 0.0000e+00, ..., 3.5906e-11, 1.3284e-14,\n 1.3224e-10],\n [2.6480e-10, 9.3711e-10, 0.0000e+00, ..., 5.1448e-10, 4.5330e-15,\n 1.0542e-09],\n ...,\n [0.0000e+00, 4.4806e-17, 0.0000e+00, ..., 1.3126e-18, 1.5677e-18,\n 0.0000e+00],\n [2.7058e-09, 5.1170e-09, 1.4081e-12, ..., 2.1932e-09, 2.4470e-10,\n 6.7471e-09],\n [5.3852e-10, 6.1069e-10, 7.5062e-12, ..., 1.2926e-09, 1.0978e-08,\n 2.9124e-10]], device='cuda:0')" }, "3": { - "step": "tensor(1252.)", - "exp_avg": "tensor([ 3.8218e-04, 8.0004e-22, 5.6052e-45, -1.2478e-04, 7.0635e-05,\n 2.0413e-20, 1.1707e-04, 1.9848e-04, 3.1622e-05, 4.7034e-04,\n 1.0529e-04, 4.7842e-19, 4.8117e-04, 6.6726e-07, -2.6610e-04,\n 6.0888e-20, -9.4428e-05, 5.6052e-45, 5.6052e-45, -3.7158e-04,\n 5.6052e-45, -9.8299e-05, 5.6052e-45, 2.5381e-04, 6.2474e-04,\n -1.8862e-06, 2.7348e-08, 5.6052e-45, 2.7322e-04, 1.9617e-04,\n -2.3872e-04, 2.4501e-06, -1.8965e-05, 4.9968e-04, -1.3027e-04,\n 8.1243e-05, 1.5436e-28, 2.1093e-04, 3.3742e-04, 5.0860e-05,\n 5.2040e-04, 9.5792e-05, 5.7385e-04, 6.3446e-04, 2.6995e-04,\n 1.0579e-10, -4.3391e-05, 3.0988e-04, 5.5316e-04, 4.2428e-04,\n 4.5718e-31, 2.1099e-06, 1.0349e-04, 3.7559e-04, -1.1469e-07,\n 1.7873e-23, 1.4911e-27, -6.1441e-05, 5.6052e-45, 3.0057e-04,\n -1.1182e-07, -2.2101e-05, -4.2033e-04, 5.4079e-04, 7.5203e-15,\n 5.5865e-18, 2.6620e-04, -4.5977e-06, 5.6052e-45, 5.6052e-45,\n -6.1323e-04, 1.2223e-39, -2.3448e-05, -2.2909e-04, -3.9513e-04,\n 4.2284e-04, 5.6052e-45, 3.7224e-05, -3.5990e-05, 1.3120e-04,\n 5.6052e-45, 6.2860e-05, 9.5806e-04, 5.6052e-45, 7.9613e-05,\n 3.2063e-04, 7.8807e-05, -1.4610e-04, -7.5393e-05, 5.7598e-04,\n 2.8434e-04, -2.9455e-21, 9.5098e-08, 4.7277e-04, 5.6382e-05,\n 5.8133e-04, 3.8539e-04, -3.8778e-07, -5.0567e-05, 2.9547e-04,\n -7.1920e-04, 1.5677e-04, -1.3764e-04, -1.0793e-03, 2.9480e-04,\n -5.5674e-04, -8.5346e-05, -1.5459e-04, -9.3293e-05, 5.6052e-45,\n -1.1488e-04, 3.4394e-04, 6.1917e-04, 1.1114e-38, 1.7663e-04,\n -4.3614e-04, 1.2581e-04, 4.0967e-05, 3.7442e-07, 2.4266e-34,\n -3.3006e-04, -5.9274e-04, 5.4158e-06, 5.6052e-45, -2.8571e-04,\n 4.2498e-05, 2.4431e-06, 1.7019e-41, 5.1506e-05, 5.6052e-45,\n 2.1816e-04, 1.8041e-04, 5.6052e-45, 7.9613e-05, 1.0230e-20,\n -2.3387e-04, 2.6979e-21, 2.7262e-04, 1.9061e-04, 5.7635e-31,\n 5.5506e-23, 2.5028e-18, 5.2738e-05, 1.2882e-10, 5.6052e-45,\n 3.0278e-19, -1.9402e-04, -3.5185e-04, 3.0248e-18, -4.9454e-10,\n 5.6052e-45, -2.1463e-04, 5.6052e-45, 8.2809e-04, 3.2103e-16,\n 7.3538e-05, 5.6052e-45, 2.0477e-06, 5.1427e-27, 1.3080e-04,\n -1.1485e-06, 1.5549e-16, -3.6691e-04, 1.7712e-04, -1.2041e-04,\n -3.3719e-04, -4.9399e-22, 5.6052e-45, 3.4673e-04, 4.4995e-04,\n -7.1545e-05, 1.5384e-04, 1.7358e-04, -1.2248e-04, 4.7999e-05,\n 4.1299e-05, 7.5115e-07, -1.7030e-04, -9.2796e-05, 6.8892e-05,\n -1.1835e-03, -2.8181e-04, 1.1863e-04, 1.4258e-33, 1.2956e-32,\n 5.6052e-45, -8.1853e-04, -2.0531e-04, -3.9707e-10, 7.2812e-10,\n -1.9252e-04, -4.5288e-04, 2.9232e-04, -4.5380e-04, -1.8649e-04,\n -1.7415e-04, -3.9736e-04, 8.5544e-04, 1.5977e-04, 5.6052e-45,\n 1.0916e-09, 2.0393e-04, -2.6196e-04, -1.6734e-04, -6.9888e-04,\n -1.5094e-07, -1.4927e-04, -4.3640e-04, 7.9703e-16, 3.7150e-11,\n 3.1619e-05, 9.1752e-36, 9.4894e-05, -7.7122e-05, -7.4456e-05,\n -4.7453e-04, 2.1760e-04, -6.6162e-17, 4.2643e-04, 1.6761e-04,\n 3.2113e-07, -9.2353e-04, -2.2324e-04, -4.9814e-05, -1.2032e-06,\n 5.1401e-11, 5.6052e-45, 3.1504e-04, 3.6809e-04, 5.6052e-45,\n -9.6823e-05, -1.7097e-04, -2.7672e-05, 2.0413e-04, 5.6052e-45,\n -6.7278e-04, 8.5375e-04, 5.6052e-45, -2.5825e-04, -7.9492e-04,\n 2.8845e-15, 6.3364e-31, 1.1674e-04, -2.8837e-04, 6.1486e-05,\n 3.6381e-04, 9.9847e-05, 1.2208e-04, 2.2044e-21, 5.6052e-45,\n 8.9531e-05, 1.2577e-04, -1.1164e-04, -5.1391e-04, 5.1978e-05,\n 7.7511e-04, 5.6052e-45, 4.0989e-05, 5.6052e-45, 2.4154e-04,\n 5.6052e-45, 2.6517e-04, 2.6230e-04, -1.6418e-04, 4.0567e-04,\n 1.0574e-14, -5.9253e-05, 1.0241e-04, -7.9978e-05, 8.9149e-27,\n -2.4600e-04, -4.8269e-06, -3.3168e-04, -1.6313e-04, 1.2319e-03,\n -8.4572e-05, 1.8478e-04, 2.0782e-09, -2.8268e-06, -7.9179e-05,\n 5.0319e-10, -1.1923e-04, 3.9236e-44, 4.0023e-05, 3.0151e-04,\n -1.2087e-04, 2.6693e-05, -1.1285e-04, 5.6052e-45, 2.9637e-04,\n 2.1271e-04, 2.8520e-04, 2.4720e-41, 5.1247e-14, 3.3199e-04,\n -1.7947e-06, 5.2672e-04, -4.8191e-04, -1.7399e-04, -6.5861e-43,\n -1.9457e-04, -6.4591e-04, 3.3081e-09, 5.6052e-45, 4.1406e-20,\n 8.5171e-10, 5.6052e-45, 1.5754e-04, -4.1803e-06, -4.1639e-04,\n -8.5921e-05, 5.6052e-45, 5.1273e-04, -1.0320e-05, -1.7667e-04,\n 5.6052e-45, 5.6052e-45, -7.7146e-04, 4.0002e-38, -2.0809e-04,\n -1.3197e-04, 8.0053e-05, 3.8074e-20, -4.5425e-04, 2.1859e-04,\n 8.1467e-32, 2.7410e-04, -6.8712e-05, 6.9257e-05, 6.5898e-04,\n 3.3839e-04, -2.5711e-04, 1.1826e-03, 2.3023e-04, 7.7894e-40,\n -1.4262e-04, -6.9575e-05, 1.1507e-07, 5.6052e-45, 5.6052e-45,\n 4.4987e-05, -4.5090e-05, -3.4941e-05, 2.7201e-05, -6.0723e-05,\n 1.0621e-04, 3.0405e-05, 5.6052e-45, -6.2574e-04, 3.9199e-06,\n 1.2306e-05, -2.7718e-04, 2.3346e-17, 5.6905e-39, 4.9593e-34,\n -7.6822e-04, 8.4728e-14, 1.6328e-08, 2.9529e-04, -7.6453e-07,\n 3.7565e-04, 2.0243e-04, 1.8215e-34, 1.6978e-04, -1.9418e-05,\n 3.1847e-20, 1.0820e-04, 4.8579e-04, 7.1742e-08, 5.6052e-45,\n 2.5511e-04, 5.7119e-20, 6.4309e-05, -1.5516e-04, -2.0177e-05,\n 7.1613e-09, 5.6052e-45, 2.5350e-13, 5.8009e-29, 2.4599e-04,\n 2.4395e-04, -3.7157e-04, -1.9739e-04, -1.3765e-04, 5.6052e-45,\n 2.7911e-04, 5.6052e-45, 2.4672e-04, -4.9174e-04, -6.8502e-05,\n -8.3649e-05, 1.5599e-05, 5.6052e-45, -1.6585e-05, 4.0139e-05,\n 2.9731e-04, 8.4497e-08, 2.5546e-07, 2.8841e-35, 5.6052e-45,\n 8.6095e-05, 3.6328e-04, 2.5533e-04, 1.3146e-03, 1.7070e-04,\n 3.8743e-04, 5.6052e-45, -4.7014e-04, 4.5412e-05, -3.2610e-04,\n -3.0897e-04, 5.6052e-45, -4.7544e-05, 2.2350e-04, 1.6483e-08,\n 1.3765e-04, 5.6052e-45, 3.6315e-05, 4.5507e-05, -1.0489e-03,\n 3.7407e-04, -5.4334e-04, -2.2857e-03, 1.0393e-33, 5.6052e-45,\n 7.3345e-33, 1.5414e-04, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -6.5093e-04, -1.4926e-04, -3.1952e-04, 1.7657e-07, 7.7445e-04,\n 5.6052e-45, -4.8834e-04, -9.2296e-07, -6.0505e-04, 5.6052e-45,\n 5.6052e-45, -3.2284e-04, 5.6052e-45, 5.6052e-45, 4.2095e-42,\n -3.9422e-04, -1.3901e-04, 1.1123e-04, 8.2342e-15, 5.6052e-45,\n 5.9790e-04, 1.1872e-04, -5.3718e-04, 3.5846e-04, 7.2883e-04,\n -1.4106e-04, 2.9866e-08, 4.4480e-04, 5.5912e-43, 1.8141e-07,\n 4.0878e-10, -2.1845e-04, 5.6052e-45, -4.4449e-04, 5.6052e-45,\n -2.7358e-04, 5.6782e-05, 5.2037e-05, -7.2313e-05, -6.5192e-05,\n 2.0171e-04, 5.6052e-45, 5.6052e-45, 4.2101e-04, 5.6052e-45,\n 2.6803e-37, -2.0666e-07, -1.4383e-04, 3.7621e-18, 4.3796e-04,\n 4.4754e-04, 3.2286e-04, -1.9190e-04, -4.2962e-04, -1.3110e-05,\n 3.3386e-04, -4.6300e-04, -5.3814e-04, 7.3730e-05, -7.4109e-04,\n -8.2087e-05, 5.6052e-45, -6.3055e-04, 2.7288e-04, 8.9807e-05,\n 5.6052e-45, -1.4269e-04, 5.5829e-05, -1.4574e-04, -1.1367e-04,\n 2.8679e-14, -1.1732e-03, 1.3771e-04, 5.4366e-07, -2.0518e-04,\n 1.0191e-06, -3.7106e-04, -1.7129e-04, 5.6052e-45, 2.0274e-04,\n -1.7718e-04, -5.9050e-05, -1.4819e-04, -3.4906e-04, 1.1461e-40,\n 1.6469e-08, -8.1696e-05, 1.2102e-04, 2.7143e-21, -9.1091e-07,\n 6.4320e-04, -6.5085e-05, -3.0696e-04, -6.2679e-06, 1.4956e-05,\n 1.4310e-04, 1.1640e-09, 1.1597e-03, -5.7752e-04, 2.4327e-05,\n -4.4174e-04, -2.4569e-04, 5.3230e-04, -3.9954e-05, 5.6052e-45,\n 8.2504e-20, -1.0823e-04, 6.2652e-12, -2.6155e-05, -6.7134e-04,\n 7.0425e-04, -5.0158e-04, 5.9339e-04, -2.0269e-04, 7.5749e-04,\n 5.6052e-45, -3.2619e-06, -1.5872e-03, 8.0820e-04, 1.2370e-06,\n 3.6122e-04, 3.1083e-04, 2.7292e-04, 4.5809e-04, -8.2813e-05,\n 5.6052e-45, 3.0311e-05, -7.7129e-05, 5.6052e-45, 2.3292e-04,\n 5.6052e-45, -3.7187e-05, 2.1344e-26, -1.0217e-04, -9.4743e-07,\n 4.2678e-04, 4.1857e-05, 5.6052e-45, -2.1576e-04, 5.6052e-45,\n 2.1321e-05, -7.5512e-06, -7.2515e-04, 5.6052e-45, -2.2833e-04,\n -3.6974e-04, 1.6655e-04, 8.4603e-08, -2.6396e-05, 7.9812e-05,\n 1.2428e-04, 3.3136e-09, 5.1999e-09, -6.2667e-05, 5.6052e-45,\n -1.8404e-04, 4.1423e-04, 6.9310e-04, -2.5808e-04, -8.0821e-06,\n -3.8919e-06, 5.6052e-45, 8.7584e-05, -4.2317e-04, 1.2604e-05,\n 2.9426e-04, 2.5232e-05, -1.3666e-04, 1.4815e-04, -7.9855e-07,\n 4.9999e-05, 8.5039e-05, 2.2255e-04, -1.1264e-09, 2.5471e-04,\n 3.6019e-04, -3.6965e-04, 1.1358e-13, -2.3170e-04, 3.6616e-04,\n -1.5764e-04, 5.9709e-05, 1.8939e-07, 5.6052e-45, 7.6144e-05,\n 5.6052e-45, 5.0188e-10, 1.9223e-32, -4.5194e-04, -3.0563e-04,\n 1.1113e-04, -2.1723e-04, -1.3396e-04, -1.2334e-03, 6.4252e-06,\n -9.6372e-04, 5.6052e-45, 4.8890e-05, -1.7205e-04, 5.6052e-45,\n 5.6052e-45, 3.5136e-35, 5.6052e-45, 5.6052e-45, 1.1885e-04,\n 2.4666e-07, 5.6052e-45, -8.8639e-05, 5.9116e-04, -9.4945e-05,\n 3.9502e-05, 5.6052e-45, -4.1640e-04, 5.7558e-41, 5.6052e-45,\n -1.5666e-04, 5.6052e-45, -2.7820e-04, -4.2402e-04, 2.9701e-04,\n 3.1877e-04, -4.6690e-06, 6.6976e-04, 3.4616e-04, 6.1442e-05,\n -3.5794e-04, -2.8278e-04, -1.7150e-04, 4.4884e-06, 4.4483e-12,\n -4.4531e-04, 2.3326e-24, -2.4075e-04, 5.6052e-45, 5.1110e-05,\n 7.1042e-04, 7.8490e-05, 5.6052e-45, -5.8747e-06, 1.2398e-08,\n -4.9089e-04, 5.6052e-45, -2.5268e-04, -4.3913e-04, -1.7341e-04,\n 5.1760e-04, -7.4397e-04, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 2.3848e-04, 1.5510e-04, 2.1766e-04, 5.6052e-45, 3.8660e-04,\n 5.6052e-45, 2.2806e-05, -9.8839e-07, -5.6887e-05, 2.4856e-04,\n 2.9393e-12, 5.0904e-22, 5.6052e-45, 5.4609e-07, 4.5710e-42,\n -1.9395e-04, 8.4836e-05, 4.8425e-04, 1.8691e-11, 6.2520e-07,\n 9.4244e-04, 1.1704e-04, -2.5014e-04, -1.5512e-04, 2.8406e-04,\n -4.1554e-04, 5.6052e-45, -8.8677e-05, -1.2047e-04, 5.6052e-45,\n -4.8120e-05, -2.2179e-05, 2.0070e-32, -2.5375e-04, -2.8027e-04,\n 5.6052e-45, -3.6313e-04, 2.3223e-04, -7.2957e-05, 1.8224e-04,\n 5.6052e-45, -2.6673e-05, -1.8215e-04, 2.1180e-04, 9.9072e-14,\n 1.6484e-11, -2.4268e-04, -3.0438e-04, -3.6011e-04, 4.3778e-04,\n 2.8486e-12, 2.1854e-04, 5.6052e-45, -2.3703e-05, 4.5993e-34,\n -1.3364e-06, -2.7262e-04, 1.8741e-04, -2.1678e-04, 4.0780e-07,\n -2.8199e-04, 4.9196e-05, 3.1032e-06, 1.8501e-04, 5.6052e-45,\n 1.4732e-05, 1.3301e-04, 5.6052e-45, -2.0621e-04, 2.1345e-41,\n -2.1755e-06, 3.1857e-28, 8.7392e-10, -1.9154e-04, -2.5642e-04,\n 2.8721e-04, 2.8977e-30, 8.9714e-05, 6.4323e-04, -4.2397e-05,\n 2.6927e-06, 9.8263e-05, 2.4579e-04, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 7.2791e-07, 1.1419e-04], device='cuda:0')", - "exp_avg_sq": "tensor([2.7635e-06, 8.2423e-07, 2.6001e-06, 4.4758e-07, 7.6642e-06, 7.6808e-06,\n 1.1055e-05, 2.5434e-05, 6.7623e-06, 7.3278e-06, 4.8224e-06, 4.2421e-06,\n 1.0958e-06, 9.1319e-06, 2.0342e-06, 2.3354e-06, 1.6900e-05, 4.6893e-06,\n 5.8218e-07, 1.4690e-05, 9.6257e-08, 1.3343e-06, 7.5355e-06, 2.7443e-06,\n 6.3642e-06, 1.4530e-06, 2.0400e-06, 1.7442e-08, 6.0682e-06, 6.2240e-06,\n 5.3140e-07, 2.0466e-06, 3.6801e-06, 1.6826e-05, 2.3466e-06, 6.1230e-09,\n 9.3673e-06, 8.8893e-06, 2.5041e-06, 5.5933e-06, 2.0168e-05, 9.0763e-07,\n 4.4732e-06, 2.6477e-06, 7.5364e-06, 7.7119e-06, 1.1337e-06, 1.1949e-06,\n 3.2596e-05, 2.2453e-06, 1.4961e-05, 9.7974e-05, 3.9626e-06, 2.9001e-06,\n 1.2551e-05, 8.7083e-06, 2.0797e-05, 1.1476e-07, 4.6326e-06, 3.2638e-06,\n 2.9113e-08, 3.0116e-06, 8.1881e-06, 4.6217e-06, 1.2633e-10, 3.1499e-06,\n 4.4544e-06, 1.5556e-05, 8.1254e-06, 1.9342e-06, 2.4515e-06, 6.8063e-06,\n 3.9216e-07, 2.4367e-06, 1.5499e-06, 1.0611e-06, 4.7240e-08, 2.1201e-06,\n 1.9380e-07, 2.6816e-07, 1.9720e-05, 1.0707e-06, 9.3989e-06, 1.3042e-07,\n 3.8750e-05, 9.6063e-07, 4.5605e-06, 8.8143e-06, 8.8979e-07, 1.1859e-05,\n 2.0681e-06, 3.8866e-06, 3.3987e-08, 3.3756e-06, 2.6734e-08, 1.1815e-05,\n 1.4255e-05, 5.6772e-05, 9.0597e-06, 4.9357e-06, 8.6550e-06, 5.7778e-06,\n 2.9124e-05, 3.7177e-05, 3.3967e-06, 5.5560e-06, 1.2809e-06, 2.3054e-06,\n 2.5995e-06, 3.2284e-08, 1.7538e-06, 1.8689e-05, 4.4649e-06, 6.5428e-06,\n 1.0186e-05, 9.1332e-06, 1.8178e-06, 1.1373e-07, 6.3444e-06, 3.5708e-06,\n 7.9360e-06, 1.2610e-05, 5.6133e-06, 8.9479e-06, 2.5459e-06, 3.2453e-06,\n 1.4225e-05, 2.8527e-07, 4.3806e-06, 3.9827e-05, 4.9039e-06, 6.4620e-06,\n 3.9087e-07, 2.3398e-06, 4.1882e-06, 5.1322e-07, 1.5762e-09, 1.0374e-05,\n 1.2307e-06, 6.4191e-06, 3.9408e-06, 2.1664e-06, 7.1447e-07, 6.1204e-10,\n 8.3959e-09, 1.1168e-05, 4.2433e-06, 4.9932e-06, 3.0005e-05, 2.6369e-07,\n 5.3896e-08, 9.8654e-07, 6.6309e-07, 4.6892e-07, 2.5723e-08, 4.9584e-06,\n 9.6611e-06, 9.1801e-06, 1.2126e-05, 1.4420e-05, 3.4475e-09, 4.4199e-06,\n 2.1153e-06, 1.9039e-06, 5.8109e-06, 3.1363e-06, 4.7447e-06, 1.5787e-11,\n 1.8691e-06, 5.0461e-07, 1.2875e-05, 1.1117e-05, 1.6662e-06, 2.0908e-06,\n 4.5878e-06, 1.3043e-06, 1.5576e-09, 2.4455e-06, 8.6583e-06, 2.6111e-05,\n 9.1380e-06, 9.5527e-06, 3.1162e-06, 3.6288e-10, 6.6277e-10, 3.8118e-16,\n 5.4426e-06, 1.1652e-06, 2.3855e-07, 1.5424e-08, 3.1613e-06, 3.1236e-07,\n 4.6236e-06, 1.3960e-05, 7.5890e-06, 2.5207e-06, 1.9452e-06, 6.8358e-06,\n 3.8253e-07, 1.0103e-16, 1.1990e-05, 2.1094e-06, 4.7269e-06, 6.8602e-06,\n 3.3418e-06, 8.4926e-07, 7.9136e-06, 6.4455e-06, 4.1708e-06, 7.5726e-08,\n 4.6451e-06, 1.0624e-07, 2.3946e-06, 8.1357e-06, 4.7993e-06, 1.0426e-06,\n 3.2290e-06, 1.5974e-06, 5.7175e-06, 1.7559e-06, 3.6356e-06, 5.8189e-06,\n 2.4541e-06, 9.9111e-07, 1.7420e-05, 1.4958e-05, 3.3310e-05, 3.7194e-06,\n 4.2554e-06, 2.9765e-07, 3.0352e-06, 4.2935e-06, 3.9751e-06, 1.3449e-06,\n 9.4652e-06, 9.3510e-07, 1.3593e-05, 1.7949e-09, 2.0589e-06, 2.9155e-06,\n 6.1750e-06, 4.9236e-06, 5.0906e-06, 2.3622e-06, 9.3690e-07, 1.6861e-06,\n 6.7642e-07, 9.8416e-06, 8.2588e-07, 6.2805e-06, 2.7127e-05, 3.5601e-06,\n 1.0432e-05, 1.1015e-05, 1.6681e-06, 5.2195e-06, 1.9196e-09, 3.0939e-06,\n 1.4243e-14, 2.2847e-05, 1.1307e-14, 4.9401e-06, 2.7682e-06, 5.1914e-06,\n 2.4829e-05, 1.2108e-05, 5.0223e-07, 3.2724e-06, 1.5393e-05, 2.2256e-05,\n 7.1956e-06, 7.9809e-07, 2.3644e-06, 8.9256e-06, 5.7172e-06, 1.7009e-06,\n 7.3657e-06, 9.7895e-09, 1.7859e-08, 1.5188e-05, 2.7915e-05, 1.2584e-05,\n 1.6261e-05, 5.1083e-06, 7.5614e-06, 2.6763e-07, 3.9161e-06, 2.2760e-06,\n 5.7604e-09, 3.3778e-05, 3.5035e-07, 1.2192e-05, 3.5018e-05, 1.5584e-07,\n 2.4436e-06, 3.4135e-05, 8.5208e-06, 2.6426e-05, 2.6068e-06, 5.2863e-08,\n 5.6054e-07, 3.0880e-05, 7.8967e-08, 3.8191e-17, 1.0476e-06, 2.2543e-08,\n 2.6412e-07, 6.6212e-07, 5.0764e-06, 1.1731e-05, 1.3041e-05, 1.4668e-09,\n 3.4768e-06, 2.4514e-07, 1.8413e-06, 3.2195e-09, 1.2458e-08, 5.0837e-05,\n 6.3851e-06, 9.5732e-07, 6.1195e-06, 1.4582e-07, 1.1189e-09, 3.9474e-06,\n 7.4354e-07, 1.1680e-05, 2.1765e-05, 2.3609e-06, 2.9188e-06, 1.8701e-06,\n 3.4824e-06, 6.6610e-07, 5.8816e-06, 6.9383e-07, 1.1291e-07, 2.3145e-05,\n 8.8313e-07, 1.8833e-06, 8.7727e-15, 1.4838e-08, 1.4422e-05, 5.7285e-06,\n 3.7528e-05, 1.8159e-06, 1.0544e-05, 2.5835e-06, 3.9738e-06, 8.8034e-05,\n 1.2251e-06, 9.4887e-06, 6.0755e-06, 4.8507e-06, 5.3185e-07, 2.3883e-06,\n 2.6655e-06, 4.2074e-06, 2.0790e-08, 2.5173e-06, 6.3141e-07, 5.0400e-05,\n 4.6013e-06, 3.0301e-06, 9.9046e-06, 1.2544e-05, 2.0839e-06, 3.2442e-06,\n 3.4710e-07, 6.3130e-06, 3.2889e-08, 1.5440e-08, 3.2169e-06, 5.0759e-06,\n 3.4324e-07, 1.3033e-06, 1.6576e-06, 2.2029e-05, 1.7243e-06, 1.0396e-10,\n 4.7026e-06, 8.7802e-07, 3.5323e-06, 5.2635e-06, 1.5909e-06, 2.8940e-06,\n 9.8276e-06, 5.8029e-07, 2.5229e-08, 1.5997e-06, 1.1792e-05, 1.7611e-06,\n 1.0263e-05, 3.0424e-06, 6.7804e-09, 6.1549e-08, 4.7294e-06, 8.4267e-06,\n 1.8893e-07, 4.1132e-06, 3.3799e-05, 4.3778e-06, 1.8254e-05, 6.7081e-07,\n 4.1423e-06, 8.4735e-06, 3.6044e-06, 1.6995e-06, 2.8538e-05, 8.8125e-06,\n 6.0234e-06, 5.7969e-06, 1.1630e-05, 6.0837e-11, 1.0943e-05, 3.3285e-06,\n 3.7586e-07, 1.3991e-06, 5.7702e-09, 4.1101e-06, 2.6616e-07, 1.0576e-06,\n 1.0275e-05, 5.5071e-06, 3.7250e-06, 4.0906e-09, 2.6992e-16, 3.0799e-09,\n 8.3534e-06, 2.2700e-06, 6.2404e-09, 1.3051e-08, 3.3644e-06, 1.7666e-06,\n 1.2167e-05, 4.3525e-06, 9.1940e-06, 1.4465e-06, 2.3755e-06, 7.0184e-10,\n 3.9014e-06, 3.4029e-08, 5.1870e-06, 2.7011e-06, 5.4022e-09, 4.1265e-08,\n 1.9683e-06, 3.4872e-06, 2.1086e-06, 1.7690e-06, 3.3838e-09, 7.2447e-07,\n 8.4324e-07, 3.2213e-06, 5.0629e-06, 2.9049e-06, 3.3831e-06, 1.1949e-05,\n 2.2543e-06, 5.6718e-06, 1.2789e-06, 8.2205e-07, 2.9680e-09, 1.5477e-05,\n 3.1158e-10, 2.3947e-06, 2.6832e-06, 2.4993e-06, 6.5105e-08, 1.0203e-06,\n 1.0962e-05, 7.7665e-08, 2.7368e-06, 1.2613e-07, 7.8532e-08, 2.0080e-06,\n 1.1522e-07, 5.6014e-09, 8.0050e-07, 1.1106e-07, 2.8275e-05, 5.3317e-06,\n 5.7615e-06, 6.8117e-06, 2.9477e-06, 9.4296e-06, 1.1397e-06, 4.0063e-06,\n 7.8655e-06, 1.0989e-05, 1.9478e-06, 2.6493e-06, 7.9995e-06, 8.3976e-09,\n 7.2869e-06, 1.0398e-05, 1.3317e-06, 1.3967e-05, 3.6518e-06, 1.1920e-05,\n 2.6141e-06, 5.3034e-06, 6.7520e-05, 4.4231e-06, 2.4173e-06, 1.0679e-08,\n 6.1482e-06, 7.5624e-06, 1.3259e-06, 3.4549e-06, 3.4735e-07, 8.2579e-06,\n 8.2085e-06, 2.9985e-06, 1.1818e-06, 1.1183e-05, 4.1715e-07, 3.6036e-06,\n 1.3245e-06, 8.5177e-07, 9.3521e-06, 4.5151e-06, 5.3930e-06, 8.7343e-08,\n 4.0790e-06, 2.6213e-06, 9.7652e-07, 4.8756e-07, 2.6712e-10, 4.2550e-06,\n 1.8962e-06, 1.0766e-06, 6.6679e-06, 1.0320e-05, 4.3428e-06, 8.1872e-06,\n 1.5235e-06, 1.0617e-06, 1.0776e-05, 3.1228e-05, 9.0577e-06, 1.7002e-06,\n 4.6403e-06, 1.2928e-06, 7.4892e-06, 6.5739e-06, 3.5542e-05, 1.1237e-07,\n 4.4597e-06, 2.1408e-06, 2.8802e-06, 4.9050e-05, 1.5864e-05, 1.5140e-06,\n 8.1816e-06, 5.7483e-06, 9.7043e-08, 7.0867e-05, 7.0706e-07, 5.8030e-06,\n 7.7502e-07, 6.7973e-06, 1.8975e-09, 2.1978e-05, 9.3858e-07, 2.3881e-06,\n 9.1155e-06, 9.9408e-07, 7.4750e-07, 4.4526e-08, 1.4871e-05, 2.1717e-06,\n 6.4667e-07, 1.7443e-09, 2.2929e-06, 1.8590e-06, 8.9147e-06, 2.0009e-05,\n 3.1006e-06, 8.8249e-09, 5.3805e-06, 3.0253e-06, 6.6780e-06, 5.6370e-09,\n 6.8988e-09, 5.4272e-06, 5.5881e-13, 6.4459e-06, 2.3978e-06, 5.0918e-06,\n 1.1587e-06, 9.8417e-06, 4.2385e-07, 8.7130e-10, 7.4782e-06, 9.0731e-06,\n 5.5543e-06, 2.4838e-06, 2.9560e-06, 5.5438e-07, 2.3324e-06, 1.0982e-05,\n 1.6531e-07, 8.5394e-06, 5.4014e-06, 1.2213e-06, 2.9334e-07, 8.3539e-06,\n 8.8837e-07, 2.1152e-08, 5.0797e-06, 1.0892e-06, 4.8637e-06, 1.3747e-07,\n 5.6260e-06, 1.5568e-05, 8.6679e-06, 3.9408e-09, 5.3034e-06, 4.5725e-06,\n 8.9083e-07, 5.9902e-06, 4.1320e-06, 3.1970e-06, 1.1695e-05, 6.4857e-06,\n 1.6124e-05, 3.9665e-06, 3.9097e-10, 5.8513e-06, 1.1892e-06, 1.3367e-06,\n 3.4750e-06, 1.4724e-06, 1.0265e-07, 3.1993e-09, 9.2003e-07, 2.5418e-05,\n 1.2111e-11, 4.3551e-06, 2.3199e-06, 1.7161e-06, 1.1696e-05, 1.2982e-09,\n 6.2876e-06, 3.3889e-05, 7.6548e-10, 3.4753e-06, 7.3415e-06, 8.5450e-07,\n 1.4555e-05, 1.2171e-06, 4.1200e-06, 7.9647e-06, 1.7999e-06, 3.8644e-06,\n 1.7869e-06, 1.4648e-06, 7.8012e-06, 1.1652e-06, 2.0371e-05, 2.9475e-07,\n 1.0262e-05, 2.9415e-05, 6.5699e-06, 3.5011e-05, 2.5001e-06, 2.0804e-05,\n 7.1644e-06, 1.4837e-09, 1.6807e-05, 4.6603e-07, 3.1761e-06, 1.1410e-10,\n 1.0977e-06, 7.3931e-07, 3.4925e-06, 4.4718e-07, 5.3529e-06, 5.7546e-07,\n 5.1306e-06, 1.2285e-08, 4.3652e-06, 6.3057e-06, 5.8609e-06, 2.9062e-05,\n 2.5618e-06, 5.3666e-06, 5.8160e-06, 5.1699e-08, 2.8844e-06, 1.5069e-05,\n 4.6277e-08, 1.6703e-09, 4.4050e-05, 3.2656e-08, 2.6022e-07, 2.2618e-06,\n 9.3212e-06, 5.5249e-06, 3.4869e-09, 1.1305e-05, 3.4726e-06, 1.2189e-06,\n 4.2793e-06, 5.6784e-06, 6.4796e-06, 4.4786e-06, 2.7759e-08, 4.1061e-06,\n 3.9746e-07, 2.1879e-10, 3.0849e-06, 9.8819e-07, 3.0723e-06, 1.1628e-05,\n 6.6023e-06, 3.6294e-07, 2.1752e-06, 9.0967e-07, 2.4828e-07, 8.9622e-06,\n 1.9525e-05, 1.0774e-05, 3.4450e-06, 2.9301e-06, 2.9697e-05, 1.0362e-08,\n 3.0917e-06, 8.2352e-06, 2.4926e-06, 1.2623e-05, 2.6111e-06, 2.9703e-05,\n 2.2415e-10, 1.3668e-06, 2.6213e-05, 1.1685e-06, 9.2140e-06, 4.2463e-07,\n 1.0327e-06, 8.9171e-08, 3.5471e-06, 1.3164e-05, 5.4360e-05, 4.7926e-06,\n 1.0364e-06, 2.0245e-07, 2.1811e-05, 1.7465e-05, 6.4295e-06, 5.2477e-05,\n 2.7913e-06, 2.3232e-08, 2.1540e-06, 1.8578e-06, 6.2524e-07, 5.7028e-06,\n 4.1694e-07, 3.4700e-06, 1.4467e-06, 2.0266e-06, 4.4553e-06, 3.1180e-07,\n 8.8490e-06, 2.3461e-05, 2.7787e-08, 4.2001e-15, 1.8561e-05, 1.4299e-07],\n device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 1.4322e-04, 3.4533e-29, 6.5650e-11, -3.6571e-05, -6.6848e-05,\n 3.6434e-44, -9.7217e-05, 3.9947e-05, -1.7099e-04, 1.7532e-04,\n -3.9754e-04, 4.6762e-40, 1.3526e-04, 1.1270e-04, -5.7818e-04,\n 2.2875e-04, -5.5307e-05, 5.6052e-45, 5.6052e-45, -2.4281e-04,\n 5.6052e-45, -4.6699e-04, 1.1730e-04, 7.8408e-05, -2.5211e-04,\n -2.0803e-04, 1.4951e-04, 5.6052e-45, -6.7371e-05, 1.4645e-04,\n -4.7437e-05, 1.0607e-04, 1.0702e-04, 1.1955e-04, -1.3234e-05,\n -2.3357e-04, 5.6052e-45, 2.8735e-04, 8.1930e-05, -2.9507e-05,\n -4.6797e-04, -6.8474e-05, 3.4104e-04, -1.1711e-04, -2.0275e-04,\n -1.0806e-04, 6.7718e-05, -2.1533e-04, 1.7281e-04, 8.3198e-05,\n 5.6052e-45, -1.6009e-04, -1.3909e-04, -1.1147e-05, -1.9390e-04,\n -6.7170e-04, 2.2166e-17, -5.4672e-04, -5.7652e-16, -2.5850e-04,\n -8.6837e-05, -1.8233e-04, 1.1021e-04, 3.0163e-04, 2.4248e-05,\n -2.3600e-04, -6.3410e-05, 1.1184e-04, 5.6052e-45, 1.5415e-14,\n 3.8166e-04, 5.6052e-45, 3.0127e-05, 2.8628e-04, 2.5693e-05,\n 1.8250e-04, 5.6052e-45, 1.2301e-04, -2.1564e-04, 6.1370e-05,\n -1.8293e-08, -8.4287e-05, 7.3126e-05, 5.6052e-45, 2.4430e-05,\n 3.0213e-04, 1.8963e-04, 7.7238e-05, 2.1611e-04, 1.9266e-05,\n -9.2679e-05, -4.5723e-05, 1.0678e-04, -5.4022e-05, -3.1432e-04,\n -8.5947e-05, -9.7346e-05, 7.6312e-06, -1.8071e-04, -8.0175e-05,\n 2.9171e-04, -5.8724e-04, -4.9106e-05, 2.8464e-04, -4.5895e-05,\n 3.9347e-04, 3.7891e-05, -4.3048e-06, 4.7548e-04, -7.3704e-04,\n 7.4217e-05, -2.9715e-04, 3.5441e-05, 1.7988e-39, -1.6515e-04,\n 8.3038e-05, 1.1400e-04, -5.6444e-05, -4.2922e-07, 5.6052e-45,\n 2.7274e-05, 7.6963e-05, 2.6680e-05, 4.9977e-23, 4.0178e-05,\n 4.3908e-04, 9.0774e-05, 5.6052e-45, 3.1026e-04, 5.6052e-45,\n 6.6408e-05, 1.3555e-04, 5.6052e-45, 1.8844e-04, -1.5136e-18,\n 3.4767e-04, 1.6727e-18, 8.4772e-05, -3.9698e-05, 5.6052e-45,\n 7.9293e-30, -3.2685e-09, -3.3293e-06, 2.2372e-19, 5.6052e-45,\n 1.1082e-33, -2.3096e-04, -3.1814e-05, 5.6052e-45, -4.2766e-05,\n -2.2378e-05, 6.8677e-05, 3.7686e-05, -1.4206e-04, 2.1133e-30,\n 9.5181e-05, 5.6052e-45, -1.1860e-04, 5.6052e-45, 3.6836e-05,\n 8.6598e-06, 3.9245e-16, -1.4324e-04, -2.8696e-05, -8.9833e-05,\n -1.8606e-04, 7.8131e-18, 5.6052e-45, 4.8642e-05, 8.9579e-06,\n 1.0195e-04, 1.7994e-04, -5.4282e-05, 2.7488e-04, 4.0205e-04,\n -2.2136e-04, 1.4525e-10, 9.1646e-05, 2.0917e-04, 4.4284e-04,\n 5.7151e-04, 3.5650e-04, 2.0491e-04, 4.8932e-39, 9.9381e-24,\n 5.6490e-17, 5.3077e-04, 3.8222e-04, -9.6247e-05, -8.1921e-09,\n -5.0868e-05, 1.3761e-04, -4.5532e-05, 1.0723e-05, 3.4258e-04,\n 7.1746e-05, -7.6874e-05, -4.1260e-04, -4.6133e-05, 5.6052e-45,\n 5.6052e-45, 1.1629e-04, -6.1120e-04, 2.4319e-04, -5.4965e-06,\n -1.5510e-04, -1.7364e-05, 1.2989e-04, 5.6052e-45, 1.5166e-17,\n 2.1531e-05, 5.6052e-45, 1.2791e-05, 3.7798e-04, 2.7118e-04,\n -3.4054e-04, 4.8963e-04, 5.0734e-05, 2.3378e-05, -9.1435e-05,\n -4.1026e-05, 1.3175e-04, -2.0030e-05, -1.5977e-04, -1.6675e-04,\n 5.6052e-45, 2.2647e-08, 2.2562e-04, 1.9427e-04, 3.5270e-22,\n 1.1228e-04, -2.0312e-04, 5.9812e-05, 1.3641e-04, 1.0867e-41,\n 6.6329e-06, 3.3919e-04, 5.6052e-45, 1.2423e-04, 2.5550e-04,\n 5.6052e-45, 5.6052e-45, 2.7216e-04, -5.7222e-05, -2.0794e-05,\n -1.6444e-04, 3.4311e-04, -2.6302e-04, 9.6363e-05, 8.2548e-31,\n 7.3710e-05, 8.6174e-05, 8.6137e-06, 5.5764e-05, -1.7415e-04,\n 6.9814e-05, 5.6052e-45, 1.7774e-04, 5.6052e-45, -9.2232e-05,\n 5.6052e-45, 2.4521e-04, 7.6949e-05, -2.1472e-04, -7.1995e-05,\n 1.7149e-05, 3.9930e-05, -5.6790e-05, 2.5929e-04, -5.6052e-45,\n 8.2794e-05, 1.9250e-04, -1.3704e-04, -2.6978e-04, -3.1217e-04,\n -2.2599e-04, -2.3416e-04, 5.6052e-45, -1.6286e-04, 3.4079e-05,\n 5.6052e-45, -1.4093e-04, 5.6052e-45, 1.1633e-04, -2.1137e-05,\n -1.7026e-04, -3.3621e-04, 1.4733e-04, 5.6052e-45, -1.2498e-04,\n -1.9543e-04, 9.5395e-05, 5.6052e-45, -8.3478e-06, 2.9504e-04,\n 1.1157e-04, -5.2103e-04, -1.0053e-04, -2.2356e-04, -2.6378e-05,\n 4.5542e-05, -2.9292e-04, 9.5021e-05, -2.0734e-04, 5.6052e-45,\n 5.6052e-45, 1.0791e-14, -2.8591e-04, -3.9580e-05, -1.1984e-04,\n 3.0108e-04, 8.5024e-22, 1.6614e-05, 6.2520e-05, -7.9132e-05,\n 5.6052e-45, 1.5350e-12, 4.1572e-05, 9.6424e-10, 1.6133e-04,\n 8.9847e-05, -2.1067e-04, 1.4022e-05, 1.3755e-04, -1.1779e-04,\n 5.6052e-45, 2.0621e-04, 1.0332e-04, 2.5141e-04, 2.3641e-04,\n 7.7428e-05, -1.0527e-04, -2.6925e-05, 3.9117e-05, 1.4416e-07,\n -3.8762e-05, 3.6420e-04, 1.0793e-16, 5.6052e-45, 5.6052e-45,\n 4.9071e-05, 1.6219e-04, 7.7055e-05, 1.2669e-05, -1.1096e-05,\n 5.8260e-06, -1.1388e-04, -4.7981e-05, -3.0762e-05, 1.3368e-04,\n -5.9877e-06, 2.0230e-05, -2.0799e-05, 5.6052e-45, 3.9164e-05,\n 3.1191e-05, 2.5170e-10, 1.7391e-09, -7.1702e-06, 2.4974e-05,\n 9.8189e-05, -4.7543e-05, 5.6052e-45, 7.8405e-04, -1.0121e-05,\n 3.5048e-33, 2.9689e-04, -1.0116e-03, 8.3434e-05, -2.7620e-05,\n 4.8570e-05, -1.9765e-12, -1.9992e-05, -5.6363e-04, -1.4458e-04,\n 1.4438e-08, 5.6052e-45, 1.1052e-28, 2.3418e-28, -1.2132e-04,\n -1.2714e-05, -9.2795e-05, -1.6649e-04, 2.9925e-04, 5.6052e-45,\n -3.3937e-05, 5.6052e-45, -1.0752e-04, -1.6724e-04, -1.3534e-04,\n 3.9026e-04, 1.4994e-04, 5.6052e-45, -9.9074e-05, 6.4861e-05,\n 3.2268e-04, -3.2140e-07, 1.8217e-44, 3.2350e-24, 8.9012e-11,\n 1.6683e-04, 1.2794e-05, 1.8565e-04, 7.1517e-04, -2.7104e-04,\n 2.8276e-04, 5.6052e-45, 1.3167e-04, 2.6313e-04, -3.9169e-04,\n -1.1415e-04, 5.6052e-45, 4.6734e-05, -6.3397e-05, -2.5383e-06,\n 3.8966e-04, 5.6052e-45, 3.0111e-04, -3.4834e-04, 4.8336e-04,\n -4.9244e-04, 3.7109e-04, -2.0977e-04, 5.6052e-45, 5.6052e-45,\n 3.0751e-04, 7.8714e-07, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 7.1192e-05, -3.3616e-04, 2.8604e-04, 1.4790e-05, -8.3563e-05,\n 5.6052e-45, 7.7951e-05, -1.5048e-04, -6.7123e-04, 5.4905e-11,\n 5.6052e-45, 2.8351e-04, 7.1779e-06, 5.6052e-45, 8.5511e-05,\n 4.1778e-04, 1.6799e-04, -3.6826e-04, 6.0020e-12, 5.7071e-05,\n 4.0068e-04, -3.5485e-04, -2.8680e-04, -3.4258e-04, 1.4456e-04,\n -7.9463e-05, 2.8951e-13, -9.9962e-04, 9.8091e-45, -1.4106e-07,\n 4.6222e-05, -9.1472e-05, 5.6052e-45, 7.0233e-04, 5.6052e-45,\n 7.3851e-05, -6.0297e-05, -7.8765e-05, -1.4578e-04, 3.8607e-04,\n 3.4501e-04, 5.6052e-45, 5.6052e-45, 3.2170e-04, 3.3393e-05,\n 5.6052e-45, -9.2121e-05, 2.9594e-05, -1.0630e-19, -4.8263e-04,\n -4.9817e-04, -2.1474e-04, 1.5538e-04, 4.7020e-04, -6.3547e-05,\n -6.8207e-05, -9.1073e-05, 1.3091e-04, 2.1159e-04, 2.4608e-04,\n -1.0880e-03, -4.9823e-05, -4.4494e-05, -5.1705e-05, -1.8738e-04,\n -5.4606e-06, 1.3675e-04, -1.5774e-04, -1.0909e-03, -7.5536e-06,\n 5.6052e-45, -9.9769e-05, 4.9528e-05, 8.5745e-05, 3.3818e-04,\n 2.3689e-32, -2.5089e-04, 1.3137e-05, -2.9222e-05, 3.9141e-04,\n -7.5030e-05, 1.7190e-04, 2.6388e-04, -1.4062e-04, 3.9513e-24,\n 2.7132e-09, -7.4528e-05, -1.2651e-05, 1.5064e-42, 1.5316e-04,\n 2.4685e-04, 1.7313e-05, -8.7868e-05, 1.6602e-04, -5.1641e-08,\n -9.4725e-04, 4.4381e-33, -1.7365e-04, -2.2353e-04, 1.8567e-04,\n -9.2060e-05, -2.1319e-05, -6.2851e-05, -9.9499e-05, 6.4591e-05,\n -3.3036e-20, -6.8205e-05, 5.6052e-45, 1.3440e-04, -6.7255e-05,\n -9.4208e-05, 6.2517e-05, 3.3733e-05, -2.4861e-04, -2.4551e-04,\n 5.6052e-45, -8.3394e-05, 4.9821e-04, 2.5486e-04, 4.9623e-07,\n 6.6697e-05, 1.0025e-04, -2.7777e-04, -4.3108e-04, 9.7762e-06,\n 5.6052e-45, -1.8240e-05, 2.6031e-04, -8.4522e-05, 4.4094e-04,\n 5.6052e-45, 1.9444e-04, 8.8964e-23, 3.0764e-04, 1.7529e-04,\n -8.1915e-05, 2.2594e-04, 5.6052e-45, 4.6705e-04, 5.6052e-45,\n -3.4266e-04, -3.3431e-05, 2.1835e-04, 5.6052e-45, 2.0551e-04,\n 7.6803e-05, -2.4565e-04, -1.0524e-03, -2.0234e-04, 2.5220e-04,\n -1.4348e-04, 5.7707e-19, 1.6536e-05, -8.4527e-05, 5.6052e-45,\n 1.5139e-04, -2.0999e-04, 1.1975e-04, -2.1019e-04, -2.5023e-05,\n 6.7035e-05, 5.6052e-45, 9.4806e-05, 1.4627e-04, -4.8571e-05,\n 1.6756e-05, 1.0664e-04, 1.0193e-04, -7.7285e-06, -1.4664e-05,\n 9.5082e-05, 2.7579e-04, -3.3822e-04, 3.7190e-04, 1.8629e-04,\n -1.0317e-04, -1.3995e-05, 1.4553e-24, 6.9384e-04, 1.9461e-04,\n -3.0829e-05, -2.7351e-04, -3.0769e-06, -1.7694e-20, -4.8194e-04,\n 5.6052e-45, 1.6847e-04, 1.5109e-41, -2.3231e-04, 2.8603e-04,\n -2.0266e-04, 6.6389e-05, -9.3772e-05, 2.7580e-04, -3.4778e-05,\n 1.4767e-04, 5.6052e-45, 1.5654e-04, -2.2580e-05, -2.6966e-09,\n 8.6333e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45, 6.6889e-05,\n 4.2646e-05, 5.6052e-45, -7.7037e-05, 3.5572e-04, 1.0305e-04,\n -2.8835e-04, 5.6052e-45, 3.4140e-05, 5.6052e-45, -1.0390e-05,\n -2.1077e-04, 1.5674e-05, 7.9610e-05, 3.5305e-04, -1.2788e-04,\n -6.8740e-05, 3.4249e-05, -1.8854e-04, 1.3930e-05, 2.3807e-05,\n -2.7075e-04, 3.4273e-04, 4.0979e-04, -1.2544e-04, 1.4097e-11,\n 4.4107e-04, 5.6052e-45, -8.7699e-06, 5.6052e-45, -3.8756e-05,\n -2.9300e-04, -8.4199e-05, 1.9581e-04, -1.3556e-04, 5.6052e-45,\n 2.4229e-04, 5.6052e-45, -1.0273e-04, 1.8812e-04, 1.6293e-04,\n 5.9536e-06, -1.6237e-04, 1.5821e-17, 5.6052e-45, 5.6052e-45,\n -2.1835e-04, 2.1701e-04, -2.3411e-04, 3.7574e-24, -4.9357e-04,\n 5.6052e-45, 6.1089e-04, 8.7090e-05, -7.2234e-05, -1.7241e-04,\n 4.5766e-08, 1.6947e-04, 5.6052e-45, 5.6281e-05, -5.6052e-45,\n 1.4642e-04, -1.6603e-04, -8.2690e-05, 5.6052e-45, 9.6808e-05,\n -2.9264e-04, 2.2817e-05, 4.3639e-04, 4.4569e-04, 5.7497e-05,\n -7.4553e-05, 5.6052e-45, -2.3102e-04, 1.0971e-04, 5.6052e-45,\n 1.1701e-04, 4.3481e-05, -1.0202e-05, -1.7046e-04, -1.6170e-04,\n 4.5094e-05, 6.3836e-04, 1.3358e-04, -1.0849e-04, -1.4310e-04,\n 5.6052e-45, -2.9752e-04, -8.1707e-05, 1.8192e-04, 5.6052e-45,\n 1.4066e-04, -1.1180e-04, 5.3549e-05, -7.0059e-05, -1.9310e-04,\n -1.1519e-04, -5.1895e-05, 5.6052e-45, -2.0593e-04, 5.6052e-45,\n -1.9151e-05, -3.2400e-04, -1.4885e-05, 2.6205e-04, 6.2573e-06,\n -1.2601e-04, 1.2892e-04, -3.9843e-05, 2.5519e-05, -2.7967e-05,\n 6.1098e-06, -1.5548e-04, -1.1650e-14, 2.0624e-04, 5.6052e-45,\n -3.3741e-04, 5.6052e-45, -1.5479e-06, 1.1650e-04, 2.4428e-04,\n 1.3543e-04, 1.4263e-04, 6.5750e-05, -1.8114e-04, 1.4803e-04,\n -1.9102e-04, 1.1764e-04, 2.6771e-04, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 1.1430e-04, -6.6519e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.2342e-06, 2.3553e-07, 7.4301e-07, 4.2122e-07, 3.1955e-06, 2.1949e-06,\n 4.0330e-06, 7.5430e-06, 2.1155e-06, 3.0190e-06, 2.0372e-06, 1.2122e-06,\n 7.9250e-07, 2.6306e-06, 1.1175e-06, 7.4447e-07, 5.6495e-06, 1.3400e-06,\n 1.6636e-07, 4.6131e-06, 2.7506e-08, 5.6136e-07, 2.2238e-06, 1.4625e-06,\n 2.6214e-06, 9.6434e-07, 7.0657e-07, 4.9843e-09, 2.2368e-06, 2.2791e-06,\n 3.2826e-07, 7.7211e-07, 1.5296e-06, 5.0952e-06, 1.0137e-06, 1.7005e-07,\n 2.6768e-06, 3.0430e-06, 1.2018e-06, 2.0201e-06, 6.1835e-06, 6.2264e-07,\n 2.0084e-06, 1.4445e-06, 2.8741e-06, 2.3127e-06, 7.6381e-07, 6.9796e-07,\n 1.0215e-05, 1.1382e-06, 4.2752e-06, 2.8962e-05, 1.5605e-06, 1.2622e-06,\n 3.6828e-06, 2.6060e-06, 5.9429e-06, 2.5466e-07, 1.3238e-06, 1.4978e-06,\n 2.7682e-08, 1.2220e-06, 3.4751e-06, 1.8988e-06, 2.0474e-07, 9.2864e-07,\n 2.1289e-06, 4.6202e-06, 2.3219e-06, 5.5272e-07, 1.2912e-06, 1.9449e-06,\n 4.1527e-07, 1.9172e-06, 9.4251e-07, 7.9661e-07, 1.3499e-08, 1.3153e-06,\n 2.4820e-07, 3.6930e-07, 5.6352e-06, 7.8537e-07, 3.5317e-06, 3.7269e-08,\n 1.1346e-05, 8.8569e-07, 2.0363e-06, 3.2290e-06, 7.0268e-07, 4.1262e-06,\n 8.6875e-07, 1.2242e-06, 1.0988e-07, 1.5875e-06, 2.5384e-07, 4.5176e-06,\n 4.4649e-06, 1.6223e-05, 3.0349e-06, 2.0404e-06, 2.9235e-06, 2.3948e-06,\n 9.2525e-06, 1.1088e-05, 1.4217e-06, 2.1893e-06, 6.8731e-07, 1.0942e-06,\n 1.2270e-06, 1.7645e-07, 1.0958e-06, 5.9989e-06, 1.8585e-06, 1.8697e-06,\n 4.0439e-06, 3.1063e-06, 1.0261e-06, 2.3009e-07, 1.8130e-06, 1.0204e-06,\n 2.7011e-06, 4.7989e-06, 1.6207e-06, 2.5569e-06, 1.2296e-06, 1.6923e-06,\n 4.1428e-06, 8.1518e-08, 2.1705e-06, 1.1381e-05, 1.8709e-06, 2.1573e-06,\n 1.1169e-07, 1.2208e-06, 1.1968e-06, 5.7163e-07, 4.5208e-10, 3.3164e-06,\n 8.9651e-07, 1.8343e-06, 1.1261e-06, 6.1926e-07, 4.7246e-07, 1.7635e-10,\n 2.3992e-09, 3.1913e-06, 1.9731e-06, 2.1343e-06, 8.5741e-06, 1.2596e-07,\n 1.5757e-07, 8.3277e-07, 2.4885e-07, 5.1176e-07, 7.3506e-09, 1.9737e-06,\n 2.7607e-06, 2.8011e-06, 3.4650e-06, 4.3590e-06, 2.7378e-07, 1.2630e-06,\n 1.1960e-06, 1.4520e-06, 1.8688e-06, 1.3642e-06, 1.3558e-06, 4.5111e-12,\n 1.1397e-06, 4.9661e-07, 4.1134e-06, 3.6591e-06, 9.3414e-07, 1.1707e-06,\n 1.8983e-06, 8.3217e-07, 4.6129e-10, 1.3218e-06, 3.4356e-06, 8.1310e-06,\n 3.5847e-06, 3.1714e-06, 1.4368e-06, 1.0372e-10, 1.8939e-10, 1.0894e-16,\n 2.5394e-06, 8.3050e-07, 7.3964e-08, 4.4076e-09, 1.2834e-06, 3.5154e-07,\n 1.9335e-06, 4.6954e-06, 2.7665e-06, 1.0292e-06, 1.3930e-06, 2.6483e-06,\n 5.0426e-07, 2.8870e-17, 3.4264e-06, 8.8617e-07, 2.0287e-06, 2.8348e-06,\n 1.7927e-06, 4.1582e-07, 3.0447e-06, 2.2217e-06, 1.1918e-06, 2.1640e-08,\n 2.2302e-06, 3.0361e-08, 9.6797e-07, 3.2734e-06, 1.8114e-06, 8.6986e-07,\n 1.4335e-06, 5.3629e-07, 2.2100e-06, 9.7094e-07, 1.0494e-06, 2.5223e-06,\n 1.5104e-06, 6.4713e-07, 5.0279e-06, 4.2745e-06, 9.5186e-06, 2.0408e-06,\n 2.0141e-06, 8.5056e-08, 1.3523e-06, 1.7987e-06, 1.3589e-06, 1.0986e-06,\n 2.7048e-06, 8.3487e-07, 5.1013e-06, 5.1290e-10, 9.5415e-07, 1.4967e-06,\n 1.7645e-06, 1.4070e-06, 2.2859e-06, 1.0473e-06, 6.2103e-07, 9.7371e-07,\n 6.4394e-07, 3.6812e-06, 4.5378e-07, 1.7947e-06, 8.2648e-06, 2.0629e-06,\n 3.4719e-06, 4.1655e-06, 8.9826e-07, 2.1467e-06, 5.4853e-10, 1.0637e-06,\n 4.0699e-15, 7.3606e-06, 3.2311e-15, 2.0414e-06, 1.3360e-06, 1.7683e-06,\n 7.6960e-06, 3.4635e-06, 4.4732e-07, 1.2941e-06, 4.9409e-06, 6.3597e-06,\n 2.8984e-06, 7.0937e-07, 1.0557e-06, 3.1518e-06, 2.7082e-06, 1.0173e-06,\n 2.7312e-06, 2.7974e-09, 1.4046e-07, 5.1184e-06, 7.9771e-06, 4.5327e-06,\n 4.6466e-06, 1.9182e-06, 3.1384e-06, 3.4927e-07, 1.9813e-06, 1.3051e-06,\n 1.6461e-09, 1.0402e-05, 3.6370e-07, 3.9338e-06, 1.0007e-05, 5.4587e-08,\n 1.4431e-06, 9.8578e-06, 2.9241e-06, 8.1838e-06, 1.1636e-06, 3.1689e-08,\n 7.3654e-07, 9.3861e-06, 1.0931e-07, 1.0095e-08, 2.9935e-07, 6.4418e-09,\n 7.9273e-08, 4.8544e-07, 1.6774e-06, 3.7217e-06, 4.1799e-06, 4.1914e-10,\n 1.5523e-06, 3.9449e-07, 8.8361e-07, 9.1999e-10, 3.5598e-09, 1.5150e-05,\n 1.8247e-06, 5.4027e-07, 2.1855e-06, 2.3093e-07, 3.3966e-10, 1.7560e-06,\n 4.5349e-07, 3.3377e-06, 6.7455e-06, 1.2701e-06, 1.4549e-06, 1.0958e-06,\n 1.6948e-06, 4.3207e-07, 2.2138e-06, 5.5533e-07, 3.2284e-08, 7.2661e-06,\n 7.3374e-07, 5.3816e-07, 2.5069e-15, 4.2402e-09, 5.2093e-06, 1.8741e-06,\n 1.1191e-05, 6.1929e-07, 3.8158e-06, 1.1430e-06, 1.5517e-06, 2.5159e-05,\n 9.2608e-07, 3.3793e-06, 2.4856e-06, 1.9730e-06, 1.7131e-07, 6.8248e-07,\n 8.2455e-07, 1.9525e-06, 5.9412e-09, 7.1935e-07, 6.0823e-07, 1.4402e-05,\n 1.9038e-06, 1.4084e-06, 2.8303e-06, 4.6445e-06, 1.0350e-06, 9.2705e-07,\n 3.6169e-07, 2.5135e-06, 7.7473e-08, 6.5724e-08, 1.2564e-06, 1.4505e-06,\n 4.2685e-07, 7.3962e-07, 9.4232e-07, 6.2950e-06, 4.9274e-07, 1.7835e-09,\n 1.3438e-06, 6.0871e-07, 1.8644e-06, 2.2317e-06, 7.4971e-07, 1.4943e-06,\n 2.8083e-06, 8.2071e-07, 7.2095e-09, 1.0187e-06, 4.5966e-06, 8.7032e-07,\n 3.4520e-06, 1.5587e-06, 1.9376e-09, 1.5169e-07, 2.1828e-06, 3.2811e-06,\n 5.3995e-08, 1.1754e-06, 9.6584e-06, 1.2510e-06, 5.4765e-06, 7.7861e-07,\n 1.8883e-06, 3.6099e-06, 1.6932e-06, 1.0361e-06, 8.1549e-06, 3.4259e-06,\n 2.3535e-06, 2.3915e-06, 3.6304e-06, 1.7386e-11, 3.4499e-06, 1.5176e-06,\n 1.0764e-07, 9.0875e-07, 1.6489e-09, 1.9576e-06, 5.0778e-07, 6.8152e-07,\n 3.3342e-06, 2.4314e-06, 1.7620e-06, 1.1689e-09, 7.7133e-17, 1.4296e-07,\n 2.4895e-06, 6.4866e-07, 1.7833e-09, 3.7294e-09, 1.6310e-06, 1.0366e-06,\n 3.8879e-06, 1.2527e-06, 3.5171e-06, 4.1335e-07, 1.3027e-06, 1.6604e-07,\n 1.8134e-06, 9.7241e-09, 1.4822e-06, 1.2445e-06, 9.6486e-09, 1.1792e-08,\n 6.0053e-07, 1.9073e-06, 9.7377e-07, 8.9003e-07, 9.6927e-10, 2.2416e-07,\n 9.8008e-07, 1.1416e-06, 2.0961e-06, 1.4093e-06, 1.7170e-06, 3.7436e-06,\n 6.4417e-07, 2.8944e-06, 3.6545e-07, 2.3491e-07, 1.0743e-09, 4.5410e-06,\n 8.9035e-11, 1.2580e-06, 7.6673e-07, 1.2939e-06, 2.3281e-07, 6.0000e-07,\n 3.6705e-06, 2.3029e-07, 1.0003e-06, 3.6043e-08, 2.2441e-08, 1.3760e-06,\n 6.0573e-08, 1.6006e-09, 4.1849e-07, 3.5218e-07, 8.0797e-06, 2.4469e-06,\n 2.2216e-06, 2.8469e-06, 1.5181e-06, 3.7686e-06, 8.0410e-07, 1.6776e-06,\n 3.2533e-06, 3.7778e-06, 1.0898e-06, 1.2510e-06, 2.8922e-06, 1.2033e-07,\n 2.8669e-06, 3.2232e-06, 8.4868e-07, 3.9920e-06, 1.6553e-06, 3.9551e-06,\n 1.5488e-06, 1.6749e-06, 1.9294e-05, 2.3888e-06, 1.0952e-06, 5.5434e-08,\n 2.4100e-06, 2.1610e-06, 9.5865e-07, 1.6894e-06, 4.5556e-07, 3.0249e-06,\n 2.6194e-06, 1.3612e-06, 9.6548e-07, 4.0148e-06, 1.1920e-07, 1.0298e-06,\n 6.8147e-07, 5.6453e-07, 2.6725e-06, 1.9617e-06, 2.0231e-06, 1.5765e-07,\n 1.4835e-06, 1.2730e-06, 5.8101e-07, 5.8430e-07, 7.6578e-11, 1.9978e-06,\n 9.6193e-07, 8.0785e-07, 2.4777e-06, 3.9382e-06, 1.8740e-06, 2.9094e-06,\n 6.3963e-07, 3.0339e-07, 3.4361e-06, 8.9237e-06, 2.7063e-06, 9.1326e-07,\n 2.1668e-06, 8.9477e-07, 3.0268e-06, 3.1117e-06, 1.0998e-05, 3.2109e-08,\n 1.3875e-06, 1.2396e-06, 1.5395e-06, 1.4016e-05, 5.2351e-06, 1.1303e-06,\n 2.7549e-06, 2.0478e-06, 3.5359e-07, 2.0251e-05, 6.5283e-07, 2.3265e-06,\n 4.2137e-07, 2.4615e-06, 5.4222e-10, 6.8261e-06, 2.6821e-07, 9.9649e-07,\n 2.7931e-06, 7.6039e-07, 4.8441e-07, 1.2724e-08, 4.8742e-06, 6.2060e-07,\n 6.1465e-07, 2.1435e-07, 1.1846e-06, 5.3122e-07, 2.8448e-06, 6.1203e-06,\n 1.5231e-06, 1.8601e-07, 1.9391e-06, 1.2727e-06, 2.5766e-06, 1.6111e-09,\n 7.5429e-08, 1.6444e-06, 1.5968e-13, 2.6782e-06, 1.3129e-06, 2.0987e-06,\n 6.8217e-07, 2.8695e-06, 5.1171e-07, 2.4898e-10, 2.5211e-06, 3.1402e-06,\n 1.6797e-06, 1.1125e-06, 1.0601e-06, 5.2986e-07, 9.6843e-07, 3.1576e-06,\n 4.1137e-07, 2.7210e-06, 2.3700e-06, 4.9400e-07, 4.9038e-07, 3.2205e-06,\n 9.0888e-07, 6.0443e-09, 1.9733e-06, 7.5287e-07, 2.0740e-06, 3.8841e-07,\n 1.6077e-06, 4.4487e-06, 3.2210e-06, 1.1261e-09, 1.7682e-06, 1.3066e-06,\n 7.4057e-07, 2.7610e-06, 2.0111e-06, 1.6941e-06, 4.3709e-06, 3.0512e-06,\n 4.6203e-06, 1.7072e-06, 1.1172e-10, 2.1940e-06, 8.7076e-07, 3.8197e-07,\n 1.0159e-06, 4.2075e-07, 2.9333e-08, 9.1423e-10, 6.5351e-07, 7.7729e-06,\n 3.4609e-12, 1.5363e-06, 1.2570e-06, 1.2254e-06, 4.2506e-06, 3.7096e-10,\n 2.6076e-06, 9.6842e-06, 4.3476e-09, 1.4353e-06, 2.0991e-06, 7.5750e-07,\n 5.3920e-06, 6.4765e-07, 1.6487e-06, 2.2763e-06, 1.0748e-06, 1.8438e-06,\n 8.8718e-07, 8.7676e-07, 3.3009e-06, 5.6425e-07, 6.0389e-06, 8.4232e-08,\n 4.0337e-06, 8.4054e-06, 2.7262e-06, 1.0005e-05, 1.0007e-06, 6.8124e-06,\n 2.5241e-06, 1.3547e-07, 5.2569e-06, 1.3317e-07, 1.4245e-06, 3.2605e-11,\n 6.3829e-07, 5.4756e-07, 1.3543e-06, 5.3845e-07, 2.5300e-06, 1.6444e-07,\n 1.4661e-06, 3.5104e-09, 2.1672e-06, 2.3464e-06, 2.4530e-06, 8.3048e-06,\n 1.2423e-06, 1.5335e-06, 2.5051e-06, 2.6706e-07, 1.2984e-06, 4.9161e-06,\n 1.3224e-08, 7.3419e-08, 1.2588e-05, 5.6375e-08, 7.4361e-08, 9.8109e-07,\n 3.6097e-06, 2.0040e-06, 9.9640e-10, 3.3286e-06, 1.6219e-06, 8.1969e-07,\n 2.0379e-06, 2.4106e-06, 2.6993e-06, 1.6373e-06, 7.9324e-09, 2.9212e-06,\n 6.6845e-07, 6.2520e-11, 1.6803e-06, 5.5341e-07, 9.8433e-07, 3.8181e-06,\n 2.4972e-06, 1.0685e-07, 1.1707e-06, 7.6252e-07, 3.6902e-07, 3.3998e-06,\n 5.5794e-06, 4.1344e-06, 1.4437e-06, 1.4553e-06, 8.4862e-06, 1.1338e-07,\n 1.2819e-06, 3.1262e-06, 1.8125e-06, 4.5328e-06, 7.7319e-07, 8.8453e-06,\n 6.4052e-11, 1.1625e-06, 7.4907e-06, 3.9933e-07, 3.3975e-06, 6.0516e-07,\n 6.7741e-07, 2.8122e-07, 1.4127e-06, 4.7937e-06, 1.5535e-05, 1.7112e-06,\n 3.4087e-07, 3.5979e-07, 6.6651e-06, 4.9908e-06, 2.3517e-06, 1.4996e-05,\n 1.2469e-06, 6.6386e-09, 6.1552e-07, 9.3368e-07, 6.8865e-07, 2.5898e-06,\n 2.6155e-07, 1.2554e-06, 8.9826e-07, 1.2451e-06, 1.3213e-06, 4.1140e-07,\n 3.4265e-06, 6.7042e-06, 7.9404e-09, 1.2002e-15, 5.4385e-06, 3.3372e-07],\n device='cuda:0')" }, "4": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 1.6488e-05, -1.4424e-31, -1.3237e-14, ..., 5.6052e-45,\n 4.5902e-06, 7.1103e-06],\n [ 2.0914e-05, -2.4680e-31, 1.7005e-14, ..., -5.6052e-45,\n 1.2571e-05, 4.9623e-06],\n [-5.2860e-06, -4.6505e-31, 4.9668e-14, ..., -5.6052e-45,\n -9.4278e-06, -2.1789e-06],\n ...,\n [ 1.2960e-05, -1.4827e-32, 1.4761e-13, ..., -5.6052e-45,\n -6.5906e-06, -5.4997e-06],\n [ 1.3405e-05, 2.9384e-31, -2.1457e-13, ..., -5.6052e-45,\n 1.1084e-05, -2.2056e-05],\n [ 1.1067e-06, 1.1508e-31, 6.9854e-14, ..., -5.6052e-45,\n -1.9899e-05, -1.3157e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.0222e-09, 2.9364e-12, 6.5881e-10, ..., 3.3641e-18, 6.2046e-09,\n 5.3830e-10],\n [1.3911e-08, 5.4147e-11, 1.6519e-10, ..., 1.7193e-16, 2.4183e-09,\n 2.2894e-09],\n [2.0818e-08, 3.2965e-13, 4.4880e-11, ..., 7.4982e-17, 2.0536e-09,\n 2.5456e-09],\n ...,\n [1.9563e-08, 5.3595e-11, 1.3424e-11, ..., 6.5601e-17, 1.6696e-09,\n 1.4470e-09],\n [2.5444e-08, 4.4919e-12, 6.2387e-10, ..., 9.7767e-19, 3.3787e-09,\n 4.0470e-09],\n [1.8861e-08, 6.8987e-12, 4.9939e-10, ..., 6.2656e-17, 4.7172e-09,\n 3.3056e-09]], device='cuda:0')" + }, + "5": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6052e-45, 3.9702e-26, 0.0000e+00, ..., 2.8262e-25,\n 5.6052e-45, 5.6052e-45],\n [ 2.8750e-06, -3.4558e-06, -1.9541e-39, ..., -1.7206e-06,\n -2.4016e-06, 1.0311e-05],\n [ 1.4448e-07, 3.8939e-07, -1.1574e-39, ..., -3.8548e-07,\n -2.4723e-07, 1.2512e-06],\n ...,\n [ 1.9303e-06, -9.3097e-06, -6.4635e-40, ..., -1.6230e-06,\n 3.4935e-08, -1.6698e-05],\n [-2.5685e-07, -3.0748e-06, -1.5835e-42, ..., -4.1745e-06,\n 1.5720e-06, 1.5001e-08],\n [-7.5800e-07, 1.2750e-06, -1.6718e-39, ..., 7.4812e-06,\n 1.5872e-06, 1.1566e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5982e-11, 1.5239e-11, 0.0000e+00, ..., 2.8985e-12, 7.7933e-11,\n 2.0601e-12],\n [5.5887e-10, 3.4808e-10, 3.6153e-12, ..., 1.0094e-09, 1.0207e-09,\n 1.4900e-10],\n [7.1554e-11, 7.0524e-11, 2.3953e-13, ..., 1.4232e-10, 3.5385e-10,\n 1.4243e-10],\n ...,\n [7.5388e-10, 4.4527e-10, 5.2433e-12, ..., 1.5097e-10, 2.0495e-10,\n 3.8287e-09],\n [1.1055e-11, 1.3709e-10, 5.0357e-14, ..., 2.9572e-10, 5.7164e-10,\n 7.6276e-11],\n [4.1424e-10, 2.5341e-10, 9.0561e-13, ..., 8.3957e-10, 2.4020e-10,\n 8.6336e-10]], device='cuda:0')" + }, + "6": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-2.7876e-14, 2.1512e-05, -1.4557e-05, ..., 1.4872e-05,\n -6.0187e-06, -1.4926e-07], device='cuda:0')", + "exp_avg_sq": "tensor([1.5950e-09, 1.0827e-07, 2.4486e-08, ..., 6.4185e-08, 2.2175e-08,\n 6.6246e-08], device='cuda:0')" + }, + "7": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-4.9243e-17, -7.5301e-08, -1.5296e-06, ..., -8.2674e-07,\n -2.0414e-06, 2.7959e-07],\n [ 1.6864e-16, -5.2434e-06, -9.0352e-07, ..., -2.9519e-06,\n 1.4543e-06, 1.2484e-07],\n [ 3.5097e-17, 4.5890e-06, -9.4728e-07, ..., -1.1508e-06,\n 3.1703e-07, -4.4113e-06],\n ...,\n [ 4.6054e-16, 3.5197e-07, -2.7686e-06, ..., -1.0305e-06,\n -1.2226e-07, 1.3850e-06],\n [-5.3855e-16, -3.4545e-06, -2.1339e-07, ..., -1.0924e-06,\n 4.2353e-08, -5.3837e-07],\n [ 9.1176e-16, -2.5962e-06, -1.3834e-06, ..., -6.8402e-07,\n 1.6414e-06, 4.9500e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.0808e-11, 5.7454e-11, 9.0231e-11, ..., 4.5614e-11, 5.4293e-11,\n 7.9972e-11],\n [3.0057e-10, 1.7043e-10, 1.3969e-10, ..., 1.3365e-10, 6.3195e-11,\n 1.2874e-10],\n [2.7659e-11, 1.1931e-10, 3.9111e-10, ..., 8.9023e-11, 7.4569e-11,\n 1.4400e-10],\n ...,\n [7.0189e-11, 1.6190e-10, 5.9274e-11, ..., 1.0649e-10, 1.3345e-10,\n 2.3981e-10],\n [8.4020e-11, 1.7071e-10, 1.5698e-10, ..., 1.0911e-10, 1.4494e-10,\n 1.3377e-10],\n [4.6804e-11, 1.5156e-10, 2.8898e-10, ..., 1.1255e-10, 5.3664e-11,\n 1.1334e-10]], device='cuda:0')" + }, + "14": { + "step": "tensor(1252.)", + "exp_avg": "tensor([5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.5368e-06], device='cuda:0')" + }, + "15": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.7230e-09, 5.5040e-07, 4.1382e-07], device='cuda:0')" + }, + "16": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.8550e-04, 6.0329e-05, 6.5278e-05, 6.9730e-05], device='cuda:0')" + }, + "18": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.2423e-10, 3.8968e-10, 0.0000e+00, ..., 2.5713e-09, 9.2475e-10,\n 2.9682e-11],\n [1.3318e-10, 3.4236e-10, 0.0000e+00, ..., 3.1475e-10, 1.1068e-09,\n 3.1192e-10],\n [4.0026e-11, 1.4126e-10, 0.0000e+00, ..., 4.7206e-11, 3.0604e-10,\n 1.0023e-10],\n ...,\n [5.0717e-12, 2.7278e-11, 0.0000e+00, ..., 3.8900e-11, 5.3871e-10,\n 3.0349e-12],\n [9.7692e-10, 1.0270e-09, 0.0000e+00, ..., 1.2431e-09, 5.2102e-09,\n 3.1268e-10],\n [3.0515e-12, 2.3715e-12, 0.0000e+00, ..., 4.6937e-12, 4.6558e-11,\n 1.1027e-11]], device='cuda:0')" + }, + "19": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.8308e-07, 4.0264e-07, 9.1999e-08, 1.6702e-07, 6.0763e-08, 5.8881e-10,\n 1.0454e-08, 1.6582e-08, 4.2378e-08, 2.3990e-09, 1.3440e-07, 6.8348e-07,\n 2.8164e-07, 3.4117e-07, 5.1507e-08, 3.2122e-07, 7.6560e-07, 6.3462e-08,\n 4.2448e-07, 6.9534e-07, 8.3002e-09, 2.9215e-07, 6.3157e-08, 6.2446e-07,\n 1.2634e-07, 2.5567e-07, 1.1395e-07, 7.0883e-08, 1.9302e-08, 1.1958e-07,\n 3.5772e-07, 1.7928e-07, 6.5142e-09, 1.6444e-06, 8.5263e-07, 4.1454e-07,\n 4.2983e-08, 4.7403e-09, 3.2581e-07, 3.1752e-07, 1.5578e-06, 3.8533e-08,\n 6.8763e-09, 2.1776e-08, 5.4974e-08, 3.9941e-07, 1.7173e-07, 1.1195e-07,\n 1.0893e-07, 1.2770e-07, 5.2238e-09, 1.8388e-07, 4.6704e-07, 2.8493e-07,\n 6.0421e-08, 3.1508e-08, 2.1472e-07, 2.7351e-09, 8.2593e-08, 2.9695e-08,\n 9.2496e-08, 2.0435e-07, 1.3179e-07, 7.6380e-08, 7.4859e-09, 4.8749e-08,\n 1.7727e-07, 1.0428e-08, 2.2344e-07, 2.1031e-07, 2.0114e-09, 6.7896e-08,\n 1.7961e-07, 8.1649e-09, 7.2057e-08, 6.1735e-10, 4.7775e-08, 1.2200e-06,\n 2.4256e-08, 2.2419e-09, 3.7398e-07, 5.0057e-07, 8.2036e-11, 2.7913e-09,\n 8.9173e-08, 3.0973e-07, 9.3193e-09, 1.5852e-07, 2.0321e-08, 3.5246e-07,\n 5.5969e-08, 5.1121e-07, 6.5161e-07, 4.6108e-08, 6.4555e-08, 2.5698e-07,\n 1.6941e-08, 1.0103e-06, 3.3699e-07, 1.2070e-06, 3.4449e-07, 2.3870e-08,\n 4.7633e-09, 1.4577e-07, 1.2115e-08, 1.2033e-06, 3.0206e-08, 8.5584e-08,\n 1.9284e-07, 1.4878e-09, 1.5525e-06, 2.8761e-08, 4.6174e-07, 1.9610e-07,\n 6.6521e-08, 9.1507e-07, 2.2799e-08, 1.1092e-07, 3.0627e-07, 1.4654e-07,\n 1.5985e-07, 2.9578e-07, 2.9372e-08, 4.9372e-09, 1.9254e-09, 1.0756e-07,\n 2.5650e-06, 2.0001e-08, 1.6836e-07, 3.3391e-08, 4.4933e-09, 8.2488e-07,\n 6.9326e-08, 1.1611e-07, 4.6991e-07, 3.3555e-07, 2.8359e-07, 8.6947e-07,\n 3.7491e-07, 8.2480e-11, 3.4199e-09, 5.5487e-09, 3.0194e-07, 1.2391e-06,\n 6.9398e-07, 4.5595e-08, 4.6634e-08, 6.0201e-08, 3.7138e-07, 8.4102e-10,\n 1.9246e-08, 3.8663e-07, 1.3347e-07, 1.4865e-07, 3.3254e-07, 1.1440e-08,\n 1.0446e-07, 2.7445e-07, 5.3318e-08, 8.8894e-07, 4.2149e-07, 6.9165e-08,\n 1.6406e-07, 4.4545e-07, 2.8159e-08, 2.0147e-07, 1.9833e-06, 3.0135e-07,\n 2.0822e-07, 9.6013e-08, 1.3887e-08, 3.3126e-07, 1.3542e-06, 3.4060e-07,\n 4.4760e-07, 3.2605e-09, 4.5131e-08, 1.5583e-07, 1.7759e-07, 1.9039e-07,\n 2.3365e-07, 1.0910e-07, 2.1906e-08, 1.3154e-07, 2.5038e-07, 6.9478e-10,\n 6.5579e-08, 2.6934e-07, 6.8317e-08, 1.0857e-06, 8.1157e-07, 8.4168e-08,\n 5.5717e-08, 2.4134e-08, 1.6107e-10, 3.8891e-06, 4.5577e-08, 7.8472e-08,\n 8.6639e-08, 1.8995e-06, 1.8925e-11, 1.7739e-06, 3.3367e-07, 5.0767e-08,\n 3.2411e-07, 3.9825e-06, 3.6616e-07, 1.1427e-07, 1.6584e-09, 5.1402e-07,\n 4.1501e-08, 7.5014e-07, 1.1680e-07, 2.3798e-07, 1.8218e-09, 3.0304e-07,\n 2.3303e-06, 1.3192e-09, 2.5685e-08, 7.5333e-08, 5.5460e-08, 1.5503e-06,\n 1.9524e-07, 1.1147e-06, 1.5454e-07, 1.8765e-10, 3.9814e-09, 2.2809e-08,\n 8.4537e-09, 3.2547e-07, 2.7826e-09, 2.9742e-09, 5.6495e-07, 2.2094e-07,\n 1.7231e-07, 3.8392e-08, 4.5956e-08, 3.4786e-07, 4.3552e-08, 3.8269e-08,\n 4.4395e-07, 1.8068e-09, 9.7741e-08, 5.9932e-07, 1.0447e-07, 2.9928e-09,\n 5.5931e-09, 2.5151e-07, 2.8396e-08, 9.0457e-07, 4.7159e-07, 4.7385e-08,\n 4.0736e-07, 4.8099e-08, 1.3605e-06, 2.4056e-08], device='cuda:0')" + }, + "20": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.1787e-09, 1.9923e-09, 1.3398e-10, 8.7434e-10, 1.0257e-10, 8.4596e-12,\n 1.8602e-11, 8.6380e-11, 5.1293e-11, 3.0132e-12, 2.4756e-10, 1.2397e-09,\n 7.1375e-10, 1.6056e-09, 1.6528e-10, 7.5660e-10, 2.4816e-09, 1.1007e-10,\n 1.3731e-09, 1.9783e-09, 4.0998e-12, 1.2288e-09, 1.3894e-10, 2.5486e-09,\n 4.9537e-10, 4.9623e-10, 2.8818e-10, 2.3552e-10, 7.0048e-11, 2.1165e-10,\n 1.0925e-09, 5.3189e-10, 4.1487e-11, 4.4273e-09, 3.3036e-09, 1.0338e-09,\n 8.5337e-11, 8.6364e-14, 6.7701e-10, 5.7400e-10, 6.5895e-09, 9.1923e-11,\n 2.9254e-11, 9.5536e-11, 1.0983e-10, 8.9846e-10, 3.7408e-10, 2.3616e-10,\n 1.6660e-10, 1.9730e-10, 4.0930e-12, 5.0242e-10, 9.6966e-10, 6.2112e-10,\n 8.2766e-11, 1.4858e-10, 6.9466e-10, 5.7227e-12, 2.7259e-10, 8.4108e-11,\n 1.6700e-10, 1.5244e-09, 3.6965e-10, 1.8345e-10, 1.1328e-13, 7.7880e-11,\n 3.8705e-10, 5.6615e-12, 4.1256e-10, 6.8090e-10, 4.7333e-11, 1.5600e-10,\n 1.9942e-10, 2.7413e-11, 1.3179e-10, 3.9898e-11, 2.7857e-10, 6.1944e-09,\n 2.1230e-11, 6.3353e-15, 1.5664e-09, 1.2978e-09, 1.1927e-11, 1.3789e-11,\n 1.7541e-10, 6.9202e-10, 3.7138e-11, 7.4208e-10, 5.9557e-11, 6.5722e-10,\n 1.6953e-10, 1.1681e-09, 2.3687e-09, 5.8462e-11, 1.7860e-10, 9.0574e-10,\n 3.6683e-11, 2.1518e-09, 1.3115e-09, 3.3940e-09, 1.0325e-09, 1.1112e-10,\n 5.8248e-11, 2.5837e-10, 5.0535e-12, 2.8278e-09, 3.1576e-11, 2.0177e-10,\n 4.0153e-10, 2.1027e-12, 3.9626e-09, 8.9652e-11, 1.3086e-09, 4.1175e-10,\n 1.2138e-10, 3.6934e-09, 8.9823e-11, 2.3651e-10, 9.2345e-10, 3.5547e-10,\n 2.9411e-10, 6.1601e-10, 2.4684e-11, 4.7641e-13, 1.9648e-11, 1.6552e-10,\n 1.1599e-08, 7.5850e-11, 5.3774e-10, 5.0928e-11, 1.6465e-12, 2.4600e-09,\n 1.6965e-10, 3.4545e-10, 9.0204e-10, 7.5777e-10, 7.9654e-10, 2.2441e-09,\n 1.0256e-09, 1.3915e-11, 8.7698e-11, 3.1795e-11, 6.6344e-10, 3.6420e-09,\n 4.3607e-09, 1.8049e-10, 1.8733e-10, 1.4092e-10, 7.6932e-10, 2.1330e-13,\n 1.9265e-10, 8.4779e-10, 4.3093e-10, 1.6923e-10, 7.8013e-10, 6.9309e-11,\n 1.5416e-10, 4.6758e-10, 8.7710e-11, 3.5159e-09, 8.2192e-10, 7.8275e-11,\n 3.8223e-10, 1.4849e-09, 8.1830e-11, 2.6183e-10, 6.8177e-09, 5.7153e-10,\n 6.3715e-10, 2.4468e-10, 7.1168e-12, 1.3213e-09, 3.2619e-09, 1.2712e-09,\n 8.4256e-10, 1.2543e-11, 7.0528e-11, 6.9705e-10, 5.4397e-10, 4.9724e-10,\n 6.0340e-10, 2.1113e-10, 2.6602e-11, 1.9750e-10, 6.2679e-10, 3.7468e-16,\n 1.0103e-10, 5.9918e-10, 6.0165e-11, 2.7186e-09, 2.1182e-09, 1.0601e-10,\n 1.6570e-10, 1.3204e-10, 1.5182e-11, 1.5076e-08, 5.8023e-11, 1.2565e-10,\n 2.6221e-10, 5.2206e-09, 4.4975e-12, 4.7662e-09, 6.0211e-10, 6.9347e-11,\n 1.0781e-09, 1.0150e-08, 7.9311e-10, 2.8836e-10, 5.4363e-13, 8.0324e-10,\n 5.6846e-11, 1.8699e-09, 7.9309e-10, 7.2626e-10, 1.0887e-12, 5.2627e-10,\n 1.1087e-08, 4.5231e-13, 9.6430e-11, 9.0240e-11, 7.0839e-11, 6.8193e-09,\n 1.0864e-09, 2.7127e-09, 4.5687e-10, 3.9926e-12, 1.9353e-13, 6.7630e-11,\n 8.3731e-12, 9.1515e-10, 2.2491e-11, 2.0946e-12, 3.3325e-09, 9.7562e-10,\n 5.2151e-10, 8.9432e-11, 5.8780e-11, 1.0118e-09, 1.1953e-10, 3.8210e-11,\n 1.0993e-09, 4.2089e-13, 2.9326e-10, 1.5436e-09, 1.5843e-10, 5.1308e-13,\n 1.3174e-12, 4.8110e-10, 4.4786e-11, 2.4412e-09, 9.5826e-10, 6.6602e-11,\n 9.1244e-10, 9.5188e-11, 4.3569e-09, 1.2668e-10], device='cuda:0')" + }, + "21": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.7367e-09, 1.9074e-09, 2.6260e-10, 8.5836e-10, 1.3498e-10, 5.7339e-12,\n 2.6006e-11, 1.4126e-10, 8.6217e-11, 1.6896e-12, 4.1036e-10, 2.3162e-09,\n 8.1685e-10, 1.5918e-09, 2.7817e-10, 1.4673e-09, 3.3184e-09, 1.7545e-10,\n 1.2195e-09, 2.8793e-09, 4.6351e-12, 1.3759e-09, 3.1846e-10, 2.6883e-09,\n 6.6182e-10, 7.6636e-10, 5.3067e-10, 3.7236e-10, 1.4424e-10, 3.0853e-10,\n 1.6042e-09, 8.3467e-10, 6.4873e-11, 5.6596e-09, 3.6464e-09, 1.7809e-09,\n 6.1668e-11, 1.7925e-14, 1.4380e-09, 1.0921e-09, 5.0678e-09, 2.1382e-10,\n 7.0559e-11, 1.6877e-10, 1.8830e-10, 1.2839e-09, 7.9304e-10, 5.5275e-10,\n 2.7372e-10, 4.1199e-10, 6.6889e-12, 8.1296e-10, 1.5224e-09, 8.9301e-10,\n 1.5066e-10, 2.0383e-10, 9.9243e-10, 5.1600e-12, 4.6022e-10, 1.7675e-10,\n 2.2134e-10, 1.0736e-09, 6.0291e-10, 3.4940e-10, 1.8613e-12, 1.2184e-10,\n 8.3808e-10, 1.5298e-11, 9.9340e-10, 1.0192e-09, 5.2034e-11, 3.4463e-10,\n 6.2430e-10, 5.9158e-11, 1.7134e-10, 3.6567e-11, 2.9669e-10, 5.3912e-09,\n 3.1994e-11, 5.0843e-15, 1.8066e-09, 2.2541e-09, 1.3139e-11, 2.9832e-11,\n 4.5457e-10, 1.4423e-09, 6.5584e-11, 8.3927e-10, 6.5899e-11, 1.5511e-09,\n 2.9298e-10, 2.2003e-09, 2.0984e-09, 1.2229e-10, 2.9051e-10, 6.9408e-10,\n 4.8112e-11, 3.4497e-09, 1.5801e-09, 5.0554e-09, 1.5186e-09, 1.6587e-10,\n 6.6428e-11, 3.9735e-10, 7.4649e-12, 3.9717e-09, 5.3795e-11, 4.4788e-10,\n 5.4267e-10, 1.3953e-12, 5.2136e-09, 1.7501e-10, 2.0770e-09, 7.2249e-10,\n 1.5978e-10, 2.8906e-09, 1.2348e-10, 2.9912e-10, 1.4009e-09, 7.0843e-10,\n 4.5295e-10, 8.7247e-10, 4.5413e-11, 1.3138e-12, 3.6589e-11, 3.0854e-10,\n 1.0312e-08, 1.1225e-10, 8.5006e-10, 9.4024e-11, 1.9009e-12, 3.5724e-09,\n 3.5388e-10, 5.5007e-10, 1.4813e-09, 1.0951e-09, 1.2562e-09, 2.9323e-09,\n 1.6691e-09, 1.6970e-11, 6.6313e-11, 5.2007e-11, 9.6638e-10, 4.2389e-09,\n 3.2251e-09, 2.7022e-10, 2.6748e-10, 3.1463e-10, 1.2177e-09, 4.7620e-13,\n 1.7666e-10, 1.1632e-09, 6.7657e-10, 4.2638e-10, 9.2331e-10, 1.0538e-10,\n 2.6145e-10, 9.9497e-10, 1.4140e-10, 3.7677e-09, 1.3841e-09, 1.9244e-10,\n 3.8592e-10, 2.0033e-09, 1.5132e-10, 5.7542e-10, 7.0817e-09, 9.6056e-10,\n 9.5451e-10, 4.6903e-10, 1.9560e-11, 8.6265e-10, 4.6449e-09, 1.6144e-09,\n 1.3460e-09, 2.5625e-11, 1.0492e-10, 8.3307e-10, 4.0630e-10, 9.1025e-10,\n 6.1712e-10, 2.7260e-10, 4.4362e-11, 3.5937e-10, 1.0497e-09, 1.0771e-13,\n 2.0127e-10, 7.6857e-10, 1.8364e-10, 4.4568e-09, 2.7086e-09, 2.2223e-10,\n 3.0319e-10, 1.5929e-10, 1.9942e-11, 1.5958e-08, 9.2200e-11, 1.6943e-10,\n 4.2067e-10, 6.6937e-09, 1.1249e-11, 7.3854e-09, 1.0737e-09, 1.1271e-10,\n 1.5202e-09, 1.3770e-08, 1.0973e-09, 2.6858e-10, 1.3732e-12, 1.7870e-09,\n 6.8768e-11, 3.2351e-09, 6.6576e-10, 1.1497e-09, 1.3016e-12, 1.0108e-09,\n 7.8253e-09, 3.0491e-13, 1.6230e-10, 1.6811e-10, 1.0301e-10, 6.5011e-09,\n 1.0128e-09, 4.7426e-09, 7.9409e-10, 7.3511e-12, 1.3266e-13, 1.5367e-10,\n 2.1361e-11, 1.3989e-09, 4.4995e-11, 4.5242e-12, 2.5267e-09, 1.0445e-09,\n 7.9497e-10, 1.9224e-10, 1.1901e-10, 1.5605e-09, 2.4798e-10, 9.2774e-11,\n 2.0036e-09, 3.9776e-13, 4.7573e-10, 1.8753e-09, 2.9735e-10, 9.9184e-13,\n 1.2169e-12, 8.0111e-10, 4.4725e-11, 3.8947e-09, 1.5048e-09, 1.1038e-10,\n 1.2230e-09, 1.6150e-10, 4.5114e-09, 1.7652e-10], device='cuda:0')" + }, + "22": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6403e-10, 9.9180e-10, 0.0000e+00, ..., 1.4441e-09, 2.3615e-09,\n 3.6132e-10],\n [1.2111e-10, 1.3516e-10, 0.0000e+00, ..., 2.2305e-10, 6.8801e-10,\n 4.3733e-13],\n [1.6259e-10, 8.8325e-11, 0.0000e+00, ..., 1.4853e-10, 3.1735e-10,\n 2.5558e-11],\n ...,\n [2.7772e-10, 3.4241e-10, 0.0000e+00, ..., 4.2293e-10, 8.0028e-10,\n 2.5457e-10],\n [8.4557e-10, 2.6848e-10, 0.0000e+00, ..., 2.6689e-10, 1.6935e-09,\n 1.6122e-10],\n [4.1389e-12, 5.7963e-12, 0.0000e+00, ..., 6.2501e-11, 9.9615e-11,\n 2.0332e-11]], device='cuda:0')" + }, + "23": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0314e-06, 7.9389e-08, 1.7535e-07, 1.3196e-07, 4.4997e-08, 2.4137e-08,\n 1.0382e-07, 6.1771e-11, 4.1400e-08, 7.0221e-09, 7.7954e-07, 1.5436e-07,\n 2.0767e-07, 1.3474e-07, 3.9432e-08, 9.7314e-08, 4.0781e-07, 2.6676e-08,\n 7.7207e-08, 8.2238e-07, 3.2463e-09, 3.1222e-07, 1.3591e-07, 7.8519e-08,\n 1.0507e-08, 3.2748e-07, 1.0345e-07, 1.6447e-08, 1.1500e-08, 9.1599e-08,\n 4.5246e-07, 9.4210e-08, 1.1557e-08, 3.3104e-06, 3.2699e-07, 2.2913e-07,\n 3.9239e-08, 2.8437e-12, 3.0575e-07, 1.5420e-06, 1.9393e-07, 1.1838e-07,\n 2.0927e-08, 1.2039e-07, 7.0296e-07, 5.2158e-07, 2.9495e-07, 1.7258e-07,\n 4.3481e-08, 7.4903e-07, 8.3990e-09, 1.3539e-06, 9.3841e-07, 2.1401e-07,\n 2.1717e-08, 3.9236e-09, 5.1483e-07, 1.6369e-08, 2.3507e-07, 4.7398e-08,\n 1.1062e-07, 1.2685e-07, 6.7488e-07, 3.5822e-07, 1.6345e-08, 4.2290e-08,\n 2.5242e-07, 4.5016e-08, 3.5346e-07, 8.4981e-09, 1.2484e-11, 1.4502e-07,\n 2.6671e-07, 4.6853e-09, 1.1312e-07, 1.3824e-08, 3.9202e-08, 7.5237e-07,\n 1.1061e-08, 4.7283e-09, 6.0448e-08, 5.3012e-07, 1.5119e-11, 3.9288e-08,\n 1.1008e-08, 3.9934e-07, 1.6070e-07, 9.0046e-08, 1.5887e-07, 2.3271e-06,\n 3.1172e-08, 5.2469e-07, 6.6913e-07, 1.0120e-07, 1.2697e-06, 9.4870e-08,\n 1.1234e-07, 7.1323e-07, 2.3968e-07, 2.7949e-07, 6.2087e-07, 3.4955e-08,\n 6.7330e-09, 2.5084e-07, 9.2959e-09, 1.3389e-06, 7.5424e-08, 1.5267e-07,\n 4.6634e-08, 2.1565e-10, 1.0949e-06, 7.8345e-08, 1.0874e-06, 3.3195e-07,\n 3.6318e-08, 6.0410e-08, 7.1975e-08, 6.1165e-08, 4.6152e-08, 1.1217e-07,\n 3.3574e-07, 1.9635e-07, 3.3360e-08, 2.6611e-11, 1.9746e-10, 1.2082e-07,\n 6.7224e-07, 4.3588e-07, 3.5271e-08, 5.8564e-07, 4.5953e-10, 2.8942e-06,\n 1.3167e-07, 2.4476e-07, 4.1990e-07, 1.1274e-06, 3.5768e-07, 1.1365e-06,\n 3.1003e-07, 1.4143e-10, 4.0555e-09, 2.1175e-08, 1.3611e-07, 1.2255e-06,\n 2.1369e-07, 5.7867e-08, 5.1207e-08, 1.9847e-08, 1.2182e-06, 2.6159e-08,\n 2.1364e-08, 1.0177e-07, 1.1120e-07, 4.4258e-07, 1.5088e-07, 2.0207e-08,\n 1.0652e-07, 1.1313e-06, 3.1985e-08, 1.5576e-06, 3.9590e-07, 5.7553e-07,\n 6.0081e-08, 5.2372e-07, 4.0060e-07, 3.3008e-07, 3.1803e-06, 4.4740e-07,\n 4.9205e-07, 6.3635e-08, 2.0298e-08, 8.5590e-08, 5.8985e-07, 1.8146e-07,\n 3.3843e-07, 3.6247e-09, 2.9814e-08, 1.9887e-08, 4.0371e-08, 1.4213e-07,\n 2.0878e-08, 1.1517e-07, 7.6004e-08, 1.0187e-07, 7.8592e-07, 2.6712e-09,\n 7.9605e-08, 2.4373e-07, 4.6810e-08, 1.2635e-06, 1.3322e-07, 1.9034e-07,\n 2.0556e-07, 1.6243e-08, 6.9105e-11, 3.6148e-06, 4.2973e-08, 4.4549e-08,\n 2.6594e-07, 6.9245e-07, 2.8075e-11, 1.9658e-07, 7.4494e-07, 8.8139e-09,\n 4.4774e-07, 1.4902e-06, 2.3483e-07, 4.8235e-08, 1.8017e-10, 2.0367e-06,\n 4.1867e-08, 6.9847e-07, 2.1462e-08, 9.5967e-08, 1.6239e-08, 1.9371e-07,\n 2.5609e-07, 8.8062e-10, 1.9487e-08, 2.5225e-08, 8.4243e-08, 3.2731e-07,\n 6.0639e-08, 1.1604e-06, 2.0453e-07, 8.3556e-09, 2.5821e-09, 1.2472e-08,\n 4.6353e-08, 1.0622e-07, 3.2231e-09, 1.1319e-08, 1.8506e-07, 1.4986e-07,\n 6.0200e-07, 1.8861e-07, 4.4087e-08, 3.3620e-07, 5.0288e-08, 8.8206e-08,\n 3.2244e-07, 6.6979e-10, 4.2170e-07, 5.2322e-07, 1.8169e-07, 4.1324e-10,\n 1.4430e-11, 1.8029e-06, 8.7044e-09, 6.6254e-07, 2.2731e-07, 3.9090e-08,\n 2.9846e-07, 3.1226e-07, 7.2478e-07, 2.0439e-08], device='cuda:0')" + }, + "24": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.8910e-09, 2.4425e-10, 2.7698e-10, 5.9937e-10, 7.0104e-11, 9.4659e-11,\n 2.4339e-10, 8.1458e-13, 4.9805e-11, 4.7780e-12, 2.7800e-09, 3.7917e-10,\n 4.3195e-10, 3.2146e-10, 1.1046e-10, 2.6500e-10, 9.8219e-10, 5.4281e-11,\n 1.6941e-10, 1.8284e-09, 4.1289e-14, 1.0713e-09, 4.1615e-10, 2.1669e-10,\n 4.4286e-11, 6.0204e-10, 2.3088e-10, 5.7465e-11, 4.5118e-11, 2.2619e-10,\n 1.7318e-09, 1.7387e-10, 2.8579e-11, 1.3107e-08, 7.5306e-10, 4.8266e-10,\n 9.5395e-11, 8.3629e-13, 8.4191e-10, 3.4256e-09, 3.7601e-10, 3.9596e-10,\n 4.7780e-11, 9.9642e-10, 2.0982e-09, 1.0257e-09, 1.3423e-09, 4.4117e-10,\n 6.9384e-11, 2.8707e-09, 7.3828e-12, 4.0780e-09, 2.3353e-09, 3.6360e-10,\n 1.8551e-11, 1.3092e-11, 1.6899e-09, 1.0587e-11, 6.5581e-10, 1.1819e-10,\n 2.4822e-10, 3.6530e-10, 2.9090e-09, 9.8370e-10, 1.1563e-11, 5.9325e-11,\n 6.7320e-10, 4.9651e-11, 7.2811e-10, 4.2343e-11, 1.4796e-11, 2.9326e-10,\n 5.4388e-10, 2.7991e-11, 2.0282e-10, 9.4765e-11, 1.2540e-10, 1.9387e-09,\n 1.7731e-11, 5.0382e-12, 1.4371e-10, 1.3950e-09, 1.3431e-12, 3.1384e-10,\n 5.0799e-11, 9.7052e-10, 7.2219e-10, 2.0609e-10, 2.8792e-10, 8.2625e-09,\n 8.5437e-11, 1.2518e-09, 2.3414e-09, 1.4998e-10, 8.1789e-09, 1.8216e-10,\n 1.4495e-10, 1.4331e-09, 6.2346e-10, 7.1890e-10, 2.6302e-09, 2.6358e-10,\n 3.3767e-11, 6.5187e-10, 3.8892e-12, 2.4877e-09, 1.4744e-10, 3.8329e-10,\n 7.3102e-11, 9.2876e-12, 2.3828e-09, 2.9747e-10, 6.8002e-09, 6.7797e-10,\n 3.1525e-11, 1.0631e-10, 1.9955e-10, 9.2903e-11, 1.6642e-10, 3.7560e-10,\n 9.6420e-10, 4.9965e-10, 3.7209e-11, 1.1857e-13, 4.6514e-12, 2.0262e-10,\n 1.4792e-09, 2.5889e-09, 7.8714e-11, 1.9707e-09, 5.8558e-13, 9.3773e-09,\n 3.2858e-10, 5.7082e-10, 6.2289e-10, 4.0953e-09, 8.1183e-10, 3.2996e-09,\n 7.3840e-10, 1.0519e-12, 4.1634e-11, 7.7865e-11, 2.3094e-10, 3.8259e-09,\n 4.5069e-10, 1.9612e-10, 1.5908e-10, 5.6550e-11, 4.2458e-09, 3.7377e-11,\n 9.2349e-11, 1.7059e-10, 3.2409e-10, 1.5201e-09, 2.4941e-10, 9.8808e-11,\n 1.5801e-10, 3.3620e-09, 7.1773e-11, 4.7778e-09, 8.3173e-10, 2.2132e-09,\n 8.4358e-11, 1.7736e-09, 2.4360e-09, 7.3850e-10, 1.0903e-08, 8.4801e-10,\n 2.1291e-09, 2.3737e-10, 1.5058e-11, 1.2260e-10, 1.1116e-09, 5.5352e-10,\n 6.2434e-10, 1.1934e-11, 3.4284e-11, 4.7257e-11, 7.1957e-11, 2.4602e-10,\n 3.7922e-11, 2.6928e-10, 1.2869e-10, 1.7498e-10, 1.6833e-09, 1.7160e-14,\n 1.4418e-10, 5.9296e-10, 6.1769e-11, 3.4325e-09, 1.6650e-10, 4.9148e-10,\n 9.4292e-10, 5.8734e-11, 1.7081e-12, 1.4275e-08, 7.8633e-11, 6.8914e-11,\n 1.0583e-09, 9.9502e-10, 4.6915e-12, 3.3347e-10, 2.5448e-09, 1.0461e-11,\n 1.3722e-09, 2.7974e-09, 3.9131e-10, 7.5898e-11, 5.8679e-14, 6.4835e-09,\n 8.7427e-11, 1.5656e-09, 1.0186e-10, 2.1025e-10, 1.8898e-11, 3.7240e-10,\n 4.7976e-10, 3.5809e-13, 5.9874e-11, 3.0844e-11, 2.0509e-10, 9.4448e-10,\n 2.7659e-10, 3.7310e-09, 7.1032e-10, 3.2681e-11, 1.1749e-12, 1.3076e-10,\n 7.3235e-11, 3.0534e-10, 1.6743e-11, 1.8413e-11, 3.1716e-10, 6.0249e-10,\n 1.5179e-09, 6.1553e-10, 6.9084e-11, 7.3785e-10, 1.5477e-10, 1.3226e-10,\n 8.1898e-10, 2.8407e-13, 3.5653e-09, 1.2771e-09, 2.9748e-10, 2.8136e-13,\n 5.1243e-12, 8.5054e-09, 9.1946e-12, 2.0528e-09, 4.3371e-10, 5.8567e-11,\n 6.8353e-10, 6.4738e-10, 1.5805e-09, 7.6657e-11], device='cuda:0')" + }, + "25": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.1962e-09, 3.6998e-10, 4.9922e-10, 6.7708e-10, 1.0504e-10, 1.5038e-10,\n 3.0447e-10, 9.5619e-13, 9.0088e-11, 8.3277e-12, 2.4868e-09, 5.5937e-10,\n 6.2489e-10, 6.2432e-10, 1.9984e-10, 4.8667e-10, 1.7780e-09, 7.5574e-11,\n 2.4714e-10, 3.5545e-09, 6.4834e-13, 1.4232e-09, 6.7768e-10, 3.6731e-10,\n 5.5759e-11, 1.0692e-09, 4.9065e-10, 1.0094e-10, 8.0760e-11, 2.7122e-10,\n 2.0502e-09, 4.1929e-10, 7.1569e-11, 1.1670e-08, 1.4304e-09, 9.9725e-10,\n 7.4202e-11, 6.3238e-13, 1.3725e-09, 5.4211e-09, 6.7915e-10, 5.7206e-10,\n 1.1912e-10, 6.6244e-10, 2.2987e-09, 1.8235e-09, 1.3556e-09, 8.3325e-10,\n 1.1265e-10, 2.4135e-09, 7.4750e-12, 5.5284e-09, 3.0485e-09, 6.8096e-10,\n 3.9167e-11, 3.1913e-11, 2.2961e-09, 2.9293e-11, 1.0100e-09, 2.6417e-10,\n 2.8503e-10, 5.9511e-10, 2.9000e-09, 1.5560e-09, 2.0069e-11, 1.1256e-10,\n 1.1221e-09, 1.0472e-10, 1.5023e-09, 5.2300e-11, 1.2226e-11, 6.9193e-10,\n 8.5769e-10, 2.9556e-11, 3.2366e-10, 1.0688e-10, 2.3032e-10, 3.1187e-09,\n 2.9082e-11, 4.4656e-12, 3.2007e-10, 2.3751e-09, 3.5850e-12, 2.6573e-10,\n 6.6634e-11, 1.6986e-09, 8.0699e-10, 4.0713e-10, 4.9491e-10, 9.3517e-09,\n 1.5398e-10, 2.2737e-09, 2.1309e-09, 3.1462e-10, 5.4412e-09, 2.9246e-10,\n 3.5481e-10, 2.5209e-09, 1.0683e-09, 1.3067e-09, 2.6724e-09, 2.3490e-10,\n 6.4001e-11, 7.2928e-10, 1.1269e-11, 4.5741e-09, 1.8439e-10, 7.3902e-10,\n 1.3380e-10, 1.6573e-11, 3.7593e-09, 4.0259e-10, 4.6406e-09, 1.1435e-09,\n 9.5934e-11, 1.9630e-10, 3.7090e-10, 1.8520e-10, 2.2537e-10, 5.7999e-10,\n 9.9288e-10, 5.9818e-10, 7.4884e-11, 7.7641e-14, 4.9223e-12, 3.6930e-10,\n 2.8658e-09, 1.9833e-09, 1.7836e-10, 1.7836e-09, 7.7717e-13, 1.1736e-08,\n 5.9674e-10, 1.0514e-09, 1.3388e-09, 3.7768e-09, 1.5931e-09, 3.8947e-09,\n 1.3347e-09, 2.2892e-12, 5.2807e-11, 1.4194e-10, 4.2366e-10, 4.1241e-09,\n 9.0676e-10, 3.1261e-10, 2.5758e-10, 1.0395e-10, 4.1296e-09, 4.2263e-11,\n 1.5020e-10, 3.3708e-10, 5.3842e-10, 1.3107e-09, 4.3497e-10, 1.4387e-10,\n 2.8643e-10, 3.8327e-09, 9.5235e-11, 6.3504e-09, 1.3158e-09, 1.7334e-09,\n 1.4230e-10, 2.3399e-09, 1.8594e-09, 1.0370e-09, 1.1101e-08, 1.5191e-09,\n 2.0929e-09, 3.1110e-10, 3.8838e-11, 2.4298e-10, 2.0430e-09, 8.5169e-10,\n 1.0995e-09, 2.8016e-11, 6.6245e-11, 9.6710e-11, 9.3148e-11, 6.5191e-10,\n 5.5625e-11, 2.8587e-10, 2.0825e-10, 3.1532e-10, 3.2840e-09, 2.8905e-13,\n 2.5856e-10, 7.2244e-10, 1.2293e-10, 5.2505e-09, 4.4743e-10, 5.5343e-10,\n 9.7615e-10, 9.3321e-11, 4.4993e-12, 1.4502e-08, 1.0250e-10, 1.1806e-10,\n 1.2364e-09, 2.3758e-09, 1.1351e-11, 8.7310e-10, 2.3436e-09, 1.9772e-11,\n 1.8539e-09, 5.2318e-09, 7.7247e-10, 1.2438e-10, 1.0408e-13, 7.3417e-09,\n 8.1489e-11, 2.9334e-09, 1.5162e-10, 4.6777e-10, 2.5530e-11, 6.4956e-10,\n 8.5415e-10, 3.5156e-12, 1.1866e-10, 5.7183e-11, 1.7144e-10, 1.4314e-09,\n 3.2372e-10, 4.8605e-09, 9.6749e-10, 6.7156e-11, 9.3204e-13, 1.1538e-10,\n 9.7238e-11, 4.9800e-10, 3.5785e-11, 1.9833e-11, 8.3216e-10, 7.6357e-10,\n 2.5865e-09, 8.8907e-10, 1.0243e-10, 1.4218e-09, 2.9260e-10, 2.5076e-10,\n 1.4301e-09, 6.2332e-13, 1.9849e-09, 1.7402e-09, 5.3162e-10, 2.1169e-12,\n 1.0598e-11, 6.1348e-09, 1.4780e-11, 2.6888e-09, 6.9445e-10, 9.0168e-11,\n 9.2302e-10, 9.3279e-10, 2.3570e-09, 1.3844e-10], device='cuda:0')" + }, + "26": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.0234e-10, 2.7969e-10, 0.0000e+00, ..., 5.9202e-10, 9.5464e-10,\n 1.1661e-10],\n [1.8706e-11, 2.6006e-11, 0.0000e+00, ..., 7.2681e-13, 2.3430e-11,\n 7.4823e-13],\n [1.5120e-10, 1.1815e-10, 0.0000e+00, ..., 1.2162e-10, 1.3135e-10,\n 8.5434e-11],\n ...,\n [7.5290e-11, 6.1873e-11, 0.0000e+00, ..., 4.3647e-11, 4.2628e-10,\n 1.2303e-11],\n [4.4183e-10, 1.3673e-10, 0.0000e+00, ..., 1.1783e-10, 9.6572e-10,\n 2.1387e-10],\n [8.9350e-11, 1.5708e-11, 0.0000e+00, ..., 2.1673e-11, 1.8618e-10,\n 1.9573e-12]], device='cuda:0')" + }, + "27": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.4292e-07, 1.5379e-08, 1.2207e-07, 1.5861e-07, 4.8299e-08, 1.2175e-08,\n 1.6021e-07, 3.8991e-09, 7.1899e-08, 1.0973e-08, 3.6453e-07, 1.2616e-06,\n 2.1478e-07, 2.5258e-07, 2.6353e-07, 1.0908e-06, 1.2225e-07, 1.2120e-07,\n 4.0910e-07, 4.7564e-07, 1.5253e-09, 5.1103e-08, 1.0307e-07, 1.2779e-07,\n 1.1836e-07, 2.1941e-07, 1.7443e-07, 3.4260e-08, 5.0063e-09, 5.7023e-08,\n 3.0303e-07, 2.0021e-07, 3.1393e-08, 1.5398e-06, 5.5774e-07, 3.4184e-07,\n 1.0169e-08, 1.9274e-10, 1.4339e-07, 4.6304e-06, 6.9076e-07, 9.7114e-08,\n 3.0442e-08, 1.9053e-08, 7.9862e-07, 1.4064e-06, 2.1907e-07, 4.8531e-07,\n 6.0459e-08, 3.8851e-07, 7.4923e-09, 8.2714e-07, 2.1089e-07, 1.8999e-07,\n 3.0194e-08, 2.6338e-09, 3.6017e-07, 1.3366e-08, 2.3938e-07, 2.0948e-08,\n 1.2301e-07, 2.3550e-07, 5.9367e-07, 3.1640e-07, 8.9238e-09, 2.2080e-08,\n 2.3738e-07, 1.5454e-08, 1.5052e-07, 2.8601e-08, 5.2214e-10, 1.4587e-07,\n 6.1653e-07, 4.5282e-08, 1.2482e-07, 8.3442e-10, 8.1062e-09, 5.0358e-07,\n 3.2498e-08, 1.0731e-11, 2.9807e-07, 3.5872e-07, 5.1369e-10, 3.0450e-08,\n 2.1253e-07, 9.0778e-07, 7.2971e-08, 3.1344e-07, 6.2089e-08, 5.1776e-07,\n 3.6666e-07, 2.2553e-07, 5.7323e-07, 1.0459e-07, 1.8470e-07, 2.8309e-08,\n 9.6295e-08, 6.5926e-07, 1.5996e-07, 1.3314e-06, 1.1794e-06, 2.8858e-08,\n 5.1294e-10, 1.5840e-07, 8.9022e-09, 1.0445e-06, 8.3708e-08, 7.6448e-08,\n 2.2779e-07, 1.4205e-09, 1.1291e-07, 7.3680e-10, 7.3589e-07, 7.2085e-07,\n 5.6823e-08, 6.3016e-07, 5.3809e-08, 1.0278e-07, 1.5227e-07, 1.5640e-07,\n 5.4506e-08, 1.2401e-08, 2.2363e-08, 2.9666e-11, 2.9060e-09, 1.3997e-07,\n 1.1895e-06, 1.9646e-07, 4.9678e-07, 2.2589e-07, 1.2475e-09, 1.4334e-06,\n 2.3111e-07, 2.1361e-07, 5.2165e-07, 3.0438e-07, 3.3921e-07, 4.5517e-07,\n 3.0918e-07, 1.1912e-09, 9.0227e-09, 1.6624e-08, 1.2241e-07, 1.5577e-06,\n 2.9761e-07, 1.8181e-08, 2.8067e-07, 3.3905e-08, 2.8656e-07, 7.0887e-09,\n 1.3810e-08, 4.8559e-07, 3.5425e-07, 4.4332e-07, 9.4137e-08, 1.5870e-08,\n 2.8228e-07, 8.3567e-07, 1.4661e-07, 1.8473e-07, 2.2823e-07, 2.6203e-07,\n 1.4225e-07, 1.8994e-07, 1.7784e-07, 1.6739e-07, 1.2016e-06, 9.2548e-07,\n 1.3509e-07, 5.0971e-07, 5.0747e-08, 3.6620e-08, 7.5627e-07, 7.1954e-08,\n 5.4010e-07, 4.0173e-08, 1.5457e-08, 4.8767e-08, 5.2153e-08, 5.7991e-08,\n 6.8355e-08, 5.8835e-08, 2.1431e-08, 1.5267e-07, 2.3493e-06, 3.5886e-10,\n 1.1647e-07, 9.7663e-08, 6.8948e-08, 7.0462e-07, 2.0714e-07, 2.5403e-08,\n 1.0476e-07, 1.4443e-07, 1.0970e-09, 1.2797e-06, 6.7338e-08, 1.0255e-08,\n 3.1984e-07, 1.4192e-06, 1.1884e-12, 1.3878e-07, 6.8884e-07, 1.5330e-07,\n 1.2801e-07, 9.9941e-07, 2.4717e-07, 5.3252e-08, 5.3080e-11, 1.1986e-06,\n 2.4347e-08, 1.2010e-06, 7.3789e-10, 2.6632e-07, 3.8223e-09, 3.1999e-07,\n 4.5041e-07, 7.6569e-12, 3.8076e-08, 5.2944e-08, 4.5068e-08, 1.4657e-06,\n 7.7488e-08, 3.0744e-07, 6.9219e-08, 2.8481e-09, 1.5856e-09, 5.6487e-09,\n 5.1789e-08, 1.0319e-07, 8.6511e-09, 1.2694e-09, 5.3993e-07, 3.8538e-08,\n 2.8329e-08, 1.9755e-07, 1.6804e-07, 2.4726e-07, 1.0226e-07, 7.9346e-08,\n 2.6814e-08, 6.0742e-11, 2.0359e-07, 1.3201e-07, 5.9060e-08, 1.8866e-09,\n 1.2189e-09, 1.0085e-06, 4.6859e-08, 1.1628e-06, 3.6963e-07, 4.0348e-08,\n 5.5499e-07, 1.5686e-07, 3.2767e-07, 6.2278e-08], device='cuda:0')" + }, + "28": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.1357e-10, 7.6116e-11, 2.2349e-10, 5.1826e-10, 9.2447e-11, 5.7124e-11,\n 3.2980e-10, 2.1854e-11, 1.1180e-10, 1.3604e-11, 6.8368e-10, 3.3352e-09,\n 4.6885e-10, 6.3183e-10, 1.0134e-09, 4.5225e-09, 3.6887e-10, 2.9798e-10,\n 1.3793e-09, 9.4312e-10, 1.6680e-13, 1.3533e-10, 3.8361e-10, 2.6008e-10,\n 4.2203e-10, 5.2920e-10, 3.6310e-10, 1.0808e-10, 1.2863e-11, 9.9823e-11,\n 8.2028e-10, 4.7638e-10, 7.3883e-11, 3.8638e-09, 1.4590e-09, 1.0320e-09,\n 2.1786e-11, 8.2307e-14, 3.4790e-10, 2.8307e-08, 1.5500e-09, 3.1066e-10,\n 1.0166e-10, 5.6851e-11, 2.7035e-09, 6.2059e-09, 6.7633e-10, 2.2107e-09,\n 6.5660e-11, 8.0579e-10, 7.4653e-12, 2.0891e-09, 4.0478e-10, 4.1982e-10,\n 4.2732e-11, 9.9821e-12, 1.1542e-09, 1.1361e-11, 9.7795e-10, 7.2760e-11,\n 2.8495e-10, 1.6909e-09, 2.8306e-09, 1.1081e-09, 4.1752e-12, 2.5547e-11,\n 6.4271e-10, 1.3645e-11, 2.4645e-10, 9.2796e-11, 2.0463e-11, 4.3996e-10,\n 1.4336e-09, 1.0685e-10, 2.9925e-10, 8.7571e-12, 2.9134e-11, 1.4443e-09,\n 4.6088e-11, 8.2071e-13, 1.1077e-09, 7.7190e-10, 8.9657e-13, 1.4793e-10,\n 8.4217e-10, 2.6415e-09, 2.1130e-10, 1.3375e-09, 1.5195e-10, 1.1864e-09,\n 2.0778e-09, 5.6486e-10, 1.4163e-09, 1.5316e-10, 3.9951e-10, 2.7661e-11,\n 1.9439e-10, 1.1710e-09, 3.7646e-10, 3.7258e-09, 7.7219e-09, 1.3167e-10,\n 2.9453e-12, 3.6112e-10, 4.7184e-12, 2.3098e-09, 2.5524e-10, 2.0824e-10,\n 6.8160e-10, 5.8898e-15, 2.9680e-10, 1.4896e-11, 2.2516e-09, 1.4207e-09,\n 7.9371e-11, 1.6591e-09, 1.8002e-10, 1.9715e-10, 3.8195e-10, 4.1146e-10,\n 7.1506e-11, 5.0900e-11, 4.1398e-11, 1.7554e-13, 1.1573e-11, 2.6390e-10,\n 3.0294e-09, 6.1013e-10, 2.5281e-09, 5.1205e-10, 2.6915e-13, 3.7432e-09,\n 1.0725e-09, 4.1514e-10, 1.6262e-09, 5.5245e-10, 5.8051e-10, 7.7324e-10,\n 6.6626e-10, 6.5475e-14, 4.4982e-11, 1.0124e-10, 1.4560e-10, 4.4484e-09,\n 7.6407e-10, 5.3760e-11, 1.2731e-09, 8.9191e-11, 5.0683e-10, 1.4357e-12,\n 3.7796e-11, 1.2618e-09, 2.1844e-09, 1.7964e-09, 9.4807e-11, 1.3618e-10,\n 1.3369e-09, 2.2281e-09, 3.2231e-10, 3.9726e-10, 3.1078e-10, 4.3808e-10,\n 6.9766e-10, 3.5022e-10, 5.5325e-10, 3.9174e-10, 2.1765e-09, 3.0525e-09,\n 2.7795e-10, 1.2003e-09, 9.3548e-11, 5.3298e-11, 1.6596e-09, 1.4156e-10,\n 1.5795e-09, 2.4177e-10, 3.3510e-11, 1.1331e-10, 1.0172e-10, 2.0379e-10,\n 9.2085e-11, 1.2832e-10, 2.4162e-11, 2.8374e-10, 7.2241e-09, 2.4422e-12,\n 1.8499e-10, 1.7751e-10, 7.6793e-11, 1.5457e-09, 3.0601e-10, 4.2227e-11,\n 3.6811e-10, 6.6436e-10, 4.5912e-12, 2.8421e-09, 2.2770e-10, 1.5748e-11,\n 1.4846e-09, 3.5159e-09, 4.6932e-13, 3.1615e-10, 2.5123e-09, 5.0447e-10,\n 4.7339e-10, 1.9874e-09, 4.8110e-10, 1.3828e-10, 1.9403e-13, 2.5626e-09,\n 3.8373e-11, 3.2590e-09, 8.5017e-12, 7.1237e-10, 4.0002e-12, 7.2201e-10,\n 6.3470e-10, 3.4451e-12, 1.2764e-10, 9.3190e-11, 1.1396e-10, 6.6383e-09,\n 2.8101e-10, 8.0463e-10, 1.9227e-10, 3.0841e-11, 4.3818e-14, 3.2399e-11,\n 7.1311e-11, 2.3551e-10, 3.3315e-11, 2.4189e-14, 2.0119e-09, 1.3016e-10,\n 1.0294e-10, 7.1375e-10, 6.1069e-10, 5.1120e-10, 4.6088e-10, 1.7784e-10,\n 1.3840e-10, 1.2400e-12, 6.8334e-10, 2.2100e-10, 1.0772e-10, 1.8963e-12,\n 2.1963e-12, 3.3529e-09, 1.0730e-10, 3.7964e-09, 6.2923e-10, 6.9562e-11,\n 2.2586e-09, 2.8443e-10, 5.9109e-10, 4.0259e-10], device='cuda:0')" + }, + "29": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.3685e-09, 7.5201e-11, 4.0876e-10, 6.9613e-10, 1.1196e-10, 7.7763e-11,\n 4.9385e-10, 3.5380e-11, 2.0388e-10, 2.2288e-11, 1.2031e-09, 4.6042e-09,\n 7.1720e-10, 1.0280e-09, 1.1987e-09, 4.6856e-09, 6.2974e-10, 4.0899e-10,\n 1.3451e-09, 2.0523e-09, 2.3125e-14, 2.4105e-10, 4.9941e-10, 5.9053e-10,\n 5.7631e-10, 7.2841e-10, 7.7883e-10, 1.8352e-10, 3.1721e-11, 1.7787e-10,\n 1.3700e-09, 8.6115e-10, 1.6532e-10, 5.5894e-09, 2.4122e-09, 1.5470e-09,\n 1.7479e-11, 3.7817e-13, 6.3789e-10, 1.6849e-08, 2.4104e-09, 4.8971e-10,\n 1.6528e-10, 1.1222e-10, 2.7781e-09, 4.9319e-09, 1.0066e-09, 2.1939e-09,\n 1.6399e-10, 1.3557e-09, 1.3034e-11, 3.5164e-09, 6.9390e-10, 6.5187e-10,\n 7.7787e-11, 2.5677e-11, 1.5680e-09, 3.4785e-11, 1.1276e-09, 1.1811e-10,\n 3.6005e-10, 1.1561e-09, 2.5725e-09, 1.4086e-09, 1.4716e-11, 6.3426e-11,\n 1.0724e-09, 3.3479e-11, 6.6313e-10, 1.4122e-10, 1.3214e-11, 6.9884e-10,\n 2.0594e-09, 2.4538e-10, 3.8339e-10, 7.3580e-12, 4.7086e-11, 2.1978e-09,\n 6.8074e-11, 9.5487e-13, 1.3197e-09, 1.6055e-09, 3.4425e-12, 1.7231e-10,\n 1.0141e-09, 3.9285e-09, 3.3127e-10, 1.3661e-09, 2.1355e-10, 2.2830e-09,\n 1.6922e-09, 1.0054e-09, 2.0085e-09, 3.3893e-10, 8.1020e-10, 8.2801e-11,\n 3.1520e-10, 2.4298e-09, 7.6286e-10, 5.3641e-09, 5.0923e-09, 1.7323e-10,\n 5.0588e-12, 4.8789e-10, 7.0504e-12, 3.8158e-09, 2.1471e-10, 3.6814e-10,\n 7.2640e-10, 1.4271e-13, 4.0219e-10, 4.0558e-12, 3.0712e-09, 2.5710e-09,\n 1.8134e-10, 2.1985e-09, 2.6174e-10, 3.2513e-10, 6.9180e-10, 6.6035e-10,\n 1.7331e-10, 4.1173e-11, 4.7700e-11, 4.5776e-13, 2.7306e-11, 4.4251e-10,\n 5.0469e-09, 9.0766e-10, 2.1868e-09, 7.3196e-10, 1.6368e-14, 5.7708e-09,\n 1.0956e-09, 9.6138e-10, 1.7770e-09, 1.0437e-09, 1.4771e-09, 1.6762e-09,\n 1.3564e-09, 1.4086e-13, 6.7037e-11, 1.1839e-10, 4.2122e-10, 5.4551e-09,\n 1.2529e-09, 1.0167e-10, 1.2824e-09, 1.7863e-10, 9.6593e-10, 1.1710e-11,\n 8.2755e-11, 1.6407e-09, 1.6092e-09, 1.4716e-09, 2.8734e-10, 1.1537e-10,\n 8.6409e-10, 3.0519e-09, 4.5327e-10, 8.5210e-10, 8.0976e-10, 8.8896e-10,\n 3.8526e-10, 8.1250e-10, 8.0056e-10, 5.4574e-10, 4.4148e-09, 3.2040e-09,\n 6.5334e-10, 2.2760e-09, 1.2647e-10, 1.2471e-10, 2.7020e-09, 3.3285e-10,\n 1.8105e-09, 2.4726e-10, 4.4562e-11, 2.4575e-10, 1.3402e-10, 2.9268e-10,\n 2.0309e-10, 1.7133e-10, 5.7228e-11, 4.7482e-10, 9.2146e-09, 4.5782e-12,\n 3.8935e-10, 3.0638e-10, 1.9926e-10, 2.9080e-09, 7.5911e-10, 7.8716e-11,\n 4.9382e-10, 7.2591e-10, 1.6838e-11, 5.5413e-09, 1.8213e-10, 2.6992e-11,\n 1.4897e-09, 5.3045e-09, 4.3077e-12, 6.3591e-10, 2.3764e-09, 4.2166e-10,\n 6.2755e-10, 3.8968e-09, 8.3113e-10, 1.6256e-10, 1.6535e-12, 4.3664e-09,\n 5.3242e-11, 4.8353e-09, 8.0175e-12, 1.1712e-09, 3.1695e-12, 1.1855e-09,\n 1.5564e-09, 4.0490e-12, 2.1136e-10, 1.6904e-10, 9.4946e-11, 6.0819e-09,\n 3.8507e-10, 1.4228e-09, 3.6179e-10, 4.0544e-11, 6.8266e-14, 5.2631e-11,\n 1.3619e-10, 4.5937e-10, 5.9480e-11, 7.2016e-13, 2.3813e-09, 1.9199e-10,\n 1.3793e-10, 9.1461e-10, 4.8923e-10, 1.1198e-09, 5.1198e-10, 2.3357e-10,\n 1.0897e-10, 2.9078e-12, 9.3567e-10, 4.5923e-10, 1.9727e-10, 2.3068e-12,\n 9.5820e-13, 3.4940e-09, 1.1141e-10, 4.8012e-09, 1.2457e-09, 1.0951e-10,\n 1.8249e-09, 5.0482e-10, 1.2479e-09, 3.4591e-10], device='cuda:0')" + }, + "30": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2601e-10, 4.2558e-10, 0.0000e+00, ..., 1.4640e-09, 8.3305e-10,\n 8.9057e-10],\n [2.0131e-10, 7.5190e-11, 0.0000e+00, ..., 6.2886e-11, 7.2841e-10,\n 3.2847e-11],\n [1.6396e-10, 2.8932e-10, 0.0000e+00, ..., 2.0125e-10, 1.3653e-09,\n 8.0719e-11],\n ...,\n [3.9639e-11, 1.0792e-10, 0.0000e+00, ..., 1.3726e-10, 2.0400e-10,\n 1.2594e-12],\n [9.0506e-10, 8.0456e-10, 0.0000e+00, ..., 1.1700e-09, 4.9905e-09,\n 1.0174e-09],\n [7.3065e-12, 9.7694e-12, 0.0000e+00, ..., 3.7683e-11, 8.8126e-11,\n 9.1702e-13]], device='cuda:0')" + }, + "31": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.8087e-07, 2.4273e-07, 3.0864e-07, 9.6051e-09, 8.7272e-08, 9.2314e-09,\n 9.4683e-08, 8.0973e-09, 5.5327e-08, 3.5461e-08, 7.2848e-07, 7.1804e-07,\n 9.8548e-08, 7.1144e-08, 1.3920e-07, 1.8832e-08, 3.5354e-07, 1.0517e-07,\n 3.5314e-07, 9.8322e-07, 1.6173e-09, 2.8185e-07, 6.2106e-08, 5.7699e-07,\n 4.2558e-08, 9.3457e-08, 6.1794e-07, 9.9897e-09, 3.5086e-08, 1.0713e-07,\n 1.8584e-07, 6.1174e-07, 8.0845e-10, 1.1010e-06, 6.3347e-07, 7.5201e-07,\n 2.8523e-08, 1.0167e-09, 4.0864e-07, 2.6982e-06, 1.1340e-06, 3.1934e-08,\n 2.9023e-08, 2.9254e-08, 2.0622e-07, 4.7072e-07, 2.8689e-07, 1.3259e-07,\n 2.8880e-08, 3.9887e-07, 1.0691e-08, 9.9886e-07, 5.0062e-07, 3.3355e-07,\n 1.4765e-08, 3.0665e-08, 2.2449e-07, 2.6263e-08, 1.6643e-07, 5.2623e-08,\n 6.4303e-08, 1.6642e-07, 2.7394e-07, 6.7383e-07, 7.0593e-09, 1.6795e-07,\n 1.6176e-07, 3.9108e-08, 3.2224e-07, 4.6783e-08, 3.2810e-10, 1.3037e-07,\n 5.3505e-07, 5.2145e-08, 3.1930e-08, 4.1332e-10, 8.3934e-08, 9.3905e-07,\n 3.6742e-08, 2.6368e-10, 3.8263e-07, 1.2219e-06, 5.4614e-10, 8.9877e-09,\n 1.5176e-07, 9.0462e-07, 4.0843e-08, 1.8554e-07, 1.3573e-07, 3.4745e-08,\n 1.9221e-07, 6.9305e-07, 2.1111e-07, 2.4494e-07, 7.8548e-07, 1.9578e-07,\n 3.4715e-07, 1.1778e-06, 1.8359e-07, 1.4248e-06, 1.3272e-07, 2.7181e-08,\n 5.2954e-11, 1.4054e-07, 1.5427e-09, 1.6983e-06, 4.8613e-08, 1.9212e-07,\n 1.4609e-07, 1.0155e-09, 1.2877e-06, 5.6962e-08, 4.0000e-07, 6.0240e-07,\n 1.0525e-07, 8.3871e-07, 1.8079e-08, 6.6598e-08, 1.0419e-07, 5.5920e-07,\n 5.8316e-08, 4.4652e-08, 2.4860e-08, 4.1747e-10, 4.1325e-09, 3.0046e-07,\n 5.0280e-07, 7.4075e-08, 2.4916e-07, 5.1063e-07, 5.5753e-09, 1.1449e-06,\n 2.7960e-07, 4.1858e-08, 4.8466e-07, 1.8336e-07, 9.1748e-07, 1.7779e-07,\n 1.4011e-07, 1.0080e-10, 3.5728e-10, 3.4536e-09, 6.2834e-07, 2.3296e-06,\n 4.0792e-07, 8.0309e-08, 9.8551e-08, 1.5591e-07, 1.3769e-07, 1.1766e-08,\n 3.6021e-08, 3.1910e-07, 2.3141e-07, 5.6637e-08, 6.9315e-08, 4.1000e-08,\n 1.1026e-07, 1.2410e-06, 5.4911e-08, 5.9485e-08, 1.9658e-07, 6.1541e-08,\n 5.8599e-08, 1.7556e-07, 5.5720e-08, 2.6147e-07, 1.1989e-06, 8.6739e-08,\n 4.0947e-07, 7.4244e-08, 3.7634e-09, 1.3597e-07, 2.5130e-07, 3.9310e-08,\n 5.0864e-07, 8.6468e-08, 6.0041e-08, 5.7348e-08, 6.3926e-08, 1.7284e-07,\n 2.0870e-07, 6.2268e-08, 9.3162e-08, 1.5472e-07, 1.3670e-06, 7.9890e-10,\n 1.0718e-07, 4.0291e-07, 7.1547e-08, 1.3129e-06, 2.2808e-07, 1.3430e-07,\n 7.7213e-08, 2.6924e-08, 1.1813e-10, 3.6409e-06, 5.0872e-08, 1.0860e-08,\n 1.0672e-07, 4.9926e-07, 5.7301e-10, 1.4811e-06, 1.0209e-07, 1.2740e-07,\n 1.2517e-07, 3.4393e-07, 6.2440e-07, 1.2715e-08, 2.9080e-10, 2.3835e-07,\n 6.8855e-08, 1.7504e-07, 6.5233e-09, 4.3563e-07, 8.6268e-09, 9.4862e-07,\n 1.1007e-06, 4.3325e-09, 1.0840e-08, 1.1027e-07, 4.4915e-08, 6.1457e-07,\n 5.7153e-08, 7.4411e-07, 1.0036e-07, 1.3777e-11, 1.2763e-10, 9.8398e-09,\n 1.5272e-08, 1.6082e-07, 4.2331e-09, 1.4280e-08, 1.6615e-07, 9.6387e-08,\n 6.5777e-07, 5.0627e-09, 4.6227e-08, 3.0795e-07, 8.6034e-08, 1.0078e-07,\n 1.8275e-07, 7.4527e-10, 7.2365e-08, 2.5818e-07, 1.0609e-07, 2.5597e-09,\n 5.9656e-09, 4.9311e-07, 3.4177e-08, 1.1036e-07, 8.8750e-07, 1.3595e-08,\n 5.5350e-07, 4.8941e-08, 1.3178e-06, 3.1519e-08], device='cuda:0')" + }, + "32": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.6831e-09, 7.7800e-10, 8.6211e-10, 5.1025e-11, 1.9332e-10, 2.7403e-11,\n 1.3810e-10, 6.5898e-11, 7.5491e-11, 8.5115e-11, 1.3411e-09, 1.1806e-09,\n 2.0397e-10, 1.8028e-10, 4.5964e-10, 1.3674e-10, 8.4367e-10, 1.8302e-10,\n 8.6407e-10, 2.3379e-09, 7.7815e-13, 9.8946e-10, 2.9600e-10, 1.2153e-09,\n 1.4285e-10, 1.0815e-10, 2.9509e-09, 3.0758e-11, 1.2202e-10, 2.0562e-10,\n 2.8117e-10, 4.4558e-09, 7.6024e-12, 1.8825e-09, 1.8839e-09, 2.8640e-09,\n 3.5720e-11, 1.1518e-12, 1.1922e-09, 6.8297e-09, 3.3538e-09, 5.3110e-11,\n 7.7332e-11, 1.3318e-10, 2.6734e-10, 8.4428e-10, 1.1835e-09, 3.1928e-10,\n 3.6861e-11, 9.2630e-10, 1.1676e-11, 3.0601e-09, 1.0404e-09, 7.2657e-10,\n 1.7277e-11, 2.1867e-10, 4.8365e-10, 2.9064e-11, 8.4259e-10, 1.8144e-10,\n 1.4788e-10, 6.8256e-10, 8.6972e-10, 3.0259e-09, 8.5107e-12, 4.8876e-10,\n 4.4075e-10, 6.2711e-11, 7.4847e-10, 1.0143e-10, 9.5036e-14, 4.7546e-10,\n 1.3677e-09, 1.7242e-10, 4.8273e-11, 1.5630e-12, 5.9665e-10, 2.7297e-09,\n 7.6509e-11, 1.4123e-14, 2.2336e-09, 6.7254e-09, 3.3165e-12, 5.0481e-11,\n 6.8288e-10, 2.6417e-09, 1.5640e-10, 7.2399e-10, 2.8911e-10, 1.8246e-10,\n 5.2308e-10, 1.6607e-09, 3.0571e-10, 6.4686e-10, 2.6851e-09, 3.9064e-10,\n 9.2850e-10, 2.6958e-09, 4.6798e-10, 4.1268e-09, 2.9345e-10, 1.6471e-10,\n 7.5557e-12, 2.6921e-10, 7.8305e-13, 5.2288e-09, 9.6588e-11, 9.7357e-10,\n 2.3478e-10, 3.7353e-16, 3.9439e-09, 1.6772e-10, 1.0080e-09, 1.0484e-09,\n 1.8741e-10, 2.6348e-09, 5.6579e-11, 1.0897e-10, 2.0757e-10, 2.4170e-09,\n 7.1065e-11, 6.3977e-11, 2.6377e-11, 1.9531e-11, 3.8357e-11, 9.9468e-10,\n 9.0030e-10, 1.8637e-10, 7.6683e-10, 1.9252e-09, 2.9054e-13, 2.1488e-09,\n 1.0789e-09, 1.2813e-10, 8.0332e-10, 2.6387e-10, 3.1312e-09, 4.0109e-10,\n 3.5486e-10, 1.5522e-14, 1.4681e-13, 1.6997e-11, 1.5100e-09, 9.6412e-09,\n 1.4138e-09, 3.0532e-10, 2.4186e-10, 9.3105e-10, 1.6296e-10, 1.1726e-11,\n 2.3616e-10, 6.5155e-10, 9.1573e-10, 9.2390e-11, 9.1397e-11, 3.2764e-10,\n 1.9512e-10, 3.1527e-09, 8.2959e-11, 2.5895e-10, 4.3565e-10, 9.1498e-11,\n 7.0917e-11, 5.9831e-10, 1.3364e-10, 5.6772e-10, 2.2469e-09, 1.6621e-10,\n 1.9716e-09, 2.4454e-10, 1.1857e-12, 2.5520e-10, 4.0830e-10, 1.1999e-10,\n 1.4437e-09, 5.9943e-10, 8.8445e-11, 1.8510e-10, 1.3354e-10, 3.6953e-10,\n 4.7275e-10, 9.5648e-11, 1.9874e-10, 2.5353e-10, 3.1739e-09, 5.1548e-14,\n 1.8066e-10, 1.4697e-09, 8.1846e-11, 2.8644e-09, 3.8391e-10, 3.3062e-10,\n 2.9876e-10, 8.4625e-11, 3.0474e-13, 1.4916e-08, 5.4649e-11, 8.0472e-12,\n 2.2269e-10, 9.5686e-10, 1.9745e-11, 6.5039e-09, 1.7913e-10, 3.5164e-10,\n 2.4962e-10, 7.6769e-10, 1.6107e-09, 2.2344e-11, 2.0981e-14, 4.1481e-10,\n 1.4201e-10, 3.9657e-10, 2.0587e-11, 1.5256e-09, 6.2294e-12, 2.7495e-09,\n 2.5936e-09, 5.2496e-13, 3.0171e-11, 2.3636e-10, 8.7519e-11, 1.4940e-09,\n 1.9320e-10, 1.5824e-09, 2.7501e-10, 1.2494e-12, 9.2642e-13, 3.5999e-11,\n 2.1698e-11, 4.8555e-10, 1.2675e-11, 8.2060e-12, 5.7499e-10, 3.3720e-10,\n 2.0734e-09, 3.4339e-11, 6.8304e-11, 6.5773e-10, 3.6760e-10, 1.8458e-10,\n 3.5899e-10, 4.0571e-13, 2.0545e-10, 7.3601e-10, 1.7349e-10, 2.0216e-13,\n 2.5639e-12, 8.0680e-10, 5.4005e-11, 3.7704e-10, 2.6099e-09, 1.2708e-11,\n 1.9847e-09, 9.2230e-11, 3.5142e-09, 9.9344e-11], device='cuda:0')" + }, + "33": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.2281e-09, 1.1382e-09, 9.0442e-10, 5.8763e-11, 1.8876e-10, 7.3388e-11,\n 2.5988e-10, 8.5104e-11, 1.2730e-10, 5.9210e-11, 2.3783e-09, 2.5553e-09,\n 3.0680e-10, 3.4005e-10, 7.2304e-10, 7.8269e-11, 1.5844e-09, 2.6955e-10,\n 1.1357e-09, 4.0995e-09, 6.4140e-13, 1.3071e-09, 3.6540e-10, 2.4245e-09,\n 2.3674e-10, 2.9544e-10, 2.7736e-09, 5.8569e-11, 2.1247e-10, 2.8088e-10,\n 8.8237e-10, 2.8641e-09, 8.5501e-12, 3.7704e-09, 2.6862e-09, 3.3253e-09,\n 6.0033e-11, 2.0674e-12, 1.8212e-09, 9.4490e-09, 3.7966e-09, 1.8806e-10,\n 1.7206e-10, 1.8242e-10, 6.9541e-10, 1.5712e-09, 1.3852e-09, 6.4710e-10,\n 7.6603e-11, 1.3504e-09, 1.0054e-11, 4.2472e-09, 1.7016e-09, 1.0119e-09,\n 2.9374e-11, 2.3443e-10, 9.9679e-10, 6.4544e-11, 8.5966e-10, 2.8836e-10,\n 1.4313e-10, 8.5008e-10, 1.3057e-09, 2.9993e-09, 1.1741e-11, 4.3942e-10,\n 8.0442e-10, 8.3815e-11, 1.3935e-09, 2.3833e-10, 2.3775e-15, 6.5324e-10,\n 1.7262e-09, 2.9622e-10, 8.8615e-11, 7.1837e-12, 4.8630e-10, 3.8813e-09,\n 5.6040e-11, 3.8488e-13, 1.8122e-09, 5.3097e-09, 4.5287e-12, 8.1004e-11,\n 7.7050e-10, 3.7803e-09, 2.3358e-10, 9.1869e-10, 4.4226e-10, 1.5315e-10,\n 8.8428e-10, 3.0621e-09, 6.8722e-10, 7.8098e-10, 3.4265e-09, 5.6370e-10,\n 1.0613e-09, 4.0240e-09, 9.1710e-10, 5.9857e-09, 6.3995e-10, 1.7817e-10,\n 1.6012e-11, 3.9277e-10, 4.7452e-13, 5.7308e-09, 9.9642e-11, 9.5649e-10,\n 4.2729e-10, 4.9480e-13, 4.3349e-09, 3.0969e-10, 1.8020e-09, 2.0712e-09,\n 2.6169e-10, 2.7833e-09, 9.6641e-11, 1.8819e-10, 4.7053e-10, 2.5713e-09,\n 1.7726e-10, 1.2773e-10, 4.6830e-11, 1.0337e-11, 4.7666e-11, 8.8332e-10,\n 2.1155e-09, 3.7327e-10, 1.1537e-09, 1.5389e-09, 1.5682e-12, 4.6910e-09,\n 1.3134e-09, 2.1943e-10, 1.5910e-09, 5.9262e-10, 3.8814e-09, 6.1334e-10,\n 6.5852e-10, 6.3209e-13, 1.3686e-12, 3.0017e-11, 1.9284e-09, 7.8514e-09,\n 1.7922e-09, 4.2469e-10, 4.6970e-10, 8.3466e-10, 4.5728e-10, 1.2476e-11,\n 2.4300e-10, 1.0146e-09, 1.1258e-09, 1.7123e-10, 2.1254e-10, 2.9658e-10,\n 3.0983e-10, 4.2385e-09, 1.5665e-10, 2.6689e-10, 6.5681e-10, 1.8052e-10,\n 1.4408e-10, 8.0680e-10, 2.8647e-10, 8.1910e-10, 4.2218e-09, 2.8684e-10,\n 1.9455e-09, 3.5854e-10, 6.5442e-12, 3.8775e-10, 8.8646e-10, 2.1466e-10,\n 1.6462e-09, 5.2842e-10, 1.5110e-10, 3.2686e-10, 1.3440e-10, 7.8529e-10,\n 5.7177e-10, 1.6392e-10, 2.2328e-10, 4.4455e-10, 5.7737e-09, 4.2655e-13,\n 3.3726e-10, 1.1613e-09, 1.9676e-10, 5.3242e-09, 7.6892e-10, 3.8983e-10,\n 3.9645e-10, 1.7235e-10, 2.8918e-13, 1.4708e-08, 1.1965e-10, 1.9965e-11,\n 5.2889e-10, 1.7906e-09, 3.4677e-11, 6.3762e-09, 3.6187e-10, 3.2140e-10,\n 5.9483e-10, 1.2972e-09, 1.9193e-09, 3.6342e-11, 9.9455e-13, 8.5450e-10,\n 1.4922e-10, 7.3322e-10, 4.9443e-11, 1.9179e-09, 6.9934e-12, 3.1303e-09,\n 3.7256e-09, 6.4230e-13, 6.1503e-11, 2.7560e-10, 6.8772e-11, 2.6803e-09,\n 3.2166e-10, 3.1873e-09, 4.8110e-10, 3.3111e-12, 7.5852e-13, 7.6003e-11,\n 3.6292e-11, 7.7822e-10, 2.8149e-11, 1.6705e-11, 7.5451e-10, 4.9786e-10,\n 2.8566e-09, 2.5949e-11, 1.1123e-10, 1.3819e-09, 4.7050e-10, 2.6682e-10,\n 8.2127e-10, 4.2365e-13, 3.8645e-10, 9.1147e-10, 3.1574e-10, 9.1802e-15,\n 3.5643e-12, 1.6014e-09, 7.6213e-11, 5.0744e-10, 2.9833e-09, 2.7653e-11,\n 1.6532e-09, 1.5192e-10, 4.5810e-09, 1.9403e-10], device='cuda:0')" + }, + "34": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1506e-11, 4.1548e-11, 2.5346e-11, ..., 5.7366e-12, 3.1409e-11,\n 1.8874e-11],\n [8.1894e-13, 1.2252e-13, 1.3353e-12, ..., 3.5635e-12, 4.1426e-12,\n 1.4765e-13],\n [3.9919e-12, 1.7556e-11, 5.5357e-12, ..., 2.8327e-12, 1.7058e-11,\n 3.2640e-12],\n ...,\n [9.9888e-11, 2.3694e-10, 2.4555e-10, ..., 4.3631e-11, 3.6840e-10,\n 5.3317e-10],\n [6.6701e-10, 1.0550e-09, 1.6283e-09, ..., 3.6158e-10, 1.5932e-09,\n 2.9252e-09],\n [9.1514e-09, 2.0298e-08, 2.4256e-08, ..., 5.1638e-09, 3.0552e-08,\n 4.7933e-08]], device='cuda:0')" + }, + "35": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.3199e-10, 7.4555e-12, 6.6557e-11, 4.3799e-12, 2.9327e-11, 4.3356e-11,\n 6.4021e-12, 5.5383e-12, 2.6980e-13, 4.5201e-11, 3.6663e-11, 6.3410e-11,\n 1.5749e-12, 1.3333e-12, 1.6222e-11, 1.0640e-11, 3.5097e-12, 1.3891e-10,\n 4.7405e-11, 6.0717e-13, 2.3904e-12, 1.9398e-11, 4.4788e-11, 1.7277e-12,\n 8.3677e-11, 9.8135e-12, 1.3895e-11, 1.4168e-12, 3.2293e-12, 6.6056e-12,\n 6.5930e-12, 1.0966e-10, 1.5891e-11, 8.5065e-13, 3.2088e-14, 3.2784e-11,\n 1.1666e-11, 4.7001e-12, 1.4054e-13, 6.2663e-15, 1.9039e-11, 2.3826e-12,\n 1.1634e-11, 5.4932e-12, 9.3972e-13, 1.0698e-11, 1.3546e-11, 1.3744e-11,\n 7.2567e-12, 3.0836e-11, 1.1966e-11, 3.4035e-12, 4.4410e-12, 2.6153e-12,\n 9.8106e-14, 1.3339e-13, 3.0530e-12, 2.4505e-13, 9.8229e-12, 4.1776e-13,\n 3.4693e-11, 1.1570e-11, 2.7304e-14, 2.1049e-11, 4.0708e-11, 1.3865e-11,\n 2.0789e-11, 4.1163e-12, 5.2718e-13, 3.2496e-13, 2.1300e-11, 2.2500e-12,\n 1.7903e-13, 8.7959e-12, 5.1251e-12, 1.3815e-11, 1.0161e-11, 2.6315e-12,\n 4.3581e-17, 9.5428e-12, 7.2675e-12, 6.3967e-11, 9.7885e-13, 9.0434e-12,\n 1.2302e-12, 6.2073e-12, 3.6184e-12, 1.0013e-14, 6.1569e-12, 7.9909e-13,\n 2.8582e-14, 4.6372e-12, 4.1467e-12, 1.1818e-10, 3.8936e-11, 2.7077e-11,\n 2.9499e-12, 1.3920e-10, 9.6151e-12, 1.7831e-11, 1.6573e-12, 1.9064e-12,\n 9.3897e-13, 1.0198e-11, 1.2396e-13, 3.5564e-11, 5.8400e-11, 6.4913e-12,\n 7.3709e-12, 1.1081e-12, 1.1209e-11, 1.2729e-10, 9.6290e-13, 1.4583e-10,\n 2.0228e-11, 9.0112e-11, 8.8429e-18, 8.2660e-11, 6.0213e-11, 2.9800e-11,\n 1.1405e-10, 5.1022e-13, 5.9541e-11, 2.2451e-13, 3.2495e-11, 5.4124e-11,\n 4.9051e-11, 3.5528e-11, 6.5580e-11, 9.9242e-11, 1.1413e-11, 2.2260e-11,\n 6.7038e-13, 3.8252e-11, 1.0990e-11, 1.0052e-11, 2.3242e-11, 3.1471e-11,\n 2.6442e-14, 6.1399e-12, 7.7194e-11, 2.8408e-13, 1.6236e-11, 5.9904e-12,\n 3.7500e-11, 1.5599e-10, 3.1017e-11, 3.9836e-13, 9.8380e-11, 1.4919e-12,\n 3.1730e-12, 2.5776e-11, 1.0703e-10, 8.7970e-12, 3.7796e-11, 5.6704e-11,\n 4.1755e-13, 1.0530e-13, 3.8598e-12, 1.8976e-11, 1.8104e-11, 5.0208e-12,\n 7.5581e-11, 3.9094e-11, 4.3534e-12, 4.2006e-14, 8.3885e-13, 1.1884e-13,\n 2.3731e-12, 1.0686e-11, 2.1609e-12, 1.3109e-12, 9.0497e-12, 1.0963e-11,\n 9.9788e-13, 2.7997e-13, 1.0490e-11, 2.4220e-12, 1.5042e-12, 2.1438e-13,\n 1.7479e-11, 5.0841e-14, 3.7804e-12, 1.6320e-11, 4.6461e-12, 2.1664e-12,\n 1.6189e-11, 5.4350e-12, 5.4537e-11, 1.7806e-12, 1.5208e-11, 1.2535e-12,\n 1.3775e-11, 1.3678e-10, 1.9305e-11, 4.1139e-11, 2.1043e-11, 6.1509e-11,\n 7.0352e-15, 7.4696e-13, 3.5779e-12, 5.3122e-12, 9.1583e-12, 7.5444e-11,\n 1.8940e-12, 5.6709e-12, 4.2408e-13, 6.3873e-12, 7.3573e-13, 1.7085e-11,\n 2.9237e-15, 1.1489e-11, 7.7622e-13, 8.4620e-12, 7.7858e-11, 3.9469e-11,\n 5.7982e-11, 1.5681e-11, 1.9843e-11, 6.1731e-11, 1.5677e-11, 2.9936e-13,\n 7.9370e-12, 7.0600e-11, 4.5160e-13, 9.3206e-12, 5.6441e-11, 2.0067e-11,\n 1.7702e-11, 1.7126e-11, 1.4822e-11, 4.4132e-13, 6.5098e-12, 3.4388e-12,\n 1.5953e-11, 4.3784e-12, 1.1025e-13, 1.0114e-12, 4.0843e-12, 4.3014e-13,\n 3.9567e-11, 1.0913e-11, 1.6707e-12, 1.3226e-11, 4.0508e-13, 1.5125e-11,\n 4.0626e-11, 5.2128e-12, 1.7272e-11, 5.3843e-12, 4.1076e-12, 4.2243e-11,\n 1.6407e-11, 4.6744e-11, 4.3727e-11, 9.4784e-12, 1.5038e-27, 6.6474e-29,\n 2.7120e-26, 1.9068e-27, 3.0364e-26, 1.1934e-27, 7.8278e-28, 1.6582e-26,\n 1.0867e-26, 1.5817e-26, 4.7750e-28, 5.9144e-28, 1.6587e-26, 1.4907e-26,\n 8.2181e-28, 1.8109e-27, 6.6713e-27, 4.2502e-30, 4.2172e-26, 6.6752e-27,\n 2.1966e-27, 1.5004e-26, 6.6731e-27, 6.8503e-27, 5.0088e-27, 1.5998e-27,\n 3.9306e-27, 4.4984e-28, 3.3989e-26, 6.9859e-27, 6.0684e-27, 5.4048e-27,\n 2.9255e-26, 8.8619e-27, 9.5594e-28, 3.3874e-27, 5.3830e-27, 7.6809e-29,\n 7.3938e-28, 2.3193e-28, 1.5968e-29, 7.1364e-28, 3.5339e-27, 5.0731e-29,\n 5.7959e-27, 1.7039e-27, 9.0042e-27, 4.6524e-29, 7.3644e-27, 6.3441e-29,\n 1.9414e-27, 2.2175e-31, 2.7263e-27, 1.2979e-26, 3.7659e-27, 7.2278e-28,\n 2.8196e-26, 3.6274e-27, 5.2393e-26, 4.2159e-27, 1.2112e-28, 6.7370e-28,\n 6.1160e-27, 3.6262e-27, 9.3428e-28, 4.8691e-27, 1.7820e-26, 2.3351e-26,\n 1.2435e-26, 5.1101e-28, 4.3508e-26, 3.1715e-26, 3.3329e-26, 5.5628e-26,\n 1.1509e-25, 3.6188e-27, 7.1719e-26, 1.5252e-27, 5.2822e-27, 1.0534e-27,\n 1.5524e-26, 6.1490e-26, 3.6739e-26, 2.3938e-26, 6.3691e-27, 1.1519e-26,\n 5.6060e-26, 1.2608e-25, 7.0608e-27, 1.9339e-26, 1.1132e-26, 5.8293e-26,\n 6.1164e-27, 3.5740e-28, 4.0559e-27, 3.8723e-27, 3.6012e-26, 1.2243e-28,\n 7.3423e-27, 5.3094e-29, 4.1746e-27, 1.9711e-26, 2.8980e-26, 2.4836e-27,\n 6.5518e-28, 2.8619e-27, 4.9520e-27, 2.8400e-28, 1.7444e-28, 8.1542e-28,\n 9.4262e-28, 1.6119e-28, 3.7307e-27, 4.6631e-27, 6.5961e-27, 5.9824e-27,\n 4.2545e-27, 5.6182e-29, 1.8977e-30, 1.0933e-28, 7.7434e-29, 8.0438e-27,\n 1.1153e-26, 8.1630e-27, 3.8543e-27, 1.0458e-26, 5.1833e-29, 1.7525e-26,\n 1.8658e-28, 2.1431e-27, 8.7294e-28, 2.7817e-26, 1.1119e-26, 1.7384e-26,\n 1.4463e-26, 3.0634e-28, 2.7248e-28, 3.2164e-27, 6.4395e-27, 1.9931e-26,\n 2.1068e-27, 9.9949e-28, 5.7349e-28, 8.6865e-29, 3.2696e-26, 3.0753e-27,\n 9.2427e-27, 6.9640e-28, 1.0950e-27, 7.0826e-26, 1.2040e-26, 1.3690e-28,\n 9.7714e-27, 1.1599e-26, 3.3666e-26, 2.0012e-26, 2.5634e-26, 1.2263e-26,\n 1.8506e-28, 1.4431e-25, 3.2695e-26, 4.0098e-28, 3.9548e-26, 1.0047e-26,\n 8.7440e-27, 5.1492e-28, 5.9364e-27, 2.8297e-27, 2.9476e-27, 1.1585e-26,\n 9.2656e-27, 1.1026e-30, 8.5968e-27, 8.6894e-28, 1.3378e-28, 2.0446e-27,\n 2.0936e-26, 1.4815e-26, 7.6602e-29, 1.1669e-26, 3.3875e-27, 2.4330e-27,\n 4.0771e-26, 4.0836e-26, 4.3222e-27, 2.2212e-28, 4.1726e-26, 3.8661e-26,\n 1.8028e-26, 2.2929e-27, 2.3398e-27, 9.2923e-27, 1.0897e-26, 1.2204e-26,\n 6.0053e-26, 4.2270e-27, 1.3241e-27, 2.0906e-26, 4.0354e-27, 2.0168e-26,\n 3.1829e-27, 3.4502e-27, 3.2721e-27, 1.5586e-26, 3.0411e-26, 2.1930e-26,\n 1.4608e-28, 5.5392e-28, 7.1180e-27, 2.9409e-30, 2.1587e-27, 1.3544e-26,\n 1.4456e-27, 4.1948e-27, 6.1890e-27, 1.2353e-26, 3.8594e-27, 6.1407e-27,\n 1.2709e-26, 6.9553e-27, 6.0059e-28, 1.3925e-27, 4.4361e-28, 8.0621e-28,\n 5.4794e-27, 3.6338e-27, 1.0249e-26, 3.5201e-26, 2.2194e-26, 1.5231e-26,\n 1.0901e-26, 1.3019e-26, 4.7020e-28, 2.6969e-26, 8.1618e-28, 1.2299e-27,\n 5.8018e-27, 3.3642e-27, 6.1297e-27, 1.2383e-27, 9.4999e-28, 1.9069e-31,\n 5.7545e-28, 4.7618e-28, 8.8578e-27, 1.5747e-26, 1.2250e-27, 1.0584e-26,\n 4.6566e-27, 1.3917e-26, 1.1500e-28, 3.9081e-27, 1.1139e-27, 1.6488e-26,\n 2.5570e-27, 1.4744e-27, 3.0598e-08, 4.7900e-07, 8.4316e-08, 8.0408e-08,\n 6.3620e-07, 5.6528e-07, 7.7109e-09, 4.1100e-09, 1.0304e-08, 1.0313e-07,\n 1.3660e-10, 2.7494e-08, 4.9909e-07, 1.6788e-08, 2.7624e-08, 1.2648e-07,\n 1.8569e-07, 3.8142e-08, 3.3998e-07, 1.2828e-07, 1.4227e-10, 3.9955e-10,\n 6.9000e-08, 4.5457e-07, 3.8296e-07, 1.3342e-09, 1.7250e-07, 9.2776e-09,\n 4.8536e-08, 9.8469e-09, 2.1466e-07, 1.7924e-07, 6.9473e-07, 7.4573e-09,\n 1.4213e-07, 2.0403e-07, 1.2036e-07, 1.2182e-07, 1.0744e-07, 4.2463e-09,\n 6.5794e-07, 2.1546e-09, 1.0644e-08, 1.8740e-08, 3.1862e-07, 2.7247e-08,\n 5.9585e-08, 2.4864e-08, 4.7825e-09, 4.5539e-08, 9.1284e-08, 1.6306e-08,\n 2.6626e-07, 2.5130e-07, 2.7083e-08, 1.8979e-08, 2.9367e-08, 2.1285e-09,\n 1.7196e-07, 6.5230e-08, 1.3904e-07, 1.3761e-07, 1.4488e-08, 3.7517e-09,\n 7.7458e-12, 3.7763e-10, 1.1664e-09, 3.4883e-09, 5.0308e-08, 3.5852e-09,\n 4.4799e-08, 1.1683e-07, 1.6814e-07, 2.6801e-08, 6.0736e-08, 7.6687e-10,\n 3.4251e-08, 1.0143e-06, 5.3217e-09, 5.1130e-08, 6.8645e-08, 4.6946e-07,\n 1.7230e-10, 1.3315e-07, 3.8367e-08, 1.8530e-07, 1.1831e-07, 1.3738e-07,\n 1.1225e-11, 4.3820e-07, 2.9457e-07, 2.6816e-10, 1.3192e-08, 3.1733e-07,\n 1.4739e-08, 6.1560e-08, 8.7007e-10, 2.1865e-07, 4.2099e-08, 2.0462e-07,\n 1.0665e-07, 3.6984e-07, 4.6579e-09, 8.9426e-08, 1.8335e-07, 6.6813e-07,\n 2.9545e-07, 5.2146e-10, 1.1202e-08, 1.1218e-07, 7.0116e-08, 1.2948e-08,\n 1.9755e-08, 2.3354e-08, 6.9260e-08, 1.0301e-08, 8.7271e-08, 1.7320e-09,\n 2.6805e-07, 3.3879e-09, 2.6264e-07, 3.0088e-07, 9.2707e-08, 2.7320e-09,\n 3.1684e-08, 3.7091e-09, 8.6737e-08, 2.0557e-07, 2.0527e-08, 5.3250e-09,\n 2.1374e-07, 1.0605e-08, 1.2483e-07, 7.0338e-08, 3.6236e-08, 2.4680e-10,\n 5.0354e-09, 5.3333e-07, 6.8729e-09, 3.7001e-07, 6.3890e-11, 1.8315e-07,\n 6.9394e-08, 6.9321e-08, 2.0459e-08, 2.4927e-07, 9.3773e-08, 3.6419e-08,\n 1.5117e-07, 4.7180e-10, 3.4357e-07, 2.1641e-08, 2.6644e-09, 3.5726e-08,\n 7.0635e-08, 2.7704e-07, 5.3387e-08, 1.2690e-07, 1.9683e-09, 3.1918e-07,\n 9.3320e-08, 9.5370e-08, 7.4840e-08, 5.8532e-07, 5.3307e-10, 1.6560e-08,\n 5.0789e-07, 3.9640e-08, 4.6829e-08, 2.4362e-09, 8.1148e-08, 2.0995e-07,\n 1.8243e-07, 2.7743e-07, 6.7319e-09, 8.2571e-08, 4.8186e-08, 4.3119e-08,\n 4.4069e-08, 2.7761e-08, 7.0244e-08, 1.7623e-07, 1.3535e-08, 3.0051e-07,\n 3.3560e-07, 8.1546e-08, 8.0089e-08, 7.6227e-08, 5.5132e-10, 1.6312e-08,\n 1.6613e-08, 3.5827e-08, 1.6207e-07, 2.0528e-08, 8.9322e-09, 1.2355e-07,\n 3.4095e-11, 3.0037e-07, 2.5399e-09, 3.2911e-07, 1.4002e-07, 3.9358e-07,\n 1.0159e-08, 6.7321e-08, 2.6436e-07, 9.9473e-08, 9.6577e-09, 2.5516e-07,\n 1.9440e-08, 1.2368e-07, 6.2826e-10, 1.9055e-08, 5.5657e-07, 1.2142e-07,\n 5.5860e-07, 6.2851e-08, 8.9740e-08, 2.2228e-10, 3.1040e-08, 7.8274e-08,\n 7.1977e-10, 5.2690e-09, 6.7617e-08, 2.1650e-08, 3.1107e-08, 1.7719e-08,\n 2.0075e-07, 2.0110e-08, 5.0782e-09, 1.6763e-09, 5.5506e-10, 9.6669e-10,\n 1.2113e-08, 1.6602e-07, 1.1583e-08, 2.3683e-08, 2.9600e-09, 2.8265e-07,\n 1.3861e-07, 2.6876e-07, 6.1154e-07, 1.2666e-08, 3.3101e-07, 1.3509e-07,\n 2.9998e-08, 1.1235e-06, 2.4246e-08, 1.1562e-07, 3.8766e-08, 2.7818e-08,\n 1.4471e-08, 1.2431e-07, 6.6965e-08, 3.2344e-09, 1.6751e-08, 2.8784e-07],\n device='cuda:0')" + }, + "36": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0249e-08, 5.7233e-12, 5.7234e-08, ..., 1.4822e-10, 1.4921e-09,\n 4.1247e-09],\n [1.3893e-08, 1.4479e-12, 8.1180e-08, ..., 2.3234e-10, 2.2049e-09,\n 5.8231e-09],\n [7.8449e-09, 9.9788e-13, 4.5882e-08, ..., 1.2123e-10, 1.3058e-09,\n 2.9669e-09],\n ...,\n [5.9362e-09, 8.4729e-13, 3.6001e-08, ..., 5.5682e-11, 8.9820e-10,\n 2.3880e-09],\n [7.7985e-10, 2.0857e-12, 4.2957e-09, ..., 1.4941e-11, 8.4439e-11,\n 3.2375e-10],\n [4.1263e-09, 4.9721e-12, 2.4213e-08, ..., 7.0467e-11, 5.6390e-10,\n 1.6570e-09]], device='cuda:0')" + }, + "37": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.7079e-07, 5.1584e-07, 2.7586e-07, 1.4646e-06, 4.4450e-09, 4.2481e-08,\n 7.7796e-07, 1.4608e-06, 1.1815e-06, 5.8837e-07, 2.5502e-08, 1.2285e-06,\n 6.0692e-08, 3.0266e-09, 1.5976e-07, 1.5264e-09, 6.6728e-07, 1.2636e-06,\n 3.5340e-07, 7.5499e-08, 1.3760e-07, 1.0127e-07, 4.8209e-07, 6.1799e-09,\n 1.6064e-08, 1.3631e-07, 1.4055e-08, 2.0549e-07, 9.7382e-08, 3.0573e-07,\n 6.6853e-10, 1.6406e-07, 1.6601e-07, 5.7817e-08, 7.4436e-09, 3.2548e-07,\n 1.1145e-08, 1.8715e-06, 3.1384e-07, 2.2336e-08, 7.0505e-07, 4.5927e-08,\n 1.3495e-07, 1.2203e-08, 2.4625e-07, 4.2233e-07, 5.3029e-09, 5.5168e-10,\n 3.4644e-07, 3.1476e-07, 4.4965e-07, 1.8740e-08, 1.9542e-07, 7.3247e-07,\n 9.3572e-09, 8.1845e-08, 7.4036e-07, 2.9373e-09, 3.3975e-08, 6.0997e-08,\n 2.5605e-07, 2.0041e-07, 4.1230e-08, 3.9175e-07, 2.4628e-09, 6.1749e-07,\n 8.4105e-08, 1.6079e-07, 1.8106e-07, 6.6298e-08, 3.1147e-08, 6.1021e-07,\n 7.4765e-07, 1.5664e-06, 1.0108e-08, 1.2230e-09, 6.1682e-08, 5.2665e-07,\n 7.9803e-09, 1.5146e-09, 1.2380e-07, 7.8499e-09, 3.1387e-07, 5.3916e-08,\n 5.9328e-08, 3.0667e-08, 5.9924e-08, 8.6155e-08, 8.8815e-08, 1.0362e-06,\n 9.0299e-10, 6.1603e-07, 2.5546e-07, 6.8638e-07, 1.6728e-06, 1.0662e-06,\n 1.0827e-07, 1.4597e-06, 5.2471e-08, 1.8449e-08, 6.4971e-07, 2.2335e-10,\n 2.8323e-07, 1.7111e-07, 1.6131e-08, 4.5271e-07, 2.7209e-10, 2.6661e-06,\n 5.5236e-07, 5.7303e-07, 2.0251e-09, 1.6575e-08, 5.1330e-08, 5.1206e-07,\n 1.6141e-07, 4.3030e-07, 9.0993e-10, 5.3185e-07, 6.6878e-07, 2.6811e-07,\n 8.9415e-07, 3.5323e-07, 2.7609e-07, 3.9567e-08, 1.8706e-06, 9.8093e-07,\n 2.4932e-07, 1.0304e-07, 2.1361e-07, 7.6028e-10, 2.2244e-06, 2.1618e-07,\n 2.8082e-09, 8.2931e-10, 1.4448e-06, 5.5733e-07, 8.3920e-08, 3.6876e-08,\n 3.4101e-09, 1.1166e-09, 9.2110e-07, 1.9159e-09, 1.3821e-06, 2.6893e-08,\n 3.1201e-08, 6.6141e-08, 5.5035e-07, 4.6162e-07, 1.3407e-07, 1.6144e-09,\n 2.6581e-08, 8.4818e-09, 1.0065e-09, 9.1002e-07, 8.0521e-08, 1.1939e-06,\n 2.8644e-08, 6.6924e-07, 9.0549e-08, 9.0694e-07, 2.3708e-07, 3.8590e-08,\n 4.0714e-07, 3.5420e-07, 4.9141e-07, 1.0830e-06, 2.2755e-06, 1.0751e-09,\n 4.2696e-09, 2.0706e-07, 1.1973e-07, 1.8229e-09, 7.3427e-07, 9.4512e-08,\n 1.0616e-06, 1.3238e-09, 4.9609e-07, 5.5358e-08, 2.3884e-07, 2.3970e-07,\n 2.5555e-06, 2.5445e-07, 1.6431e-07, 2.4427e-10, 4.7873e-08, 7.4204e-07,\n 3.0310e-09, 2.7110e-07, 6.9066e-07, 6.1538e-08, 7.0565e-07, 6.1359e-08,\n 1.7789e-07, 1.6283e-07, 1.0759e-06, 5.7575e-07, 6.7742e-07, 4.9729e-09,\n 2.1580e-07, 1.3821e-06, 1.4777e-09, 1.2820e-07, 6.6082e-08, 9.6904e-07,\n 9.4674e-07, 1.3541e-07, 4.4791e-07, 9.2397e-08, 6.6184e-09, 2.5246e-09,\n 2.9983e-09, 1.5078e-07, 7.7688e-07, 6.9147e-07, 2.4952e-07, 8.6800e-07,\n 1.0543e-07, 5.2152e-07, 3.7991e-09, 1.3685e-08, 2.6132e-10, 7.3657e-08,\n 8.1701e-07, 6.0887e-08, 2.0534e-08, 4.2051e-07, 1.1838e-07, 3.6964e-07,\n 2.6286e-08, 5.2140e-07, 7.2970e-09, 2.8531e-07, 8.8684e-07, 3.7408e-09,\n 7.3846e-07, 1.6570e-08, 1.7534e-07, 6.5818e-10, 1.3923e-06, 2.8062e-08,\n 1.2762e-07, 2.8810e-08, 3.3707e-08, 1.9743e-07, 2.9055e-07, 2.0135e-07,\n 7.2784e-07, 2.7678e-07, 1.1783e-06, 1.1322e-07, 6.1281e-07, 1.3285e-11,\n 4.8302e-08, 2.2758e-07, 2.9418e-08, 1.5659e-07], device='cuda:0')" + }, + "38": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5943e-06, 6.8672e-07, 9.8298e-07, ..., 1.2520e-07, 7.6421e-07,\n 1.4112e-06],\n [2.6787e-07, 6.9628e-08, 1.0424e-07, ..., 1.3882e-08, 7.7159e-08,\n 1.3990e-07],\n [2.9181e-07, 7.5233e-08, 1.0861e-07, ..., 1.3366e-08, 8.6616e-08,\n 1.6180e-07],\n [3.0573e-07, 8.4408e-08, 1.1494e-07, ..., 1.4498e-08, 9.1272e-08,\n 1.6946e-07]], device='cuda:0')" + }, + "39": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.5055e-05, 6.7032e-06, 7.2531e-06, 7.7478e-06], device='cuda:0')" + }, + "40": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5943e-06, 6.8672e-07, 9.8298e-07, ..., 1.2520e-07, 7.6421e-07,\n 1.4112e-06],\n [2.6787e-07, 6.9628e-08, 1.0424e-07, ..., 1.3882e-08, 7.7159e-08,\n 1.3990e-07],\n [2.9181e-07, 7.5233e-08, 1.0861e-07, ..., 1.3366e-08, 8.6616e-08,\n 1.6180e-07],\n [3.0573e-07, 8.4408e-08, 1.1494e-07, ..., 1.4498e-08, 9.1272e-08,\n 1.6946e-07]], device='cuda:0')" + }, + "41": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.5055e-05, 6.7032e-06, 7.2531e-06, 7.7478e-06], device='cuda:0')" + }, + "42": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5943e-06, 6.8672e-07, 9.8298e-07, ..., 1.2520e-07, 7.6421e-07,\n 1.4112e-06],\n [2.6787e-07, 6.9628e-08, 1.0424e-07, ..., 1.3882e-08, 7.7159e-08,\n 1.3990e-07],\n [2.9181e-07, 7.5233e-08, 1.0861e-07, ..., 1.3366e-08, 8.6616e-08,\n 1.6180e-07],\n [3.0573e-07, 8.4408e-08, 1.1494e-07, ..., 1.4498e-08, 9.1272e-08,\n 1.6946e-07]], device='cuda:0')" + }, + "43": { "step": "tensor(1252.)", - "exp_avg": "tensor([[ 6.2476e-06, 1.5620e-24, 5.6052e-45, ..., 5.6052e-45,\n -1.0678e-08, 3.5135e-07],\n [ 2.0481e-05, 2.4674e-23, -5.6052e-45, ..., -5.6052e-45,\n 1.1884e-09, -1.4128e-06],\n [ 9.2974e-06, 4.8254e-24, -5.6052e-45, ..., -5.6052e-45,\n -2.5342e-08, 8.8364e-07],\n ...,\n [ 3.9494e-05, 1.0036e-23, -5.6052e-45, ..., -5.6052e-45,\n 7.4303e-09, 1.3260e-05],\n [-3.1277e-05, -1.7812e-23, 5.6052e-45, ..., -5.6052e-45,\n 1.6134e-08, 3.6615e-06],\n [-3.0890e-05, -6.1184e-24, 5.6052e-45, ..., -5.6052e-45,\n -1.5985e-08, -2.5147e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.9864e-08, 1.0276e-11, 2.3055e-09, ..., 1.1772e-17, 1.8261e-08,\n 2.0070e-10],\n [2.7359e-08, 1.8948e-10, 5.7807e-10, ..., 6.0166e-16, 2.3274e-09,\n 3.5971e-10],\n [4.3654e-08, 1.1519e-12, 1.5706e-10, ..., 2.6240e-16, 6.9820e-11,\n 3.4333e-10],\n ...,\n [3.9805e-08, 1.8755e-10, 4.6976e-11, ..., 2.2957e-16, 1.6913e-09,\n 3.4654e-10],\n [5.4508e-08, 1.5719e-11, 2.1832e-09, ..., 3.4213e-18, 4.7717e-09,\n 1.0817e-09],\n [3.9547e-08, 2.4142e-11, 1.7476e-09, ..., 2.1926e-16, 1.0526e-08,\n 8.0925e-10]], device='cuda:0')" + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.5055e-05, 6.7032e-06, 7.2531e-06, 7.7478e-06], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "shared", "betas": [ 0.9, @@ -52,7 +212,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_384", "betas": [ 0.9, @@ -75,7 +235,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_768", "betas": [ 0.9, @@ -98,7 +258,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_1024", "betas": [ 0.9, @@ -121,7 +281,7 @@ ] }, { - "lr": 0.00975530705321762, + "lr": 0.00904518046337755, "name": "scale_1280", "betas": [ 0.9, @@ -144,7 +304,7 @@ ] }, { - "lr": 0.004877665762479736, + "lr": 0.004522637977440181, "name": "fusion", "betas": [ 0.9, @@ -200,7 +360,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 1, + "T_cur": 2, "base_lrs": [ 0.01, 0.01, @@ -209,24 +369,25 @@ 0.01, 0.005 ], - "last_epoch": 1, + "last_epoch": 2, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.00975530705321762, - 0.004877665762479736 + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.00904518046337755, + 0.004522637977440181 ] }, "metrics": { - "best_val_acc": 79.318, - "best_epoch": 0, + "best_val_acc": 80.924, + "best_epoch": 1, "scale_accuracies": { - "384": 79.318 + "384": 80.924, + "768": 80.638 } }, "train_config": {