diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,301 +1,316 @@ { - "epoch": 8, + "epoch": 9, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(11268.)", - "exp_avg": "tensor([[-1.5343e-05, -2.9916e-05, 8.0613e-06, ..., 5.4684e-07,\n -1.3229e-05, 3.6274e-05],\n [ 1.3143e-04, 1.0377e-04, -1.0416e-04, ..., -6.9462e-05,\n -3.5507e-05, 6.6690e-05],\n [ 2.0145e-05, -2.0488e-05, 5.1333e-05, ..., -8.9157e-05,\n -4.8126e-05, -1.1310e-05],\n ...,\n [ 2.2608e-05, -6.8028e-06, 1.1318e-05, ..., 3.2938e-05,\n 1.5136e-05, 6.1005e-06],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-3.4981e-05, -1.6545e-05, 7.6290e-05, ..., -3.7448e-05,\n -1.8311e-05, 5.9050e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4443e-08, 1.0509e-08, 6.4694e-09, ..., 7.3627e-09, 6.9091e-09,\n 7.3090e-09],\n [6.1750e-09, 6.5877e-09, 4.0047e-09, ..., 3.1493e-09, 3.3491e-09,\n 2.2819e-09],\n [1.3491e-08, 1.1312e-08, 1.5966e-08, ..., 1.0809e-08, 8.5882e-09,\n 8.3468e-09],\n ...,\n [1.3088e-08, 1.5454e-08, 2.3230e-08, ..., 1.1529e-08, 9.3906e-09,\n 9.5801e-09],\n [2.0930e-15, 1.0919e-14, 3.3181e-15, ..., 1.9453e-17, 1.0470e-14,\n 6.1699e-17],\n [1.2837e-08, 1.2301e-08, 1.3881e-08, ..., 1.2517e-08, 7.1009e-09,\n 6.3818e-09]], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([[-6.3960e-05, -6.0342e-06, 2.5991e-05, ..., 7.9031e-06,\n -1.0817e-05, 1.0993e-06],\n [ 8.8662e-06, -1.3754e-05, -6.6039e-06, ..., -7.5104e-06,\n 6.7735e-06, 1.1299e-05],\n [ 2.0843e-05, -5.0735e-05, -6.4116e-05, ..., 4.3576e-05,\n 6.4431e-05, -3.8258e-05],\n ...,\n [-3.0679e-06, -6.5986e-05, -5.2217e-05, ..., 1.6762e-06,\n 6.4565e-05, -6.0786e-05],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 1.9304e-05, 9.2353e-05, -7.0835e-05, ..., 7.7714e-05,\n 4.0903e-05, -2.3749e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4227e-08, 9.8451e-09, 5.8651e-09, ..., 7.5263e-09, 6.9033e-09,\n 6.9766e-09],\n [5.8534e-09, 5.5103e-09, 3.1861e-09, ..., 2.9842e-09, 3.1072e-09,\n 2.1654e-09],\n [1.4612e-08, 1.2444e-08, 1.6592e-08, ..., 1.1548e-08, 9.1652e-09,\n 8.7804e-09],\n ...,\n [1.3218e-08, 1.5949e-08, 2.4706e-08, ..., 1.1718e-08, 9.9901e-09,\n 1.0700e-08],\n [5.9810e-16, 3.1201e-15, 9.4819e-16, ..., 5.5587e-18, 2.9919e-15,\n 1.7631e-17],\n [1.3356e-08, 1.3326e-08, 1.5134e-08, ..., 1.3426e-08, 7.8721e-09,\n 6.7177e-09]], device='cuda:0')" }, "1": { - "step": "tensor(11268.)", - "exp_avg": "tensor([-2.5786e-06, 2.3335e-03, -2.3068e-03, ..., -5.5620e-04,\n 5.6052e-45, -2.2192e-03], device='cuda:0')", - "exp_avg_sq": "tensor([1.4529e-05, 6.4480e-06, 2.1745e-05, ..., 2.4178e-05, 7.0869e-11,\n 2.0185e-05], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([-9.0691e-04, 7.2409e-04, 3.0193e-03, ..., 1.0748e-03,\n 5.6052e-45, 3.6514e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.4690e-05, 6.2727e-06, 2.1278e-05, ..., 2.5137e-05, 2.0251e-11,\n 2.1901e-05], device='cuda:0')" }, "2": { - "step": "tensor(11268.)", - "exp_avg": "tensor([[ 4.3837e-06, 6.6444e-08, 6.5168e-06, ..., 1.0892e-05,\n 5.6052e-45, 9.9768e-07],\n [-1.7445e-06, -1.7880e-06, -3.4509e-06, ..., 3.9121e-06,\n -5.6052e-45, 1.0129e-05],\n [-6.9749e-07, -8.9262e-07, 2.2429e-07, ..., 2.6828e-06,\n -5.6052e-45, 2.5236e-06],\n ...,\n [-1.6380e-08, -2.9910e-08, 8.3911e-08, ..., 1.6073e-06,\n 5.6052e-45, 2.2116e-09],\n [ 2.7342e-07, -3.6762e-06, 1.5606e-07, ..., 2.0683e-07,\n -5.6052e-45, -3.1494e-06],\n [ 9.8010e-07, -3.1956e-06, -4.2292e-06, ..., -1.0124e-05,\n 5.6052e-45, 1.5897e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.8636e-09, 2.6598e-10, 1.5985e-09, ..., 7.0807e-09, 1.6688e-15,\n 7.9274e-10],\n [2.2948e-09, 9.3642e-10, 5.7460e-10, ..., 2.6606e-10, 9.2589e-14,\n 4.0976e-09],\n [2.6984e-10, 4.2794e-10, 3.6995e-10, ..., 6.4811e-10, 6.3807e-13,\n 1.1953e-09],\n ...,\n [6.4829e-11, 4.0342e-11, 5.6010e-10, ..., 8.6937e-11, 6.4074e-14,\n 1.7563e-11],\n [7.3777e-10, 3.3662e-11, 2.9484e-10, ..., 3.1220e-10, 1.2925e-16,\n 7.0558e-10],\n [3.1164e-09, 3.4633e-10, 7.0920e-09, ..., 2.2102e-09, 1.1451e-14,\n 2.4112e-09]], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([[-2.7164e-05, -1.9900e-07, 9.5126e-07, ..., 9.4927e-06,\n 5.6052e-45, -9.6058e-07],\n [-2.7432e-05, 5.1023e-07, -2.3293e-06, ..., 2.8908e-07,\n -5.6052e-45, -2.5127e-06],\n [-3.1435e-07, -3.2044e-07, -1.2302e-07, ..., 2.4341e-06,\n -5.6052e-45, -1.9378e-07],\n ...,\n [ 5.8933e-07, 1.5870e-07, -7.7837e-07, ..., 9.6177e-07,\n 5.6052e-45, 2.0668e-06],\n [ 3.8947e-06, -6.5624e-09, -9.0244e-06, ..., -9.2989e-07,\n -5.6052e-45, 6.2904e-06],\n [-1.1632e-05, -7.0983e-06, 8.5217e-06, ..., 1.5592e-06,\n 5.6052e-45, 2.1561e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9624e-09, 2.2615e-10, 1.6196e-09, ..., 6.6825e-09, 4.7686e-16,\n 6.7964e-10],\n [2.0070e-09, 7.9523e-10, 4.2056e-10, ..., 2.2196e-10, 2.6458e-14,\n 4.2702e-09],\n [2.3410e-10, 3.6115e-10, 3.2898e-10, ..., 6.0066e-10, 1.8233e-13,\n 1.4097e-09],\n ...,\n [4.7294e-11, 4.8290e-11, 5.5586e-10, ..., 7.2295e-11, 1.8310e-14,\n 1.7603e-11],\n [5.4413e-10, 2.2035e-11, 1.9991e-10, ..., 3.0986e-10, 3.6935e-17,\n 4.8808e-10],\n [3.3296e-09, 3.1063e-10, 6.0098e-09, ..., 1.7746e-09, 3.2723e-15,\n 2.6791e-09]], device='cuda:0')" }, "3": { - "step": "tensor(11268.)", - "exp_avg": "tensor([ 1.5229e-04, -2.9219e-04, -1.2801e-04, -3.4364e-04, 9.5671e-05,\n 4.8997e-05, -2.2886e-04, -1.6803e-04, 1.7185e-05, 9.2368e-05,\n 1.4420e-04, -1.6297e-04, -4.6934e-05, -1.6186e-04, -1.1104e-04,\n -4.3819e-06, -4.2037e-05, -3.7408e-04, -1.5471e-04, -1.0270e-04,\n 2.6074e-06, 8.2539e-05, -7.1725e-06, 2.3472e-04, -1.4651e-04,\n -3.3279e-04, -2.0961e-04, -2.0892e-04, 8.0952e-05, -3.5379e-04,\n 9.2425e-05, 8.0845e-05, 4.9962e-05, 1.8483e-04, -2.0881e-04,\n 3.5471e-05, 6.3195e-05, 4.1524e-04, 1.2191e-04, 1.3449e-04,\n -2.8320e-04, 7.7556e-05, -1.5116e-04, 2.2942e-05, 2.8454e-04,\n -3.1674e-04, -5.6052e-45, -1.5949e-04, 5.6884e-05, 3.5002e-04,\n 4.6816e-05, 3.7598e-06, 1.8236e-04, -2.8362e-04, 6.6865e-05,\n -2.6347e-04, -2.2549e-04, 4.0716e-05, -4.4075e-04, 2.0291e-04,\n 2.2085e-04, 1.9384e-04, 3.9979e-04, -1.6815e-04, 9.7195e-05,\n 1.9780e-04, 6.6959e-04, 1.0407e-04, -5.2698e-05, -5.3397e-06,\n -9.1514e-05, -5.3188e-05, 9.7403e-05, 9.3548e-05, 2.5342e-04,\n 2.9798e-05, 3.2833e-05, -1.0137e-05, -8.1619e-05, -7.6645e-05,\n 1.1814e-05, 1.3889e-04, 5.9521e-05, 3.9917e-05, 9.9553e-06,\n -3.0184e-04, -1.4107e-04, 3.5316e-04, 2.9205e-05, -1.9473e-04,\n 2.6450e-04, -3.5905e-04, 9.1293e-05, 3.1483e-05, 1.0005e-04,\n -1.3656e-04, 4.3081e-05, 3.5351e-05, -6.3808e-04, -2.5330e-04,\n 4.1012e-04, 9.1998e-05, 1.1210e-04, 9.1816e-05, 3.3382e-04,\n 2.2303e-05, 4.2458e-05, 1.5800e-04, -3.0466e-04, -4.6722e-06,\n 2.0434e-04, -5.2888e-05, 8.0016e-05, 2.8135e-05, 2.2964e-05,\n 1.0689e-04, 8.6122e-05, -3.9746e-05, 3.4084e-05, 9.1869e-06,\n 6.7812e-05, 5.0680e-04, -5.2910e-05, 2.0601e-04, 4.4756e-04,\n -1.1057e-04, 2.1201e-04, 6.5971e-05, -1.7056e-04, 6.1104e-05,\n -9.5285e-05, -1.2622e-04, -1.7720e-04, 8.3173e-06, 6.8743e-05,\n -1.4036e-04, 2.0822e-04, -3.7766e-04, 3.5963e-04, -9.7682e-05,\n 7.6612e-06, 3.8310e-06, -6.8941e-05, -1.1112e-04, 3.5452e-05,\n -3.7110e-04, -2.3343e-04, -1.2359e-04, -7.4859e-05, 6.3316e-06,\n -1.7249e-04, -1.0408e-04, -4.8535e-05, -1.9113e-04, -2.6144e-05,\n -1.7043e-05, 3.5163e-05, -3.5447e-05, -7.7567e-05, -2.0112e-04,\n -1.1606e-04, 3.6766e-05, -9.7139e-05, -1.2948e-05, -2.0277e-04,\n 2.7818e-05, -1.1609e-04, 6.5892e-04, 2.7178e-04, 2.6926e-04,\n 1.8817e-04, 3.0370e-04, 3.5504e-05, -1.6215e-04, 6.3222e-05,\n 1.2291e-04, -2.7731e-04, -3.5287e-05, 1.3415e-05, -1.3244e-05,\n 1.1431e-05, 4.3161e-05, 4.3297e-05, 5.3435e-06, -1.4040e-04,\n 1.9187e-04, 1.9658e-04, 2.3720e-04, -8.2670e-05, -7.4134e-05,\n -1.0299e-04, 6.5903e-05, 1.0671e-05, 1.7196e-05, 3.8074e-04,\n -1.6844e-04, -4.5394e-05, -1.6590e-04, 6.4303e-06, -1.3504e-06,\n 9.8468e-04, 1.0775e-04, 5.1250e-05, -4.9287e-05, 2.1062e-04,\n 2.7151e-04, -3.4830e-06, -1.0807e-04, 1.6426e-04, 1.3161e-04,\n 1.9526e-04, -2.2069e-04, 5.6052e-45, 1.2185e-04, 2.8381e-05,\n 1.5901e-04, -1.5507e-04, -2.3662e-05, 3.0013e-04, 9.1942e-05,\n -4.1790e-05, -9.2152e-05, -6.2299e-05, -2.2772e-04, -2.1145e-04,\n 1.6266e-04, -5.6191e-04, 9.7258e-05, -3.2974e-05, -4.0231e-04,\n -2.1738e-05, -1.2598e-04, 1.9880e-04, -1.4613e-04, 1.1519e-04,\n -2.7870e-04, 1.3778e-04, -1.5295e-05, -1.1578e-04, -1.9365e-04,\n -5.2622e-05, -8.2147e-05, -4.4753e-05, 3.3654e-04, 1.1808e-04,\n 6.8941e-05, 7.6050e-05, -1.7478e-04, 2.1500e-04, -1.7520e-05,\n 7.2670e-05, 9.0456e-05, -1.6727e-04, 1.4467e-04, 2.4953e-05,\n 6.7948e-05, -1.4747e-04, 4.0561e-05, -6.7893e-04, 1.3536e-04,\n 2.8986e-04, 1.0582e-04, 2.2390e-05, 8.4432e-05, 6.6921e-05,\n 1.2139e-04, 1.5982e-04, -5.8609e-05, 8.7098e-05, -4.4338e-05,\n 3.2509e-05, -3.6579e-05, -3.3608e-04, 1.5402e-04, 6.9273e-05,\n 1.3649e-04, -3.5791e-05, -9.1216e-05, 5.3865e-05, 1.5280e-04,\n -4.2523e-04, 4.9081e-05, -7.7856e-05, -1.3650e-04, -1.5560e-05,\n -3.8984e-05, 1.5914e-04, -2.4416e-04, -7.2311e-05, -2.9961e-04,\n 2.7047e-04, 2.4877e-04, 2.6457e-04, 1.1013e-04, -5.8835e-05,\n 4.2717e-06, 2.1048e-04, 7.0403e-05, 2.1980e-04, -5.2683e-05,\n 1.3578e-05, 2.4864e-04, -1.0618e-04, -1.1770e-04, -5.9297e-05,\n 3.4439e-05, -3.5771e-06, 1.1658e-04, 8.5610e-05, 1.6608e-04,\n -2.9284e-05, 7.7598e-05, 5.5339e-07, -9.4520e-05, 2.9793e-04,\n 1.8167e-04, -1.5612e-04, 2.5042e-04, -1.9842e-05, -1.5637e-04,\n 2.2985e-04, 1.2404e-04, 2.4799e-04, -8.3271e-06, 3.0838e-05,\n -1.9754e-04, -2.4900e-05, -2.4865e-04, 1.6674e-04, -1.3708e-04,\n 1.3966e-04, 1.3792e-04, 2.9294e-05, 2.4573e-04, 3.1070e-04,\n -6.8794e-05, -3.0605e-04, 1.0557e-04, 5.2921e-05, -1.8375e-04,\n -1.9304e-04, -1.2493e-04, 1.4038e-04, 6.1003e-05, -3.6518e-04,\n -2.9476e-05, 4.3739e-05, -4.6015e-05, -3.0393e-06, -1.1751e-04,\n 9.3789e-05, 1.7479e-04, 2.3947e-05, -3.0682e-04, 3.1597e-04,\n -1.5001e-04, 8.2501e-04, 1.1845e-04, -3.5118e-04, 2.1722e-04,\n -3.4041e-04, 1.1784e-04, 3.0923e-04, 7.2952e-06, -8.0025e-05,\n -1.9046e-04, -9.1373e-06, 7.9670e-05, 1.1091e-04, -1.6488e-04,\n 3.7483e-05, -3.3692e-04, -7.2778e-05, -2.0617e-04, 1.0152e-04,\n 2.8278e-04, 5.7331e-05, 9.3630e-05, -3.0906e-04, -1.2777e-04,\n -4.2197e-05, -2.5509e-04, 2.6364e-04, 1.9492e-05, -7.7376e-04,\n -6.6202e-05, 4.2200e-05, -8.8475e-05, 2.8754e-04, -6.7222e-05,\n 1.4772e-04, 6.9926e-05, 3.2623e-04, -1.4134e-04, 1.3683e-04,\n -5.2892e-05, -8.9700e-05, 1.3334e-05, -1.1221e-04, -1.2480e-04,\n 1.2755e-04, 6.8539e-05, -6.5429e-05, -2.5807e-05, -3.9333e-04,\n -5.5534e-05, 1.1339e-04, -1.5995e-04, 8.1842e-05, -1.1484e-04,\n 1.4207e-04, 1.5680e-04, 7.0450e-05, 3.2238e-04, -2.7028e-04,\n -1.1824e-04, 3.0010e-04, -1.0347e-05, 1.7322e-04, 1.9967e-04,\n 5.9531e-05, -4.9493e-05, 9.9186e-05, -7.1483e-05, -8.8750e-05,\n 5.8170e-05, 7.8102e-05, -1.1151e-04, -6.8837e-05, 1.8265e-04,\n 1.2505e-05, 7.9890e-05, 2.5825e-05, 1.0739e-04, -1.9385e-04,\n 6.0349e-06, -2.2406e-04, -2.4324e-04, -1.9040e-04, 3.5842e-04,\n 4.1158e-04, 1.0126e-04, -1.8945e-04, 1.6112e-05, 2.4980e-04,\n 1.0767e-04, 1.4455e-04, -4.2564e-05, 1.6563e-04, 5.8149e-04,\n -1.2088e-04, 3.1214e-05, -3.1510e-05, -2.8824e-04, -2.8342e-04,\n 8.4168e-05, -2.3589e-06, 2.7631e-04, -1.0592e-04, -1.8831e-05,\n -7.6251e-05, 1.2220e-04, -6.9987e-06, 8.6507e-05, 4.7519e-05,\n 1.4535e-04, -8.3508e-05, -2.1179e-04, -6.3078e-06, -3.4268e-04,\n 1.0571e-04, 8.3764e-05, -7.8774e-06, 5.8167e-06, 3.6839e-04,\n 5.7583e-05, -4.1581e-05, -5.3662e-05, -2.9042e-04, -1.3293e-05,\n 3.2602e-05, -1.5973e-06, -9.6672e-05, 1.2181e-04, 1.0235e-04,\n 2.1583e-04, -5.5438e-05, -1.3240e-04, 1.0263e-04, 3.9766e-04,\n -1.3819e-04, -2.2814e-05, -5.8962e-05, 8.8468e-05, 1.2026e-04,\n 3.7707e-04, -1.3652e-04, -2.6984e-04, 1.7916e-04, -2.0370e-04,\n -2.0017e-05, 3.3012e-05, 5.8549e-05, -1.9374e-04, -4.8018e-06,\n 4.3375e-05, 1.1050e-05, -1.9575e-04, -6.6394e-06, -7.7931e-05,\n 8.5722e-05, -6.5492e-05], device='cuda:0')", - "exp_avg_sq": "tensor([1.7803e-07, 2.3964e-07, 1.9902e-07, 3.3806e-07, 1.7414e-07, 2.9508e-07,\n 1.5384e-07, 2.8728e-07, 1.7856e-07, 1.8355e-07, 3.7140e-07, 2.6759e-07,\n 1.1042e-07, 2.4060e-07, 1.1885e-07, 2.9829e-07, 2.5338e-07, 3.2596e-07,\n 2.2953e-07, 1.5077e-07, 1.5147e-07, 2.1988e-07, 3.8888e-07, 2.2474e-07,\n 2.9612e-07, 3.1480e-07, 3.2770e-07, 4.0255e-07, 1.2832e-07, 1.3322e-07,\n 3.0065e-07, 3.6437e-07, 3.1909e-07, 4.5067e-07, 1.4544e-07, 2.5752e-07,\n 2.4253e-07, 4.1259e-07, 3.6068e-07, 3.0766e-07, 4.6250e-07, 4.0612e-07,\n 3.3905e-07, 1.6448e-07, 3.0983e-07, 3.3710e-07, 3.1158e-09, 2.3513e-07,\n 3.9940e-07, 2.1685e-07, 2.0466e-07, 1.7958e-07, 3.4395e-07, 2.2241e-07,\n 2.5642e-07, 4.3040e-07, 3.3548e-07, 2.9221e-07, 3.6896e-07, 3.4259e-07,\n 2.3068e-07, 3.1603e-07, 2.7869e-07, 3.2463e-07, 2.2689e-07, 1.5499e-07,\n 2.9973e-07, 2.1318e-07, 2.6262e-07, 1.8337e-07, 2.8875e-07, 8.5985e-08,\n 2.4598e-07, 2.8379e-07, 2.2453e-07, 3.1481e-07, 3.7386e-07, 3.1128e-07,\n 3.9810e-07, 2.8007e-07, 1.8709e-07, 2.0321e-07, 2.4407e-07, 2.8688e-07,\n 2.3561e-07, 1.6466e-07, 2.7246e-07, 4.5844e-07, 2.0854e-07, 3.1558e-07,\n 2.1554e-07, 3.2455e-07, 1.8170e-07, 3.2131e-07, 1.1213e-07, 4.0139e-07,\n 3.7406e-07, 3.8877e-07, 5.3326e-07, 2.1932e-07, 3.4936e-07, 2.6132e-07,\n 3.0075e-07, 2.3937e-07, 1.8154e-07, 1.5911e-07, 3.1457e-07, 1.8362e-07,\n 2.0956e-07, 1.7995e-07, 7.5829e-08, 3.6385e-07, 1.6908e-07, 3.6995e-07,\n 2.8945e-07, 3.1709e-07, 2.7901e-07, 3.8465e-07, 2.1378e-07, 1.9609e-07,\n 3.0732e-07, 2.5855e-07, 5.2280e-07, 3.3978e-07, 3.9229e-07, 1.1932e-07,\n 3.9207e-07, 1.8772e-07, 2.2416e-07, 1.5421e-07, 1.2790e-07, 1.6641e-07,\n 2.1054e-07, 1.3133e-07, 1.6891e-07, 2.5519e-07, 4.0948e-07, 2.9278e-07,\n 2.6721e-07, 4.0309e-07, 3.9818e-07, 3.0512e-07, 1.6214e-07, 5.3810e-07,\n 2.5920e-07, 2.9788e-07, 3.6241e-07, 2.4131e-07, 2.5794e-07, 4.2049e-07,\n 3.4626e-07, 3.2201e-07, 1.2243e-07, 3.7399e-07, 2.0422e-07, 8.4769e-08,\n 3.0668e-07, 3.1214e-07, 1.4302e-07, 2.1261e-07, 2.6143e-07, 1.8730e-07,\n 2.3498e-07, 2.2588e-07, 2.7707e-07, 2.2387e-07, 4.7494e-07, 3.3990e-07,\n 3.7995e-07, 2.1469e-07, 3.8857e-07, 1.7226e-07, 6.9942e-08, 3.6269e-07,\n 2.4580e-07, 2.2036e-07, 3.5808e-07, 2.8428e-07, 2.3868e-07, 1.6717e-07,\n 1.6093e-07, 2.2625e-07, 1.9444e-07, 1.4322e-07, 3.6049e-07, 3.5732e-07,\n 3.2590e-07, 2.9302e-07, 3.0785e-07, 3.0177e-07, 3.2362e-07, 4.1424e-07,\n 3.9192e-07, 3.0503e-07, 3.9918e-07, 3.1583e-07, 4.2948e-07, 1.7299e-07,\n 1.3532e-07, 4.3103e-07, 4.6638e-07, 2.1411e-07, 2.9270e-07, 4.2386e-07,\n 1.9392e-07, 2.2913e-07, 1.5654e-07, 4.9317e-07, 1.3789e-07, 1.0880e-07,\n 2.5604e-07, 2.3620e-07, 2.2603e-09, 2.5170e-07, 1.5616e-07, 2.5975e-07,\n 2.6539e-07, 1.4282e-07, 2.9162e-07, 2.8621e-07, 2.1910e-07, 1.9284e-07,\n 1.9040e-07, 2.5322e-07, 3.3520e-07, 3.7375e-07, 4.1572e-07, 2.2223e-07,\n 3.4386e-07, 2.4089e-07, 2.2263e-07, 2.9529e-07, 3.0464e-07, 2.9579e-07,\n 2.7637e-07, 1.9389e-07, 3.2818e-07, 2.2962e-07, 2.7148e-07, 3.3040e-07,\n 1.6515e-07, 3.5451e-07, 2.4885e-07, 2.3696e-07, 1.2397e-07, 2.0000e-07,\n 2.9807e-07, 2.0003e-07, 1.8887e-07, 2.9758e-07, 2.2182e-07, 3.6397e-07,\n 3.0399e-07, 3.6594e-07, 4.1733e-07, 2.9969e-07, 2.4000e-07, 2.0542e-07,\n 4.1806e-07, 2.3651e-07, 3.5256e-07, 3.6022e-07, 2.7605e-07, 2.7103e-07,\n 1.0017e-07, 2.0294e-07, 2.9107e-07, 3.0693e-07, 4.6967e-07, 3.3497e-07,\n 3.1232e-07, 2.8614e-07, 2.1571e-07, 5.3601e-07, 1.3459e-07, 1.5064e-07,\n 1.7036e-07, 3.6634e-07, 2.3937e-07, 1.4522e-07, 3.3177e-07, 2.2916e-07,\n 2.6331e-07, 2.4922e-07, 2.7352e-07, 3.6734e-07, 1.7392e-07, 2.6433e-07,\n 2.4238e-07, 3.9964e-07, 3.3276e-07, 2.7950e-07, 3.9597e-07, 1.7272e-07,\n 1.0609e-07, 3.6388e-07, 2.3873e-07, 2.4154e-07, 3.5400e-07, 2.4698e-07,\n 1.6338e-07, 2.2714e-07, 3.7924e-07, 3.6481e-07, 2.2449e-07, 1.8338e-07,\n 2.3531e-07, 3.9737e-07, 1.9471e-07, 3.3130e-07, 2.5865e-07, 1.7713e-07,\n 1.3222e-07, 2.6939e-07, 3.6850e-07, 3.1160e-07, 1.6502e-07, 3.3689e-07,\n 1.1478e-07, 2.8709e-07, 2.6123e-07, 1.9589e-07, 4.4381e-07, 1.8994e-07,\n 1.7576e-07, 3.0935e-07, 2.8470e-07, 1.9826e-07, 4.1855e-07, 2.5359e-07,\n 3.6023e-07, 3.5569e-07, 2.7096e-07, 2.0957e-07, 3.3991e-07, 2.7975e-07,\n 3.7429e-07, 1.8173e-07, 9.8321e-08, 2.8260e-07, 2.9627e-07, 2.6396e-07,\n 2.7511e-07, 8.4601e-08, 1.7940e-07, 1.9515e-07, 1.3316e-07, 1.0403e-07,\n 1.9798e-07, 3.0380e-07, 2.3756e-07, 2.1810e-07, 2.4720e-07, 2.7512e-07,\n 3.5669e-07, 1.7089e-07, 2.9254e-07, 2.9211e-07, 2.7510e-07, 2.2915e-07,\n 3.0719e-07, 1.1502e-07, 2.7429e-07, 3.3793e-07, 3.5077e-07, 2.1645e-07,\n 3.6969e-09, 3.8462e-07, 1.5695e-07, 3.6394e-07, 2.5553e-07, 3.9000e-07,\n 4.6180e-07, 1.9430e-07, 2.2096e-07, 2.6487e-07, 1.4734e-07, 2.2269e-07,\n 1.3973e-07, 2.0455e-07, 8.3810e-08, 2.6105e-07, 3.7402e-07, 3.6319e-07,\n 4.2724e-07, 2.9190e-07, 3.3441e-07, 1.2426e-07, 2.3501e-07, 3.4327e-07,\n 2.3907e-07, 2.6177e-07, 3.2895e-07, 3.7386e-07, 1.0504e-07, 1.6915e-07,\n 2.0918e-07, 2.4129e-07, 2.3367e-07, 3.0492e-07, 4.3113e-07, 1.5176e-07,\n 4.0990e-07, 2.1068e-07, 4.5841e-07, 3.1190e-07, 2.7430e-07, 4.3270e-07,\n 2.8060e-07, 3.4266e-07, 3.2365e-07, 2.1732e-07, 2.8285e-07, 3.6567e-07,\n 3.2427e-07, 1.7247e-07, 3.0577e-07, 2.3954e-07, 2.2944e-07, 1.9341e-07,\n 1.3467e-07, 2.6395e-07, 2.1222e-07, 1.4132e-07, 4.4635e-07, 2.6535e-07,\n 1.6347e-07, 2.7150e-07, 1.7992e-07, 3.9549e-07, 3.2034e-07, 2.8901e-07,\n 1.8621e-07, 2.7717e-07, 1.7457e-07, 1.5651e-07, 2.9332e-07, 1.4138e-07,\n 1.1984e-07, 1.8223e-07, 4.0238e-07, 3.0803e-07, 1.5970e-07, 1.6083e-07,\n 4.1018e-07, 2.4732e-07, 1.0750e-07, 3.9827e-07, 2.7106e-07, 3.7496e-07,\n 4.0408e-07, 1.3479e-07, 2.0873e-07, 3.2692e-07, 4.0519e-07, 3.3073e-07,\n 1.4502e-07, 2.7080e-07, 1.0851e-07, 2.2787e-07, 3.2231e-07, 2.3689e-07,\n 2.6508e-07, 3.5950e-07, 4.0663e-07, 1.0502e-07, 1.9308e-07, 3.2943e-07,\n 1.7652e-07, 2.9372e-07, 4.2486e-07, 1.4260e-07, 2.8083e-07, 2.9453e-07,\n 2.6566e-07, 2.0865e-07, 2.2006e-07, 3.6773e-07, 1.6867e-07, 2.8067e-07,\n 4.6259e-07, 2.3024e-07, 2.2337e-07, 2.8255e-07, 3.0705e-07, 3.5043e-07,\n 9.4398e-08, 3.2634e-07, 3.6823e-07, 4.0335e-07, 3.8260e-07, 3.1174e-07,\n 2.2468e-07, 3.6026e-07, 2.7339e-07, 1.1580e-07, 2.6471e-07, 4.3107e-07,\n 1.8181e-07, 2.9640e-07, 4.1116e-07, 2.9477e-07, 2.2260e-07, 3.0659e-07,\n 2.6745e-07, 2.8173e-07, 1.2837e-07, 2.6343e-07, 1.4263e-07, 9.3482e-08,\n 1.7442e-07, 2.7146e-07], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([-2.3097e-05, 9.5465e-05, -8.4887e-05, 1.8510e-04, 1.3161e-04,\n -2.0138e-05, -1.4139e-04, 3.0738e-05, -1.0147e-04, 1.1279e-04,\n 5.3551e-04, -5.0594e-05, 2.7870e-05, -2.8186e-05, -1.8885e-04,\n 1.8148e-04, -1.8156e-05, -1.8771e-05, 1.0264e-04, 4.3141e-05,\n 1.0310e-05, -9.1166e-05, 7.8901e-05, -3.3479e-06, 5.7091e-05,\n 1.1384e-04, -7.9914e-05, 1.9652e-04, -2.5717e-05, 3.2720e-06,\n -6.0814e-06, 4.1105e-06, 4.4543e-05, -1.6229e-05, -3.3145e-05,\n 1.2382e-04, 3.2658e-05, -1.1043e-04, 1.5420e-04, -3.8682e-05,\n -1.1848e-05, 5.8904e-05, 1.2257e-05, -9.3945e-06, 1.8034e-04,\n 9.1726e-05, -5.6052e-45, -5.7588e-05, -4.4043e-05, 1.0230e-04,\n -1.8961e-05, 7.8207e-05, 1.1652e-04, -2.0387e-04, 1.1119e-04,\n -1.0507e-04, -9.7519e-05, -1.4766e-05, 6.2325e-05, 9.5417e-05,\n 1.9512e-04, 1.9354e-04, -7.6378e-05, 1.7317e-05, -7.5884e-05,\n -3.8450e-05, 4.8033e-05, 9.6934e-05, 1.7354e-04, 5.6695e-05,\n -1.0674e-04, -9.1333e-05, 1.1460e-04, -1.6287e-04, 1.4066e-04,\n -3.2969e-04, -1.7241e-04, -8.8311e-05, 7.6578e-05, 2.0743e-05,\n -3.3686e-04, -9.5934e-05, 9.1859e-05, 9.0062e-05, -4.4188e-05,\n 5.8586e-04, -2.1020e-06, -1.5128e-04, 6.2619e-05, 1.3123e-04,\n -1.0706e-04, 3.3948e-04, -9.1203e-05, 2.3025e-05, 6.6483e-05,\n -3.3932e-05, 2.9560e-04, 3.8678e-04, 8.5746e-05, -1.4657e-04,\n 3.5971e-04, 7.8842e-05, -3.0828e-05, 1.7587e-04, -2.1526e-05,\n 5.8349e-05, -8.9016e-05, -1.5025e-04, 6.3920e-06, -3.5173e-05,\n -1.8645e-05, 6.7288e-05, -8.4056e-05, 1.0504e-04, -4.8835e-04,\n 9.3352e-05, 2.5125e-04, -2.2435e-04, 2.8625e-04, 2.9043e-04,\n 1.4384e-04, -1.1013e-04, -7.8196e-04, -3.0329e-05, -1.2110e-04,\n -5.3806e-05, -5.4022e-05, 2.8966e-04, 2.5352e-04, -2.1548e-05,\n -1.6928e-04, 3.4110e-05, -8.5611e-05, -2.3438e-04, 1.0973e-04,\n 2.1551e-05, 3.3693e-04, -1.3017e-04, -1.4229e-04, -1.7197e-04,\n 1.2198e-04, 3.2057e-04, 3.5783e-05, -9.1998e-04, -9.7898e-05,\n -4.7958e-05, -2.3403e-04, 1.7019e-04, -1.2190e-04, -1.2435e-04,\n -2.1264e-04, -7.0273e-05, -4.7520e-05, -3.4855e-05, 1.7900e-04,\n 3.7459e-05, -1.8914e-05, 1.6363e-04, 2.8416e-04, -9.4851e-06,\n -6.3028e-05, -6.1105e-05, -7.1743e-05, 1.0626e-04, 4.5321e-05,\n 3.9030e-06, -1.0642e-07, 8.2713e-05, 1.6905e-04, 1.7992e-04,\n 1.3156e-04, 1.2906e-04, 8.0033e-05, 2.7912e-04, 1.1921e-05,\n 3.6470e-04, 2.8856e-05, -1.0755e-04, -2.6323e-04, 1.1200e-04,\n 1.0333e-05, -5.7287e-05, -1.6896e-04, 1.3798e-04, -3.5929e-04,\n 8.5352e-06, 9.7074e-05, -7.6636e-05, 7.1150e-05, -1.5857e-04,\n -4.2728e-05, 1.3065e-04, 2.5683e-04, 1.0638e-04, -1.5501e-04,\n 2.9864e-05, -7.6376e-04, -1.4754e-04, 1.8833e-04, -1.5678e-04,\n -2.7757e-04, -2.0727e-05, -8.6255e-05, -5.9114e-06, 1.3423e-04,\n -1.0213e-04, 3.3842e-05, 4.7509e-05, -6.0057e-05, 5.8698e-05,\n 1.2088e-04, 1.6870e-05, 5.6052e-45, 2.6568e-04, -2.7662e-05,\n 8.2736e-05, 1.2345e-05, 8.8218e-05, -8.5139e-06, -3.0057e-04,\n 1.6387e-04, -6.1195e-05, -3.4784e-04, 2.3584e-05, 6.3180e-05,\n -3.1708e-04, 3.9810e-04, -2.6866e-06, -4.5599e-05, 8.1166e-07,\n -2.4455e-04, -6.2089e-05, -9.8747e-05, 6.3951e-05, -5.1049e-04,\n -1.6604e-04, -2.6183e-04, 1.9005e-04, -5.2653e-05, 1.1844e-04,\n -1.5078e-04, 1.0557e-04, -5.6954e-05, 1.8391e-05, 1.1196e-05,\n -1.3285e-04, -1.6921e-04, -6.2150e-05, -6.3824e-05, 2.3192e-04,\n -1.4176e-05, -5.5748e-05, 3.4016e-05, -4.3338e-05, 2.5997e-04,\n -1.2537e-04, -5.1138e-05, -4.3932e-05, -1.2502e-04, 3.3399e-05,\n -4.4549e-04, 9.2137e-05, -7.2620e-05, 1.0123e-04, 2.1431e-05,\n 2.0540e-05, 1.9755e-04, -6.9849e-05, 7.0310e-05, -9.9446e-05,\n 1.3434e-05, -1.0451e-04, -7.7842e-05, 1.7967e-04, -6.7510e-05,\n 1.2798e-04, 2.3808e-04, 1.1131e-04, 8.8340e-05, -5.5274e-06,\n 3.2821e-05, -1.6958e-04, 3.7256e-05, 3.1093e-05, 1.2233e-04,\n -2.5047e-04, 3.2305e-05, 5.0378e-05, 1.0068e-04, -1.5496e-04,\n -3.9147e-05, -3.3405e-04, 4.4631e-04, -1.5522e-04, 1.4786e-05,\n -1.7125e-05, 1.8812e-04, 2.0777e-05, 8.9404e-05, -6.4116e-05,\n 6.1663e-06, 1.7845e-04, -2.6063e-04, -7.2249e-05, -9.1835e-05,\n -2.9728e-05, 1.1194e-06, -5.4563e-04, -1.3660e-04, 1.0344e-04,\n -2.5862e-04, 1.8876e-04, 2.7539e-04, -2.9561e-04, 1.7747e-04,\n 4.3308e-05, -5.8373e-05, 2.6540e-05, -1.6754e-04, -5.1464e-05,\n 5.2062e-05, -6.8142e-05, 1.2304e-05, -1.4105e-04, -1.4604e-05,\n 2.8377e-06, -3.8877e-05, -5.3468e-05, 1.5921e-04, -1.0798e-04,\n -1.3162e-04, -1.5323e-04, 8.4145e-05, 1.0660e-04, -5.6187e-04,\n -3.3303e-05, -6.9691e-05, -1.6655e-05, -4.8823e-05, -2.5034e-05,\n 5.9345e-05, -1.3957e-05, 6.7960e-05, 1.4905e-04, 2.0097e-04,\n 3.5805e-04, 1.8035e-04, -5.3022e-06, -3.9822e-05, 5.5282e-04,\n 1.3070e-04, 1.3386e-04, 3.2436e-04, 1.5350e-04, 2.6154e-05,\n 1.4979e-05, -3.8418e-05, -1.9032e-04, 7.4158e-05, 6.3868e-05,\n -6.4798e-05, 9.5889e-06, -7.5351e-05, 5.8865e-06, -1.4279e-05,\n 1.6290e-04, 1.6383e-04, 4.6368e-06, -1.1138e-04, 2.8730e-04,\n 9.0168e-05, 3.9317e-04, -7.7682e-05, -2.2131e-04, -5.3471e-05,\n 7.2889e-05, -4.3777e-05, 1.9659e-04, -2.1230e-04, -3.5902e-04,\n 9.0394e-06, 6.1946e-05, -2.6224e-04, -2.8862e-04, 4.7006e-04,\n 1.3675e-05, 9.7516e-05, -1.8979e-05, -5.3750e-05, 1.3230e-04,\n 2.3560e-06, 8.6359e-05, 2.6651e-05, -1.6681e-05, -1.2589e-05,\n -1.4659e-04, -5.5320e-05, 1.7960e-05, -6.4628e-05, -1.9698e-04,\n 2.0112e-04, 1.1762e-04, -4.8798e-05, 2.2323e-04, 2.1938e-04,\n -9.1546e-05, -1.2086e-04, -1.0658e-04, -1.1210e-04, 1.3857e-04,\n -2.4781e-04, 3.3030e-04, 3.5094e-05, 2.1677e-04, 8.2244e-05,\n -1.7420e-04, 2.6636e-04, 7.7121e-05, 3.5095e-04, -1.4248e-05,\n 2.7391e-06, 4.9044e-06, 6.8081e-05, 4.7585e-05, 3.2127e-04,\n -6.0759e-05, -7.9097e-05, 1.6906e-04, -2.3544e-05, -2.9171e-05,\n -1.6745e-04, 1.5603e-04, 4.6529e-05, -1.1040e-04, 3.6129e-05,\n -1.3145e-04, 5.1521e-05, -1.4818e-04, -4.8436e-05, 1.4861e-04,\n -2.1237e-05, 2.7486e-04, 1.0794e-04, -1.8279e-05, 4.2193e-05,\n -1.3471e-04, 7.8152e-05, 1.3767e-04, 6.3685e-05, -1.7406e-04,\n -1.4160e-04, 5.6353e-05, -1.4992e-05, -1.9234e-05, 2.8758e-05,\n -2.9798e-04, 2.7565e-05, 6.8054e-05, -1.4859e-05, -1.6320e-04,\n -2.6463e-05, 1.3570e-04, -1.1376e-04, 1.6880e-04, 5.2559e-04,\n -7.0014e-05, -1.8083e-04, -6.5876e-05, -4.1378e-05, 5.8489e-05,\n -1.6953e-04, 3.5184e-05, -1.0442e-04, -2.4437e-05, 2.0582e-04,\n 1.6997e-04, -5.2732e-05, -1.0792e-06, -5.5454e-05, -8.3733e-05,\n -1.9213e-05, -8.9905e-05, 7.2450e-05, 1.7878e-04, 1.3550e-04,\n -1.9496e-05, 1.1908e-04, -3.5069e-04, 2.0520e-04, -3.6586e-04,\n -1.0590e-04, 6.5640e-05, -7.6636e-05, -8.0065e-05, 1.2526e-04,\n -5.9940e-05, 2.7638e-04, -1.4376e-04, 1.3936e-05, -5.2534e-05,\n 4.7914e-04, 1.6016e-05, 1.3879e-04, 2.0653e-04, 5.4986e-05,\n -1.4737e-06, 7.5473e-05, 1.4020e-04, -9.5014e-05, 3.4207e-05,\n 9.7805e-05, 2.9363e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.8256e-07, 2.1425e-07, 1.8096e-07, 3.1086e-07, 1.5436e-07, 2.5972e-07,\n 1.4867e-07, 2.7652e-07, 1.7006e-07, 1.7839e-07, 3.9224e-07, 2.4319e-07,\n 1.0806e-07, 2.2200e-07, 1.0709e-07, 2.9199e-07, 2.3372e-07, 3.3276e-07,\n 2.1700e-07, 1.4956e-07, 1.4182e-07, 2.2349e-07, 3.9689e-07, 2.1595e-07,\n 3.0088e-07, 3.0177e-07, 3.0431e-07, 4.0482e-07, 1.2525e-07, 1.2114e-07,\n 3.0681e-07, 3.4064e-07, 3.0594e-07, 4.1693e-07, 1.4970e-07, 2.5318e-07,\n 2.2596e-07, 4.0247e-07, 3.4422e-07, 2.9641e-07, 4.3696e-07, 4.2409e-07,\n 3.1674e-07, 1.5198e-07, 3.0235e-07, 3.3255e-07, 8.9036e-10, 1.9861e-07,\n 3.8588e-07, 2.1598e-07, 2.0388e-07, 1.7505e-07, 3.5890e-07, 2.2420e-07,\n 2.3949e-07, 4.0970e-07, 3.1663e-07, 2.9832e-07, 3.6877e-07, 3.3916e-07,\n 2.4072e-07, 3.1500e-07, 2.7670e-07, 2.9994e-07, 2.0455e-07, 1.4815e-07,\n 2.6749e-07, 2.0407e-07, 2.9794e-07, 1.6954e-07, 2.6729e-07, 1.1179e-07,\n 2.4321e-07, 2.9745e-07, 2.0795e-07, 2.8625e-07, 3.4843e-07, 3.0315e-07,\n 3.7308e-07, 2.8408e-07, 2.0312e-07, 2.0092e-07, 2.3537e-07, 2.5766e-07,\n 2.0972e-07, 1.7113e-07, 2.6095e-07, 4.3421e-07, 2.0788e-07, 2.9926e-07,\n 2.1054e-07, 3.3935e-07, 1.9373e-07, 3.1144e-07, 1.0913e-07, 3.9414e-07,\n 3.6177e-07, 4.0089e-07, 4.9945e-07, 1.9958e-07, 3.4982e-07, 2.5925e-07,\n 3.0261e-07, 2.3794e-07, 1.6194e-07, 1.4839e-07, 3.0522e-07, 1.7743e-07,\n 2.0349e-07, 1.8151e-07, 7.4737e-08, 3.7157e-07, 1.5902e-07, 3.3497e-07,\n 2.8562e-07, 3.2404e-07, 2.5718e-07, 3.6781e-07, 2.0814e-07, 1.8925e-07,\n 3.3441e-07, 2.5547e-07, 5.9334e-07, 3.0809e-07, 3.7081e-07, 1.1751e-07,\n 4.0517e-07, 1.8862e-07, 2.1909e-07, 1.4162e-07, 1.1979e-07, 1.7339e-07,\n 2.1070e-07, 1.3662e-07, 1.5900e-07, 2.3966e-07, 3.8422e-07, 2.7259e-07,\n 2.4649e-07, 4.2156e-07, 3.7968e-07, 3.0587e-07, 1.6132e-07, 6.2563e-07,\n 2.5627e-07, 2.9693e-07, 3.4808e-07, 2.3952e-07, 2.3560e-07, 4.3328e-07,\n 3.5887e-07, 3.1825e-07, 1.2514e-07, 3.5611e-07, 1.9743e-07, 9.0684e-08,\n 3.1427e-07, 2.8987e-07, 1.5878e-07, 1.8582e-07, 2.4428e-07, 1.9156e-07,\n 2.1997e-07, 2.2396e-07, 2.8756e-07, 2.1900e-07, 4.5996e-07, 3.1181e-07,\n 3.7335e-07, 1.8851e-07, 3.9430e-07, 1.5814e-07, 7.1152e-08, 3.3474e-07,\n 2.2523e-07, 2.2151e-07, 3.3801e-07, 2.8776e-07, 2.3862e-07, 1.6238e-07,\n 1.5302e-07, 2.3336e-07, 1.9019e-07, 1.3591e-07, 3.5657e-07, 3.3209e-07,\n 3.1976e-07, 2.7448e-07, 2.9121e-07, 2.9496e-07, 3.1630e-07, 4.0501e-07,\n 3.6675e-07, 2.9018e-07, 4.0665e-07, 3.0313e-07, 4.4880e-07, 1.6705e-07,\n 1.4267e-07, 3.9530e-07, 4.0331e-07, 2.1055e-07, 3.1743e-07, 4.3339e-07,\n 1.8589e-07, 2.1722e-07, 1.3684e-07, 4.7503e-07, 1.2302e-07, 1.0313e-07,\n 2.4157e-07, 2.2642e-07, 6.4589e-10, 2.7511e-07, 1.5819e-07, 2.6258e-07,\n 2.6209e-07, 1.4568e-07, 2.8857e-07, 2.8944e-07, 2.2921e-07, 1.9293e-07,\n 1.8708e-07, 2.5509e-07, 3.2730e-07, 3.7669e-07, 4.1516e-07, 2.1599e-07,\n 3.2565e-07, 2.1794e-07, 2.0155e-07, 2.9262e-07, 3.0381e-07, 2.8204e-07,\n 2.9562e-07, 1.8101e-07, 3.2179e-07, 2.3236e-07, 2.7288e-07, 3.1693e-07,\n 1.4705e-07, 3.3113e-07, 2.3241e-07, 2.2905e-07, 1.1740e-07, 2.0800e-07,\n 3.1191e-07, 1.8555e-07, 1.7903e-07, 2.9601e-07, 2.2307e-07, 3.4995e-07,\n 3.1301e-07, 3.5587e-07, 3.8555e-07, 2.8922e-07, 2.2781e-07, 2.0783e-07,\n 3.7546e-07, 2.3805e-07, 3.5527e-07, 3.5037e-07, 2.8838e-07, 2.6903e-07,\n 9.8683e-08, 2.0528e-07, 2.8616e-07, 2.7834e-07, 4.4921e-07, 3.2371e-07,\n 2.9430e-07, 3.0961e-07, 1.9665e-07, 4.7703e-07, 1.3199e-07, 1.4511e-07,\n 1.6614e-07, 3.3142e-07, 2.3435e-07, 1.3907e-07, 3.2215e-07, 2.1930e-07,\n 2.4806e-07, 2.3886e-07, 2.7431e-07, 3.8069e-07, 1.5978e-07, 2.5274e-07,\n 2.3241e-07, 3.9467e-07, 3.3011e-07, 2.9075e-07, 3.8669e-07, 1.6693e-07,\n 1.0705e-07, 3.2581e-07, 2.2301e-07, 2.4748e-07, 3.3591e-07, 2.5318e-07,\n 1.7251e-07, 2.2753e-07, 3.6364e-07, 3.8079e-07, 2.1385e-07, 1.7824e-07,\n 2.1364e-07, 4.4789e-07, 1.8411e-07, 3.2213e-07, 2.4491e-07, 1.7791e-07,\n 1.3328e-07, 2.6314e-07, 3.6003e-07, 3.1611e-07, 1.6867e-07, 3.3040e-07,\n 1.0868e-07, 2.9242e-07, 2.3903e-07, 1.7826e-07, 4.1388e-07, 1.8012e-07,\n 1.6430e-07, 3.0997e-07, 2.7268e-07, 1.8449e-07, 4.0652e-07, 2.5353e-07,\n 3.5883e-07, 3.4857e-07, 2.4792e-07, 1.9784e-07, 3.7018e-07, 2.7227e-07,\n 3.7122e-07, 1.7250e-07, 9.2883e-08, 2.7772e-07, 2.9366e-07, 2.6413e-07,\n 2.6411e-07, 9.2334e-08, 1.7285e-07, 1.8836e-07, 1.3916e-07, 1.0605e-07,\n 1.9253e-07, 2.9649e-07, 2.1877e-07, 2.1372e-07, 2.4435e-07, 2.7261e-07,\n 3.4448e-07, 1.5889e-07, 2.6325e-07, 3.0022e-07, 2.6954e-07, 2.2011e-07,\n 2.7123e-07, 1.1561e-07, 2.6991e-07, 3.4243e-07, 3.4209e-07, 2.0396e-07,\n 2.4188e-08, 3.6758e-07, 1.5635e-07, 3.6905e-07, 2.6259e-07, 3.8687e-07,\n 4.7996e-07, 1.8078e-07, 2.1399e-07, 2.5266e-07, 1.4999e-07, 2.1441e-07,\n 1.4788e-07, 2.1741e-07, 9.1990e-08, 2.6614e-07, 3.7700e-07, 3.6182e-07,\n 3.6236e-07, 2.6526e-07, 3.3645e-07, 1.1933e-07, 2.2411e-07, 3.3755e-07,\n 2.1433e-07, 2.4179e-07, 3.1439e-07, 3.4441e-07, 1.0257e-07, 1.6522e-07,\n 1.9009e-07, 2.2387e-07, 2.0718e-07, 2.9718e-07, 4.3104e-07, 1.5074e-07,\n 3.9462e-07, 1.9716e-07, 4.2145e-07, 2.9546e-07, 2.5878e-07, 4.3282e-07,\n 2.6977e-07, 3.2924e-07, 3.1842e-07, 2.2467e-07, 2.6467e-07, 3.6254e-07,\n 3.0816e-07, 1.7705e-07, 3.1104e-07, 2.2346e-07, 2.1714e-07, 1.9056e-07,\n 1.3208e-07, 2.5582e-07, 2.1585e-07, 1.4291e-07, 4.6411e-07, 2.5803e-07,\n 1.5700e-07, 2.7100e-07, 1.7117e-07, 3.7821e-07, 3.3190e-07, 2.8111e-07,\n 1.8160e-07, 2.7962e-07, 1.7832e-07, 1.5373e-07, 2.8135e-07, 1.4392e-07,\n 1.2374e-07, 1.5946e-07, 3.9163e-07, 2.9864e-07, 1.5609e-07, 1.6107e-07,\n 3.6032e-07, 2.3995e-07, 1.0812e-07, 3.3769e-07, 2.4657e-07, 3.6158e-07,\n 3.8523e-07, 1.2528e-07, 2.2048e-07, 3.0105e-07, 4.2159e-07, 3.1306e-07,\n 1.5309e-07, 2.6877e-07, 1.0997e-07, 2.1401e-07, 2.8960e-07, 2.2950e-07,\n 2.6709e-07, 3.3779e-07, 4.1425e-07, 9.7348e-08, 1.9150e-07, 3.3333e-07,\n 1.7449e-07, 2.9185e-07, 4.2516e-07, 1.4202e-07, 2.8163e-07, 2.7556e-07,\n 2.6623e-07, 1.8766e-07, 2.1143e-07, 3.4654e-07, 1.7227e-07, 2.8115e-07,\n 4.5815e-07, 2.0989e-07, 1.9896e-07, 2.8303e-07, 3.1401e-07, 3.3674e-07,\n 9.0994e-08, 3.1324e-07, 3.4415e-07, 3.7619e-07, 3.6785e-07, 3.0748e-07,\n 2.2138e-07, 3.8593e-07, 2.5920e-07, 1.2004e-07, 2.4800e-07, 4.2623e-07,\n 1.6435e-07, 2.9217e-07, 4.2009e-07, 2.9591e-07, 2.2528e-07, 3.0166e-07,\n 2.8132e-07, 2.9005e-07, 1.2789e-07, 2.5149e-07, 1.3761e-07, 8.1217e-08,\n 1.5388e-07, 2.6906e-07], device='cuda:0')" }, "4": { - "step": "tensor(11268.)", - "exp_avg": "tensor([[ 9.7181e-06, 1.2194e-05, -1.2142e-05, ..., -1.0210e-06,\n -3.8989e-07, -1.2667e-05],\n [-7.6793e-06, 5.1672e-06, 8.0782e-06, ..., 1.0473e-06,\n -1.2139e-06, -3.1953e-05],\n [ 7.1169e-06, 8.0726e-06, -5.6278e-06, ..., -4.5993e-07,\n -6.7431e-07, 5.4280e-05],\n ...,\n [-1.1072e-05, 1.0498e-05, -1.4943e-05, ..., 3.9305e-07,\n 1.6919e-06, 1.3458e-05],\n [-1.6084e-05, 4.0618e-06, 6.6641e-06, ..., 8.7840e-07,\n 6.2655e-06, 1.7319e-05],\n [ 2.9388e-05, 2.0403e-05, 4.6667e-05, ..., 1.2119e-06,\n 2.7194e-06, -1.5418e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.5980e-09, 8.4700e-10, 9.2848e-10, ..., 6.2271e-11, 3.6013e-10,\n 3.0340e-09],\n [4.9079e-09, 2.8522e-09, 3.1050e-09, ..., 7.1686e-11, 6.0037e-10,\n 4.5012e-09],\n [3.1506e-09, 2.1573e-09, 1.5505e-09, ..., 1.7339e-10, 1.0334e-09,\n 2.3982e-09],\n ...,\n [1.9848e-08, 1.8182e-09, 4.2952e-09, ..., 8.7813e-11, 2.1629e-09,\n 4.6721e-09],\n [4.2066e-09, 1.5965e-09, 3.3846e-09, ..., 9.4656e-11, 3.5054e-09,\n 5.9671e-09],\n [5.7673e-09, 2.5246e-09, 5.6542e-09, ..., 1.5631e-10, 5.8562e-10,\n 5.7798e-09]], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([[-6.2645e-06, 2.7620e-05, -6.3752e-06, ..., -2.2096e-07,\n 1.1970e-05, 8.1451e-06],\n [-5.5997e-06, -2.0422e-05, 5.4662e-06, ..., -2.0322e-06,\n 3.8756e-06, -6.0927e-06],\n [-1.9815e-06, -3.1985e-05, 1.1588e-05, ..., 5.0815e-07,\n -2.6333e-05, 1.2598e-06],\n ...,\n [-3.2060e-05, -3.0144e-06, 4.5311e-06, ..., -1.0798e-06,\n -5.0732e-05, -9.6518e-06],\n [ 7.6311e-06, -1.6229e-06, -7.7973e-06, ..., -1.1816e-06,\n -6.2412e-05, 9.8432e-06],\n [-1.5716e-05, -9.9179e-05, -1.7823e-05, ..., -3.2049e-06,\n 6.9133e-06, -3.0023e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4652e-09, 7.9788e-10, 8.8448e-10, ..., 5.6255e-11, 3.4621e-10,\n 2.6910e-09],\n [4.6996e-09, 2.7530e-09, 3.1170e-09, ..., 6.2600e-11, 5.3921e-10,\n 4.2599e-09],\n [3.0225e-09, 2.1399e-09, 1.4565e-09, ..., 1.4477e-10, 1.0545e-09,\n 2.2051e-09],\n ...,\n [1.8733e-08, 1.7064e-09, 4.4232e-09, ..., 8.1151e-11, 2.0680e-09,\n 4.5656e-09],\n [4.0706e-09, 1.4898e-09, 3.2587e-09, ..., 7.6016e-11, 3.5762e-09,\n 5.8001e-09],\n [5.5760e-09, 3.0821e-09, 5.4045e-09, ..., 1.3457e-10, 5.7061e-10,\n 6.1333e-09]], device='cuda:0')" }, "5": { - "step": "tensor(10016.)", - "exp_avg": "tensor([[-1.8016e-06, -1.5406e-08, -1.0055e-06, ..., -2.3729e-08,\n 0.0000e+00, 1.1375e-07],\n [ 7.0122e-07, -1.4908e-08, -1.0336e-06, ..., -3.0063e-07,\n 0.0000e+00, 6.1195e-08],\n [ 7.5080e-10, -3.7815e-07, 2.5921e-07, ..., 1.1794e-06,\n 0.0000e+00, -1.8667e-06],\n ...,\n [ 6.9816e-08, 2.0089e-07, -6.9579e-07, ..., 6.4976e-08,\n 0.0000e+00, 7.7260e-07],\n [-5.5986e-08, -5.8681e-08, 5.0926e-07, ..., -4.2409e-07,\n 0.0000e+00, -6.7658e-08],\n [ 3.6202e-07, 3.0465e-07, -1.1973e-07, ..., -3.3080e-06,\n 0.0000e+00, 4.7525e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.8573e-11, 4.0773e-12, 1.5021e-11, ..., 2.7107e-12, 0.0000e+00,\n 6.5522e-12],\n [1.3914e-11, 3.6121e-12, 1.4838e-11, ..., 2.0370e-11, 0.0000e+00,\n 8.6740e-12],\n [1.2513e-11, 5.9873e-12, 8.4755e-12, ..., 5.4414e-11, 0.0000e+00,\n 6.8138e-11],\n ...,\n [3.7293e-11, 7.4319e-12, 8.3803e-12, ..., 1.4982e-12, 0.0000e+00,\n 3.4745e-12],\n [8.3990e-13, 1.5232e-12, 5.2959e-11, ..., 6.9723e-11, 0.0000e+00,\n 1.2371e-11],\n [2.3282e-12, 1.0610e-11, 7.4924e-12, ..., 2.2800e-10, 0.0000e+00,\n 2.5137e-11]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-1.8525e-06, 2.0083e-07, 3.1452e-07, ..., 7.3335e-07,\n 0.0000e+00, 5.2947e-07],\n [ 1.7694e-06, -9.4981e-08, 5.4940e-07, ..., -9.8179e-07,\n 0.0000e+00, 4.1946e-08],\n [ 1.5789e-07, -1.6118e-07, 1.0848e-06, ..., 3.7259e-06,\n 0.0000e+00, 2.9067e-06],\n ...,\n [-4.1457e-06, -1.7088e-07, -2.2955e-07, ..., 3.0773e-07,\n 0.0000e+00, 9.7976e-07],\n [-3.9881e-08, -1.9297e-07, -3.0809e-07, ..., -6.9249e-07,\n 0.0000e+00, 6.4987e-08],\n [ 3.2264e-07, 1.2982e-07, 1.1459e-07, ..., -1.1472e-06,\n 0.0000e+00, -1.5853e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7611e-11, 3.6099e-12, 1.5492e-11, ..., 2.4540e-12, 0.0000e+00,\n 6.1661e-12],\n [1.3127e-11, 5.1008e-12, 1.4483e-11, ..., 2.0982e-11, 0.0000e+00,\n 8.0593e-12],\n [1.1453e-11, 8.4014e-12, 8.0252e-12, ..., 5.5600e-11, 0.0000e+00,\n 6.6102e-11],\n ...,\n [3.4881e-11, 7.9718e-12, 7.6355e-12, ..., 1.5760e-12, 0.0000e+00,\n 3.5873e-12],\n [9.0826e-13, 1.3714e-12, 4.4607e-11, ..., 6.1792e-11, 0.0000e+00,\n 1.2949e-11],\n [2.1942e-12, 9.8990e-12, 7.6443e-12, ..., 2.1875e-10, 0.0000e+00,\n 2.6375e-11]], device='cuda:0')" }, "6": { - "step": "tensor(10016.)", - "exp_avg": "tensor([ 7.6263e-06, -9.9588e-06, -3.0441e-06, ..., 2.1436e-06,\n 8.0679e-06, -1.3797e-05], device='cuda:0')", - "exp_avg_sq": "tensor([3.0652e-09, 2.0270e-09, 2.7295e-09, ..., 1.3572e-09, 1.7728e-09,\n 1.9715e-09], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([ 8.4450e-06, -1.1573e-05, 2.1310e-05, ..., -5.5773e-06,\n 2.5801e-06, 1.5663e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.8729e-09, 2.0459e-09, 2.6377e-09, ..., 1.2660e-09, 1.7396e-09,\n 1.8250e-09], device='cuda:0')" }, "7": { - "step": "tensor(10016.)", - "exp_avg": "tensor([[ 9.6924e-08, -6.4704e-07, -1.3257e-07, ..., 2.6272e-07,\n 2.8101e-07, -1.1390e-06],\n [-1.4681e-07, -3.2878e-07, -2.7861e-07, ..., -7.1975e-07,\n 3.4175e-08, -2.1453e-06],\n [-7.0761e-07, -3.8526e-07, 2.4708e-07, ..., 7.3957e-07,\n 1.3427e-07, 4.4255e-07],\n ...,\n [ 1.9646e-06, 1.9400e-06, 3.0984e-07, ..., 3.8027e-07,\n -2.1975e-07, -2.1944e-06],\n [ 2.2285e-07, -1.6522e-07, 1.1065e-07, ..., -8.9064e-09,\n -3.8318e-08, -2.7100e-07],\n [ 7.0027e-07, -1.1210e-06, 3.2453e-08, ..., -1.8131e-06,\n 1.7138e-07, 4.4063e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.0445e-12, 2.4391e-12, 3.0524e-12, ..., 1.8594e-12, 2.7767e-12,\n 3.0334e-12],\n [7.5429e-12, 3.8529e-12, 4.2821e-12, ..., 4.7988e-12, 5.4369e-12,\n 6.0973e-12],\n [7.3855e-12, 4.5987e-12, 4.2149e-12, ..., 3.6965e-12, 4.7375e-12,\n 7.9860e-12],\n ...,\n [6.8776e-12, 8.3986e-12, 7.5646e-12, ..., 5.0598e-12, 7.8171e-12,\n 6.0397e-12],\n [6.3193e-12, 3.8484e-12, 5.1846e-12, ..., 2.9339e-12, 4.9425e-12,\n 6.5392e-12],\n [9.1869e-12, 4.1080e-12, 6.4307e-12, ..., 3.1447e-11, 5.3341e-12,\n 3.4032e-12]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 4.5972e-07, 3.1196e-08, 5.7611e-07, ..., 8.5632e-08,\n 2.1753e-08, -4.2801e-07],\n [ 1.6404e-06, 2.7880e-07, -6.3306e-07, ..., 6.9177e-07,\n 3.6146e-07, -1.0307e-06],\n [ 4.5100e-07, 5.2791e-07, -1.6244e-07, ..., -1.9920e-07,\n 9.6318e-07, -1.0921e-06],\n ...,\n [-7.4474e-08, -2.5112e-06, 1.7036e-07, ..., -5.1389e-08,\n -3.4643e-07, 4.7856e-07],\n [ 4.3694e-07, -1.3849e-06, -3.8582e-07, ..., -1.0422e-07,\n 5.3094e-08, -2.7910e-07],\n [-5.7219e-08, 5.0992e-07, 3.4745e-08, ..., 7.7025e-07,\n -2.7766e-08, -1.0441e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.2503e-12, 2.0841e-12, 2.7646e-12, ..., 1.8498e-12, 2.7767e-12,\n 2.9103e-12],\n [7.1288e-12, 3.6513e-12, 3.8564e-12, ..., 4.4297e-12, 5.1838e-12,\n 6.0143e-12],\n [6.6456e-12, 4.2578e-12, 3.7655e-12, ..., 3.2873e-12, 4.3643e-12,\n 7.3331e-12],\n ...,\n [5.9260e-12, 7.8217e-12, 6.7143e-12, ..., 4.9780e-12, 7.2787e-12,\n 5.6242e-12],\n [5.6961e-12, 3.5072e-12, 4.5105e-12, ..., 2.6611e-12, 4.2406e-12,\n 6.4120e-12],\n [8.5862e-12, 3.6779e-12, 5.7739e-12, ..., 3.1581e-11, 5.1929e-12,\n 3.1326e-12]], device='cuda:0')" }, "32": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.5879e-09], device='cuda:0')" + "exp_avg_sq": "tensor([4.5374e-10], device='cuda:0')" }, "33": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.4454e-11, 7.0263e-11, 2.1396e-11], device='cuda:0')" + "exp_avg_sq": "tensor([4.1304e-12, 2.0078e-11, 6.1141e-12], device='cuda:0')" }, "34": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([8.6257e-08, 7.8203e-10, 1.0563e-09, 1.0961e-09, 1.2567e-09, 1.4436e-09,\n 1.1477e-09, 8.4994e-10, 1.0736e-09, 9.5401e-10], device='cuda:0')" + "exp_avg_sq": "tensor([2.4649e-08, 2.2347e-10, 3.0183e-10, 3.1322e-10, 3.5911e-10, 4.1252e-10,\n 3.2797e-10, 2.4288e-10, 3.0678e-10, 2.7262e-10], device='cuda:0')" }, "36": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3321e-16, 6.0028e-17, 8.9187e-17, ..., 3.8986e-16, 0.0000e+00,\n 1.3158e-16],\n [2.3980e-17, 5.7974e-17, 1.7437e-16, ..., 1.9278e-16, 0.0000e+00,\n 7.7390e-17],\n [2.1520e-14, 5.0917e-15, 2.8291e-13, ..., 9.0703e-14, 0.0000e+00,\n 2.5309e-13],\n ...,\n [5.9977e-15, 2.4943e-16, 4.7238e-14, ..., 2.6361e-14, 0.0000e+00,\n 7.0928e-14],\n [1.6166e-16, 5.8172e-17, 1.7097e-17, ..., 6.3477e-16, 0.0000e+00,\n 3.2731e-16],\n [5.4539e-18, 1.0234e-16, 1.3369e-17, ..., 3.4277e-17, 0.0000e+00,\n 1.3963e-16]], device='cuda:0')" + "exp_avg_sq": "tensor([[3.8067e-17, 1.7153e-17, 2.5486e-17, ..., 1.1141e-16, 0.0000e+00,\n 3.7600e-17],\n [6.8525e-18, 1.6567e-17, 4.9828e-17, ..., 5.5088e-17, 0.0000e+00,\n 2.2115e-17],\n [6.1496e-15, 1.4550e-15, 8.0845e-14, ..., 2.5919e-14, 0.0000e+00,\n 7.2322e-14],\n ...,\n [1.7139e-15, 7.1278e-17, 1.3499e-14, ..., 7.5328e-15, 0.0000e+00,\n 2.0268e-14],\n [4.6196e-17, 1.6623e-17, 4.8855e-18, ..., 1.8139e-16, 0.0000e+00,\n 9.3531e-17],\n [1.5585e-18, 2.9243e-17, 3.8203e-18, ..., 9.7949e-18, 0.0000e+00,\n 3.9900e-17]], device='cuda:0')" }, "37": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.3657e-13, 8.1648e-14, 3.0788e-10, 4.0853e-13, 1.4861e-10, 5.1182e-11,\n 9.7922e-12, 1.1237e-11, 2.8396e-10, 1.0391e-12, 1.4437e-10, 1.7567e-11,\n 1.5572e-13, 3.2480e-12, 2.2930e-14, 1.6471e-12, 2.3498e-11, 2.6611e-11,\n 1.7205e-12, 2.0663e-12, 2.5202e-11, 1.1627e-12, 1.7526e-11, 1.0440e-11,\n 8.8234e-12, 8.7906e-11, 1.6709e-11, 1.4549e-11, 1.1072e-11, 2.2068e-10,\n 8.1075e-11, 5.0170e-11, 5.8443e-10, 9.5044e-13, 7.8304e-12, 1.7514e-12,\n 2.4349e-11, 1.0821e-12, 9.8285e-11, 2.5959e-12, 9.7329e-11, 1.6598e-12,\n 5.1702e-12, 9.2151e-11, 1.5299e-13, 5.5839e-12, 8.6386e-12, 8.6132e-12,\n 1.7591e-12, 6.2040e-11, 2.1437e-11, 2.2987e-11, 1.5399e-10, 1.0071e-11,\n 1.7111e-12, 1.2331e-11, 2.4589e-15, 6.0053e-12, 2.6452e-11, 2.5931e-10,\n 3.1971e-11, 2.8364e-12, 9.0414e-14, 9.6561e-12, 1.0972e-11, 1.4513e-10,\n 1.3243e-12, 9.7238e-12, 3.1913e-12, 2.2306e-11, 1.3654e-11, 1.1910e-11,\n 3.6747e-12, 5.4356e-11, 7.6407e-11, 4.3146e-11, 5.2349e-11, 1.2640e-11,\n 3.7801e-12, 1.0452e-13, 1.2945e-11, 5.0294e-12, 1.2935e-12, 7.2839e-12,\n 1.0255e-11, 3.2227e-14, 1.0703e-10, 1.7175e-11, 3.1335e-12, 1.4005e-13,\n 1.0042e-12, 1.0249e-11, 5.8587e-11, 9.1721e-11, 8.5936e-11, 6.5258e-12,\n 7.6946e-11, 3.2258e-11, 2.2223e-11, 3.2030e-12, 9.4350e-12, 3.9598e-13,\n 1.6862e-12, 1.0110e-10, 4.0627e-11, 7.0965e-11, 2.3126e-11, 2.2177e-13,\n 9.5135e-11, 7.9481e-12, 1.3793e-11, 1.9073e-12, 2.0202e-11, 9.8203e-12,\n 9.0911e-12, 4.2984e-10, 3.1391e-13, 1.4507e-11, 2.7945e-12, 1.3431e-10,\n 1.3680e-10, 1.8668e-11, 4.2722e-11, 1.6830e-12, 2.1097e-12, 3.6858e-11,\n 1.9910e-11, 2.2491e-11, 3.0112e-12, 7.0543e-11, 5.5671e-14, 1.5905e-12,\n 3.8347e-10, 3.4962e-11, 6.5869e-13, 7.5870e-12, 2.3759e-11, 9.4561e-12,\n 1.5560e-10, 2.9971e-10, 7.0052e-12, 2.1673e-11, 1.8403e-12, 3.0367e-13,\n 3.3538e-12, 1.5195e-11, 1.5986e-11, 3.0144e-11, 1.1359e-12, 6.4256e-11,\n 5.7506e-13, 1.5820e-12, 5.2782e-11, 2.3538e-11, 1.9873e-11, 1.1480e-10,\n 1.6574e-12, 8.8077e-11, 7.6241e-13, 4.1159e-12, 9.8763e-14, 7.1011e-11,\n 6.5905e-14, 5.3617e-12, 9.1435e-11, 5.1898e-11, 2.4444e-12, 3.2100e-11,\n 2.7323e-13, 1.5998e-12, 6.8572e-12, 3.8047e-11, 9.0304e-11, 3.1797e-11,\n 1.9206e-11, 6.5352e-12, 9.4521e-11, 1.1452e-10, 2.7441e-13, 2.0995e-14,\n 2.0852e-13, 3.4123e-11, 2.7497e-12, 1.0402e-12, 3.5738e-12, 1.3343e-10,\n 8.3065e-12, 1.6371e-13, 2.9747e-12, 4.9275e-12, 5.8162e-11, 3.4383e-13,\n 5.4401e-11, 1.0865e-10, 3.2310e-11, 8.7993e-12, 2.6356e-12, 2.9763e-11,\n 8.0501e-12, 3.3685e-11, 8.9990e-12, 5.4121e-13, 2.8173e-10, 5.5485e-13,\n 1.9312e-11, 3.6168e-14, 2.1776e-11, 7.2436e-11, 1.9042e-13, 4.9505e-12,\n 8.3264e-11, 5.0326e-12, 3.8384e-11, 2.5836e-10, 2.3817e-10, 2.6256e-11,\n 3.9191e-13, 1.1958e-11, 6.9652e-11, 1.4318e-13, 1.6438e-11, 2.8964e-10,\n 8.9879e-12, 1.5737e-11, 5.2105e-11, 6.7030e-12, 2.5647e-12, 2.3414e-11,\n 1.4107e-11, 1.1762e-11, 6.6610e-12, 2.7522e-11, 1.2359e-11, 6.5834e-12,\n 2.8587e-11, 4.5268e-12, 9.1673e-12, 1.1482e-11, 2.7916e-11, 4.9178e-13,\n 1.3820e-10, 4.7761e-12, 1.2968e-12, 4.7138e-11, 1.6173e-11, 2.4199e-13,\n 2.5199e-11, 1.6763e-10, 1.2473e-10, 1.8279e-10, 6.1373e-12, 2.5758e-11,\n 1.9585e-11, 8.5332e-11, 6.7589e-13, 8.5014e-14], device='cuda:0')" + "exp_avg_sq": "tensor([6.7602e-14, 2.3332e-14, 8.7980e-11, 1.1674e-13, 4.2467e-11, 1.4626e-11,\n 2.7982e-12, 3.2110e-12, 8.1145e-11, 2.9692e-13, 4.1254e-11, 5.0198e-12,\n 4.4499e-14, 9.2815e-13, 6.5526e-15, 4.7067e-13, 6.7148e-12, 7.6044e-12,\n 4.9165e-13, 5.9047e-13, 7.2017e-12, 3.3226e-13, 5.0081e-12, 2.9832e-12,\n 2.5213e-12, 2.5120e-11, 4.7747e-12, 4.1576e-12, 3.1639e-12, 6.3060e-11,\n 2.3168e-11, 1.4336e-11, 1.6701e-10, 2.7160e-13, 2.2376e-12, 5.0049e-13,\n 6.9579e-12, 3.0923e-13, 2.8086e-11, 7.4181e-13, 2.7813e-11, 4.7430e-13,\n 1.4774e-12, 2.6333e-11, 4.3718e-14, 1.5957e-12, 2.4685e-12, 2.4613e-12,\n 5.0269e-13, 1.7728e-11, 6.1258e-12, 6.5688e-12, 4.4004e-11, 2.8779e-12,\n 4.8896e-13, 3.5236e-12, 7.0266e-16, 1.7161e-12, 7.5588e-12, 7.4101e-11,\n 9.1360e-12, 8.1054e-13, 2.5836e-14, 2.7593e-12, 3.1355e-12, 4.1473e-11,\n 3.7844e-13, 2.7786e-12, 9.1194e-13, 6.3740e-12, 3.9018e-12, 3.4032e-12,\n 1.0501e-12, 1.5533e-11, 2.1834e-11, 1.2329e-11, 1.4959e-11, 3.6119e-12,\n 1.0802e-12, 2.9868e-14, 3.6992e-12, 1.4372e-12, 3.6963e-13, 2.0814e-12,\n 2.9306e-12, 9.2092e-15, 3.0585e-11, 4.9078e-12, 8.9542e-13, 4.0021e-14,\n 2.8695e-13, 2.9288e-12, 1.6742e-11, 2.6210e-11, 2.4557e-11, 1.8648e-12,\n 2.1988e-11, 9.2180e-12, 6.3504e-12, 9.1528e-13, 2.6961e-12, 1.1315e-13,\n 4.8186e-13, 2.8891e-11, 1.1609e-11, 2.0279e-11, 6.6085e-12, 6.3373e-14,\n 2.7186e-11, 2.2712e-12, 3.9414e-12, 5.4501e-13, 5.7729e-12, 2.8062e-12,\n 2.5978e-12, 1.2283e-10, 8.9703e-14, 4.1454e-12, 7.9854e-13, 3.8382e-11,\n 3.9091e-11, 5.3346e-12, 1.2208e-11, 4.8093e-13, 6.0287e-13, 1.0532e-11,\n 5.6894e-12, 6.4270e-12, 8.6047e-13, 2.0158e-11, 1.5908e-14, 4.5451e-13,\n 1.0958e-10, 9.9906e-12, 1.8822e-13, 2.1680e-12, 6.7894e-12, 2.7022e-12,\n 4.4465e-11, 8.5645e-11, 2.0018e-12, 6.1932e-12, 5.2587e-13, 8.6776e-14,\n 9.5838e-13, 4.3421e-12, 4.5682e-12, 8.6140e-12, 3.2458e-13, 1.8362e-11,\n 1.6433e-13, 4.5208e-13, 1.5083e-11, 6.7263e-12, 5.6788e-12, 3.2804e-11,\n 4.7362e-13, 2.5169e-11, 2.1787e-13, 1.1762e-12, 2.8222e-14, 2.0292e-11,\n 1.8833e-14, 1.5322e-12, 2.6128e-11, 1.4830e-11, 6.9850e-13, 9.1728e-12,\n 7.8077e-14, 4.5716e-13, 1.9595e-12, 1.0872e-11, 2.5805e-11, 9.0863e-12,\n 5.4882e-12, 1.8675e-12, 2.7010e-11, 3.2724e-11, 7.8415e-14, 5.9994e-15,\n 5.9588e-14, 9.7509e-12, 7.8574e-13, 2.9725e-13, 1.0212e-12, 3.8129e-11,\n 2.3737e-12, 4.6780e-14, 8.5004e-13, 1.4081e-12, 1.6620e-11, 9.8251e-14,\n 1.5546e-11, 3.1049e-11, 9.2328e-12, 2.5145e-12, 7.5315e-13, 8.5052e-12,\n 2.3004e-12, 9.6258e-12, 2.5715e-12, 1.5466e-13, 8.0506e-11, 1.5855e-13,\n 5.5186e-12, 1.0335e-14, 6.2226e-12, 2.0699e-11, 5.4415e-14, 1.4146e-12,\n 2.3793e-11, 1.4381e-12, 1.0969e-11, 7.3829e-11, 6.8060e-11, 7.5029e-12,\n 1.1199e-13, 3.4170e-12, 1.9903e-11, 4.0915e-14, 4.6974e-12, 8.2767e-11,\n 2.5684e-12, 4.4970e-12, 1.4890e-11, 1.9154e-12, 7.3287e-13, 6.6907e-12,\n 4.0311e-12, 3.3611e-12, 1.9034e-12, 7.8647e-12, 3.5316e-12, 1.8812e-12,\n 8.1689e-12, 1.2936e-12, 2.6196e-12, 3.2810e-12, 7.9772e-12, 1.4053e-13,\n 3.9491e-11, 1.3648e-12, 3.7057e-13, 1.3470e-11, 4.6216e-12, 6.9149e-14,\n 7.2008e-12, 4.7901e-11, 3.5642e-11, 5.2234e-11, 1.7538e-12, 7.3604e-12,\n 5.5967e-12, 2.4384e-11, 1.9314e-13, 2.4293e-14], device='cuda:0')" }, "38": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.3147e-16, 6.0953e-19, 5.9147e-13, 2.0206e-15, 3.1462e-13, 9.9025e-14,\n 1.7258e-14, 2.5199e-14, 6.5353e-13, 2.5850e-15, 5.1657e-13, 3.0095e-14,\n 9.4167e-16, 4.0583e-15, 7.3693e-17, 5.9659e-15, 5.2175e-14, 7.3216e-14,\n 6.2422e-15, 4.5115e-15, 4.0982e-14, 2.8578e-15, 2.3214e-14, 1.7951e-14,\n 1.6521e-14, 1.0944e-13, 2.5627e-14, 2.6042e-14, 6.3356e-14, 6.4048e-13,\n 1.0007e-13, 8.8064e-14, 1.1045e-12, 1.3950e-15, 1.3622e-14, 6.6366e-15,\n 3.0964e-14, 3.2563e-15, 2.4907e-13, 3.6460e-15, 2.7432e-13, 5.6539e-15,\n 1.5225e-14, 1.2241e-13, 1.5342e-16, 6.1474e-15, 1.3587e-14, 1.2366e-14,\n 4.8070e-15, 1.4771e-13, 2.8317e-14, 6.0427e-14, 4.1665e-13, 1.4268e-14,\n 2.7978e-15, 1.8292e-14, 1.4788e-16, 7.7197e-15, 3.7152e-14, 7.5391e-13,\n 5.2534e-14, 4.4441e-15, 8.9014e-19, 2.1602e-14, 1.9725e-14, 4.0204e-13,\n 3.3519e-15, 1.3503e-14, 6.4974e-15, 2.8815e-14, 2.2983e-14, 2.1644e-14,\n 3.1931e-15, 1.0906e-13, 1.3169e-13, 6.5695e-14, 1.4497e-13, 1.9306e-14,\n 9.9591e-15, 2.7162e-17, 2.2859e-14, 7.2610e-15, 2.5022e-15, 9.1388e-15,\n 1.5011e-14, 9.5740e-17, 1.6306e-13, 4.1816e-14, 7.0667e-15, 2.5211e-17,\n 2.4362e-15, 1.7185e-14, 1.3186e-13, 2.5139e-13, 1.0380e-13, 8.1174e-15,\n 1.0774e-13, 4.7347e-14, 3.1629e-14, 1.4075e-14, 1.3095e-14, 5.7097e-16,\n 2.8092e-15, 2.0224e-13, 1.1818e-13, 1.3698e-13, 3.3444e-14, 7.4781e-16,\n 3.4123e-13, 1.4531e-14, 3.0891e-14, 4.0326e-15, 3.2355e-14, 1.2307e-14,\n 1.6976e-14, 1.1071e-12, 7.4765e-16, 1.4684e-14, 1.4628e-14, 4.2293e-13,\n 2.3616e-13, 2.5685e-14, 5.4109e-14, 1.4352e-14, 6.0179e-15, 7.9050e-14,\n 4.0604e-14, 5.2961e-14, 5.8439e-15, 2.2996e-13, 9.2494e-16, 3.1731e-15,\n 1.0410e-12, 3.1769e-14, 9.3426e-16, 1.0373e-14, 8.1270e-14, 1.3499e-14,\n 4.4223e-13, 9.6393e-13, 1.3930e-14, 4.6195e-14, 7.0133e-15, 1.2811e-15,\n 5.6772e-15, 2.3248e-14, 3.1542e-14, 5.6520e-14, 4.2674e-15, 2.2845e-13,\n 8.9548e-16, 1.8452e-15, 1.1297e-13, 4.6719e-14, 4.3462e-14, 2.0367e-13,\n 2.4069e-15, 1.0215e-13, 1.1144e-15, 6.1753e-15, 3.7396e-16, 1.1744e-13,\n 2.5345e-16, 1.2225e-14, 1.5693e-13, 8.1495e-14, 3.0249e-15, 6.6994e-14,\n 1.0721e-15, 5.3445e-15, 6.3851e-15, 2.2254e-13, 2.2312e-13, 9.4528e-14,\n 5.5872e-14, 1.5165e-14, 1.8632e-13, 2.9362e-13, 1.1688e-15, 1.7917e-17,\n 4.3799e-16, 9.3463e-14, 8.4722e-15, 1.8924e-15, 1.2597e-14, 1.7348e-13,\n 1.3745e-14, 3.4483e-16, 5.8311e-15, 8.8116e-15, 1.3022e-13, 2.1811e-15,\n 1.0153e-13, 3.9517e-13, 4.7335e-14, 2.5623e-14, 4.7317e-15, 5.1171e-14,\n 1.1365e-14, 6.3612e-14, 1.4654e-14, 3.9220e-15, 4.5629e-13, 1.8912e-15,\n 2.9617e-14, 6.9804e-17, 4.9913e-14, 1.6584e-13, 6.5321e-16, 5.5062e-15,\n 1.4676e-13, 6.8359e-15, 6.6014e-14, 4.7799e-13, 3.8209e-13, 4.9376e-14,\n 6.5557e-16, 1.9347e-14, 1.1404e-13, 1.2613e-16, 2.9092e-14, 4.9666e-13,\n 2.1372e-14, 2.8954e-14, 7.4275e-14, 1.5359e-14, 5.9926e-15, 3.8344e-14,\n 1.5049e-14, 2.6985e-14, 1.2688e-14, 7.5721e-14, 1.8484e-14, 1.2075e-14,\n 4.3804e-14, 1.0010e-14, 1.1550e-14, 1.6521e-14, 9.7433e-14, 1.0477e-15,\n 3.8232e-13, 7.6835e-15, 1.2584e-14, 5.0592e-14, 2.2664e-14, 1.8031e-15,\n 5.3999e-14, 5.2057e-13, 2.0361e-13, 6.3928e-13, 1.0015e-14, 5.3151e-14,\n 3.1560e-14, 1.4769e-13, 1.1682e-15, 3.1724e-16], device='cuda:0')" + "exp_avg_sq": "tensor([1.5187e-16, 1.7418e-19, 1.6902e-13, 5.7739e-16, 8.9904e-14, 2.8297e-14,\n 4.9316e-15, 7.2007e-15, 1.8675e-13, 7.3868e-16, 1.4761e-13, 8.5998e-15,\n 2.6909e-16, 1.1597e-15, 2.1058e-17, 1.7048e-15, 1.4910e-14, 2.0922e-14,\n 1.7838e-15, 1.2892e-15, 1.1711e-14, 8.1664e-16, 6.6337e-15, 5.1297e-15,\n 4.7211e-15, 3.1273e-14, 7.3232e-15, 7.4417e-15, 1.8104e-14, 1.8302e-13,\n 2.8596e-14, 2.5165e-14, 3.1561e-13, 3.9863e-16, 3.8927e-15, 1.8965e-15,\n 8.8481e-15, 9.3052e-16, 7.1173e-14, 1.0419e-15, 7.8390e-14, 1.6156e-15,\n 4.3507e-15, 3.4979e-14, 4.3840e-17, 1.7567e-15, 3.8827e-15, 3.5338e-15,\n 1.3736e-15, 4.2210e-14, 8.0917e-15, 1.7268e-14, 1.1906e-13, 4.0773e-15,\n 7.9951e-16, 5.2272e-15, 4.2257e-17, 2.2060e-15, 1.0616e-14, 2.1544e-13,\n 1.5012e-14, 1.2699e-15, 2.5436e-19, 6.1730e-15, 5.6367e-15, 1.1489e-13,\n 9.5783e-16, 3.8587e-15, 1.8567e-15, 8.2341e-15, 6.5675e-15, 6.1851e-15,\n 9.1247e-16, 3.1165e-14, 3.7633e-14, 1.8773e-14, 4.1426e-14, 5.5167e-15,\n 2.8459e-15, 7.7618e-18, 6.5322e-15, 2.0749e-15, 7.1503e-16, 2.6115e-15,\n 4.2895e-15, 2.7359e-17, 4.6596e-14, 1.1949e-14, 2.0194e-15, 7.2043e-18,\n 6.9617e-16, 4.9106e-15, 3.7680e-14, 7.1836e-14, 2.9661e-14, 2.3196e-15,\n 3.0788e-14, 1.3530e-14, 9.0382e-15, 4.0221e-15, 3.7420e-15, 1.6316e-16,\n 8.0276e-16, 5.7792e-14, 3.3770e-14, 3.9143e-14, 9.5567e-15, 2.1369e-16,\n 9.7510e-14, 4.1525e-15, 8.8275e-15, 1.1524e-15, 9.2456e-15, 3.5167e-15,\n 4.8509e-15, 3.1635e-13, 2.1365e-16, 4.1961e-15, 4.1801e-15, 1.2086e-13,\n 6.7483e-14, 7.3396e-15, 1.5462e-14, 4.1012e-15, 1.7197e-15, 2.2589e-14,\n 1.1603e-14, 1.5134e-14, 1.6699e-15, 6.5712e-14, 2.6431e-16, 9.0672e-16,\n 2.9748e-13, 9.0782e-15, 2.6697e-16, 2.9641e-15, 2.3223e-14, 3.8576e-15,\n 1.2637e-13, 2.7545e-13, 3.9806e-15, 1.3201e-14, 2.0041e-15, 3.6607e-16,\n 1.6223e-15, 6.6433e-15, 9.0134e-15, 1.6151e-14, 1.2195e-15, 6.5280e-14,\n 2.5589e-16, 5.2727e-16, 3.2282e-14, 1.3350e-14, 1.2420e-14, 5.8200e-14,\n 6.8779e-16, 2.9191e-14, 3.1846e-16, 1.7646e-15, 1.0686e-16, 3.3560e-14,\n 7.2425e-17, 3.4934e-15, 4.4843e-14, 2.3288e-14, 8.6440e-16, 1.9144e-14,\n 3.0636e-16, 1.5272e-15, 1.8246e-15, 6.3593e-14, 6.3759e-14, 2.7012e-14,\n 1.5966e-14, 4.3335e-15, 5.3243e-14, 8.3904e-14, 3.3399e-16, 5.1200e-18,\n 1.2516e-16, 2.6708e-14, 2.4210e-15, 5.4076e-16, 3.5996e-15, 4.9574e-14,\n 3.9276e-15, 9.8538e-17, 1.6663e-15, 2.5180e-15, 3.7211e-14, 6.2327e-16,\n 2.9014e-14, 1.1292e-13, 1.3526e-14, 7.3219e-15, 1.3521e-15, 1.4623e-14,\n 3.2478e-15, 1.8178e-14, 4.1876e-15, 1.1207e-15, 1.3039e-13, 5.4043e-16,\n 8.4634e-15, 1.9947e-17, 1.4263e-14, 4.7389e-14, 1.8666e-16, 1.5734e-15,\n 4.1937e-14, 1.9534e-15, 1.8864e-14, 1.3659e-13, 1.0918e-13, 1.4110e-14,\n 1.8734e-16, 5.5284e-15, 3.2587e-14, 3.6043e-17, 8.3133e-15, 1.4193e-13,\n 6.1072e-15, 8.2739e-15, 2.1225e-14, 4.3891e-15, 1.7124e-15, 1.0957e-14,\n 4.3003e-15, 7.7113e-15, 3.6258e-15, 2.1638e-14, 5.2819e-15, 3.4504e-15,\n 1.2517e-14, 2.8603e-15, 3.3005e-15, 4.7209e-15, 2.7842e-14, 2.9940e-16,\n 1.0925e-13, 2.1956e-15, 3.5961e-15, 1.4457e-14, 6.4765e-15, 5.1526e-16,\n 1.5431e-14, 1.4876e-13, 5.8184e-14, 1.8268e-13, 2.8620e-15, 1.5188e-14,\n 9.0186e-15, 4.2205e-14, 3.3383e-16, 9.0653e-17], device='cuda:0')" }, "39": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0833e-15, 4.1362e-17, 8.4662e-13, 1.8236e-15, 3.8604e-13, 1.2954e-13,\n 3.0697e-14, 3.2986e-14, 7.9179e-13, 5.5084e-15, 4.1538e-13, 5.0163e-14,\n 1.4811e-15, 9.9312e-15, 2.9801e-16, 7.5421e-15, 7.0557e-14, 7.9407e-14,\n 5.5919e-15, 7.2073e-15, 7.2451e-14, 3.9775e-15, 5.0946e-14, 3.1606e-14,\n 2.6265e-14, 2.3928e-13, 5.0589e-14, 3.9028e-14, 4.2510e-14, 6.1246e-13,\n 2.1042e-13, 1.3964e-13, 1.5710e-12, 2.4552e-15, 2.3514e-14, 6.9070e-15,\n 6.2770e-14, 5.9191e-15, 2.6656e-13, 8.7024e-15, 2.5106e-13, 6.2342e-15,\n 1.7272e-14, 2.7120e-13, 1.0883e-16, 1.2622e-14, 2.5350e-14, 2.6808e-14,\n 5.2521e-15, 1.7156e-13, 6.3859e-14, 5.3187e-14, 4.0326e-13, 2.8178e-14,\n 5.8089e-15, 3.7715e-14, 3.6253e-16, 1.6055e-14, 7.1214e-14, 7.0534e-13,\n 8.4997e-14, 8.4128e-15, 8.3118e-17, 3.1515e-14, 3.4023e-14, 3.7287e-13,\n 4.5457e-15, 2.7771e-14, 1.0174e-14, 6.4156e-14, 4.1375e-14, 3.6536e-14,\n 9.5946e-15, 1.5600e-13, 2.1984e-13, 1.1847e-13, 1.4756e-13, 3.7551e-14,\n 1.2940e-14, 2.4688e-16, 3.8254e-14, 1.3226e-14, 5.1058e-15, 2.2754e-14,\n 2.5963e-14, 4.2602e-16, 2.9008e-13, 5.1584e-14, 1.4251e-14, 2.6906e-17,\n 3.1391e-15, 2.8781e-14, 1.5031e-13, 2.3823e-13, 2.1458e-13, 1.5018e-14,\n 2.0482e-13, 9.4373e-14, 5.7822e-14, 1.5951e-14, 2.8228e-14, 1.3753e-15,\n 5.7634e-15, 2.6500e-13, 1.1928e-13, 1.6706e-13, 6.9015e-14, 1.3148e-15,\n 2.3623e-13, 2.3874e-14, 4.1847e-14, 7.2019e-15, 5.1722e-14, 2.2825e-14,\n 2.4807e-14, 1.1581e-12, 1.6750e-15, 3.9182e-14, 1.2984e-14, 3.8198e-13,\n 3.6646e-13, 5.3168e-14, 1.2002e-13, 1.4885e-14, 7.1365e-15, 1.0953e-13,\n 4.9758e-14, 6.3784e-14, 1.0492e-14, 1.9851e-13, 1.2525e-15, 5.3783e-15,\n 1.0289e-12, 9.2712e-14, 2.0446e-15, 2.3759e-14, 7.0632e-14, 2.2707e-14,\n 4.0749e-13, 7.9574e-13, 2.2784e-14, 6.5804e-14, 5.4761e-15, 1.5650e-15,\n 1.0690e-14, 4.3604e-14, 4.0773e-14, 8.4052e-14, 5.4207e-15, 1.8500e-13,\n 1.9252e-15, 3.9098e-15, 1.3807e-13, 5.8456e-14, 5.6990e-14, 3.1480e-13,\n 5.1418e-15, 2.3385e-13, 2.4096e-15, 1.1410e-14, 1.8905e-15, 2.0299e-13,\n 9.2132e-16, 1.6191e-14, 2.5803e-13, 1.3464e-13, 7.5240e-15, 8.7349e-14,\n 1.4650e-15, 5.5055e-15, 1.7182e-14, 1.1672e-13, 2.3146e-13, 9.8704e-14,\n 5.1518e-14, 2.1219e-14, 2.6687e-13, 3.2636e-13, 3.2849e-15, 6.3947e-17,\n 7.2512e-16, 9.1894e-14, 9.5282e-15, 3.6869e-15, 1.0851e-14, 3.6388e-13,\n 2.2959e-14, 7.1840e-16, 1.0527e-14, 1.4843e-14, 1.5826e-13, 2.9042e-15,\n 1.3883e-13, 3.0483e-13, 8.7510e-14, 2.6867e-14, 8.2280e-15, 7.2673e-14,\n 2.1761e-14, 9.6582e-14, 2.6908e-14, 4.3909e-15, 7.5511e-13, 3.1686e-15,\n 5.2011e-14, 9.1477e-17, 6.2482e-14, 1.8960e-13, 2.2417e-15, 1.6134e-14,\n 2.3531e-13, 1.2083e-14, 1.0970e-13, 6.7666e-13, 6.3588e-13, 7.0567e-14,\n 1.6432e-15, 3.2373e-14, 1.8555e-13, 4.7618e-16, 3.9971e-14, 7.9116e-13,\n 2.6211e-14, 4.4442e-14, 1.5254e-13, 1.6188e-14, 9.0286e-15, 6.6424e-14,\n 4.0165e-14, 3.4562e-14, 2.1577e-14, 7.4560e-14, 2.8881e-14, 1.9558e-14,\n 8.1923e-14, 1.4464e-14, 2.3398e-14, 3.4119e-14, 7.0247e-14, 2.3196e-15,\n 3.6970e-13, 1.4373e-14, 9.7071e-15, 1.3624e-13, 4.4388e-14, 2.4043e-15,\n 7.6037e-14, 4.2621e-13, 3.6243e-13, 5.1854e-13, 1.8951e-14, 6.6068e-14,\n 4.6478e-14, 2.3294e-13, 2.8624e-15, 3.0966e-16], device='cuda:0')" + "exp_avg_sq": "tensor([3.0956e-16, 1.1820e-17, 2.4193e-13, 5.2110e-16, 1.1031e-13, 3.7018e-14,\n 8.7719e-15, 9.4259e-15, 2.2626e-13, 1.5741e-15, 1.1870e-13, 1.4335e-14,\n 4.2323e-16, 2.8379e-15, 8.5158e-17, 2.1552e-15, 2.0162e-14, 2.2691e-14,\n 1.5979e-15, 2.0595e-15, 2.0703e-14, 1.1366e-15, 1.4558e-14, 9.0316e-15,\n 7.5054e-15, 6.8376e-14, 1.4456e-14, 1.1152e-14, 1.2148e-14, 1.7501e-13,\n 6.0130e-14, 3.9903e-14, 4.4894e-13, 7.0159e-16, 6.7194e-15, 1.9737e-15,\n 1.7937e-14, 1.6914e-15, 7.6171e-14, 2.4868e-15, 7.1744e-14, 1.7815e-15,\n 4.9355e-15, 7.7497e-14, 3.1098e-17, 3.6067e-15, 7.2440e-15, 7.6606e-15,\n 1.5008e-15, 4.9026e-14, 1.8248e-14, 1.5199e-14, 1.1524e-13, 8.0521e-15,\n 1.6599e-15, 1.0777e-14, 1.0359e-16, 4.5878e-15, 2.0350e-14, 2.0156e-13,\n 2.4289e-14, 2.4040e-15, 2.3752e-17, 9.0057e-15, 9.7225e-15, 1.0655e-13,\n 1.2990e-15, 7.9358e-15, 2.9072e-15, 1.8333e-14, 1.1823e-14, 1.0440e-14,\n 2.7417e-15, 4.4578e-14, 6.2820e-14, 3.3854e-14, 4.2167e-14, 1.0731e-14,\n 3.6977e-15, 7.0548e-17, 1.0931e-14, 3.7794e-15, 1.4590e-15, 6.5020e-15,\n 7.4191e-15, 1.2174e-16, 8.2893e-14, 1.4741e-14, 4.0722e-15, 7.6887e-18,\n 8.9701e-16, 8.2245e-15, 4.2952e-14, 6.8075e-14, 6.1317e-14, 4.2915e-15,\n 5.8528e-14, 2.6968e-14, 1.6523e-14, 4.5581e-15, 8.0665e-15, 3.9300e-16,\n 1.6469e-15, 7.5726e-14, 3.4086e-14, 4.7739e-14, 1.9722e-14, 3.7571e-16,\n 6.7503e-14, 6.8223e-15, 1.1958e-14, 2.0580e-15, 1.4780e-14, 6.5223e-15,\n 7.0887e-15, 3.3093e-13, 4.7863e-16, 1.1197e-14, 3.7104e-15, 1.0915e-13,\n 1.0472e-13, 1.5193e-14, 3.4298e-14, 4.2535e-15, 2.0393e-15, 3.1299e-14,\n 1.4219e-14, 1.8227e-14, 2.9983e-15, 5.6726e-14, 3.5792e-16, 1.5369e-15,\n 2.9401e-13, 2.6493e-14, 5.8427e-16, 6.7892e-15, 2.0184e-14, 6.4888e-15,\n 1.1644e-13, 2.2739e-13, 6.5108e-15, 1.8804e-14, 1.5648e-15, 4.4721e-16,\n 3.0549e-15, 1.2460e-14, 1.1651e-14, 2.4019e-14, 1.5490e-15, 5.2865e-14,\n 5.5015e-16, 1.1172e-15, 3.9455e-14, 1.6704e-14, 1.6285e-14, 8.9957e-14,\n 1.4693e-15, 6.6824e-14, 6.8855e-16, 3.2606e-15, 5.4023e-16, 5.8005e-14,\n 2.6327e-16, 4.6268e-15, 7.3734e-14, 3.8475e-14, 2.1500e-15, 2.4961e-14,\n 4.1862e-16, 1.5732e-15, 4.9100e-15, 3.3354e-14, 6.6142e-14, 2.8205e-14,\n 1.4722e-14, 6.0636e-15, 7.6261e-14, 9.3259e-14, 9.3869e-16, 1.8273e-17,\n 2.0721e-16, 2.6260e-14, 2.7228e-15, 1.0536e-15, 3.1008e-15, 1.0398e-13,\n 6.5608e-15, 2.0529e-16, 3.0080e-15, 4.2414e-15, 4.5224e-14, 8.2989e-16,\n 3.9672e-14, 8.7107e-14, 2.5007e-14, 7.6775e-15, 2.3512e-15, 2.0767e-14,\n 6.2183e-15, 2.7599e-14, 7.6892e-15, 1.2547e-15, 2.1578e-13, 9.0545e-16,\n 1.4863e-14, 2.6140e-17, 1.7855e-14, 5.4181e-14, 6.4058e-16, 4.6105e-15,\n 6.7241e-14, 3.4529e-15, 3.1347e-14, 1.9336e-13, 1.8171e-13, 2.0165e-14,\n 4.6955e-16, 9.2508e-15, 5.3024e-14, 1.3607e-16, 1.1422e-14, 2.2608e-13,\n 7.4901e-15, 1.2700e-14, 4.3591e-14, 4.6260e-15, 2.5800e-15, 1.8981e-14,\n 1.1478e-14, 9.8764e-15, 6.1658e-15, 2.1306e-14, 8.2529e-15, 5.5889e-15,\n 2.3410e-14, 4.1332e-15, 6.6861e-15, 9.7498e-15, 2.0074e-14, 6.6284e-16,\n 1.0565e-13, 4.1072e-15, 2.7739e-15, 3.8933e-14, 1.2684e-14, 6.8705e-16,\n 2.1728e-14, 1.2179e-13, 1.0357e-13, 1.4818e-13, 5.4153e-15, 1.8879e-14,\n 1.3282e-14, 6.6565e-14, 8.1795e-16, 8.8488e-17], device='cuda:0')" }, "40": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.3809e-20, 9.7444e-19, 6.6787e-17, ..., 2.7524e-17, 0.0000e+00,\n 2.7762e-16],\n [6.3421e-17, 7.3577e-18, 6.6038e-16, ..., 4.0634e-16, 0.0000e+00,\n 1.7648e-16],\n [2.0297e-14, 3.1312e-16, 2.7183e-13, ..., 4.2561e-14, 0.0000e+00,\n 1.9531e-13],\n ...,\n [4.7091e-15, 1.3109e-15, 8.3061e-14, ..., 4.7201e-14, 0.0000e+00,\n 4.2486e-14],\n [2.7146e-17, 5.9910e-17, 1.0049e-15, ..., 8.7132e-17, 0.0000e+00,\n 4.8120e-16],\n [5.0036e-17, 6.5536e-16, 4.4594e-15, ..., 5.2162e-16, 0.0000e+00,\n 4.3857e-15]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.8036e-21, 2.7845e-19, 1.9085e-17, ..., 7.8653e-18, 0.0000e+00,\n 7.9333e-17],\n [1.8123e-17, 2.1025e-18, 1.8871e-16, ..., 1.1612e-16, 0.0000e+00,\n 5.0431e-17],\n [5.7999e-15, 8.9477e-17, 7.7679e-14, ..., 1.2162e-14, 0.0000e+00,\n 5.5812e-14],\n ...,\n [1.3456e-15, 3.7460e-16, 2.3735e-14, ..., 1.3488e-14, 0.0000e+00,\n 1.2141e-14],\n [7.7572e-18, 1.7120e-17, 2.8716e-16, ..., 2.4899e-17, 0.0000e+00,\n 1.3751e-16],\n [1.4298e-17, 1.8727e-16, 1.2743e-15, ..., 1.4906e-16, 0.0000e+00,\n 1.2532e-15]], device='cuda:0')" }, "41": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.2561e-15, 4.8172e-14, 9.1500e-11, 5.3342e-14, 2.0099e-11, 2.9084e-11,\n 1.5101e-11, 1.9462e-12, 3.0862e-10, 8.6939e-12, 7.1449e-11, 6.4070e-11,\n 1.9187e-13, 6.0001e-12, 1.2457e-13, 2.4905e-13, 2.8990e-11, 1.6244e-11,\n 3.4712e-12, 8.8118e-13, 5.1150e-12, 5.0959e-13, 3.2575e-11, 1.5455e-11,\n 4.1210e-11, 5.5364e-11, 1.7743e-11, 2.5741e-11, 1.0076e-11, 1.4375e-10,\n 1.5183e-10, 5.2172e-12, 6.0765e-10, 5.3476e-12, 6.4333e-12, 5.8085e-13,\n 2.2206e-11, 4.7145e-13, 1.4491e-11, 5.1309e-12, 4.9544e-11, 4.6967e-13,\n 1.1848e-13, 3.0129e-10, 7.5256e-14, 1.3556e-11, 3.1983e-11, 2.5715e-11,\n 2.9097e-13, 1.6849e-11, 2.5270e-11, 3.2832e-12, 1.0704e-10, 7.0230e-12,\n 3.4768e-12, 7.1890e-11, 2.3019e-12, 6.5638e-12, 1.3292e-10, 1.3079e-10,\n 2.7999e-11, 3.6794e-11, 8.8437e-13, 1.7774e-11, 2.1830e-11, 1.2196e-10,\n 7.3793e-13, 3.2076e-11, 6.7585e-12, 1.5456e-11, 5.6378e-12, 6.0151e-12,\n 1.6120e-11, 1.7290e-10, 5.2260e-11, 1.5283e-10, 3.2402e-11, 4.6097e-12,\n 4.8605e-12, 2.2215e-13, 1.5002e-11, 2.1502e-12, 1.3838e-12, 3.9142e-11,\n 1.1367e-11, 1.1515e-12, 2.2813e-10, 2.2953e-11, 6.5891e-12, 5.4555e-14,\n 4.1331e-13, 1.4616e-11, 2.5706e-11, 7.0769e-12, 2.4576e-10, 9.3914e-13,\n 2.2941e-10, 1.5869e-10, 2.7584e-11, 1.8243e-12, 6.1989e-12, 3.7246e-12,\n 4.1193e-13, 1.1631e-10, 1.3541e-12, 1.9271e-11, 1.5276e-11, 2.5758e-12,\n 1.6800e-11, 7.6096e-11, 1.9608e-11, 1.3181e-12, 8.1754e-11, 8.3572e-12,\n 6.4166e-12, 2.4554e-11, 2.2204e-12, 1.8986e-11, 2.3590e-12, 6.4468e-11,\n 1.7725e-10, 2.2495e-11, 2.0122e-11, 2.6224e-13, 4.5669e-12, 2.5842e-10,\n 1.8967e-12, 1.4182e-11, 5.3698e-12, 1.6996e-11, 1.8844e-13, 5.4005e-12,\n 2.5334e-10, 1.1493e-11, 5.0696e-13, 2.1456e-11, 1.3634e-11, 2.1029e-12,\n 7.0204e-11, 1.2470e-10, 2.2490e-12, 6.2436e-12, 9.0892e-11, 1.5200e-13,\n 3.2853e-12, 1.9863e-11, 6.3561e-12, 7.4795e-11, 1.3548e-12, 4.3783e-11,\n 4.0649e-12, 2.4699e-13, 6.6038e-11, 1.7717e-11, 4.5483e-12, 6.4326e-11,\n 2.3251e-11, 8.8373e-11, 1.2877e-13, 1.1371e-11, 5.0046e-13, 6.1010e-11,\n 5.5827e-14, 9.7693e-11, 1.2961e-11, 6.4938e-11, 6.6421e-12, 2.3013e-11,\n 4.7017e-13, 1.6622e-14, 5.5605e-12, 2.1082e-11, 1.9093e-11, 2.3735e-11,\n 8.8300e-12, 7.4652e-12, 1.0522e-10, 2.3709e-11, 1.9650e-13, 3.4920e-14,\n 2.2660e-12, 2.4080e-11, 2.8455e-13, 2.6126e-12, 1.3469e-12, 2.9623e-10,\n 2.8104e-11, 4.0293e-13, 5.5655e-12, 6.6282e-12, 9.2969e-11, 2.6597e-13,\n 3.9449e-11, 1.0054e-10, 6.5775e-11, 7.7893e-13, 9.2785e-13, 2.9022e-12,\n 2.8834e-11, 8.1348e-11, 4.1639e-11, 4.9109e-13, 1.8139e-10, 2.2618e-14,\n 1.2048e-11, 8.1359e-14, 6.9643e-12, 5.9325e-11, 1.6631e-12, 1.8740e-12,\n 1.1165e-10, 3.5088e-12, 4.3401e-11, 2.3908e-10, 8.0431e-10, 5.9569e-12,\n 9.8478e-13, 3.6878e-11, 1.8262e-11, 2.4394e-14, 1.4320e-11, 3.3037e-10,\n 4.6346e-12, 3.0291e-11, 2.7336e-11, 3.4206e-12, 1.9135e-12, 4.5384e-11,\n 5.6042e-11, 2.8269e-12, 5.4464e-11, 8.2404e-12, 2.6221e-11, 3.5099e-11,\n 3.1242e-11, 1.5887e-11, 9.1031e-12, 1.7701e-11, 5.3446e-12, 1.0295e-12,\n 9.2092e-11, 1.9279e-11, 5.9642e-13, 7.6620e-11, 7.8661e-12, 1.5778e-13,\n 7.1956e-11, 8.7543e-11, 2.2036e-10, 6.7983e-11, 2.1133e-12, 5.2409e-11,\n 4.7735e-12, 7.0013e-11, 8.3105e-13, 5.9981e-12], device='cuda:0')" + "exp_avg_sq": "tensor([6.4470e-16, 1.3765e-14, 2.6147e-11, 1.5243e-14, 5.7434e-12, 8.3110e-12,\n 4.3151e-12, 5.5613e-13, 8.8192e-11, 2.4843e-12, 2.0417e-11, 1.8309e-11,\n 5.4829e-14, 1.7146e-12, 3.5598e-14, 7.1168e-14, 8.2841e-12, 4.6418e-12,\n 9.9192e-13, 2.5180e-13, 1.4616e-12, 1.4562e-13, 9.3085e-12, 4.4165e-12,\n 1.1776e-11, 1.5821e-11, 5.0702e-12, 7.3556e-12, 2.8794e-12, 4.1077e-11,\n 4.3386e-11, 1.4909e-12, 1.7364e-10, 1.5281e-12, 1.8384e-12, 1.6598e-13,\n 6.3454e-12, 1.3472e-13, 4.1410e-12, 1.4662e-12, 1.4158e-11, 1.3421e-13,\n 3.3858e-14, 8.6095e-11, 2.1505e-14, 3.8738e-12, 9.1394e-12, 7.3482e-12,\n 8.3147e-14, 4.8147e-12, 7.2211e-12, 9.3820e-13, 3.0588e-11, 2.0069e-12,\n 9.9352e-13, 2.0543e-11, 6.5780e-13, 1.8757e-12, 3.7982e-11, 3.7374e-11,\n 8.0010e-12, 1.0514e-11, 2.5272e-13, 5.0792e-12, 6.2380e-12, 3.4852e-11,\n 2.1087e-13, 9.1659e-12, 1.9313e-12, 4.4167e-12, 1.6110e-12, 1.7188e-12,\n 4.6064e-12, 4.9407e-11, 1.4934e-11, 4.3673e-11, 9.2591e-12, 1.3173e-12,\n 1.3889e-12, 6.3481e-14, 4.2869e-12, 6.1442e-13, 3.9543e-13, 1.1185e-11,\n 3.2481e-12, 3.2904e-13, 6.5189e-11, 6.5589e-12, 1.8829e-12, 1.5589e-14,\n 1.1811e-13, 4.1768e-12, 7.3458e-12, 2.0223e-12, 7.0228e-11, 2.6837e-13,\n 6.5557e-11, 4.5347e-11, 7.8823e-12, 5.2131e-13, 1.7714e-12, 1.0643e-12,\n 1.1771e-13, 3.3235e-11, 3.8693e-13, 5.5069e-12, 4.3653e-12, 7.3605e-13,\n 4.8007e-12, 2.1745e-11, 5.6031e-12, 3.7666e-13, 2.3362e-11, 2.3881e-12,\n 1.8336e-12, 7.0165e-12, 6.3450e-13, 5.4254e-12, 6.7410e-13, 1.8422e-11,\n 5.0649e-11, 6.4280e-12, 5.7501e-12, 7.4937e-14, 1.3050e-12, 7.3845e-11,\n 5.4200e-13, 4.0525e-12, 1.5345e-12, 4.8567e-12, 5.3847e-14, 1.5432e-12,\n 7.2395e-11, 3.2841e-12, 1.4487e-13, 6.1313e-12, 3.8962e-12, 6.0092e-13,\n 2.0061e-11, 3.5635e-11, 6.4267e-13, 1.7842e-12, 2.5973e-11, 4.3436e-14,\n 9.3879e-13, 5.6760e-12, 1.8163e-12, 2.1373e-11, 3.8714e-13, 1.2511e-11,\n 1.1616e-12, 7.0578e-14, 1.8871e-11, 5.0627e-12, 1.2997e-12, 1.8382e-11,\n 6.6442e-12, 2.5253e-11, 3.6798e-14, 3.2494e-12, 1.4301e-13, 1.7434e-11,\n 1.5953e-14, 2.7917e-11, 3.7036e-12, 1.8556e-11, 1.8980e-12, 6.5761e-12,\n 1.3436e-13, 4.7498e-15, 1.5890e-12, 6.0242e-12, 5.4561e-12, 6.7826e-12,\n 2.5232e-12, 2.1332e-12, 3.0067e-11, 6.7749e-12, 5.6151e-14, 9.9788e-15,\n 6.4753e-13, 6.8810e-12, 8.1312e-14, 7.4658e-13, 3.8487e-13, 8.4649e-11,\n 8.0310e-12, 1.1514e-13, 1.5904e-12, 1.8941e-12, 2.6567e-11, 7.6002e-14,\n 1.1273e-11, 2.8730e-11, 1.8796e-11, 2.2258e-13, 2.6514e-13, 8.2934e-13,\n 8.2395e-12, 2.3246e-11, 1.1899e-11, 1.4033e-13, 5.1835e-11, 6.4631e-15,\n 3.4428e-12, 2.3249e-14, 1.9901e-12, 1.6953e-11, 4.7523e-13, 5.3551e-13,\n 3.1906e-11, 1.0027e-12, 1.2402e-11, 6.8319e-11, 2.2984e-10, 1.7022e-12,\n 2.8141e-13, 1.0538e-11, 5.2185e-12, 6.9709e-15, 4.0920e-12, 9.4407e-11,\n 1.3244e-12, 8.6558e-12, 7.8115e-12, 9.7746e-13, 5.4680e-13, 1.2969e-11,\n 1.6014e-11, 8.0782e-13, 1.5564e-11, 2.3548e-12, 7.4927e-12, 1.0030e-11,\n 8.9276e-12, 4.5397e-12, 2.6013e-12, 5.0583e-12, 1.5273e-12, 2.9417e-13,\n 2.6316e-11, 5.5092e-12, 1.7043e-13, 2.1895e-11, 2.2478e-12, 4.5088e-14,\n 2.0562e-11, 2.5016e-11, 6.2970e-11, 1.9427e-11, 6.0388e-13, 1.4976e-11,\n 1.3641e-12, 2.0007e-11, 2.3748e-13, 1.7140e-12], device='cuda:0')" }, "42": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.4307e-17, 1.0522e-16, 1.5939e-13, 4.4955e-16, 3.5590e-14, 4.9152e-14,\n 2.5432e-14, 3.1825e-15, 5.5646e-13, 1.8684e-14, 1.1411e-13, 1.0382e-13,\n 5.7205e-16, 8.4772e-15, 3.8717e-16, 6.5857e-16, 5.4684e-14, 6.7843e-14,\n 9.0699e-15, 2.9383e-15, 1.2117e-14, 1.3481e-15, 8.4763e-14, 3.1693e-14,\n 1.4760e-13, 8.6425e-14, 3.5845e-14, 7.7698e-14, 3.9016e-14, 2.3796e-13,\n 2.8057e-13, 1.2824e-14, 1.2251e-12, 5.9638e-15, 1.0689e-14, 8.6689e-16,\n 3.5128e-14, 2.5103e-15, 1.9837e-14, 1.1061e-14, 6.5153e-14, 5.9464e-15,\n 1.5735e-16, 1.0061e-12, 3.0833e-16, 2.3878e-14, 8.7946e-14, 1.0180e-13,\n 8.8649e-16, 2.6770e-14, 4.9105e-14, 4.1120e-15, 1.8699e-13, 1.1319e-14,\n 7.4379e-15, 1.1168e-13, 8.2638e-15, 8.8653e-15, 3.1495e-13, 1.7366e-13,\n 4.1146e-14, 1.0066e-13, 2.3743e-15, 2.8318e-14, 5.5953e-14, 2.6681e-13,\n 2.1760e-15, 4.7239e-14, 1.2514e-14, 1.7286e-14, 9.2011e-15, 1.3251e-14,\n 2.6262e-14, 4.1830e-13, 7.2374e-14, 3.3086e-13, 6.4430e-14, 8.1005e-15,\n 9.5806e-15, 2.3682e-16, 1.8369e-14, 3.2212e-15, 4.2314e-15, 1.5403e-13,\n 1.4773e-14, 6.2613e-15, 6.0683e-13, 8.0228e-14, 1.6595e-14, 3.2749e-16,\n 2.0018e-15, 2.3868e-14, 3.3456e-14, 1.1329e-14, 6.5498e-13, 2.2248e-15,\n 5.8565e-13, 4.0626e-13, 5.1526e-14, 7.2511e-15, 1.1991e-14, 7.7018e-15,\n 1.2671e-15, 2.1642e-13, 4.8418e-15, 2.6699e-14, 1.7909e-14, 6.6317e-15,\n 1.6886e-14, 1.7584e-13, 4.3858e-14, 1.7562e-15, 1.2519e-13, 1.0519e-14,\n 1.6125e-14, 4.1169e-14, 8.1598e-15, 2.1269e-14, 8.8073e-15, 9.0911e-14,\n 3.3702e-13, 6.6717e-14, 2.2236e-14, 6.0219e-16, 1.3945e-14, 5.2518e-13,\n 3.6996e-15, 3.9338e-14, 1.2319e-14, 2.9924e-14, 1.0791e-15, 7.2174e-15,\n 4.7990e-13, 1.8629e-14, 1.3017e-15, 4.3503e-14, 3.6773e-14, 4.6612e-15,\n 1.1262e-13, 1.9778e-13, 4.5185e-15, 9.4284e-15, 1.7375e-13, 1.0149e-15,\n 8.5431e-15, 3.8786e-14, 8.9025e-15, 2.4277e-13, 5.7006e-15, 1.4465e-13,\n 7.9743e-15, 1.5299e-16, 1.3563e-13, 3.3743e-14, 8.5004e-15, 6.6758e-14,\n 3.6742e-14, 1.1330e-13, 1.8971e-16, 2.4061e-14, 5.9084e-15, 1.3280e-13,\n 3.1274e-16, 3.1550e-13, 2.2611e-14, 1.1121e-13, 1.6097e-14, 5.0001e-14,\n 1.0781e-15, 4.8397e-17, 7.7728e-15, 5.2413e-14, 2.4037e-14, 3.3047e-14,\n 1.9754e-14, 1.6505e-14, 2.3191e-13, 4.4185e-14, 6.9473e-16, 8.3511e-17,\n 3.7131e-15, 6.3880e-14, 2.6525e-16, 5.0616e-15, 3.3868e-15, 5.1297e-13,\n 3.5680e-14, 2.1971e-15, 2.4838e-14, 1.7639e-14, 2.8551e-13, 1.9711e-15,\n 5.9096e-14, 2.1723e-13, 1.1525e-13, 2.3359e-15, 8.9811e-16, 5.5390e-15,\n 4.5265e-14, 1.9297e-13, 9.7639e-14, 2.7596e-15, 3.0963e-13, 5.2528e-16,\n 1.3297e-14, 1.5389e-16, 8.8797e-15, 1.2597e-13, 6.8151e-15, 2.9222e-15,\n 3.3890e-13, 3.6523e-15, 5.7696e-14, 4.8819e-13, 2.3251e-12, 8.7564e-15,\n 1.7309e-15, 6.5540e-14, 2.6566e-14, 2.2073e-17, 1.4202e-14, 5.9149e-13,\n 1.2174e-14, 9.4681e-14, 4.9425e-14, 5.5771e-15, 4.9932e-15, 9.4900e-14,\n 9.6925e-14, 4.3835e-15, 6.9197e-14, 1.6999e-14, 5.4474e-14, 7.2568e-14,\n 4.9986e-14, 2.9256e-14, 1.4835e-14, 3.4813e-14, 8.2147e-15, 2.0628e-15,\n 1.8969e-13, 4.8546e-14, 5.8158e-15, 1.0644e-13, 8.1316e-15, 1.0449e-15,\n 1.9594e-13, 2.1062e-13, 5.1978e-13, 1.1100e-13, 3.4184e-15, 1.1299e-13,\n 7.2045e-15, 1.5925e-13, 2.1518e-15, 2.5503e-14], device='cuda:0')" + "exp_avg_sq": "tensor([2.1234e-17, 3.0068e-17, 4.5547e-14, 1.2846e-16, 1.0170e-14, 1.4046e-14,\n 7.2674e-15, 9.0943e-16, 1.5901e-13, 5.3391e-15, 3.2609e-14, 2.9668e-14,\n 1.6347e-16, 2.4224e-15, 1.1064e-16, 1.8819e-16, 1.5626e-14, 1.9387e-14,\n 2.5918e-15, 8.3964e-16, 3.4624e-15, 3.8523e-16, 2.4222e-14, 9.0565e-15,\n 4.2178e-14, 2.4697e-14, 1.0243e-14, 2.2203e-14, 1.1149e-14, 6.7998e-14,\n 8.0174e-14, 3.6646e-15, 3.5007e-13, 1.7042e-15, 3.0545e-15, 2.4772e-16,\n 1.0038e-14, 7.1733e-16, 5.6685e-15, 3.1607e-15, 1.8618e-14, 1.6992e-15,\n 4.4965e-17, 2.8750e-13, 8.8109e-17, 6.8234e-15, 2.5131e-14, 2.9089e-14,\n 2.5332e-16, 7.6496e-15, 1.4032e-14, 1.1750e-15, 5.3433e-14, 3.2346e-15,\n 2.1255e-15, 3.1913e-14, 2.3615e-15, 2.5333e-15, 8.9998e-14, 4.9623e-14,\n 1.1758e-14, 2.8763e-14, 6.7849e-16, 8.0920e-15, 1.5989e-14, 7.6243e-14,\n 6.2181e-16, 1.3499e-14, 3.5758e-15, 4.9398e-15, 2.6293e-15, 3.7866e-15,\n 7.5045e-15, 1.1953e-13, 2.0682e-14, 9.4545e-14, 1.8411e-14, 2.3148e-15,\n 2.7377e-15, 6.7673e-17, 5.2492e-15, 9.2049e-16, 1.2092e-15, 4.4016e-14,\n 4.2216e-15, 1.7892e-15, 1.7341e-13, 2.2926e-14, 4.7421e-15, 9.3584e-17,\n 5.7202e-16, 6.8206e-15, 9.5604e-15, 3.2373e-15, 1.8716e-13, 6.3576e-16,\n 1.6735e-13, 1.1609e-13, 1.4724e-14, 2.0721e-15, 3.4265e-15, 2.2009e-15,\n 3.6207e-16, 6.1844e-14, 1.3836e-15, 7.6293e-15, 5.1177e-15, 1.8950e-15,\n 4.8253e-15, 5.0249e-14, 1.2533e-14, 5.0184e-16, 3.5774e-14, 3.0058e-15,\n 4.6077e-15, 1.1764e-14, 2.3317e-15, 6.0778e-15, 2.5168e-15, 2.5979e-14,\n 9.6308e-14, 1.9065e-14, 6.3542e-15, 1.7208e-16, 3.9849e-15, 1.5007e-13,\n 1.0572e-15, 1.1241e-14, 3.5203e-15, 8.5509e-15, 3.0835e-16, 2.0624e-15,\n 1.3714e-13, 5.3234e-15, 3.7196e-16, 1.2431e-14, 1.0508e-14, 1.3320e-15,\n 3.2183e-14, 5.6518e-14, 1.2912e-15, 2.6943e-15, 4.9650e-14, 2.9003e-16,\n 2.4412e-15, 1.1083e-14, 2.5440e-15, 6.9374e-14, 1.6290e-15, 4.1335e-14,\n 2.2787e-15, 4.3718e-17, 3.8758e-14, 9.6424e-15, 2.4291e-15, 1.9077e-14,\n 1.0499e-14, 3.2375e-14, 5.4211e-17, 6.8756e-15, 1.6884e-15, 3.7948e-14,\n 8.9368e-17, 9.0157e-14, 6.4611e-15, 3.1781e-14, 4.5999e-15, 1.4288e-14,\n 3.0808e-16, 1.3830e-17, 2.2212e-15, 1.4977e-14, 6.8688e-15, 9.4435e-15,\n 5.6448e-15, 4.7166e-15, 6.6270e-14, 1.2626e-14, 1.9852e-16, 2.3864e-17,\n 1.0611e-15, 1.8254e-14, 7.5796e-17, 1.4464e-15, 9.6780e-16, 1.4659e-13,\n 1.0196e-14, 6.2784e-16, 7.0977e-15, 5.0403e-15, 8.1586e-14, 5.6325e-16,\n 1.6887e-14, 6.2074e-14, 3.2935e-14, 6.6749e-16, 2.5664e-16, 1.5828e-15,\n 1.2935e-14, 5.5142e-14, 2.7901e-14, 7.8857e-16, 8.8479e-14, 1.5010e-16,\n 3.7997e-15, 4.3977e-17, 2.5374e-15, 3.5998e-14, 1.9475e-15, 8.3504e-16,\n 9.6844e-14, 1.0437e-15, 1.6487e-14, 1.3950e-13, 6.6441e-13, 2.5022e-15,\n 4.9461e-16, 1.8729e-14, 7.5915e-15, 6.3076e-18, 4.0583e-15, 1.6902e-13,\n 3.4789e-15, 2.7056e-14, 1.4124e-14, 1.5937e-15, 1.4268e-15, 2.7118e-14,\n 2.7697e-14, 1.2526e-15, 1.9774e-14, 4.8577e-15, 1.5566e-14, 2.0737e-14,\n 1.4284e-14, 8.3601e-15, 4.2393e-15, 9.9480e-15, 2.3474e-15, 5.8946e-16,\n 5.4205e-14, 1.3872e-14, 1.6619e-15, 3.0417e-14, 2.3237e-15, 2.9859e-16,\n 5.5991e-14, 6.0185e-14, 1.4853e-13, 3.1719e-14, 9.7684e-16, 3.2288e-14,\n 2.0587e-15, 4.5507e-14, 6.1489e-16, 7.2878e-15], device='cuda:0')" }, "43": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.5586e-16, 1.9059e-16, 2.6428e-13, 7.4466e-16, 5.4833e-14, 7.2097e-14,\n 4.2697e-14, 6.9697e-15, 8.5392e-13, 2.5262e-14, 2.0269e-13, 1.5617e-13,\n 5.7150e-16, 1.8739e-14, 5.7499e-16, 1.8908e-15, 8.7116e-14, 5.2850e-14,\n 8.3573e-15, 4.0133e-15, 1.7468e-14, 2.4266e-15, 8.5045e-14, 4.5928e-14,\n 1.2232e-13, 1.4267e-13, 5.2245e-14, 6.6327e-14, 3.8000e-14, 3.9873e-13,\n 3.7614e-13, 1.7437e-14, 1.6932e-12, 1.2993e-14, 2.1918e-14, 2.9161e-15,\n 5.7114e-14, 3.9975e-15, 4.3935e-14, 1.5371e-14, 1.1919e-13, 3.7236e-15,\n 7.1197e-16, 8.4457e-13, 3.3496e-16, 3.2005e-14, 8.0893e-14, 7.8986e-14,\n 1.6022e-15, 4.9725e-14, 7.8598e-14, 8.3295e-15, 2.7705e-13, 1.8206e-14,\n 1.1832e-14, 1.9152e-13, 9.1570e-15, 1.6726e-14, 3.4294e-13, 3.7638e-13,\n 6.8087e-14, 8.5653e-14, 4.1119e-15, 5.1484e-14, 6.3149e-14, 3.0299e-13,\n 2.6988e-15, 8.5646e-14, 2.0278e-14, 4.5480e-14, 1.7034e-14, 1.7773e-14,\n 3.3889e-14, 4.7580e-13, 1.5056e-13, 3.8891e-13, 9.3653e-14, 1.4664e-14,\n 1.6614e-14, 5.0937e-16, 4.1497e-14, 6.6555e-15, 5.9396e-15, 1.1273e-13,\n 2.7831e-14, 5.6019e-15, 6.2978e-13, 7.2298e-14, 2.3186e-14, 4.6487e-16,\n 2.1239e-15, 3.7697e-14, 6.7611e-14, 2.1291e-14, 6.1647e-13, 2.4255e-15,\n 5.8189e-13, 4.0280e-13, 6.2360e-14, 1.1626e-14, 1.8722e-14, 1.3528e-14,\n 1.2032e-15, 3.0139e-13, 4.0320e-15, 5.0494e-14, 4.3221e-14, 9.9078e-15,\n 4.3335e-14, 1.8668e-13, 5.7377e-14, 4.5064e-15, 2.1798e-13, 1.8786e-14,\n 2.0827e-14, 7.1536e-14, 8.7031e-15, 4.7957e-14, 1.1786e-14, 1.8493e-13,\n 4.8409e-13, 7.0523e-14, 5.8085e-14, 4.2127e-15, 1.6793e-14, 6.5934e-13,\n 5.1374e-15, 4.4135e-14, 1.6305e-14, 4.9050e-14, 1.6489e-15, 1.7646e-14,\n 6.5111e-13, 3.1027e-14, 2.2397e-15, 5.6306e-14, 4.3651e-14, 5.1951e-15,\n 1.7610e-13, 3.1446e-13, 8.4503e-15, 1.9904e-14, 2.2409e-13, 1.8576e-15,\n 1.1493e-14, 5.8153e-14, 1.4839e-14, 1.9437e-13, 5.2001e-15, 1.2711e-13,\n 1.3405e-14, 3.0010e-16, 1.6933e-13, 4.1977e-14, 1.5620e-14, 1.6713e-13,\n 5.6485e-14, 2.1392e-13, 7.0624e-16, 3.3526e-14, 8.0626e-15, 1.7834e-13,\n 6.2067e-16, 2.7465e-13, 3.7122e-14, 1.6562e-13, 1.6453e-14, 5.8809e-14,\n 1.7796e-15, 1.3300e-16, 1.4504e-14, 6.4423e-14, 5.3545e-14, 7.0336e-14,\n 2.5023e-14, 2.1891e-14, 2.9588e-13, 6.5997e-14, 2.7390e-15, 1.8075e-16,\n 8.4091e-15, 6.5472e-14, 1.1059e-15, 9.1681e-15, 5.4845e-15, 8.1548e-13,\n 6.8049e-14, 2.0309e-15, 2.0470e-14, 2.3094e-14, 2.2930e-13, 3.5535e-15,\n 9.9366e-14, 2.6785e-13, 1.6014e-13, 2.1789e-15, 3.4476e-15, 9.9637e-15,\n 7.6963e-14, 2.3304e-13, 1.0525e-13, 4.4981e-15, 4.7314e-13, 1.1315e-15,\n 3.1223e-14, 3.3518e-16, 2.2210e-14, 1.4628e-13, 8.3992e-15, 6.0564e-15,\n 3.1689e-13, 8.0756e-15, 1.2504e-13, 6.4154e-13, 2.1815e-12, 1.6014e-14,\n 3.2629e-15, 8.7016e-14, 5.1329e-14, 1.1712e-16, 3.2927e-14, 9.0486e-13,\n 1.5652e-14, 8.9991e-14, 8.3450e-14, 7.6346e-15, 6.7656e-15, 1.2630e-13,\n 1.5626e-13, 9.1147e-15, 1.4854e-13, 2.6103e-14, 6.2140e-14, 1.0409e-13,\n 8.9570e-14, 5.0311e-14, 2.4139e-14, 5.5829e-14, 1.3861e-14, 3.2301e-15,\n 2.3822e-13, 5.8210e-14, 7.7127e-15, 2.1149e-13, 2.1257e-14, 2.4942e-15,\n 1.9898e-13, 2.2235e-13, 6.0630e-13, 1.9987e-13, 6.3578e-15, 1.2686e-13,\n 1.1353e-14, 2.0552e-13, 4.1472e-15, 2.0667e-14], device='cuda:0')" + "exp_avg_sq": "tensor([7.3115e-17, 5.4462e-17, 7.5521e-14, 2.1279e-16, 1.5669e-14, 2.0602e-14,\n 1.2201e-14, 1.9916e-15, 2.4401e-13, 7.2189e-15, 5.7919e-14, 4.4626e-14,\n 1.6331e-16, 5.3547e-15, 1.6431e-16, 5.4033e-16, 2.4894e-14, 1.5102e-14,\n 2.3882e-15, 1.1468e-15, 4.9915e-15, 6.9343e-16, 2.4302e-14, 1.3124e-14,\n 3.4953e-14, 4.0768e-14, 1.4929e-14, 1.8953e-14, 1.0859e-14, 1.1394e-13,\n 1.0749e-13, 4.9827e-15, 4.8385e-13, 3.7130e-15, 6.2632e-15, 8.3331e-16,\n 1.6321e-14, 1.1423e-15, 1.2555e-14, 4.3924e-15, 3.4059e-14, 1.0641e-15,\n 2.0345e-16, 2.4134e-13, 9.5718e-17, 9.1456e-15, 2.3116e-14, 2.2571e-14,\n 4.5785e-16, 1.4209e-14, 2.2460e-14, 2.3802e-15, 7.9168e-14, 5.2026e-15,\n 3.3812e-15, 5.4728e-14, 2.6167e-15, 4.7797e-15, 9.7998e-14, 1.0755e-13,\n 1.9456e-14, 2.4476e-14, 1.1750e-15, 1.4712e-14, 1.8045e-14, 8.6582e-14,\n 7.7119e-16, 2.4474e-14, 5.7945e-15, 1.2996e-14, 4.8676e-15, 5.0789e-15,\n 9.6839e-15, 1.3596e-13, 4.3025e-14, 1.1113e-13, 2.6762e-14, 4.1902e-15,\n 4.7477e-15, 1.4556e-16, 1.1858e-14, 1.9019e-15, 1.6973e-15, 3.2214e-14,\n 7.9529e-15, 1.6008e-15, 1.7996e-13, 2.0660e-14, 6.6256e-15, 1.3284e-16,\n 6.0693e-16, 1.0772e-14, 1.9320e-14, 6.0839e-15, 1.7616e-13, 6.9310e-16,\n 1.6628e-13, 1.1510e-13, 1.7820e-14, 3.3223e-15, 5.3499e-15, 3.8657e-15,\n 3.4383e-16, 8.6125e-14, 1.1522e-15, 1.4429e-14, 1.2351e-14, 2.8312e-15,\n 1.2383e-14, 5.3344e-14, 1.6396e-14, 1.2877e-15, 6.2289e-14, 5.3684e-15,\n 5.9515e-15, 2.0442e-14, 2.4870e-15, 1.3704e-14, 3.3680e-15, 5.2845e-14,\n 1.3833e-13, 2.0152e-14, 1.6598e-14, 1.2038e-15, 4.7986e-15, 1.8841e-13,\n 1.4681e-15, 1.2612e-14, 4.6593e-15, 1.4016e-14, 4.7119e-16, 5.0424e-15,\n 1.8606e-13, 8.8661e-15, 6.4000e-16, 1.6090e-14, 1.2474e-14, 1.4845e-15,\n 5.0322e-14, 8.9860e-14, 2.4147e-15, 5.6877e-15, 6.4036e-14, 5.3083e-16,\n 3.2841e-15, 1.6618e-14, 4.2403e-15, 5.5542e-14, 1.4860e-15, 3.6323e-14,\n 3.8305e-15, 8.5756e-17, 4.8387e-14, 1.1995e-14, 4.4635e-15, 4.7759e-14,\n 1.6141e-14, 6.1128e-14, 2.0181e-16, 9.5804e-15, 2.3039e-15, 5.0961e-14,\n 1.7736e-16, 7.8483e-14, 1.0608e-14, 4.7326e-14, 4.7014e-15, 1.6805e-14,\n 5.0852e-16, 3.8005e-17, 4.1446e-15, 1.8409e-14, 1.5301e-14, 2.0099e-14,\n 7.1505e-15, 6.2556e-15, 8.4551e-14, 1.8859e-14, 7.8269e-16, 5.1650e-17,\n 2.4030e-15, 1.8709e-14, 3.1602e-16, 2.6198e-15, 1.5672e-15, 2.3303e-13,\n 1.9446e-14, 5.8033e-16, 5.8495e-15, 6.5993e-15, 6.5526e-14, 1.0154e-15,\n 2.8395e-14, 7.6541e-14, 4.5763e-14, 6.2262e-16, 9.8517e-16, 2.8472e-15,\n 2.1993e-14, 6.6592e-14, 3.0075e-14, 1.2854e-15, 1.3520e-13, 3.2333e-16,\n 8.9223e-15, 9.5781e-17, 6.3468e-15, 4.1802e-14, 2.4001e-15, 1.7307e-15,\n 9.0554e-14, 2.3077e-15, 3.5731e-14, 1.8333e-13, 6.2338e-13, 4.5761e-15,\n 9.3240e-16, 2.4866e-14, 1.4668e-14, 3.3469e-17, 9.4090e-15, 2.5857e-13,\n 4.4725e-15, 2.5716e-14, 2.3847e-14, 2.1817e-15, 1.9333e-15, 3.6091e-14,\n 4.4652e-14, 2.6046e-15, 4.2446e-14, 7.4593e-15, 1.7757e-14, 2.9745e-14,\n 2.5595e-14, 1.4377e-14, 6.8978e-15, 1.5954e-14, 3.9608e-15, 9.2303e-16,\n 6.8074e-14, 1.6634e-14, 2.2040e-15, 6.0434e-14, 6.0744e-15, 7.1273e-16,\n 5.6861e-14, 6.3539e-14, 1.7325e-13, 5.7116e-14, 1.8168e-15, 3.6252e-14,\n 3.2442e-15, 5.8728e-14, 1.1851e-15, 5.9057e-15], device='cuda:0')" }, "44": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.5919e-18, 1.1179e-17, 4.4568e-17, ..., 7.9995e-18, 0.0000e+00,\n 1.7673e-16],\n [2.8674e-17, 3.5428e-18, 3.2401e-16, ..., 2.1104e-16, 0.0000e+00,\n 9.6914e-16],\n [5.5865e-14, 6.6636e-15, 4.2469e-13, ..., 2.6483e-13, 0.0000e+00,\n 6.9998e-13],\n ...,\n [1.4892e-14, 1.8061e-15, 1.0182e-13, ..., 4.8077e-14, 0.0000e+00,\n 5.2380e-14],\n [6.4758e-17, 3.9291e-17, 1.4359e-16, ..., 1.0745e-15, 0.0000e+00,\n 6.4808e-16],\n [1.4252e-17, 6.0488e-18, 5.3894e-16, ..., 4.0530e-16, 0.0000e+00,\n 2.7576e-17]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.8837e-18, 3.1945e-18, 1.2736e-17, ..., 2.2859e-18, 0.0000e+00,\n 5.0502e-17],\n [8.1939e-18, 1.0124e-18, 9.2588e-17, ..., 6.0307e-17, 0.0000e+00,\n 2.7694e-16],\n [1.5964e-14, 1.9042e-15, 1.2136e-13, ..., 7.5678e-14, 0.0000e+00,\n 2.0002e-13],\n ...,\n [4.2555e-15, 5.1611e-16, 2.9096e-14, ..., 1.3738e-14, 0.0000e+00,\n 1.4968e-14],\n [1.8505e-17, 1.1228e-17, 4.1032e-17, ..., 3.0705e-16, 0.0000e+00,\n 1.8519e-16],\n [4.0727e-18, 1.7285e-18, 1.5401e-16, ..., 1.1582e-16, 0.0000e+00,\n 7.8799e-18]], device='cuda:0')" }, "45": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.7730e-14, 6.8401e-14, 3.4822e-10, 1.1324e-14, 1.1550e-10, 7.2117e-11,\n 1.7154e-11, 1.0107e-11, 3.5179e-10, 7.2898e-12, 9.3073e-12, 3.3189e-11,\n 4.0218e-13, 7.5326e-12, 1.6109e-12, 2.9207e-12, 2.6334e-11, 6.4776e-12,\n 9.0633e-13, 2.8962e-13, 2.4804e-11, 1.1993e-12, 3.3397e-11, 3.5494e-12,\n 3.0444e-11, 1.0863e-10, 1.3279e-11, 1.3646e-11, 4.8689e-12, 1.0908e-10,\n 3.7944e-12, 1.4707e-11, 5.7264e-10, 6.9554e-12, 3.7424e-12, 2.1461e-13,\n 3.7551e-11, 2.1714e-13, 2.3137e-11, 7.3705e-12, 1.7524e-11, 6.5311e-13,\n 1.9535e-13, 5.4318e-11, 1.7667e-14, 2.0222e-11, 8.9426e-12, 1.2013e-11,\n 2.2853e-12, 8.6903e-12, 7.3086e-11, 4.6340e-12, 2.8542e-11, 5.2946e-12,\n 2.9435e-12, 1.4412e-10, 8.5067e-13, 1.2240e-11, 1.5254e-11, 2.6230e-10,\n 2.7074e-11, 2.5631e-11, 1.3618e-12, 4.0944e-11, 2.3861e-11, 9.1013e-11,\n 2.6363e-13, 4.9549e-11, 3.5995e-12, 1.2031e-11, 1.2277e-11, 3.6077e-11,\n 1.4761e-12, 9.6996e-11, 1.0397e-10, 8.4745e-11, 5.9427e-12, 6.2522e-12,\n 1.8026e-12, 5.6553e-13, 2.4980e-11, 1.7699e-12, 2.7465e-12, 1.9734e-12,\n 2.4868e-11, 6.3254e-14, 7.6955e-11, 1.0862e-11, 5.8857e-12, 2.3293e-14,\n 1.5766e-13, 1.3484e-11, 4.0013e-11, 2.7157e-11, 7.9393e-11, 4.5438e-12,\n 1.2628e-10, 8.3638e-11, 3.9612e-12, 4.3538e-12, 1.4051e-11, 4.9610e-12,\n 8.9685e-12, 1.3036e-10, 1.9072e-11, 1.7829e-11, 2.8031e-11, 1.9402e-12,\n 1.9497e-11, 2.9372e-12, 7.5076e-13, 4.7414e-12, 4.7458e-12, 1.1868e-11,\n 1.9437e-12, 2.6378e-11, 3.1167e-12, 1.4239e-10, 1.3375e-12, 8.2533e-11,\n 1.5275e-10, 1.2100e-11, 1.0827e-10, 6.3461e-13, 5.3898e-12, 1.9868e-10,\n 1.0815e-11, 5.6808e-12, 5.7820e-12, 1.7733e-11, 4.9120e-13, 8.4876e-12,\n 4.4911e-10, 3.9784e-11, 6.1772e-13, 2.6937e-11, 1.5832e-11, 1.6836e-11,\n 1.4506e-10, 6.1583e-11, 1.2588e-11, 2.7924e-12, 3.9312e-11, 2.3050e-13,\n 1.5936e-13, 2.8575e-11, 2.0494e-11, 2.4459e-11, 3.9287e-13, 6.2523e-12,\n 8.2649e-12, 4.2301e-15, 3.1933e-11, 7.7358e-12, 1.4760e-12, 1.6197e-10,\n 9.6943e-12, 4.0402e-11, 1.1496e-13, 4.8027e-12, 1.1026e-12, 9.8326e-11,\n 1.8369e-13, 4.8833e-11, 1.7054e-10, 3.1106e-11, 4.9381e-12, 2.6191e-11,\n 4.1489e-13, 9.0359e-15, 2.2687e-11, 5.2292e-11, 1.2671e-10, 1.4549e-11,\n 1.7552e-11, 3.2970e-13, 4.5562e-11, 1.0958e-11, 9.1860e-13, 1.1116e-12,\n 2.7285e-12, 2.7339e-11, 1.2187e-13, 8.4308e-13, 3.7797e-12, 4.9817e-10,\n 7.8458e-12, 1.0808e-13, 1.2516e-12, 3.9742e-12, 8.0148e-11, 3.3907e-13,\n 5.9177e-11, 1.9001e-11, 1.0375e-11, 1.2154e-11, 1.0476e-12, 3.0122e-11,\n 2.1179e-11, 3.4617e-11, 3.1470e-11, 1.5572e-13, 1.5856e-10, 4.9960e-14,\n 2.0482e-11, 9.2643e-14, 1.0979e-11, 4.8017e-11, 1.2191e-12, 9.5777e-12,\n 3.3019e-11, 1.1641e-11, 1.0959e-10, 1.5870e-10, 2.8524e-10, 4.0472e-11,\n 5.2173e-12, 6.7879e-12, 2.0044e-10, 1.0813e-12, 1.9312e-11, 2.7993e-10,\n 1.5639e-12, 2.1648e-11, 2.5764e-10, 3.9797e-12, 5.7873e-12, 2.6980e-11,\n 4.2938e-11, 1.4730e-11, 1.6588e-11, 1.2434e-11, 2.5169e-11, 9.2848e-12,\n 2.8139e-11, 3.6885e-11, 4.1941e-12, 3.1188e-12, 1.9230e-11, 5.7674e-13,\n 1.7573e-10, 1.2313e-11, 7.5948e-13, 9.2710e-11, 2.8553e-11, 1.3225e-13,\n 3.4345e-11, 1.3888e-11, 3.1008e-10, 2.1957e-10, 4.7425e-12, 1.3524e-11,\n 5.4694e-12, 8.5633e-11, 1.7447e-12, 4.7650e-13], device='cuda:0')" + "exp_avg_sq": "tensor([7.9241e-15, 1.9546e-14, 9.9506e-11, 3.2360e-15, 3.3004e-11, 2.0608e-11,\n 4.9018e-12, 2.8881e-12, 1.0053e-10, 2.0831e-12, 2.6596e-12, 9.4841e-12,\n 1.1493e-13, 2.1525e-12, 4.6033e-13, 8.3460e-13, 7.5251e-12, 1.8510e-12,\n 2.5899e-13, 8.2761e-14, 7.0879e-12, 3.4270e-13, 9.5433e-12, 1.0143e-12,\n 8.6997e-12, 3.1041e-11, 3.7945e-12, 3.8995e-12, 1.3913e-12, 3.1169e-11,\n 1.0843e-12, 4.2025e-12, 1.6364e-10, 1.9875e-12, 1.0694e-12, 6.1327e-14,\n 1.0731e-11, 6.2050e-14, 6.6116e-12, 2.1062e-12, 5.0076e-12, 1.8663e-13,\n 5.5824e-14, 1.5522e-11, 5.0485e-15, 5.7787e-12, 2.5554e-12, 3.4328e-12,\n 6.5303e-13, 2.4833e-12, 2.0885e-11, 1.3242e-12, 8.1560e-12, 1.5130e-12,\n 8.4114e-13, 4.1184e-11, 2.4309e-13, 3.4977e-12, 4.3590e-12, 7.4955e-11,\n 7.7367e-12, 7.3242e-12, 3.8915e-13, 1.1700e-11, 6.8184e-12, 2.6008e-11,\n 7.5336e-14, 1.4159e-11, 1.0286e-12, 3.4379e-12, 3.5084e-12, 1.0309e-11,\n 4.2182e-13, 2.7717e-11, 2.9710e-11, 2.4216e-11, 1.6982e-12, 1.7866e-12,\n 5.1511e-13, 1.6160e-13, 7.1383e-12, 5.0577e-13, 7.8483e-13, 5.6390e-13,\n 7.1062e-12, 1.8075e-14, 2.1990e-11, 3.1039e-12, 1.6819e-12, 6.6563e-15,\n 4.5052e-14, 3.8531e-12, 1.1434e-11, 7.7605e-12, 2.2687e-11, 1.2984e-12,\n 3.6085e-11, 2.3900e-11, 1.1319e-12, 1.2441e-12, 4.0152e-12, 1.4176e-12,\n 2.5628e-12, 3.7252e-11, 5.4500e-12, 5.0948e-12, 8.0100e-12, 5.5443e-13,\n 5.5716e-12, 8.3933e-13, 2.1454e-13, 1.3549e-12, 1.3561e-12, 3.3914e-12,\n 5.5541e-13, 7.5377e-12, 8.9061e-13, 4.0688e-11, 3.8220e-13, 2.3584e-11,\n 4.3651e-11, 3.4576e-12, 3.0938e-11, 1.8135e-13, 1.5402e-12, 5.6775e-11,\n 3.0904e-12, 1.6233e-12, 1.6523e-12, 5.0673e-12, 1.4036e-13, 2.4254e-12,\n 1.2834e-10, 1.1369e-11, 1.7652e-13, 7.6975e-12, 4.5241e-12, 4.8111e-12,\n 4.1451e-11, 1.7598e-11, 3.5970e-12, 7.9793e-13, 1.1234e-11, 6.5868e-14,\n 4.5539e-14, 8.1656e-12, 5.8563e-12, 6.9893e-12, 1.1226e-13, 1.7867e-12,\n 2.3618e-12, 1.2088e-15, 9.1252e-12, 2.2106e-12, 4.2176e-13, 4.6283e-11,\n 2.7702e-12, 1.1545e-11, 3.2851e-14, 1.3724e-12, 3.1507e-13, 2.8098e-11,\n 5.2491e-14, 1.3954e-11, 4.8732e-11, 8.8889e-12, 1.4111e-12, 7.4843e-12,\n 1.1856e-13, 2.5821e-15, 6.4829e-12, 1.4943e-11, 3.6207e-11, 4.1576e-12,\n 5.0157e-12, 9.4214e-14, 1.3020e-11, 3.1314e-12, 2.6250e-13, 3.1764e-13,\n 7.7968e-13, 7.8124e-12, 3.4825e-14, 2.4092e-13, 1.0801e-12, 1.4236e-10,\n 2.2420e-12, 3.0886e-14, 3.5767e-13, 1.1357e-12, 2.2903e-11, 9.6892e-14,\n 1.6910e-11, 5.4298e-12, 2.9648e-12, 3.4732e-12, 2.9936e-13, 8.6075e-12,\n 6.0521e-12, 9.8921e-12, 8.9928e-12, 4.4497e-14, 4.5309e-11, 1.4277e-14,\n 5.8528e-12, 2.6474e-14, 3.1374e-12, 1.3721e-11, 3.4838e-13, 2.7369e-12,\n 9.4356e-12, 3.3264e-12, 3.1315e-11, 4.5349e-11, 8.1510e-11, 1.1565e-11,\n 1.4909e-12, 1.9397e-12, 5.7276e-11, 3.0898e-13, 5.5186e-12, 7.9992e-11,\n 4.4689e-13, 6.1861e-12, 7.3624e-11, 1.1372e-12, 1.6538e-12, 7.7097e-12,\n 1.2270e-11, 4.2093e-12, 4.7402e-12, 3.5530e-12, 7.1923e-12, 2.6532e-12,\n 8.0408e-12, 1.0540e-11, 1.1985e-12, 8.9123e-13, 5.4950e-12, 1.6481e-13,\n 5.0216e-11, 3.5187e-12, 2.1703e-13, 2.6493e-11, 8.1593e-12, 3.7793e-14,\n 9.8143e-12, 3.9687e-12, 8.8609e-11, 6.2742e-11, 1.3552e-12, 3.8646e-12,\n 1.5629e-12, 2.4470e-11, 4.9858e-13, 1.3616e-13], device='cuda:0')" }, "46": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.1276e-16, 8.3218e-16, 7.7357e-13, 1.5573e-17, 2.0010e-13, 1.3479e-13,\n 2.9868e-14, 3.2102e-14, 7.6629e-13, 1.7650e-14, 1.7993e-14, 5.6872e-14,\n 7.6437e-16, 1.2457e-14, 3.7222e-15, 1.0695e-14, 5.2345e-14, 1.3436e-14,\n 2.2389e-15, 1.5243e-15, 3.7993e-14, 6.1768e-15, 9.4997e-14, 5.7833e-15,\n 1.0291e-13, 1.3247e-13, 2.7185e-14, 2.5839e-14, 2.5955e-14, 2.1050e-13,\n 1.2051e-14, 2.8540e-14, 1.1739e-12, 9.6636e-15, 5.9484e-15, 1.0482e-15,\n 6.1265e-14, 3.9824e-15, 3.4793e-14, 1.8892e-14, 2.5432e-14, 9.8662e-16,\n 4.5871e-16, 1.0494e-13, 3.4083e-17, 4.3283e-14, 1.1949e-14, 3.5470e-14,\n 7.5232e-15, 1.1808e-14, 1.3004e-13, 5.8149e-15, 3.3659e-14, 8.0910e-15,\n 1.5208e-14, 4.0161e-13, 2.7668e-15, 1.8659e-14, 2.4815e-14, 5.3123e-13,\n 3.8293e-14, 4.1902e-14, 4.4775e-15, 4.3042e-14, 4.7778e-14, 1.4539e-13,\n 5.8709e-16, 8.0961e-14, 5.3372e-15, 1.7607e-14, 2.2274e-14, 8.8996e-14,\n 1.7163e-15, 1.6730e-13, 2.6267e-13, 1.2612e-13, 8.1382e-15, 8.3886e-15,\n 2.3724e-15, 3.8529e-16, 3.5005e-14, 2.4491e-15, 5.3122e-15, 2.8355e-15,\n 6.3639e-14, 4.5913e-16, 1.0733e-13, 4.6160e-14, 1.3171e-14, 6.0833e-17,\n 2.2264e-17, 2.4803e-14, 7.2638e-14, 3.9648e-14, 9.7930e-14, 4.2509e-15,\n 2.3593e-13, 1.3122e-13, 7.8106e-15, 1.6851e-14, 2.8403e-14, 9.7520e-15,\n 2.0826e-14, 3.0714e-13, 3.5029e-14, 2.3851e-14, 4.1175e-14, 3.9378e-15,\n 1.7574e-14, 7.0343e-15, 2.3806e-15, 1.8986e-14, 1.6997e-14, 1.5971e-14,\n 2.4434e-15, 7.3013e-14, 8.9961e-15, 4.8810e-13, 5.5371e-15, 1.9305e-13,\n 2.7569e-13, 2.6398e-14, 2.5721e-13, 7.1945e-15, 1.5929e-14, 4.3331e-13,\n 9.7676e-15, 6.4810e-15, 1.2822e-14, 3.4843e-14, 1.5341e-15, 1.3480e-14,\n 1.6173e-12, 4.5049e-14, 1.6110e-15, 6.0265e-14, 5.9373e-14, 3.4437e-14,\n 3.5233e-13, 6.4067e-14, 2.5858e-14, 6.6986e-15, 6.3027e-14, 7.5133e-16,\n 5.3630e-16, 9.6347e-14, 3.2730e-14, 3.8492e-14, 1.1042e-15, 9.3266e-15,\n 2.7539e-14, 5.2652e-17, 4.0975e-14, 1.2022e-14, 2.3071e-15, 2.3102e-13,\n 1.3149e-14, 5.3426e-14, 1.1522e-16, 9.0814e-15, 2.4298e-15, 2.0616e-13,\n 1.9254e-15, 1.0923e-13, 4.0987e-13, 4.0773e-14, 1.0517e-14, 5.7893e-14,\n 1.7489e-15, 3.1590e-16, 4.4762e-14, 2.3002e-13, 2.8551e-13, 2.0172e-14,\n 4.8295e-14, 8.9197e-16, 6.1964e-14, 1.6820e-14, 6.0097e-15, 6.3067e-15,\n 6.7835e-15, 6.8870e-14, 4.0978e-16, 2.1152e-15, 1.7145e-14, 1.3880e-12,\n 9.4727e-15, 3.7090e-16, 1.9488e-15, 1.3168e-14, 2.4366e-13, 1.4558e-15,\n 9.2236e-14, 2.6989e-14, 2.2870e-14, 3.0210e-14, 1.5687e-15, 4.2365e-14,\n 2.9008e-14, 4.8205e-14, 6.0939e-14, 7.7780e-16, 1.8326e-13, 3.2642e-16,\n 3.2946e-14, 2.8049e-16, 1.6252e-14, 8.2305e-14, 8.9521e-15, 2.0972e-14,\n 5.0881e-14, 1.5249e-14, 2.4357e-13, 2.4414e-13, 4.3873e-13, 1.0727e-13,\n 1.2099e-14, 9.3513e-15, 3.9761e-13, 3.8900e-15, 2.8018e-14, 6.7643e-13,\n 3.8129e-15, 5.4348e-14, 7.1242e-13, 6.8807e-15, 2.7776e-14, 3.1750e-14,\n 7.0851e-14, 2.9717e-14, 3.0249e-14, 1.7201e-14, 6.4913e-14, 1.6325e-14,\n 3.9537e-14, 7.6847e-14, 6.6320e-15, 4.9184e-15, 3.7628e-14, 1.5546e-15,\n 5.6248e-13, 2.0333e-14, 3.1692e-15, 1.3624e-13, 3.1412e-14, 3.2430e-16,\n 6.5814e-14, 1.6147e-14, 7.8544e-13, 9.0110e-13, 7.1081e-15, 1.5574e-14,\n 9.2796e-15, 2.1142e-13, 4.3338e-15, 9.3793e-16], device='cuda:0')" + "exp_avg_sq": "tensor([3.2221e-17, 2.3780e-16, 2.2105e-13, 4.4502e-18, 5.7181e-14, 3.8518e-14,\n 8.5349e-15, 9.1733e-15, 2.1897e-13, 5.0435e-15, 5.1417e-15, 1.6252e-14,\n 2.1843e-16, 3.5596e-15, 1.0637e-15, 3.0561e-15, 1.4958e-14, 3.8395e-15,\n 6.3977e-16, 4.3558e-16, 1.0857e-14, 1.7651e-15, 2.7146e-14, 1.6526e-15,\n 2.9408e-14, 3.7856e-14, 7.7684e-15, 7.3837e-15, 7.4168e-15, 6.0152e-14,\n 3.4436e-15, 8.1554e-15, 3.3546e-13, 2.7615e-15, 1.6998e-15, 2.9954e-16,\n 1.7507e-14, 1.1380e-15, 9.9424e-15, 5.3986e-15, 7.2674e-15, 2.8193e-16,\n 1.3108e-16, 2.9988e-14, 9.7396e-18, 1.2368e-14, 3.4146e-15, 1.0136e-14,\n 2.1498e-15, 3.3743e-15, 3.7161e-14, 1.6617e-15, 9.6183e-15, 2.3121e-15,\n 4.3459e-15, 1.1476e-13, 7.9065e-16, 5.3319e-15, 7.0912e-15, 1.5180e-13,\n 1.0942e-14, 1.1974e-14, 1.2795e-15, 1.2299e-14, 1.3653e-14, 4.1547e-14,\n 1.6777e-16, 2.3135e-14, 1.5251e-15, 5.0312e-15, 6.3649e-15, 2.5431e-14,\n 4.9046e-16, 4.7807e-14, 7.5061e-14, 3.6039e-14, 2.3256e-15, 2.3971e-15,\n 6.7792e-16, 1.1010e-16, 1.0003e-14, 6.9985e-16, 1.5180e-15, 8.1027e-16,\n 1.8185e-14, 1.3120e-16, 3.0670e-14, 1.3191e-14, 3.7638e-15, 1.7384e-17,\n 6.3621e-18, 7.0876e-15, 2.0757e-14, 1.1330e-14, 2.7984e-14, 1.2147e-15,\n 6.7419e-14, 3.7497e-14, 2.2319e-15, 4.8152e-15, 8.1164e-15, 2.7867e-15,\n 5.9513e-15, 8.7768e-14, 1.0010e-14, 6.8157e-15, 1.1766e-14, 1.1253e-15,\n 5.0218e-15, 2.0101e-15, 6.8028e-16, 5.4253e-15, 4.8569e-15, 4.5639e-15,\n 6.9823e-16, 2.0864e-14, 2.5707e-15, 1.3948e-13, 1.5823e-15, 5.5167e-14,\n 7.8781e-14, 7.5435e-15, 7.3501e-14, 2.0559e-15, 4.5517e-15, 1.2382e-13,\n 2.7912e-15, 1.8520e-15, 3.6641e-15, 9.9568e-15, 4.3837e-16, 3.8521e-15,\n 4.6215e-13, 1.2873e-14, 4.6035e-16, 1.7221e-14, 1.6966e-14, 9.8406e-15,\n 1.0068e-13, 1.8308e-14, 7.3890e-15, 1.9142e-15, 1.8010e-14, 2.1470e-16,\n 1.5325e-16, 2.7532e-14, 9.3528e-15, 1.0999e-14, 3.1553e-16, 2.6652e-15,\n 7.8695e-15, 1.5046e-17, 1.1709e-14, 3.4353e-15, 6.5927e-16, 6.6015e-14,\n 3.7573e-15, 1.5267e-14, 3.2925e-17, 2.5951e-15, 6.9435e-16, 5.8913e-14,\n 5.5021e-16, 3.1213e-14, 1.1712e-13, 1.1651e-14, 3.0052e-15, 1.6543e-14,\n 4.9977e-16, 9.0270e-17, 1.2791e-14, 6.5730e-14, 8.1586e-14, 5.7642e-15,\n 1.3801e-14, 2.5489e-16, 1.7707e-14, 4.8065e-15, 1.7173e-15, 1.8022e-15,\n 1.9385e-15, 1.9680e-14, 1.1710e-16, 6.0444e-16, 4.8995e-15, 3.9662e-13,\n 2.7069e-15, 1.0599e-16, 5.5688e-16, 3.7629e-15, 6.9629e-14, 4.1602e-16,\n 2.6357e-14, 7.7124e-15, 6.5353e-15, 8.6329e-15, 4.4826e-16, 1.2106e-14,\n 8.2894e-15, 1.3775e-14, 1.7414e-14, 2.2226e-16, 5.2368e-14, 9.3278e-17,\n 9.4145e-15, 8.0152e-17, 4.6441e-15, 2.3519e-14, 2.5581e-15, 5.9930e-15,\n 1.4540e-14, 4.3575e-15, 6.9603e-14, 6.9764e-14, 1.2537e-13, 3.0654e-14,\n 3.4575e-15, 2.6722e-15, 1.1362e-13, 1.1116e-15, 8.0064e-15, 1.9330e-13,\n 1.0896e-15, 1.5530e-14, 2.0358e-13, 1.9662e-15, 7.9374e-15, 9.0728e-15,\n 2.0246e-14, 8.4919e-15, 8.6440e-15, 4.9153e-15, 1.8549e-14, 4.6651e-15,\n 1.1298e-14, 2.1960e-14, 1.8951e-15, 1.4055e-15, 1.0752e-14, 4.4425e-16,\n 1.6073e-13, 5.8103e-15, 9.0563e-16, 3.8933e-14, 8.9763e-15, 9.2670e-17,\n 1.8807e-14, 4.6141e-15, 2.2445e-13, 2.5750e-13, 2.0312e-15, 4.4503e-15,\n 2.6517e-15, 6.0416e-14, 1.2384e-15, 2.6802e-16], device='cuda:0')" }, "47": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.8646e-16, 5.8226e-16, 9.5690e-13, 1.4104e-16, 2.8312e-13, 1.7649e-13,\n 4.9183e-14, 3.3877e-14, 9.4240e-13, 2.1285e-14, 2.9554e-14, 8.2890e-14,\n 1.7317e-15, 2.4865e-14, 7.0941e-15, 9.3853e-15, 7.6033e-14, 2.1082e-14,\n 3.3861e-15, 2.0365e-15, 7.2370e-14, 6.1039e-15, 8.6067e-14, 1.2207e-14,\n 9.5342e-14, 2.7308e-13, 4.1641e-14, 3.7007e-14, 2.1387e-14, 3.0904e-13,\n 1.0857e-14, 3.8578e-14, 1.5595e-12, 1.6748e-14, 1.1643e-14, 1.5270e-15,\n 8.7399e-14, 4.1081e-15, 7.0983e-14, 2.3911e-14, 4.4523e-14, 2.7425e-15,\n 8.3182e-16, 1.6525e-13, 8.7635e-17, 4.5354e-14, 2.5832e-14, 4.2157e-14,\n 8.4729e-15, 2.6969e-14, 2.2050e-13, 9.7673e-15, 7.1318e-14, 1.4490e-14,\n 1.2271e-14, 3.6480e-13, 4.1016e-15, 2.8195e-14, 3.9811e-14, 7.3656e-13,\n 6.2381e-14, 5.9374e-14, 5.4312e-15, 1.1576e-13, 6.9142e-14, 2.1448e-13,\n 1.2958e-15, 1.2243e-13, 1.1119e-14, 3.7310e-14, 3.9364e-14, 1.1153e-13,\n 3.1255e-15, 2.7435e-13, 2.9704e-13, 2.2011e-13, 1.9212e-14, 2.0498e-14,\n 6.1384e-15, 6.2737e-16, 7.6133e-14, 5.0586e-15, 8.5514e-15, 6.4711e-15,\n 5.6503e-14, 1.3872e-15, 2.2488e-13, 3.9432e-14, 2.1207e-14, 8.4657e-17,\n 2.1995e-16, 3.4348e-14, 9.8205e-14, 7.1792e-14, 2.0392e-13, 1.0321e-14,\n 3.2820e-13, 2.1316e-13, 1.0576e-14, 1.8000e-14, 4.3580e-14, 1.5758e-14,\n 2.9893e-14, 3.3393e-13, 5.6156e-14, 4.4628e-14, 8.0574e-14, 7.3004e-15,\n 4.3942e-14, 8.6150e-15, 2.3605e-15, 1.8187e-14, 1.3039e-14, 2.6986e-14,\n 6.9236e-15, 7.4206e-14, 1.2439e-14, 3.4719e-13, 6.6869e-15, 2.3527e-13,\n 4.3053e-13, 3.9860e-14, 2.7856e-13, 1.0920e-14, 1.9811e-14, 5.3087e-13,\n 2.5251e-14, 1.7282e-14, 1.9111e-14, 5.2708e-14, 2.5728e-15, 2.7174e-14,\n 1.1448e-12, 9.9728e-14, 2.5708e-15, 6.5102e-14, 4.7266e-14, 3.8481e-14,\n 3.5883e-13, 1.6883e-13, 3.7096e-14, 8.6524e-15, 9.7163e-14, 1.6191e-15,\n 7.5871e-16, 8.6610e-14, 4.2815e-14, 6.9223e-14, 2.4217e-15, 2.0533e-14,\n 2.8691e-14, 2.0347e-18, 8.3146e-14, 1.8727e-14, 4.6100e-15, 4.0060e-13,\n 2.5746e-14, 1.0392e-13, 1.5582e-16, 1.6858e-14, 4.7080e-15, 2.7495e-13,\n 2.4172e-15, 1.3929e-13, 4.8139e-13, 7.9551e-14, 1.1747e-14, 6.5652e-14,\n 2.6403e-15, 3.5173e-16, 5.2774e-14, 1.5535e-13, 3.1462e-13, 4.4534e-14,\n 4.3597e-14, 1.3085e-15, 1.2997e-13, 3.3458e-14, 6.1978e-15, 5.4953e-15,\n 1.0956e-14, 7.3399e-14, 5.5023e-16, 3.3599e-15, 1.5117e-14, 1.3594e-12,\n 1.9367e-14, 7.6823e-16, 4.5111e-15, 1.5656e-14, 2.0143e-13, 2.9865e-15,\n 1.4759e-13, 6.0069e-14, 2.6856e-14, 3.9516e-14, 3.5030e-15, 7.5826e-14,\n 5.5438e-14, 1.0198e-13, 7.8161e-14, 2.5219e-15, 4.2474e-13, 1.0379e-15,\n 4.8008e-14, 2.0987e-16, 3.3992e-14, 1.2818e-13, 7.6664e-15, 2.9465e-14,\n 9.2223e-14, 2.6047e-14, 3.0555e-13, 4.1610e-13, 8.0381e-13, 9.6700e-14,\n 1.8981e-14, 1.6099e-14, 5.1119e-13, 4.8597e-15, 4.5315e-14, 7.4699e-13,\n 5.9044e-15, 6.7019e-14, 7.1373e-13, 6.9660e-15, 2.1088e-14, 7.7787e-14,\n 1.2524e-13, 3.9921e-14, 4.9403e-14, 3.5561e-14, 5.6794e-14, 2.8440e-14,\n 7.9106e-14, 1.0600e-13, 1.2225e-14, 9.7084e-15, 4.5019e-14, 2.2944e-15,\n 4.3764e-13, 3.4741e-14, 5.6051e-15, 2.6210e-13, 6.7665e-14, 8.1443e-16,\n 9.4177e-14, 3.4147e-14, 8.5026e-13, 6.1984e-13, 1.5476e-14, 3.3303e-14,\n 1.2410e-14, 2.3805e-13, 6.4267e-15, 2.1228e-15], device='cuda:0')" + "exp_avg_sq": "tensor([8.1858e-17, 1.6639e-16, 2.7344e-13, 4.0303e-17, 8.0904e-14, 5.0434e-14,\n 1.4054e-14, 9.6806e-15, 2.6930e-13, 6.0823e-15, 8.4453e-15, 2.3687e-14,\n 4.9485e-16, 7.1055e-15, 2.0272e-15, 2.6819e-15, 2.1727e-14, 6.0245e-15,\n 9.6759e-16, 5.8194e-16, 2.0680e-14, 1.7442e-15, 2.4594e-14, 3.4882e-15,\n 2.7245e-14, 7.8035e-14, 1.1899e-14, 1.0575e-14, 6.1116e-15, 8.8312e-14,\n 3.1024e-15, 1.1024e-14, 4.4564e-13, 4.7860e-15, 3.3271e-15, 4.3636e-16,\n 2.4975e-14, 1.1739e-15, 2.0284e-14, 6.8328e-15, 1.2723e-14, 7.8369e-16,\n 2.3770e-16, 4.7221e-14, 2.5042e-17, 1.2960e-14, 7.3817e-15, 1.2047e-14,\n 2.4212e-15, 7.7067e-15, 6.3008e-14, 2.7911e-15, 2.0380e-14, 4.1406e-15,\n 3.5066e-15, 1.0424e-13, 1.1721e-15, 8.0569e-15, 1.1376e-14, 2.1048e-13,\n 1.7826e-14, 1.6967e-14, 1.5520e-15, 3.3078e-14, 1.9758e-14, 6.1289e-14,\n 3.7028e-16, 3.4984e-14, 3.1772e-15, 1.0662e-14, 1.1249e-14, 3.1870e-14,\n 8.9313e-16, 7.8399e-14, 8.4882e-14, 6.2899e-14, 5.4898e-15, 5.8575e-15,\n 1.7541e-15, 1.7928e-16, 2.1756e-14, 1.4455e-15, 2.4436e-15, 1.8492e-15,\n 1.6146e-14, 3.9640e-16, 6.4260e-14, 1.1268e-14, 6.0600e-15, 2.4191e-17,\n 6.2853e-17, 9.8151e-15, 2.8063e-14, 2.0515e-14, 5.8271e-14, 2.9494e-15,\n 9.3787e-14, 6.0913e-14, 3.0223e-15, 5.1438e-15, 1.2453e-14, 4.5029e-15,\n 8.5421e-15, 9.5424e-14, 1.6047e-14, 1.2753e-14, 2.3025e-14, 2.0861e-15,\n 1.2557e-14, 2.4618e-15, 6.7453e-16, 5.1971e-15, 3.7260e-15, 7.7115e-15,\n 1.9785e-15, 2.1205e-14, 3.5544e-15, 9.9211e-14, 1.9108e-15, 6.7231e-14,\n 1.2303e-13, 1.1390e-14, 7.9602e-14, 3.1206e-15, 5.6612e-15, 1.5170e-13,\n 7.2157e-15, 4.9385e-15, 5.4610e-15, 1.5062e-14, 7.3519e-16, 7.7652e-15,\n 3.2715e-13, 2.8498e-14, 7.3461e-16, 1.8604e-14, 1.3507e-14, 1.0996e-14,\n 1.0254e-13, 4.8244e-14, 1.0600e-14, 2.4725e-15, 2.7765e-14, 4.6268e-16,\n 2.1681e-16, 2.4749e-14, 1.2235e-14, 1.9781e-14, 6.9202e-16, 5.8674e-15,\n 8.1988e-15, 5.8142e-19, 2.3760e-14, 5.3514e-15, 1.3173e-15, 1.1447e-13,\n 7.3570e-15, 2.9697e-14, 4.4526e-17, 4.8173e-15, 1.3454e-15, 7.8570e-14,\n 6.9072e-16, 3.9804e-14, 1.3756e-13, 2.2732e-14, 3.3569e-15, 1.8760e-14,\n 7.5448e-16, 1.0051e-16, 1.5081e-14, 4.4393e-14, 8.9906e-14, 1.2726e-14,\n 1.2458e-14, 3.7392e-16, 3.7139e-14, 9.5609e-15, 1.7711e-15, 1.5703e-15,\n 3.1307e-15, 2.0974e-14, 1.5723e-16, 9.6011e-16, 4.3199e-15, 3.8847e-13,\n 5.5342e-15, 2.1953e-16, 1.2891e-15, 4.4739e-15, 5.7559e-14, 8.5342e-16,\n 4.2175e-14, 1.7165e-14, 7.6742e-15, 1.1292e-14, 1.0010e-15, 2.1668e-14,\n 1.5842e-14, 2.9142e-14, 2.2335e-14, 7.2067e-16, 1.2137e-13, 2.9658e-16,\n 1.3719e-14, 5.9972e-17, 9.7136e-15, 3.6628e-14, 2.1907e-15, 8.4198e-15,\n 2.6353e-14, 7.4431e-15, 8.7313e-14, 1.1890e-13, 2.2969e-13, 2.7633e-14,\n 5.4240e-15, 4.6003e-15, 1.4608e-13, 1.3887e-15, 1.2949e-14, 2.1346e-13,\n 1.6872e-15, 1.9151e-14, 2.0396e-13, 1.9906e-15, 6.0261e-15, 2.2228e-14,\n 3.5790e-14, 1.1408e-14, 1.4117e-14, 1.0162e-14, 1.6229e-14, 8.1271e-15,\n 2.2605e-14, 3.0292e-14, 3.4933e-15, 2.7743e-15, 1.2864e-14, 6.5563e-16,\n 1.2506e-13, 9.9274e-15, 1.6017e-15, 7.4896e-14, 1.9336e-14, 2.3273e-16,\n 2.6912e-14, 9.7578e-15, 2.4297e-13, 1.7713e-13, 4.4225e-15, 9.5167e-15,\n 3.5463e-15, 6.8024e-14, 1.8365e-15, 6.0661e-16], device='cuda:0')" }, "48": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.9725e-17, 7.2179e-18, 1.1282e-16, ..., 1.3147e-16, 0.0000e+00,\n 1.7828e-17],\n [8.0071e-18, 3.2879e-17, 4.1564e-16, ..., 5.6193e-16, 0.0000e+00,\n 9.8232e-16],\n [6.6420e-14, 1.2985e-15, 2.7769e-13, ..., 7.0547e-14, 0.0000e+00,\n 3.9833e-13],\n ...,\n [8.7675e-15, 3.6303e-16, 1.0101e-13, ..., 2.9901e-14, 0.0000e+00,\n 3.2145e-14],\n [1.0910e-15, 1.2724e-18, 3.3119e-17, ..., 1.3904e-16, 0.0000e+00,\n 1.4252e-16],\n [4.0970e-18, 2.4336e-18, 9.0627e-16, ..., 7.6973e-17, 0.0000e+00,\n 6.0736e-16]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.1352e-17, 2.0626e-18, 3.2239e-17, ..., 3.7569e-17, 0.0000e+00,\n 5.0946e-18],\n [2.2881e-18, 9.3955e-18, 1.1877e-16, ..., 1.6057e-16, 0.0000e+00,\n 2.8070e-16],\n [1.8980e-14, 3.7107e-16, 7.9353e-14, ..., 2.0159e-14, 0.0000e+00,\n 1.1383e-13],\n ...,\n [2.5054e-15, 1.0374e-16, 2.8864e-14, ..., 8.5446e-15, 0.0000e+00,\n 9.1856e-15],\n [3.1177e-16, 3.6359e-19, 9.4639e-18, ..., 3.9731e-17, 0.0000e+00,\n 4.0726e-17],\n [1.1707e-18, 6.9542e-19, 2.5897e-16, ..., 2.1996e-17, 0.0000e+00,\n 1.7356e-16]], device='cuda:0')" }, "49": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.3971e-14, 2.9654e-13, 1.5908e-10, 1.6520e-13, 2.5947e-11, 8.3618e-11,\n 3.9222e-11, 8.8364e-12, 4.1752e-12, 8.5349e-12, 1.1015e-10, 3.2440e-11,\n 6.0565e-12, 2.1651e-11, 9.3999e-14, 6.1403e-12, 1.6327e-11, 7.7227e-12,\n 1.2339e-12, 2.7843e-12, 3.9917e-11, 7.7104e-13, 3.2584e-12, 1.2289e-11,\n 2.0583e-11, 1.0747e-11, 1.7034e-11, 3.4646e-11, 6.1817e-12, 6.2452e-11,\n 8.4644e-11, 9.8864e-11, 5.3686e-10, 1.6923e-12, 2.3410e-11, 4.6610e-13,\n 4.7933e-11, 7.9150e-13, 1.7684e-10, 5.1303e-12, 2.8234e-11, 6.6088e-13,\n 2.9258e-12, 1.1517e-10, 5.2435e-14, 5.7931e-12, 2.4692e-11, 1.7053e-11,\n 3.3512e-12, 3.2487e-11, 5.9947e-11, 2.0024e-11, 6.5409e-11, 5.3329e-12,\n 6.7799e-12, 1.5020e-10, 5.1975e-13, 2.2924e-11, 1.1982e-10, 1.4870e-10,\n 1.9013e-11, 1.6956e-11, 6.7459e-13, 1.1727e-10, 5.5984e-12, 1.5580e-10,\n 1.8929e-13, 3.9556e-11, 2.2268e-11, 2.2365e-11, 4.5967e-11, 7.4069e-11,\n 1.9769e-11, 5.4616e-11, 2.4134e-11, 9.2402e-11, 5.2958e-11, 2.0878e-11,\n 3.2147e-12, 1.2393e-13, 1.1006e-10, 7.9648e-12, 6.3496e-12, 4.2713e-11,\n 1.0055e-11, 1.4755e-12, 2.3851e-10, 7.3461e-12, 8.4094e-12, 3.5785e-13,\n 8.8668e-14, 9.0404e-12, 3.3859e-11, 7.4444e-12, 2.0782e-10, 6.4139e-12,\n 1.2300e-10, 1.6997e-10, 2.4502e-11, 1.4809e-11, 1.4976e-11, 6.3521e-12,\n 2.8331e-12, 4.9083e-11, 2.8364e-11, 2.0056e-11, 2.8977e-11, 2.2347e-12,\n 1.8836e-11, 2.2699e-11, 1.0727e-11, 2.6883e-12, 1.3153e-10, 3.3042e-12,\n 4.1174e-12, 1.3110e-10, 4.2431e-12, 2.7260e-11, 4.9561e-12, 8.3972e-12,\n 2.8379e-10, 2.3540e-11, 1.0576e-10, 1.8889e-12, 1.1237e-12, 3.3175e-10,\n 4.3297e-11, 2.2616e-12, 1.1391e-12, 4.8347e-11, 7.0299e-15, 9.8960e-12,\n 1.7386e-10, 1.0987e-10, 4.4673e-13, 1.1113e-11, 2.6870e-11, 9.0931e-12,\n 9.8330e-11, 7.1922e-11, 1.1498e-11, 1.0481e-11, 4.8858e-11, 7.5575e-14,\n 3.7426e-12, 5.6443e-12, 3.6218e-12, 5.9806e-11, 3.4822e-12, 5.6880e-12,\n 7.8814e-12, 1.0280e-11, 2.3238e-11, 1.7535e-11, 6.7379e-12, 8.0038e-11,\n 1.7800e-12, 2.6239e-10, 9.4505e-14, 4.8554e-12, 1.9692e-12, 6.7464e-12,\n 1.0133e-13, 1.2274e-11, 7.5848e-11, 4.9466e-11, 3.0407e-12, 1.4110e-11,\n 1.4240e-14, 2.4619e-12, 1.1437e-12, 2.1074e-11, 1.6914e-11, 2.0912e-11,\n 1.1014e-11, 2.7587e-12, 9.0248e-11, 5.2600e-11, 5.1907e-13, 1.3269e-13,\n 1.4331e-11, 3.8844e-11, 1.0878e-13, 8.3313e-12, 5.8062e-12, 1.1398e-10,\n 2.2987e-12, 5.8891e-13, 6.1871e-12, 2.6878e-12, 1.1467e-11, 1.3360e-13,\n 2.7132e-11, 8.8766e-12, 6.2747e-11, 7.6427e-12, 3.5027e-12, 2.5381e-11,\n 1.2333e-10, 1.1898e-11, 1.7464e-11, 3.7028e-13, 2.1067e-10, 4.6937e-14,\n 2.6086e-11, 1.0603e-13, 2.8695e-11, 2.8522e-11, 1.2025e-12, 1.7654e-11,\n 1.6640e-10, 9.8658e-12, 1.1733e-11, 1.4029e-10, 5.5617e-10, 7.0640e-12,\n 5.7973e-12, 5.8736e-12, 2.9338e-10, 3.2181e-13, 5.4464e-12, 5.5960e-10,\n 7.5611e-13, 4.2625e-11, 2.0048e-10, 3.4811e-12, 5.9981e-12, 8.8116e-11,\n 2.1455e-11, 6.3915e-12, 1.1696e-10, 2.5204e-11, 1.4428e-12, 9.3139e-12,\n 4.0062e-11, 3.9756e-11, 5.9539e-12, 7.4155e-12, 4.3303e-12, 2.7225e-12,\n 2.4617e-11, 1.8409e-11, 1.5038e-12, 2.5394e-10, 3.3936e-11, 3.1159e-13,\n 2.7842e-11, 1.1460e-10, 1.4323e-10, 1.2484e-10, 3.9441e-12, 1.3633e-11,\n 5.4485e-12, 5.1894e-11, 5.6332e-13, 1.4426e-13], device='cuda:0')" + "exp_avg_sq": "tensor([9.7074e-15, 8.4739e-14, 4.5459e-11, 4.7209e-14, 7.4147e-12, 2.3895e-11,\n 1.1208e-11, 2.5251e-12, 1.1931e-12, 2.4389e-12, 3.1478e-11, 9.2699e-12,\n 1.7307e-12, 6.1868e-12, 2.6861e-14, 1.7546e-12, 4.6655e-12, 2.2068e-12,\n 3.5258e-13, 7.9562e-13, 1.1407e-11, 2.2033e-13, 9.3112e-13, 3.5118e-12,\n 5.8817e-12, 3.0710e-12, 4.8675e-12, 9.9004e-12, 1.7665e-12, 1.7846e-11,\n 2.4188e-11, 2.8251e-11, 1.5341e-10, 4.8359e-13, 6.6895e-12, 1.3319e-13,\n 1.3697e-11, 2.2618e-13, 5.0532e-11, 1.4660e-12, 8.0680e-12, 1.8885e-13,\n 8.3608e-13, 3.2911e-11, 1.4984e-14, 1.6554e-12, 7.0559e-12, 4.8729e-12,\n 9.5765e-13, 9.2834e-12, 1.7130e-11, 5.7221e-12, 1.8691e-11, 1.5239e-12,\n 1.9374e-12, 4.2922e-11, 1.4852e-13, 6.5507e-12, 3.4238e-11, 4.2493e-11,\n 5.4331e-12, 4.8454e-12, 1.9277e-13, 3.3510e-11, 1.5998e-12, 4.4522e-11,\n 5.4091e-14, 1.1303e-11, 6.3632e-12, 6.3911e-12, 1.3135e-11, 2.1166e-11,\n 5.6491e-12, 1.5607e-11, 6.8964e-12, 2.6405e-11, 1.5133e-11, 5.9661e-12,\n 9.1863e-13, 3.5415e-14, 3.1450e-11, 2.2760e-12, 1.8144e-12, 1.2206e-11,\n 2.8732e-12, 4.2165e-13, 6.8155e-11, 2.0992e-12, 2.4031e-12, 1.0226e-13,\n 2.5338e-14, 2.5834e-12, 9.6755e-12, 2.1273e-12, 5.9385e-11, 1.8328e-12,\n 3.5148e-11, 4.8572e-11, 7.0016e-12, 4.2317e-12, 4.2796e-12, 1.8152e-12,\n 8.0959e-13, 1.4026e-11, 8.1052e-12, 5.7312e-12, 8.2805e-12, 6.3858e-13,\n 5.3826e-12, 6.4865e-12, 3.0654e-12, 7.6821e-13, 3.7586e-11, 9.4421e-13,\n 1.1766e-12, 3.7463e-11, 1.2125e-12, 7.7899e-12, 1.4163e-12, 2.3996e-12,\n 8.1095e-11, 6.7266e-12, 3.0222e-11, 5.3976e-13, 3.2111e-13, 9.4800e-11,\n 1.2372e-11, 6.4626e-13, 3.2550e-13, 1.3815e-11, 2.0089e-15, 2.8278e-12,\n 4.9681e-11, 3.1397e-11, 1.2766e-13, 3.1757e-12, 7.6783e-12, 2.5984e-12,\n 2.8098e-11, 2.0552e-11, 3.2856e-12, 2.9949e-12, 1.3961e-11, 2.1596e-14,\n 1.0695e-12, 1.6129e-12, 1.0349e-12, 1.7090e-11, 9.9508e-13, 1.6254e-12,\n 2.2522e-12, 2.9376e-12, 6.6404e-12, 5.0107e-12, 1.9254e-12, 2.2871e-11,\n 5.0864e-13, 7.4981e-11, 2.7005e-14, 1.3875e-12, 5.6270e-13, 1.9278e-12,\n 2.8955e-14, 3.5073e-12, 2.1674e-11, 1.4135e-11, 8.6892e-13, 4.0320e-12,\n 4.0693e-15, 7.0351e-13, 3.2681e-13, 6.0221e-12, 4.8332e-12, 5.9759e-12,\n 3.1472e-12, 7.8831e-13, 2.5789e-11, 1.5031e-11, 1.4833e-13, 3.7917e-14,\n 4.0953e-12, 1.1100e-11, 3.1086e-14, 2.3807e-12, 1.6592e-12, 3.2570e-11,\n 6.5686e-13, 1.6829e-13, 1.7680e-12, 7.6805e-13, 3.2768e-12, 3.8176e-14,\n 7.7533e-12, 2.5366e-12, 1.7931e-11, 2.1839e-12, 1.0009e-12, 7.2528e-12,\n 3.5241e-11, 3.4000e-12, 4.9903e-12, 1.0581e-13, 6.0200e-11, 1.3413e-14,\n 7.4542e-12, 3.0299e-14, 8.1999e-12, 8.1505e-12, 3.4362e-13, 5.0447e-12,\n 4.7551e-11, 2.8192e-12, 3.3529e-12, 4.0090e-11, 1.5893e-10, 2.0186e-12,\n 1.6566e-12, 1.6784e-12, 8.3836e-11, 9.1958e-14, 1.5563e-12, 1.5991e-10,\n 2.1606e-13, 1.2180e-11, 5.7288e-11, 9.9476e-13, 1.7140e-12, 2.5180e-11,\n 6.1308e-12, 1.8264e-12, 3.3422e-11, 7.2023e-12, 4.1228e-13, 2.6615e-12,\n 1.1448e-11, 1.1361e-11, 1.7014e-12, 2.1190e-12, 1.2374e-12, 7.7797e-13,\n 7.0346e-12, 5.2606e-12, 4.2974e-13, 7.2566e-11, 9.6976e-12, 8.9040e-14,\n 7.9561e-12, 3.2749e-11, 4.0929e-11, 3.5675e-11, 1.1271e-12, 3.8958e-12,\n 1.5570e-12, 1.4829e-11, 1.6097e-13, 4.1223e-14], device='cuda:0')" }, "50": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.6302e-17, 1.1344e-15, 2.3580e-13, 2.4999e-15, 2.2458e-14, 1.9068e-13,\n 1.0900e-13, 1.9573e-14, 4.0819e-14, 1.4727e-14, 2.3533e-13, 5.7708e-14,\n 2.6345e-14, 3.9373e-14, 3.8495e-16, 2.1964e-14, 2.7236e-14, 1.5346e-14,\n 4.7441e-15, 5.1009e-15, 7.4454e-14, 1.2591e-15, 6.2801e-15, 1.7380e-14,\n 3.9000e-14, 2.2173e-14, 2.1765e-14, 1.5111e-13, 4.0315e-14, 7.6015e-14,\n 1.1274e-13, 2.1905e-13, 7.5774e-13, 1.9957e-15, 9.1074e-14, 6.4697e-16,\n 9.5925e-14, 5.9263e-15, 5.5815e-13, 7.0315e-15, 2.9069e-14, 1.9105e-15,\n 5.2351e-15, 1.7321e-13, 4.6943e-18, 6.7332e-15, 5.9036e-14, 3.7088e-14,\n 1.7012e-14, 5.4236e-14, 1.1890e-13, 3.8396e-14, 1.1669e-13, 6.7760e-15,\n 1.4415e-14, 3.4858e-13, 1.3490e-15, 6.5656e-14, 1.7203e-13, 1.9220e-13,\n 2.0359e-14, 2.4576e-14, 1.1175e-15, 2.1759e-13, 8.7462e-15, 4.5436e-13,\n 5.3053e-16, 5.0208e-14, 5.6479e-14, 2.6563e-14, 1.0861e-13, 1.7329e-13,\n 4.8509e-14, 7.3396e-14, 3.1629e-14, 1.4004e-13, 1.1022e-13, 3.0129e-14,\n 4.4939e-15, 3.7503e-16, 1.8507e-13, 2.2937e-14, 1.6985e-14, 1.5468e-13,\n 1.2741e-14, 3.9195e-15, 5.0824e-13, 8.1502e-15, 1.6029e-14, 5.5411e-16,\n 5.5939e-18, 1.6915e-14, 5.4354e-14, 1.4319e-14, 4.3217e-13, 9.2942e-15,\n 1.6365e-13, 3.7014e-13, 3.4220e-14, 8.5886e-14, 2.8331e-14, 1.1168e-14,\n 4.8513e-15, 8.5820e-14, 5.1471e-14, 2.9514e-14, 4.8577e-14, 6.7747e-15,\n 2.4739e-14, 2.6902e-14, 1.7157e-14, 3.6794e-15, 2.3383e-13, 2.9655e-15,\n 4.5422e-15, 1.9172e-13, 1.0655e-14, 4.6680e-14, 1.7829e-14, 1.3956e-14,\n 6.9957e-13, 3.3827e-14, 2.4320e-13, 1.5859e-14, 3.3406e-15, 9.4735e-13,\n 1.2084e-13, 4.3451e-15, 3.1802e-15, 1.2333e-13, 2.7477e-17, 2.1066e-14,\n 2.4225e-13, 1.9419e-13, 1.1859e-15, 1.6810e-14, 9.0938e-14, 1.0578e-14,\n 1.7743e-13, 9.4200e-14, 2.2895e-14, 1.7957e-14, 6.3687e-14, 5.3854e-16,\n 5.2731e-15, 8.6289e-15, 2.8609e-15, 1.9207e-13, 1.1583e-14, 9.3235e-15,\n 2.3353e-14, 2.3738e-14, 3.7598e-14, 3.1079e-14, 1.4864e-14, 1.6580e-13,\n 3.0328e-15, 6.1869e-13, 1.2945e-16, 6.7481e-15, 4.4237e-15, 1.5483e-14,\n 4.2658e-16, 2.0803e-14, 1.5864e-13, 8.8079e-14, 3.1773e-15, 2.1496e-14,\n 1.8682e-16, 6.0509e-15, 1.8126e-15, 4.7004e-14, 2.4569e-14, 3.2843e-14,\n 2.0611e-14, 4.0313e-15, 1.3897e-13, 8.5074e-14, 5.7939e-15, 4.8320e-16,\n 5.1805e-14, 1.0901e-13, 1.9650e-16, 1.6078e-14, 2.2631e-14, 1.3911e-13,\n 5.3020e-15, 1.3053e-15, 1.9337e-14, 4.6911e-15, 1.7469e-14, 6.7559e-16,\n 3.2305e-14, 1.3317e-14, 1.1671e-13, 1.4050e-14, 5.1137e-15, 3.6912e-14,\n 4.0381e-13, 1.8618e-14, 3.6542e-14, 2.2486e-15, 2.4866e-13, 6.7341e-16,\n 6.4144e-14, 8.9415e-17, 9.3525e-14, 3.2633e-14, 8.0106e-15, 5.1953e-14,\n 4.9664e-13, 1.4955e-14, 2.8547e-14, 1.4560e-13, 9.2116e-13, 1.2084e-14,\n 1.2387e-14, 5.3413e-15, 8.8104e-13, 1.0118e-15, 4.4214e-15, 1.2301e-12,\n 1.9712e-15, 1.4220e-13, 4.2956e-13, 5.6348e-15, 1.1362e-14, 2.2881e-13,\n 2.1137e-14, 9.1539e-15, 2.2743e-13, 5.0713e-14, 2.4989e-15, 1.2352e-14,\n 6.5928e-14, 9.7265e-14, 1.1918e-14, 1.2366e-14, 6.7719e-15, 6.4106e-15,\n 3.1703e-14, 3.7602e-14, 6.8735e-15, 5.7535e-13, 5.3138e-14, 2.8093e-15,\n 4.1969e-14, 2.5296e-13, 1.9781e-13, 2.5597e-13, 6.2793e-15, 2.2621e-14,\n 4.6757e-15, 7.2811e-14, 1.2014e-15, 7.9701e-16], device='cuda:0')" + "exp_avg_sq": "tensor([7.5161e-18, 3.2417e-16, 6.7381e-14, 7.1437e-16, 6.4176e-15, 5.4490e-14,\n 3.1147e-14, 5.5932e-15, 1.1664e-14, 4.2085e-15, 6.7246e-14, 1.6491e-14,\n 7.5282e-15, 1.1251e-14, 1.1000e-16, 6.2764e-15, 7.7828e-15, 4.3851e-15,\n 1.3557e-15, 1.4576e-15, 2.1276e-14, 3.5980e-16, 1.7946e-15, 4.9664e-15,\n 1.1145e-14, 6.3362e-15, 6.2196e-15, 4.3180e-14, 1.1520e-14, 2.1722e-14,\n 3.2215e-14, 6.2595e-14, 2.1653e-13, 5.7028e-16, 2.6025e-14, 1.8488e-16,\n 2.7411e-14, 1.6935e-15, 1.5950e-13, 2.0093e-15, 8.3066e-15, 5.4595e-16,\n 1.4960e-15, 4.9496e-14, 1.3414e-18, 1.9241e-15, 1.6870e-14, 1.0598e-14,\n 4.8612e-15, 1.5498e-14, 3.3978e-14, 1.0972e-14, 3.3345e-14, 1.9363e-15,\n 4.1192e-15, 9.9610e-14, 3.8548e-16, 1.8762e-14, 4.9160e-14, 5.4923e-14,\n 5.8179e-15, 7.0227e-15, 3.1932e-16, 6.2177e-14, 2.4993e-15, 1.2984e-13,\n 1.5160e-16, 1.4347e-14, 1.6139e-14, 7.5905e-15, 3.1037e-14, 4.9519e-14,\n 1.3862e-14, 2.0974e-14, 9.0383e-15, 4.0017e-14, 3.1495e-14, 8.6095e-15,\n 1.2842e-15, 1.0717e-16, 5.2886e-14, 6.5543e-15, 4.8536e-15, 4.4202e-14,\n 3.6407e-15, 1.1200e-15, 1.4523e-13, 2.3290e-15, 4.5804e-15, 1.5834e-16,\n 1.5985e-18, 4.8335e-15, 1.5532e-14, 4.0917e-15, 1.2350e-13, 2.6559e-15,\n 4.6763e-14, 1.0577e-13, 9.7787e-15, 2.4542e-14, 8.0957e-15, 3.1915e-15,\n 1.3863e-15, 2.4524e-14, 1.4708e-14, 8.4338e-15, 1.3881e-14, 1.9359e-15,\n 7.0694e-15, 7.6874e-15, 4.9027e-15, 1.0514e-15, 6.6820e-14, 8.4742e-16,\n 1.2980e-15, 5.4787e-14, 3.0449e-15, 1.3339e-14, 5.0948e-15, 3.9880e-15,\n 1.9991e-13, 9.6664e-15, 6.9496e-14, 4.5320e-15, 9.5460e-16, 2.7071e-13,\n 3.4532e-14, 1.2416e-15, 9.0878e-16, 3.5242e-14, 7.8519e-18, 6.0198e-15,\n 6.9224e-14, 5.5490e-14, 3.3889e-16, 4.8035e-15, 2.5986e-14, 3.0227e-15,\n 5.0702e-14, 2.6918e-14, 6.5423e-15, 5.1314e-15, 1.8199e-14, 1.5389e-16,\n 1.5068e-15, 2.4658e-15, 8.1754e-16, 5.4884e-14, 3.3098e-15, 2.6643e-15,\n 6.6734e-15, 6.7834e-15, 1.0744e-14, 8.8811e-15, 4.2474e-15, 4.7379e-14,\n 8.6664e-16, 1.7680e-13, 3.6993e-17, 1.9283e-15, 1.2641e-15, 4.4244e-15,\n 1.2190e-16, 5.9446e-15, 4.5334e-14, 2.5169e-14, 9.0794e-16, 6.1427e-15,\n 5.3386e-17, 1.7291e-15, 5.1797e-16, 1.3432e-14, 7.0209e-15, 9.3851e-15,\n 5.8898e-15, 1.1520e-15, 3.9713e-14, 2.4310e-14, 1.6556e-15, 1.3808e-16,\n 1.4804e-14, 3.1149e-14, 5.6152e-17, 4.5944e-15, 6.4669e-15, 3.9752e-14,\n 1.5151e-15, 3.7299e-16, 5.5258e-15, 1.3405e-15, 4.9918e-15, 1.9306e-16,\n 9.2315e-15, 3.8055e-15, 3.3350e-14, 4.0148e-15, 1.4613e-15, 1.0548e-14,\n 1.1539e-13, 5.3202e-15, 1.0442e-14, 6.4257e-16, 7.1057e-14, 1.9243e-16,\n 1.8330e-14, 2.5551e-17, 2.6726e-14, 9.3251e-15, 2.2891e-15, 1.4846e-14,\n 1.4192e-13, 4.2735e-15, 8.1576e-15, 4.1608e-14, 2.6323e-13, 3.4532e-15,\n 3.5396e-15, 1.5263e-15, 2.5176e-13, 2.8912e-16, 1.2635e-15, 3.5150e-13,\n 5.6330e-16, 4.0634e-14, 1.2275e-13, 1.6102e-15, 3.2467e-15, 6.5383e-14,\n 6.0400e-15, 2.6158e-15, 6.4990e-14, 1.4492e-14, 7.1409e-16, 3.5298e-15,\n 1.8839e-14, 2.7794e-14, 3.4056e-15, 3.5335e-15, 1.9351e-15, 1.8319e-15,\n 9.0593e-15, 1.0745e-14, 1.9642e-15, 1.6441e-13, 1.5184e-14, 8.0277e-16,\n 1.1993e-14, 7.2284e-14, 5.6527e-14, 7.3144e-14, 1.7943e-15, 6.4641e-15,\n 1.3361e-15, 2.0806e-14, 3.4331e-16, 2.2775e-16], device='cuda:0')" }, "51": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.6728e-16, 1.3267e-15, 4.2368e-13, 1.4128e-15, 6.9014e-14, 2.1286e-13,\n 1.0710e-13, 2.7996e-14, 1.3444e-14, 2.6008e-14, 3.0570e-13, 7.9338e-14,\n 1.6602e-14, 6.2938e-14, 4.8848e-16, 1.9181e-14, 4.7340e-14, 2.2335e-14,\n 4.7977e-15, 8.7140e-15, 1.1273e-13, 2.3613e-15, 1.2680e-14, 3.7260e-14,\n 5.9498e-14, 3.0697e-14, 4.6577e-14, 9.2350e-14, 3.1504e-14, 1.6421e-13,\n 2.2350e-13, 2.5832e-13, 1.4505e-12, 4.2489e-15, 6.8281e-14, 1.4720e-15,\n 1.2272e-13, 6.6986e-15, 4.8355e-13, 1.5371e-14, 7.5701e-14, 2.3428e-15,\n 8.5877e-15, 3.1224e-13, 1.4500e-16, 1.4226e-14, 6.7321e-14, 5.0562e-14,\n 1.1904e-14, 9.1105e-14, 1.6458e-13, 4.6194e-14, 1.7069e-13, 1.4381e-14,\n 1.8504e-14, 3.7759e-13, 2.3864e-15, 5.8427e-14, 3.0714e-13, 3.8801e-13,\n 5.0251e-14, 4.1785e-14, 2.1486e-15, 3.0952e-13, 1.6807e-14, 4.0028e-13,\n 6.4336e-16, 1.0185e-13, 6.2047e-14, 6.0135e-14, 1.2814e-13, 2.0047e-13,\n 4.7458e-14, 1.4758e-13, 7.0588e-14, 2.3595e-13, 1.4355e-13, 5.8674e-14,\n 9.4510e-15, 3.7276e-16, 2.8981e-13, 2.1948e-14, 1.8622e-14, 1.1858e-13,\n 2.4354e-14, 5.0420e-15, 6.3350e-13, 2.0008e-14, 2.7606e-14, 4.2276e-16,\n 3.6940e-16, 2.4706e-14, 8.9799e-14, 2.1523e-14, 5.3444e-13, 1.5711e-14,\n 3.1801e-13, 4.3638e-13, 5.7833e-14, 4.8449e-14, 4.3393e-14, 1.9817e-14,\n 8.9084e-15, 1.3451e-13, 8.1754e-14, 5.3135e-14, 8.1051e-14, 8.6869e-15,\n 4.7254e-14, 6.0224e-14, 3.1700e-14, 7.5846e-15, 3.4442e-13, 7.9467e-15,\n 1.2477e-14, 3.6478e-13, 1.4380e-14, 7.1521e-14, 1.9173e-14, 2.4730e-14,\n 7.6909e-13, 6.8276e-14, 2.7315e-13, 1.8826e-14, 4.3866e-15, 8.4851e-13,\n 1.0661e-13, 7.0692e-15, 4.1300e-15, 1.3217e-13, 9.1757e-17, 3.0243e-14,\n 4.3530e-13, 2.7507e-13, 1.7006e-15, 3.0162e-14, 7.9236e-14, 2.2011e-14,\n 2.5091e-13, 1.8607e-13, 3.2982e-14, 3.1312e-14, 1.2522e-13, 2.2525e-15,\n 1.1641e-14, 1.5889e-14, 9.1308e-15, 1.5866e-13, 1.0258e-14, 1.6240e-14,\n 2.3041e-14, 2.3796e-14, 6.3202e-14, 4.3409e-14, 2.1859e-14, 2.2056e-13,\n 5.3196e-15, 6.9151e-13, 2.6584e-16, 1.5335e-14, 6.5501e-15, 2.1066e-14,\n 9.8100e-16, 3.8479e-14, 2.0050e-13, 1.2807e-13, 8.0457e-15, 4.4121e-14,\n 6.6917e-16, 8.2379e-15, 2.7695e-15, 6.1441e-14, 4.6844e-14, 6.1570e-14,\n 3.2168e-14, 8.4792e-15, 2.3640e-13, 1.4624e-13, 7.2384e-15, 6.5877e-16,\n 4.0461e-14, 1.1302e-13, 3.8491e-16, 2.3985e-14, 1.9595e-14, 3.1679e-13,\n 6.0872e-15, 2.4301e-15, 1.9747e-14, 7.9582e-15, 3.7414e-14, 2.0677e-15,\n 6.6560e-14, 2.6058e-14, 1.5478e-13, 2.4095e-14, 1.1519e-14, 6.6278e-14,\n 3.0847e-13, 3.7096e-14, 4.9756e-14, 4.1584e-15, 5.2393e-13, 1.6367e-15,\n 6.2322e-14, 1.2598e-16, 7.9871e-14, 7.1046e-14, 8.7083e-15, 5.3475e-14,\n 4.5124e-13, 2.0680e-14, 3.6285e-14, 3.9822e-13, 1.4686e-12, 2.2217e-14,\n 1.5944e-14, 1.3512e-14, 7.7866e-13, 1.5809e-15, 1.2741e-14, 1.4950e-12,\n 2.6920e-15, 1.1388e-13, 5.3409e-13, 7.7535e-15, 1.7055e-14, 2.4109e-13,\n 6.2652e-14, 1.8669e-14, 3.2002e-13, 6.7657e-14, 3.1489e-15, 2.6129e-14,\n 1.1444e-13, 1.0793e-13, 1.8014e-14, 2.3539e-14, 1.2133e-14, 7.6025e-15,\n 6.8895e-14, 5.3665e-14, 9.3061e-15, 6.6775e-13, 9.3352e-14, 3.9901e-15,\n 7.6725e-14, 3.0874e-13, 3.8458e-13, 3.4864e-13, 1.1050e-14, 3.6862e-14,\n 1.3267e-14, 1.4211e-13, 1.7284e-15, 5.9842e-16], device='cuda:0')" + "exp_avg_sq": "tensor([7.6379e-17, 3.7910e-16, 1.2107e-13, 4.0372e-16, 1.9721e-14, 6.0826e-14,\n 3.0604e-14, 8.0001e-15, 3.8417e-15, 7.4320e-15, 8.7357e-14, 2.2672e-14,\n 4.7440e-15, 1.7985e-14, 1.3959e-16, 5.4812e-15, 1.3528e-14, 6.3824e-15,\n 1.3710e-15, 2.4901e-15, 3.2214e-14, 6.7475e-16, 3.6235e-15, 1.0647e-14,\n 1.7002e-14, 8.7719e-15, 1.3310e-14, 2.6390e-14, 9.0026e-15, 4.6925e-14,\n 6.3866e-14, 7.3817e-14, 4.1449e-13, 1.2142e-15, 1.9512e-14, 4.2063e-16,\n 3.5068e-14, 1.9142e-15, 1.3818e-13, 4.3924e-15, 2.1632e-14, 6.6947e-16,\n 2.4540e-15, 8.9225e-14, 4.1435e-17, 4.0651e-15, 1.9237e-14, 1.4448e-14,\n 3.4017e-15, 2.6034e-14, 4.7031e-14, 1.3200e-14, 4.8777e-14, 4.1095e-15,\n 5.2877e-15, 1.0790e-13, 6.8194e-16, 1.6696e-14, 8.7767e-14, 1.1088e-13,\n 1.4360e-14, 1.1941e-14, 6.1398e-16, 8.8449e-14, 4.8028e-15, 1.1438e-13,\n 1.8384e-16, 2.9105e-14, 1.7730e-14, 1.7184e-14, 3.6616e-14, 5.7287e-14,\n 1.3561e-14, 4.2172e-14, 2.0171e-14, 6.7424e-14, 4.1020e-14, 1.6767e-14,\n 2.7007e-15, 1.0652e-16, 8.2815e-14, 6.2717e-15, 5.3213e-15, 3.3885e-14,\n 6.9593e-15, 1.4408e-15, 1.8103e-13, 5.7175e-15, 7.8887e-15, 1.2081e-16,\n 1.0556e-16, 7.0599e-15, 2.5661e-14, 6.1505e-15, 1.5272e-13, 4.4895e-15,\n 9.0875e-14, 1.2470e-13, 1.6526e-14, 1.3845e-14, 1.2400e-14, 5.6630e-15,\n 2.5456e-15, 3.8437e-14, 2.3362e-14, 1.5184e-14, 2.3161e-14, 2.4824e-15,\n 1.3503e-14, 1.7209e-14, 9.0586e-15, 2.1674e-15, 9.8422e-14, 2.2708e-15,\n 3.5653e-15, 1.0424e-13, 4.1090e-15, 2.0438e-14, 5.4788e-15, 7.0667e-15,\n 2.1977e-13, 1.9510e-14, 7.8055e-14, 5.3797e-15, 1.2535e-15, 2.4247e-13,\n 3.0464e-14, 2.0201e-15, 1.1802e-15, 3.7769e-14, 2.6220e-17, 8.6423e-15,\n 1.2439e-13, 7.8605e-14, 4.8596e-16, 8.6190e-15, 2.2642e-14, 6.2897e-15,\n 7.1699e-14, 5.3172e-14, 9.4249e-15, 8.9476e-15, 3.5783e-14, 6.4368e-16,\n 3.3266e-15, 4.5405e-15, 2.6092e-15, 4.5339e-14, 2.9314e-15, 4.6406e-15,\n 6.5842e-15, 6.7999e-15, 1.8060e-14, 1.2404e-14, 6.2464e-15, 6.3027e-14,\n 1.5201e-15, 1.9760e-13, 7.5966e-17, 4.3822e-15, 1.8717e-15, 6.0197e-15,\n 2.8033e-16, 1.0996e-14, 5.7294e-14, 3.6597e-14, 2.2991e-15, 1.2608e-14,\n 1.9122e-16, 2.3541e-15, 7.9142e-16, 1.7557e-14, 1.3386e-14, 1.7594e-14,\n 9.1923e-15, 2.4230e-15, 6.7554e-14, 4.1789e-14, 2.0684e-15, 1.8825e-16,\n 1.1562e-14, 3.2297e-14, 1.0999e-16, 6.8538e-15, 5.5994e-15, 9.0526e-14,\n 1.7395e-15, 6.9443e-16, 5.6429e-15, 2.2741e-15, 1.0691e-14, 5.9086e-16,\n 1.9020e-14, 7.4463e-15, 4.4230e-14, 6.8853e-15, 3.2918e-15, 1.8939e-14,\n 8.8149e-14, 1.0601e-14, 1.4218e-14, 1.1883e-15, 1.4972e-13, 4.6770e-16,\n 1.7809e-14, 3.5999e-17, 2.2824e-14, 2.0302e-14, 2.4885e-15, 1.5281e-14,\n 1.2895e-13, 5.9094e-15, 1.0369e-14, 1.1379e-13, 4.1966e-13, 6.3487e-15,\n 4.5560e-15, 3.8610e-15, 2.2251e-13, 4.5176e-16, 3.6409e-15, 4.2719e-13,\n 7.6927e-16, 3.2542e-14, 1.5262e-13, 2.2156e-15, 4.8735e-15, 6.8892e-14,\n 1.7903e-14, 5.3348e-15, 9.1449e-14, 1.9333e-14, 8.9982e-16, 7.4665e-15,\n 3.2703e-14, 3.0842e-14, 5.1475e-15, 6.7264e-15, 3.4670e-15, 2.1725e-15,\n 1.9687e-14, 1.5335e-14, 2.6593e-15, 1.9081e-13, 2.6676e-14, 1.1402e-15,\n 2.1925e-14, 8.8225e-14, 1.0990e-13, 9.9626e-14, 3.1576e-15, 1.0534e-14,\n 3.7910e-15, 4.0610e-14, 4.9390e-16, 1.7100e-16], device='cuda:0')" }, "52": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.6796e-17, 5.2398e-17, 9.7302e-17, ..., 6.3328e-18, 3.0023e-18,\n 4.7706e-16],\n [1.5262e-17, 4.5075e-17, 1.4951e-16, ..., 5.9056e-16, 3.5273e-17,\n 7.8200e-17],\n [9.3002e-17, 3.3438e-18, 4.9410e-17, ..., 2.8536e-16, 2.9473e-18,\n 4.1494e-18],\n ...,\n [1.2949e-13, 2.1075e-12, 2.8647e-12, ..., 4.9029e-12, 2.5026e-12,\n 1.2379e-12],\n [3.8577e-14, 6.3914e-13, 8.7279e-13, ..., 1.4105e-12, 7.9436e-13,\n 4.1194e-13],\n [1.5860e-14, 1.8640e-13, 2.7716e-13, ..., 4.5593e-13, 1.9690e-13,\n 9.7563e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.9088e-17, 1.4973e-17, 2.7805e-17, ..., 1.8096e-18, 8.5794e-19,\n 1.3632e-16],\n [4.3612e-18, 1.2880e-17, 4.2723e-17, ..., 1.6876e-16, 1.0080e-17,\n 2.2346e-17],\n [2.6576e-17, 9.5552e-19, 1.4119e-17, ..., 8.1545e-17, 8.4221e-19,\n 1.1857e-18],\n ...,\n [3.7001e-14, 6.0222e-13, 8.1862e-13, ..., 1.4011e-12, 7.1514e-13,\n 3.5374e-13],\n [1.1024e-14, 1.8264e-13, 2.4941e-13, ..., 4.0306e-13, 2.2700e-13,\n 1.1772e-13],\n [4.5320e-15, 5.3266e-14, 7.9200e-14, ..., 1.3029e-13, 5.6265e-14,\n 2.7879e-14]], device='cuda:0')" }, "53": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.2597e-16, 4.2109e-16, 1.2653e-15, 1.5372e-16, 6.2825e-16, 1.0028e-15,\n 2.8585e-16, 1.2204e-15, 2.9863e-15, 1.0093e-14, 3.6488e-16, 1.5152e-16,\n 4.2072e-15, 1.1494e-15, 7.7869e-15, 4.5974e-18, 7.8327e-16, 1.7587e-16,\n 2.0665e-15, 5.6995e-15, 4.8697e-16, 1.2717e-18, 2.1803e-17, 1.2118e-16,\n 7.3684e-16, 5.1167e-16, 1.1011e-15, 1.5411e-15, 1.4257e-16, 1.1718e-15,\n 3.2202e-15, 3.7333e-16, 2.2247e-15, 3.5209e-15, 7.9036e-15, 4.8582e-15,\n 1.2483e-15, 1.3282e-15, 2.4402e-15, 5.4775e-16, 9.7381e-17, 1.6922e-15,\n 1.7695e-15, 8.5446e-18, 1.0684e-14, 5.0824e-15, 4.8365e-17, 1.5227e-17,\n 5.7643e-16, 1.8181e-15, 1.9832e-15, 1.2300e-15, 1.4074e-15, 6.3935e-16,\n 5.8264e-15, 2.1764e-15, 3.2342e-16, 6.0277e-15, 4.6186e-17, 1.1485e-15,\n 1.7012e-15, 4.2874e-16, 3.5812e-17, 1.1082e-14, 2.7898e-16, 6.6558e-15,\n 4.1274e-17, 6.9684e-15, 1.2896e-15, 1.3103e-14, 1.5469e-14, 3.7490e-16,\n 3.7312e-15, 1.0303e-14, 7.3443e-15, 6.8290e-16, 1.8166e-14, 6.9559e-15,\n 2.5493e-14, 6.8662e-17, 7.2777e-16, 2.1828e-15, 8.6416e-16, 2.9162e-15,\n 9.2187e-15, 3.3930e-16, 6.8655e-16, 2.2242e-15, 2.6523e-14, 6.3680e-16,\n 5.0295e-15, 3.5796e-17, 2.0871e-14, 2.0065e-14, 3.4418e-15, 2.9017e-15,\n 2.2936e-17, 3.7125e-15, 3.8133e-15, 2.9013e-15, 8.4579e-15, 2.0643e-17,\n 1.0950e-16, 4.3198e-17, 2.2292e-17, 3.5291e-16, 8.9802e-17, 5.1359e-16,\n 2.0871e-15, 6.6201e-15, 1.0165e-15, 5.3083e-16, 3.0435e-16, 1.6746e-15,\n 4.6078e-16, 6.4114e-15, 2.3938e-15, 4.6721e-15, 7.2932e-15, 2.4646e-15,\n 9.8931e-17, 2.3388e-17, 4.6568e-15, 6.2895e-16, 4.3109e-16, 2.4825e-16,\n 8.0838e-15, 1.3041e-15, 2.0059e-16, 7.4042e-15, 3.1594e-16, 8.9381e-16,\n 1.2655e-15, 4.9023e-15, 9.2733e-15, 8.5501e-17, 3.5534e-16, 7.0569e-16,\n 5.0630e-16, 1.3286e-17, 2.0955e-15, 5.8282e-16, 1.7049e-16, 7.6617e-18,\n 1.6893e-15, 2.6852e-15, 4.0044e-15, 1.7555e-15, 3.1126e-16, 1.2782e-15,\n 2.9970e-17, 4.2495e-15, 4.4156e-17, 1.2809e-16, 6.3796e-15, 3.1471e-15,\n 1.0258e-17, 6.3296e-18, 4.2285e-16, 2.8304e-15, 1.2612e-14, 7.8232e-16,\n 7.5777e-16, 4.0088e-16, 1.1099e-15, 1.3091e-15, 1.0694e-15, 2.0903e-15,\n 2.4232e-17, 2.6175e-16, 2.9801e-15, 6.0017e-16, 7.5655e-16, 4.4263e-15,\n 2.7816e-18, 3.1990e-15, 4.1921e-15, 3.8146e-15, 3.2376e-17, 2.1241e-16,\n 2.4300e-16, 1.5443e-15, 3.9548e-18, 1.0005e-16, 7.2093e-16, 8.4204e-15,\n 8.0460e-16, 1.3142e-16, 3.5377e-15, 4.0108e-16, 3.4569e-16, 2.9008e-15,\n 2.0428e-15, 2.4330e-15, 1.3226e-15, 3.7615e-15, 4.5401e-18, 2.7374e-16,\n 2.0414e-15, 2.3218e-15, 3.8601e-17, 5.9811e-15, 2.8574e-16, 3.4571e-15,\n 1.8774e-16, 1.4722e-16, 5.7535e-17, 1.1324e-15, 7.2859e-15, 2.3373e-16,\n 1.1905e-15, 1.9232e-15, 4.6074e-15, 4.8972e-16, 2.5455e-17, 5.4857e-16,\n 3.2000e-16, 3.1069e-16, 1.5539e-16, 3.4717e-16, 8.8548e-17, 2.4082e-14,\n 6.3940e-15, 3.9745e-15, 1.2002e-17, 1.3190e-16, 1.3164e-15, 4.2525e-15,\n 1.2611e-14, 4.0245e-15, 1.4191e-16, 7.7285e-16, 4.4714e-15, 3.9126e-17,\n 2.5615e-15, 1.9528e-15, 5.2825e-15, 1.1541e-15, 8.3323e-16, 1.7192e-16,\n 3.4437e-16, 1.5324e-16, 4.0715e-15, 1.0495e-15, 1.0104e-15, 7.1381e-15,\n 1.2583e-15, 1.2048e-17, 2.7527e-15, 2.7922e-15, 1.1640e-14, 2.3880e-15,\n 3.4271e-15, 1.8742e-15, 2.9386e-18, 1.4844e-15, 8.0411e-30, 1.9692e-31,\n 1.2526e-30, 2.2786e-31, 1.2869e-30, 3.8313e-31, 2.0687e-30, 4.9874e-32,\n 1.1298e-30, 1.7570e-30, 1.6442e-30, 4.2236e-32, 1.0675e-33, 8.9740e-32,\n 5.0470e-31, 3.7718e-32, 8.7634e-31, 6.7127e-32, 7.9925e-31, 1.9232e-31,\n 1.2358e-32, 6.1981e-31, 3.5055e-32, 7.2230e-31, 1.0226e-32, 6.1808e-31,\n 1.3655e-30, 5.9054e-32, 3.5643e-30, 1.2157e-30, 1.9776e-31, 2.8487e-31,\n 3.6486e-31, 2.1834e-30, 2.0381e-31, 1.6799e-31, 6.8289e-32, 3.7009e-31,\n 9.9686e-31, 1.9710e-31, 3.4956e-32, 4.0531e-31, 8.1515e-32, 1.8027e-31,\n 1.7038e-31, 1.5366e-30, 6.2895e-31, 2.1071e-31, 2.2590e-31, 1.4028e-31,\n 2.8083e-31, 9.3053e-31, 9.3856e-31, 1.1763e-30, 4.7908e-31, 1.1017e-31,\n 9.7950e-31, 1.6860e-30, 2.3108e-30, 1.0086e-30, 8.1463e-31, 5.6461e-31,\n 3.8844e-32, 3.4961e-31, 2.6345e-33, 1.3645e-30, 3.6130e-30, 1.1263e-30,\n 3.0429e-32, 1.8083e-31, 1.0995e-30, 3.6766e-31, 5.9628e-31, 3.5864e-32,\n 3.0715e-31, 1.6612e-31, 5.8440e-31, 1.6463e-30, 5.6714e-30, 2.0776e-30,\n 2.2506e-31, 1.7050e-30, 2.0653e-31, 2.7558e-30, 3.3716e-31, 3.1118e-31,\n 2.7703e-30, 9.6318e-31, 4.9910e-31, 6.3863e-32, 1.1155e-31, 6.0723e-31,\n 6.3689e-31, 1.3402e-31, 5.1781e-31, 3.6821e-32, 4.8886e-32, 1.6003e-30,\n 8.4067e-32, 4.8173e-31, 7.2130e-31, 5.0314e-32, 4.3952e-31, 4.1532e-31,\n 1.6670e-31, 2.2873e-31, 1.4274e-31, 2.3828e-30, 9.6787e-31, 1.7234e-31,\n 1.3181e-31, 7.0100e-31, 7.5908e-31, 1.4256e-30, 6.0041e-31, 2.8095e-30,\n 7.7156e-31, 1.0453e-30, 2.4306e-30, 2.9844e-30, 1.8058e-30, 9.5118e-31,\n 1.1222e-30, 5.4953e-31, 1.2583e-31, 1.1272e-31, 6.2595e-30, 7.3537e-33,\n 1.1890e-30, 7.0788e-32, 2.7810e-31, 3.3895e-31, 8.7265e-31, 2.8198e-31,\n 7.3955e-32, 8.3054e-35, 1.8991e-32, 1.1171e-31, 2.5074e-31, 6.5162e-31,\n 6.1969e-32, 1.4722e-30, 5.6402e-30, 6.3670e-31, 1.8217e-30, 2.4274e-31,\n 9.9601e-31, 2.7472e-30, 1.2658e-30, 8.4831e-32, 9.1350e-32, 3.6996e-30,\n 6.3699e-31, 7.1612e-31, 3.3213e-32, 4.5566e-31, 1.5775e-30, 5.1653e-31,\n 2.7066e-33, 7.1242e-31, 2.0968e-31, 2.0410e-31, 1.1585e-30, 1.0709e-31,\n 7.4863e-32, 6.4979e-31, 4.2971e-31, 2.7516e-32, 2.5299e-31, 2.9455e-30,\n 1.7396e-31, 6.0450e-30, 3.0756e-30, 4.3338e-32, 1.0021e-30, 3.1776e-30,\n 3.1443e-30, 1.1886e-31, 1.4915e-31, 2.6053e-31, 1.8289e-31, 7.8673e-31,\n 1.5583e-32, 7.9335e-31, 4.0585e-31, 1.0209e-31, 3.7043e-31, 6.8519e-33,\n 1.2828e-30, 7.8560e-33, 1.5778e-30, 8.8071e-31, 9.7160e-31, 2.2071e-30,\n 2.0119e-30, 7.2926e-32, 4.1109e-30, 3.7340e-31, 9.5154e-31, 2.0548e-31,\n 6.0790e-33, 3.4450e-31, 4.7406e-31, 7.9076e-31, 9.1306e-32, 2.8340e-31,\n 1.8628e-32, 4.2987e-31, 6.4656e-31, 2.8322e-32, 1.2677e-30, 2.4073e-32,\n 7.2690e-31, 2.7098e-31, 1.3868e-30, 6.3471e-31, 3.6152e-32, 2.8721e-31,\n 6.8577e-31, 1.8706e-31, 2.0742e-31, 3.7792e-31, 7.3949e-31, 4.3413e-31,\n 5.7197e-31, 1.6544e-32, 7.4120e-31, 7.9697e-31, 2.1288e-30, 2.1611e-30,\n 5.3742e-32, 3.9560e-31, 5.6430e-32, 3.7722e-31, 8.0738e-33, 1.0405e-31,\n 1.2488e-31, 6.6635e-31, 2.2928e-32, 3.3794e-32, 1.4854e-31, 1.6601e-30,\n 1.1852e-31, 3.4043e-31, 9.0570e-32, 1.1344e-30, 2.3404e-30, 2.4367e-34,\n 2.2276e-30, 1.5678e-30, 2.1453e-30, 1.0097e-32, 3.5310e-31, 2.4197e-30,\n 1.0660e-31, 4.7748e-32, 3.0716e-12, 4.6394e-12, 4.8811e-14, 3.8984e-11,\n 1.0607e-11, 2.6609e-12, 6.7076e-12, 1.8898e-11, 3.7614e-13, 4.6980e-12,\n 3.1073e-11, 7.3736e-14, 2.1849e-12, 7.9176e-12, 3.9297e-11, 8.9006e-13,\n 1.1634e-11, 3.9087e-11, 2.6855e-12, 4.4969e-12, 1.2518e-11, 2.2143e-14,\n 1.6463e-11, 5.0294e-11, 3.1701e-12, 5.4815e-11, 1.1597e-12, 8.7675e-12,\n 7.2078e-12, 3.3022e-11, 7.0499e-12, 7.6476e-14, 1.2435e-12, 8.0936e-14,\n 1.5143e-11, 2.0997e-11, 2.1037e-11, 3.2304e-12, 6.1525e-13, 2.0568e-11,\n 3.9008e-11, 1.9593e-11, 2.3189e-11, 1.8095e-11, 7.8600e-12, 5.1518e-14,\n 1.4085e-11, 4.0110e-12, 1.9227e-11, 1.3702e-11, 7.6687e-11, 1.4915e-11,\n 2.5417e-12, 1.1815e-13, 6.5632e-12, 6.5462e-12, 4.2031e-11, 3.5870e-12,\n 1.0054e-11, 1.0158e-11, 1.0416e-11, 3.9982e-12, 3.8461e-12, 2.8253e-12,\n 1.0923e-12, 8.8003e-14, 2.2850e-11, 8.5211e-12, 1.1587e-11, 1.5029e-11,\n 1.0865e-12, 3.5247e-13, 2.1465e-13, 5.0975e-11, 1.6502e-12, 6.9613e-11,\n 1.3336e-11, 3.8653e-11, 1.7735e-11, 1.7947e-12, 1.1239e-11, 1.3121e-10,\n 4.5907e-12, 2.9006e-11, 1.0538e-12, 3.9425e-11, 1.4246e-11, 9.0287e-13,\n 2.5291e-13, 1.5779e-11, 2.2952e-11, 1.0705e-11, 7.7481e-14, 2.1976e-11,\n 5.6277e-11, 2.7735e-12, 1.1851e-12, 2.3001e-11, 2.1655e-12, 2.1770e-12,\n 2.9144e-12, 1.3697e-11, 3.6672e-13, 1.7978e-12, 4.8577e-12, 4.8504e-11,\n 2.9352e-11, 4.1788e-12, 4.4466e-13, 5.2116e-12, 2.3495e-12, 7.8377e-12,\n 1.4646e-11, 8.1144e-13, 1.1246e-11, 1.5651e-11, 4.1390e-12, 1.9551e-12,\n 2.7628e-11, 5.4158e-13, 1.9279e-12, 1.0598e-10, 1.9388e-11, 6.6946e-11,\n 2.4155e-11, 8.0112e-12, 5.9566e-13, 9.7976e-13, 1.2105e-11, 1.3570e-11,\n 7.5715e-12, 1.0705e-11, 2.6795e-12, 4.5333e-14, 1.1585e-11, 4.3333e-12,\n 6.2224e-12, 2.8694e-11, 3.2380e-12, 6.3300e-13, 5.1218e-15, 2.3729e-11,\n 1.0271e-11, 2.3937e-13, 7.0551e-12, 5.7029e-12, 1.4880e-12, 5.0744e-11,\n 2.1670e-12, 3.9158e-14, 6.7002e-12, 7.5794e-11, 3.9362e-12, 3.2144e-12,\n 3.0058e-11, 9.0156e-13, 3.4163e-13, 5.6800e-12, 4.9415e-15, 9.6318e-12,\n 2.4245e-11, 3.4220e-11, 9.8933e-13, 6.4697e-12, 9.0169e-12, 1.6220e-11,\n 7.6983e-12, 3.7368e-12, 6.2105e-13, 4.0570e-11, 2.5016e-12, 5.2342e-11,\n 3.3112e-11, 2.4983e-12, 8.4342e-12, 2.1906e-12, 1.3521e-13, 2.7751e-12,\n 2.2631e-11, 8.7291e-14, 1.5740e-11, 4.4978e-11, 1.1879e-13, 1.0698e-11,\n 2.9721e-11, 2.7962e-11, 7.5593e-12, 4.3687e-12, 3.4125e-13, 2.8763e-11,\n 5.2437e-13, 9.7383e-12, 4.4139e-11, 4.5687e-11, 7.9525e-12, 1.3395e-11,\n 2.3896e-11, 5.7763e-11, 5.5778e-13, 2.5621e-11, 3.1838e-12, 1.6045e-12,\n 5.9489e-11, 4.0811e-12, 1.8853e-12, 7.2780e-12, 2.3593e-12, 4.2804e-12,\n 6.6514e-12, 2.1805e-12, 1.2561e-11, 9.4222e-12, 3.0238e-11, 2.3445e-12,\n 3.4974e-13, 9.5159e-11, 2.9955e-12, 5.6344e-11, 1.1431e-13, 6.1601e-12,\n 4.8204e-12, 2.1412e-11, 2.4446e-11, 1.4433e-11, 3.1247e-11, 1.1071e-12,\n 1.9813e-11, 2.6254e-12, 1.2210e-10, 7.9042e-13, 2.8834e-12, 1.2003e-11,\n 1.2266e-11, 7.1675e-12, 2.5054e-11, 9.0659e-12, 2.5424e-12, 2.2562e-12,\n 1.5304e-11, 9.1688e-14, 2.2292e-11, 8.4136e-13, 2.0682e-15, 4.1729e-12,\n 2.1225e-11, 2.8024e-13, 5.6087e-12, 5.4665e-13, 7.3834e-13, 4.9563e-12,\n 1.7743e-11, 1.2815e-11, 9.6436e-12, 4.1857e-11, 1.2649e-11, 3.6906e-12],\n device='cuda:0')" + "exp_avg_sq": "tensor([1.5030e-16, 1.2033e-16, 3.6158e-16, 4.3927e-17, 1.7953e-16, 2.8655e-16,\n 8.1683e-17, 3.4874e-16, 8.5335e-16, 2.8842e-15, 1.0427e-16, 4.3297e-17,\n 1.2022e-15, 3.2846e-16, 2.2252e-15, 1.3137e-18, 2.2382e-16, 5.0257e-17,\n 5.9053e-16, 1.6287e-15, 1.3915e-16, 3.6341e-19, 6.2304e-18, 3.4628e-17,\n 2.1056e-16, 1.4621e-16, 3.1464e-16, 4.4039e-16, 4.0741e-17, 3.3485e-16,\n 9.2019e-16, 1.0668e-16, 6.3573e-16, 1.0061e-15, 2.2585e-15, 1.3883e-15,\n 3.5670e-16, 3.7955e-16, 6.9729e-16, 1.5652e-16, 2.7827e-17, 4.8357e-16,\n 5.0564e-16, 2.4417e-18, 3.0529e-15, 1.4523e-15, 1.3821e-17, 4.3511e-18,\n 1.6472e-16, 5.1954e-16, 5.6672e-16, 3.5148e-16, 4.0218e-16, 1.8270e-16,\n 1.6649e-15, 6.2191e-16, 9.2419e-17, 1.7225e-15, 1.3198e-17, 3.2820e-16,\n 4.8614e-16, 1.2252e-16, 1.0234e-17, 3.1667e-15, 7.9721e-17, 1.9019e-15,\n 1.1794e-17, 1.9913e-15, 3.6852e-16, 3.7442e-15, 4.4203e-15, 1.0713e-16,\n 1.0662e-15, 2.9442e-15, 2.0987e-15, 1.9514e-16, 5.1911e-15, 1.9877e-15,\n 7.2849e-15, 1.9621e-17, 2.0797e-16, 6.2376e-16, 2.4694e-16, 8.3332e-16,\n 2.6343e-15, 9.6957e-17, 1.9619e-16, 6.3558e-16, 7.5790e-15, 1.8197e-16,\n 1.4372e-15, 1.0229e-17, 5.9640e-15, 5.7337e-15, 9.8354e-16, 8.2919e-16,\n 6.5541e-18, 1.0609e-15, 1.0897e-15, 8.2907e-16, 2.4169e-15, 5.8989e-18,\n 3.1290e-17, 1.2344e-17, 6.3702e-18, 1.0085e-16, 2.5662e-17, 1.4676e-16,\n 5.9641e-16, 1.8918e-15, 2.9047e-16, 1.5169e-16, 8.6972e-17, 4.7852e-16,\n 1.3167e-16, 1.8321e-15, 6.8405e-16, 1.3351e-15, 2.0841e-15, 7.0428e-16,\n 2.8270e-17, 6.6832e-18, 1.3307e-15, 1.7973e-16, 1.2319e-16, 7.0940e-17,\n 2.3100e-15, 3.7267e-16, 5.7320e-17, 2.1158e-15, 9.0282e-17, 2.5541e-16,\n 3.6162e-16, 1.4009e-15, 2.6499e-15, 2.4433e-17, 1.0154e-16, 2.0166e-16,\n 1.4468e-16, 3.7966e-18, 5.9881e-16, 1.6655e-16, 4.8718e-17, 2.1894e-18,\n 4.8273e-16, 7.6731e-16, 1.1443e-15, 5.0164e-16, 8.8946e-17, 3.6526e-16,\n 8.5643e-18, 1.2143e-15, 1.2618e-17, 3.6602e-17, 1.8230e-15, 8.9932e-16,\n 2.9312e-18, 1.8087e-18, 1.2083e-16, 8.0881e-16, 3.6040e-15, 2.2355e-16,\n 2.1654e-16, 1.1455e-16, 3.1717e-16, 3.7409e-16, 3.0559e-16, 5.9733e-16,\n 6.9244e-18, 7.4796e-17, 8.5160e-16, 1.7150e-16, 2.1619e-16, 1.2649e-15,\n 7.9487e-19, 9.1414e-16, 1.1979e-15, 1.0900e-15, 9.2516e-18, 6.0697e-17,\n 6.9439e-17, 4.4130e-16, 1.1301e-18, 2.8590e-17, 2.0601e-16, 2.4062e-15,\n 2.2992e-16, 3.7554e-17, 1.0109e-15, 1.1461e-16, 9.8784e-17, 8.2893e-16,\n 5.8375e-16, 6.9525e-16, 3.7795e-16, 1.0749e-15, 1.2974e-18, 7.8222e-17,\n 5.8334e-16, 6.6346e-16, 1.1030e-17, 1.7091e-15, 8.1652e-17, 9.8791e-16,\n 5.3649e-17, 4.2069e-17, 1.6441e-17, 3.2359e-16, 2.0820e-15, 6.6790e-17,\n 3.4020e-16, 5.4958e-16, 1.3166e-15, 1.3994e-16, 7.2739e-18, 1.5676e-16,\n 9.1442e-17, 8.8781e-17, 4.4403e-17, 9.9208e-17, 2.5303e-17, 6.8817e-15,\n 1.8271e-15, 1.1357e-15, 3.4298e-18, 3.7691e-17, 3.7616e-16, 1.2152e-15,\n 3.6036e-15, 1.1500e-15, 4.0552e-17, 2.2085e-16, 1.2777e-15, 1.1181e-17,\n 7.3198e-16, 5.5802e-16, 1.5095e-15, 3.2981e-16, 2.3810e-16, 4.9127e-17,\n 9.8406e-17, 4.3789e-17, 1.1635e-15, 2.9990e-16, 2.8872e-16, 2.0398e-15,\n 3.5957e-16, 3.4429e-18, 7.8661e-16, 7.9789e-16, 3.3263e-15, 6.8238e-16,\n 9.7932e-16, 5.3557e-16, 8.3972e-19, 4.2418e-16, 2.2978e-30, 5.6272e-32,\n 3.5795e-31, 6.5112e-32, 3.6775e-31, 1.0948e-31, 5.9114e-31, 1.4252e-32,\n 3.2284e-31, 5.0209e-31, 4.6983e-31, 1.2069e-32, 3.0504e-34, 2.5644e-32,\n 1.4422e-31, 1.0778e-32, 2.5042e-31, 1.9182e-32, 2.2839e-31, 5.4957e-32,\n 3.5313e-33, 1.7712e-31, 1.0017e-32, 2.0640e-31, 2.9222e-33, 1.7662e-31,\n 3.9021e-31, 1.6875e-32, 1.0185e-30, 3.4740e-31, 5.6510e-32, 8.1403e-32,\n 1.0426e-31, 6.2391e-31, 5.8241e-32, 4.8005e-32, 1.9514e-32, 1.0576e-31,\n 2.8486e-31, 5.6322e-32, 9.9889e-33, 1.1582e-31, 2.3294e-32, 5.1514e-32,\n 4.8687e-32, 4.3910e-31, 1.7973e-31, 6.0212e-32, 6.4553e-32, 4.0085e-32,\n 8.0251e-32, 2.6591e-31, 2.6820e-31, 3.3614e-31, 1.3690e-31, 3.1483e-32,\n 2.7990e-31, 4.8179e-31, 6.6033e-31, 2.8822e-31, 2.3279e-31, 1.6134e-31,\n 1.1100e-32, 9.9905e-32, 7.5284e-34, 3.8992e-31, 1.0324e-30, 3.2184e-31,\n 8.6953e-33, 5.1672e-32, 3.1419e-31, 1.0506e-31, 1.7039e-31, 1.0248e-32,\n 8.7771e-32, 4.7469e-32, 1.6700e-31, 4.7043e-31, 1.6206e-30, 5.9368e-31,\n 6.4312e-32, 4.8721e-31, 5.9019e-32, 7.8748e-31, 9.6346e-32, 8.8921e-32,\n 7.9163e-31, 2.7524e-31, 1.4262e-31, 1.8249e-32, 3.1875e-32, 1.7352e-31,\n 1.8200e-31, 3.8298e-32, 1.4797e-31, 1.0522e-32, 1.3969e-32, 4.5730e-31,\n 2.4023e-32, 1.3766e-31, 2.0612e-31, 1.4378e-32, 1.2560e-31, 1.1868e-31,\n 4.7636e-32, 6.5361e-32, 4.0790e-32, 6.8090e-31, 2.7658e-31, 4.9247e-32,\n 3.7666e-32, 2.0032e-31, 2.1691e-31, 4.0737e-31, 1.7157e-31, 8.0285e-31,\n 2.2048e-31, 2.9869e-31, 6.9457e-31, 8.5282e-31, 5.1603e-31, 2.7181e-31,\n 3.2067e-31, 1.5703e-31, 3.5956e-32, 3.2210e-32, 1.7887e-30, 2.1014e-33,\n 3.3977e-31, 2.0228e-32, 7.9469e-32, 9.6858e-32, 2.4937e-31, 8.0578e-32,\n 2.1133e-32, 2.3733e-35, 5.4267e-33, 3.1921e-32, 7.1651e-32, 1.8620e-31,\n 1.7708e-32, 4.2068e-31, 1.6117e-30, 1.8194e-31, 5.2056e-31, 6.9364e-32,\n 2.8462e-31, 7.8503e-31, 3.6172e-31, 2.4241e-32, 2.6104e-32, 1.0572e-30,\n 1.8203e-31, 2.0464e-31, 9.4908e-33, 1.3021e-31, 4.5077e-31, 1.4760e-31,\n 7.7344e-34, 2.0358e-31, 5.9918e-32, 5.8322e-32, 3.3106e-31, 3.0603e-32,\n 2.1393e-32, 1.8568e-31, 1.2279e-31, 7.8630e-33, 7.2293e-32, 8.4171e-31,\n 4.9711e-32, 1.7274e-30, 8.7889e-31, 1.2384e-32, 2.8634e-31, 9.0804e-31,\n 8.9851e-31, 3.3965e-32, 4.2621e-32, 7.4448e-32, 5.2262e-32, 2.2482e-31,\n 4.4529e-33, 2.2671e-31, 1.1597e-31, 2.9173e-32, 1.0585e-31, 1.9580e-33,\n 3.6657e-31, 2.2449e-33, 4.5086e-31, 2.5167e-31, 2.7764e-31, 6.3070e-31,\n 5.7491e-31, 2.0839e-32, 1.1747e-30, 1.0670e-31, 2.7191e-31, 5.8718e-32,\n 1.7371e-33, 9.8444e-32, 1.3547e-31, 2.2597e-31, 2.6092e-32, 8.0985e-32,\n 5.3230e-33, 1.2284e-31, 1.8476e-31, 8.0932e-33, 3.6226e-31, 6.8792e-33,\n 2.0772e-31, 7.7433e-32, 3.9628e-31, 1.8137e-31, 1.0331e-32, 8.2072e-32,\n 1.9596e-31, 5.3454e-32, 5.9271e-32, 1.0799e-31, 2.1131e-31, 1.2406e-31,\n 1.6344e-31, 4.7275e-33, 2.1180e-31, 2.2774e-31, 6.0832e-31, 6.1756e-31,\n 1.5357e-32, 1.1305e-31, 1.6125e-32, 1.0779e-31, 2.3072e-33, 2.9735e-32,\n 3.5686e-32, 1.9041e-31, 6.5518e-33, 9.6570e-33, 4.2447e-32, 4.7439e-31,\n 3.3868e-32, 9.7281e-32, 2.5881e-32, 3.2417e-31, 6.6878e-31, 6.9631e-35,\n 6.3654e-31, 4.4801e-31, 6.1304e-31, 2.8852e-33, 1.0090e-31, 6.9146e-31,\n 3.0461e-32, 1.3644e-32, 8.7774e-13, 1.3257e-12, 1.3948e-14, 1.1140e-11,\n 3.0310e-12, 7.6036e-13, 1.9167e-12, 5.4003e-12, 1.0748e-13, 1.3425e-12,\n 8.8795e-12, 2.1071e-14, 6.2435e-13, 2.2625e-12, 1.1229e-11, 2.5434e-13,\n 3.3244e-12, 1.1169e-11, 7.6740e-13, 1.2850e-12, 3.5772e-12, 6.3275e-15,\n 4.7045e-12, 1.4372e-11, 9.0587e-13, 1.5664e-11, 3.3139e-13, 2.5054e-12,\n 2.0597e-12, 9.4364e-12, 2.0146e-12, 2.1854e-14, 3.5533e-13, 2.3128e-14,\n 4.3272e-12, 6.0001e-12, 6.0114e-12, 9.2311e-13, 1.7581e-13, 5.8776e-12,\n 1.1147e-11, 5.5990e-12, 6.6264e-12, 5.1708e-12, 2.2461e-12, 1.4722e-14,\n 4.0249e-12, 1.1462e-12, 5.4942e-12, 3.9154e-12, 2.1914e-11, 4.2620e-12,\n 7.2631e-13, 3.3762e-14, 1.8755e-12, 1.8706e-12, 1.2011e-11, 1.0250e-12,\n 2.8729e-12, 2.9027e-12, 2.9765e-12, 1.1425e-12, 1.0990e-12, 8.0734e-13,\n 3.1215e-13, 2.5147e-14, 6.5295e-12, 2.4350e-12, 3.3112e-12, 4.2946e-12,\n 3.1049e-13, 1.0072e-13, 6.1339e-14, 1.4566e-11, 4.7155e-13, 1.9892e-11,\n 3.8108e-12, 1.1045e-11, 5.0680e-12, 5.1284e-13, 3.2115e-12, 3.7494e-11,\n 1.3118e-12, 8.2886e-12, 3.0112e-13, 1.1266e-11, 4.0708e-12, 2.5800e-13,\n 7.2271e-14, 4.5091e-12, 6.5587e-12, 3.0592e-12, 2.2141e-14, 6.2799e-12,\n 1.6082e-11, 7.9256e-13, 3.3866e-13, 6.5728e-12, 6.1881e-13, 6.2210e-13,\n 8.3281e-13, 3.9140e-12, 1.0479e-13, 5.1375e-13, 1.3881e-12, 1.3860e-11,\n 8.3876e-12, 1.1941e-12, 1.2706e-13, 1.4892e-12, 6.7140e-13, 2.2397e-12,\n 4.1851e-12, 2.3187e-13, 3.2135e-12, 4.4724e-12, 1.1827e-12, 5.5869e-13,\n 7.8950e-12, 1.5476e-13, 5.5090e-13, 3.0284e-11, 5.5404e-12, 1.9130e-11,\n 6.9026e-12, 2.2893e-12, 1.7021e-13, 2.7997e-13, 3.4591e-12, 3.8778e-12,\n 2.1636e-12, 3.0590e-12, 7.6568e-13, 1.2954e-14, 3.3105e-12, 1.2383e-12,\n 1.7781e-12, 8.1996e-12, 9.2527e-13, 1.8089e-13, 1.4636e-15, 6.7807e-12,\n 2.9350e-12, 6.8403e-14, 2.0160e-12, 1.6296e-12, 4.2521e-13, 1.4501e-11,\n 6.1924e-13, 1.1190e-14, 1.9146e-12, 2.1659e-11, 1.1248e-12, 9.1855e-13,\n 8.5893e-12, 2.5763e-13, 9.7622e-14, 1.6231e-12, 1.4121e-15, 2.7524e-12,\n 6.9283e-12, 9.7786e-12, 2.8271e-13, 1.8488e-12, 2.5766e-12, 4.6350e-12,\n 2.1998e-12, 1.0678e-12, 1.7747e-13, 1.1593e-11, 7.1484e-13, 1.4957e-11,\n 9.4621e-12, 7.1391e-13, 2.4101e-12, 6.2597e-13, 3.8636e-14, 7.9300e-13,\n 6.4669e-12, 2.4944e-14, 4.4979e-12, 1.2853e-11, 3.3945e-14, 3.0572e-12,\n 8.4930e-12, 7.9902e-12, 2.1601e-12, 1.2484e-12, 9.7515e-14, 8.2193e-12,\n 1.4984e-13, 2.7828e-12, 1.2613e-11, 1.3055e-11, 2.2725e-12, 3.8277e-12,\n 6.8286e-12, 1.6506e-11, 1.5939e-13, 7.3215e-12, 9.0980e-13, 4.5850e-13,\n 1.6999e-11, 1.1662e-12, 5.3875e-13, 2.0797e-12, 6.7420e-13, 1.2232e-12,\n 1.9007e-12, 6.2310e-13, 3.5893e-12, 2.6925e-12, 8.6406e-12, 6.6996e-13,\n 9.9942e-14, 2.7192e-11, 8.5598e-13, 1.6101e-11, 3.2666e-14, 1.7603e-12,\n 1.3775e-12, 6.1187e-12, 6.9856e-12, 4.1244e-12, 8.9290e-12, 3.1637e-13,\n 5.6616e-12, 7.5024e-13, 3.4891e-11, 2.2587e-13, 8.2396e-13, 3.4299e-12,\n 3.5050e-12, 2.0482e-12, 7.1594e-12, 2.5906e-12, 7.2651e-13, 6.4472e-13,\n 4.3733e-12, 2.6201e-14, 6.3702e-12, 2.4042e-13, 5.9099e-16, 1.1925e-12,\n 6.0651e-12, 8.0081e-14, 1.6027e-12, 1.5621e-13, 2.1099e-13, 1.4163e-12,\n 5.0702e-12, 3.6620e-12, 2.7557e-12, 1.1961e-11, 3.6146e-12, 1.0546e-12],\n device='cuda:0')" }, "54": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.4695e-13, 2.7755e-14, 1.9330e-14, ..., 6.4128e-14, 3.4038e-13,\n 4.4395e-13],\n [4.4647e-13, 2.0450e-14, 1.4627e-14, ..., 5.4158e-14, 1.9342e-13,\n 2.5328e-13],\n [8.1670e-12, 1.8517e-13, 1.6192e-13, ..., 2.6798e-13, 3.2652e-12,\n 4.1008e-12],\n ...,\n [2.5688e-12, 6.4320e-14, 4.6682e-14, ..., 1.1819e-13, 1.0271e-12,\n 1.3638e-12],\n [6.3718e-14, 4.8257e-15, 1.4933e-15, ..., 2.4320e-14, 2.5056e-14,\n 2.3390e-14],\n [1.4402e-13, 1.1667e-14, 9.0836e-15, ..., 3.9102e-14, 7.0840e-14,\n 7.4509e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.4202e-13, 7.9313e-15, 5.5237e-15, ..., 1.8325e-14, 9.7267e-14,\n 1.2686e-13],\n [1.2758e-13, 5.8437e-15, 4.1798e-15, ..., 1.5476e-14, 5.5270e-14,\n 7.2375e-14],\n [2.3338e-12, 5.2914e-14, 4.6270e-14, ..., 7.6578e-14, 9.3305e-13,\n 1.1718e-12],\n ...,\n [7.3406e-13, 1.8380e-14, 1.3340e-14, ..., 3.3773e-14, 2.9351e-13,\n 3.8971e-13],\n [1.8208e-14, 1.3790e-15, 4.2671e-16, ..., 6.9496e-15, 7.1599e-15,\n 6.6839e-15],\n [4.1155e-14, 3.3339e-15, 2.5957e-15, ..., 1.1174e-14, 2.0243e-14,\n 2.1292e-14]], device='cuda:0')" }, "55": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.5159e-11, 8.1207e-12, 1.4428e-10, 7.7710e-11, 5.8249e-11, 7.9516e-14,\n 1.9365e-11, 2.0099e-11, 3.4726e-11, 3.4658e-12, 6.4083e-11, 8.4341e-13,\n 5.0797e-11, 2.9763e-14, 4.9988e-11, 1.2205e-13, 2.6802e-12, 2.3683e-11,\n 2.2161e-11, 3.7966e-11, 5.4370e-11, 8.3095e-12, 6.3580e-11, 2.8673e-11,\n 1.6013e-10, 4.3254e-11, 3.4463e-11, 8.0373e-12, 3.2137e-11, 7.7421e-11,\n 4.0544e-11, 1.4138e-10, 1.5735e-11, 1.8214e-12, 6.5297e-12, 7.0069e-12,\n 1.7973e-11, 5.9532e-11, 3.6884e-11, 7.5909e-12, 2.0559e-11, 5.7640e-11,\n 6.0618e-13, 2.2375e-11, 3.9866e-11, 1.4032e-11, 1.4699e-11, 4.6365e-11,\n 1.7324e-10, 8.6395e-12, 6.5793e-11, 3.3584e-11, 1.6390e-11, 1.3905e-11,\n 3.6131e-13, 1.2707e-10, 2.3992e-12, 5.3362e-11, 1.2991e-11, 1.2528e-10,\n 7.1158e-11, 4.8508e-11, 1.5337e-12, 2.5841e-11, 1.1104e-11, 1.2226e-11,\n 1.3444e-11, 1.2737e-10, 1.2752e-12, 6.0437e-11, 4.5703e-13, 4.8954e-11,\n 8.1020e-13, 5.0172e-11, 3.0536e-13, 1.3404e-11, 1.1174e-10, 6.3924e-12,\n 4.7255e-11, 8.9757e-11, 4.1547e-11, 2.9373e-11, 3.3858e-11, 4.6700e-11,\n 4.2296e-11, 5.9648e-11, 2.0475e-11, 2.8744e-11, 3.5064e-11, 1.9245e-12,\n 1.0042e-11, 1.2615e-11, 2.2309e-10, 6.0838e-12, 2.9957e-11, 3.1313e-14,\n 1.6061e-12, 5.1029e-13, 1.1290e-13, 3.4717e-12, 2.2301e-10, 1.5871e-12,\n 2.1120e-11, 1.8538e-10, 1.4597e-11, 3.8691e-12, 6.0775e-12, 1.5615e-12,\n 4.2690e-11, 1.0152e-10, 5.7554e-13, 2.9844e-11, 1.1889e-11, 1.1629e-11,\n 3.7930e-11, 8.9521e-12, 2.2207e-12, 1.2556e-12, 2.8900e-11, 2.2529e-11,\n 4.4339e-13, 3.1680e-11, 2.2410e-12, 2.2592e-10, 9.2671e-11, 2.3259e-10,\n 8.6656e-11, 5.4494e-12, 4.8185e-12, 1.3484e-11, 1.0470e-10, 1.7153e-12,\n 1.0765e-11, 4.8312e-11, 1.6919e-13, 6.7833e-13, 3.0068e-11, 7.0759e-11,\n 3.4627e-11, 8.3473e-12, 1.5535e-11, 4.0792e-11, 2.1227e-11, 1.9679e-11,\n 2.1348e-11, 4.7245e-11, 1.7621e-11, 2.6905e-11, 1.9046e-11, 1.2345e-10,\n 6.8702e-14, 1.4145e-11, 1.7289e-12, 1.4293e-12, 2.9840e-10, 1.2991e-10,\n 6.4722e-12, 1.2134e-10, 2.7550e-11, 1.6931e-10, 3.4157e-12, 3.4984e-12,\n 2.2471e-11, 9.5441e-14, 1.6493e-10, 2.7667e-11, 3.6410e-11, 5.8254e-12,\n 2.6032e-11, 1.1942e-10, 1.6356e-12, 2.8278e-11, 3.8475e-11, 1.0570e-11,\n 1.1728e-10, 2.8758e-11, 2.2926e-11, 8.0707e-13, 2.0609e-11, 6.2082e-12,\n 5.4755e-11, 2.5378e-11, 3.8315e-12, 7.8198e-12, 7.5686e-13, 1.5143e-11,\n 6.5428e-12, 9.6658e-11, 2.3900e-12, 9.1625e-12, 7.9176e-12, 1.7480e-12,\n 5.7207e-12, 6.2102e-11, 2.2512e-11, 1.9755e-11, 6.0241e-11, 3.2856e-11,\n 3.5357e-11, 1.6442e-11, 1.7587e-11, 7.3985e-11, 4.5192e-11, 1.2199e-12,\n 7.7791e-11, 2.6457e-12, 5.9912e-11, 7.9258e-11, 8.1572e-11, 7.8432e-11,\n 2.2470e-11, 1.7007e-11, 7.2617e-11, 2.9133e-12, 6.6452e-11, 3.1477e-11,\n 4.2378e-14, 4.5809e-11, 1.0927e-13, 3.4193e-11, 1.9710e-11, 4.9219e-10,\n 5.7552e-11, 1.7586e-10, 1.6040e-12, 7.9524e-12, 3.7098e-11, 2.8453e-12,\n 1.0246e-11, 5.7126e-13, 1.7639e-11, 4.5068e-11, 4.7322e-11, 7.1001e-12,\n 1.6824e-10, 2.2035e-11, 9.7330e-12, 1.0623e-12, 9.5656e-12, 1.2393e-13,\n 5.2967e-12, 4.0038e-11, 2.9329e-12, 2.6803e-12, 1.9279e-11, 1.2082e-10,\n 2.0178e-11, 6.2824e-11, 3.0753e-11, 1.4996e-11, 1.2674e-10, 2.9467e-12,\n 9.9483e-11, 4.5840e-11, 8.6484e-13, 2.5237e-12], device='cuda:0')" + "exp_avg_sq": "tensor([4.3318e-12, 2.3206e-12, 4.1229e-11, 2.2206e-11, 1.6645e-11, 2.2722e-14,\n 5.5336e-12, 5.7436e-12, 9.9233e-12, 9.9039e-13, 1.8312e-11, 2.4101e-13,\n 1.4516e-11, 8.5051e-15, 1.4284e-11, 3.4878e-14, 7.6588e-13, 6.7675e-12,\n 6.3327e-12, 1.0849e-11, 1.5537e-11, 2.3745e-12, 1.8168e-11, 8.1935e-12,\n 4.5757e-11, 1.2360e-11, 9.8480e-12, 2.2967e-12, 9.1834e-12, 2.2124e-11,\n 1.1586e-11, 4.0399e-11, 4.4965e-12, 5.2049e-13, 1.8659e-12, 2.0023e-12,\n 5.1360e-12, 1.7012e-11, 1.0540e-11, 2.1692e-12, 5.8748e-12, 1.6471e-11,\n 1.7322e-13, 6.3939e-12, 1.1392e-11, 4.0098e-12, 4.2003e-12, 1.3249e-11,\n 4.9506e-11, 2.4688e-12, 1.8801e-11, 9.5968e-12, 4.6835e-12, 3.9734e-12,\n 1.0325e-13, 3.6310e-11, 6.8560e-13, 1.5249e-11, 3.7122e-12, 3.5800e-11,\n 2.0334e-11, 1.3861e-11, 4.3827e-13, 7.3843e-12, 3.1731e-12, 3.4936e-12,\n 3.8417e-12, 3.6398e-11, 3.6439e-13, 1.7270e-11, 1.3060e-13, 1.3989e-11,\n 2.3152e-13, 1.4337e-11, 8.7260e-14, 3.8302e-12, 3.1931e-11, 1.8267e-12,\n 1.3503e-11, 2.5649e-11, 1.1872e-11, 8.3936e-12, 9.6752e-12, 1.3345e-11,\n 1.2086e-11, 1.7045e-11, 5.8510e-12, 8.2138e-12, 1.0020e-11, 5.4994e-13,\n 2.8696e-12, 3.6047e-12, 6.3750e-11, 1.7385e-12, 8.5605e-12, 8.9480e-15,\n 4.5897e-13, 1.4582e-13, 3.2263e-14, 9.9206e-13, 6.3726e-11, 4.5354e-13,\n 6.0353e-12, 5.2973e-11, 4.1712e-12, 1.1056e-12, 1.7367e-12, 4.4622e-13,\n 1.2199e-11, 2.9011e-11, 1.6446e-13, 8.5281e-12, 3.3974e-12, 3.3232e-12,\n 1.0839e-11, 2.5581e-12, 6.3458e-13, 3.5879e-13, 8.2584e-12, 6.4379e-12,\n 1.2670e-13, 9.0529e-12, 6.4039e-13, 6.4557e-11, 2.6482e-11, 6.6465e-11,\n 2.4763e-11, 1.5572e-12, 1.3769e-12, 3.8532e-12, 2.9918e-11, 4.9015e-13,\n 3.0761e-12, 1.3805e-11, 4.8348e-14, 1.9384e-13, 8.5922e-12, 2.0220e-11,\n 9.8950e-12, 2.3853e-12, 4.4393e-12, 1.1657e-11, 6.0657e-12, 5.6236e-12,\n 6.1005e-12, 1.3501e-11, 5.0354e-12, 7.6882e-12, 5.4425e-12, 3.5276e-11,\n 1.9632e-14, 4.0422e-12, 4.9404e-13, 4.0842e-13, 8.5272e-11, 3.7122e-11,\n 1.8495e-12, 3.4675e-11, 7.8726e-12, 4.8381e-11, 9.7606e-13, 9.9970e-13,\n 6.4212e-12, 2.7273e-14, 4.7131e-11, 7.9059e-12, 1.0405e-11, 1.6647e-12,\n 7.4388e-12, 3.4126e-11, 4.6739e-13, 8.0808e-12, 1.0994e-11, 3.0205e-12,\n 3.3515e-11, 8.2178e-12, 6.5513e-12, 2.3063e-13, 5.8891e-12, 1.7741e-12,\n 1.5647e-11, 7.2519e-12, 1.0949e-12, 2.2346e-12, 2.1628e-13, 4.3274e-12,\n 1.8697e-12, 2.7621e-11, 6.8295e-13, 2.6183e-12, 2.2625e-12, 4.9950e-13,\n 1.6347e-12, 1.7746e-11, 6.4331e-12, 5.6453e-12, 1.7214e-11, 9.3888e-12,\n 1.0104e-11, 4.6986e-12, 5.0256e-12, 2.1142e-11, 1.2914e-11, 3.4860e-13,\n 2.2229e-11, 7.5602e-13, 1.7120e-11, 2.2648e-11, 2.3310e-11, 2.2413e-11,\n 6.4210e-12, 4.8598e-12, 2.0751e-11, 8.3251e-13, 1.8989e-11, 8.9948e-12,\n 1.2110e-14, 1.3090e-11, 3.1224e-14, 9.7709e-12, 5.6322e-12, 1.4065e-10,\n 1.6446e-11, 5.0255e-11, 4.5836e-13, 2.2725e-12, 1.0601e-11, 8.1307e-13,\n 2.9280e-12, 1.6324e-13, 5.0406e-12, 1.2879e-11, 1.3523e-11, 2.0289e-12,\n 4.8075e-11, 6.2966e-12, 2.7813e-12, 3.0357e-13, 2.7335e-12, 3.5414e-14,\n 1.5136e-12, 1.1441e-11, 8.3809e-13, 7.6591e-13, 5.5092e-12, 3.4526e-11,\n 5.7661e-12, 1.7953e-11, 8.7878e-12, 4.2853e-12, 3.6218e-11, 8.4204e-13,\n 2.8428e-11, 1.3099e-11, 2.4713e-13, 7.2117e-13], device='cuda:0')" }, "56": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.7181e-10, 8.0659e-11, 2.2317e-10, ..., 9.6639e-11, 8.2734e-12,\n 1.2694e-12],\n [3.3503e-12, 8.7313e-13, 1.9822e-12, ..., 7.9667e-13, 7.8915e-14,\n 1.5685e-14],\n [4.5631e-12, 9.6284e-13, 2.7228e-12, ..., 1.2063e-12, 1.0207e-13,\n 1.4633e-14],\n ...,\n [3.7305e-12, 7.8659e-13, 2.2519e-12, ..., 9.5871e-13, 8.8397e-14,\n 1.8373e-14],\n [4.6214e-12, 9.9796e-13, 2.7779e-12, ..., 1.2023e-12, 8.5687e-14,\n 8.5957e-15],\n [4.1010e-12, 8.6525e-13, 2.5648e-12, ..., 1.0978e-12, 9.9048e-14,\n 2.5100e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.0625e-10, 2.3049e-11, 6.3773e-11, ..., 2.7615e-11, 2.3642e-12,\n 3.6274e-13],\n [9.5738e-13, 2.4950e-13, 5.6642e-13, ..., 2.2765e-13, 2.2551e-14,\n 4.4821e-15],\n [1.3039e-12, 2.7514e-13, 7.7805e-13, ..., 3.4471e-13, 2.9168e-14,\n 4.1816e-15],\n ...,\n [1.0660e-12, 2.2477e-13, 6.4349e-13, ..., 2.7396e-13, 2.5260e-14,\n 5.2502e-15],\n [1.3206e-12, 2.8517e-13, 7.9380e-13, ..., 3.4357e-13, 2.4486e-14,\n 2.4563e-15],\n [1.1719e-12, 2.4725e-13, 7.3292e-13, ..., 3.1371e-13, 2.8304e-14,\n 7.1725e-15]], device='cuda:0')" }, "57": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([9.5841e-09, 8.6893e-11, 1.1736e-10, 1.2179e-10, 1.3963e-10, 1.6040e-10,\n 1.2752e-10, 9.4438e-11, 1.1928e-10, 1.0600e-10], device='cuda:0')" + "exp_avg_sq": "tensor([2.7387e-09, 2.4830e-11, 3.3537e-11, 3.4802e-11, 3.9901e-11, 4.5835e-11,\n 3.6441e-11, 2.6986e-11, 3.4086e-11, 3.0291e-11], device='cuda:0')" }, "58": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.7177e-10, 8.0644e-11, 2.2311e-10, ..., 9.6639e-11, 8.2547e-12,\n 1.2566e-12],\n [3.3498e-12, 8.7291e-13, 1.9812e-12, ..., 7.9666e-13, 7.8638e-14,\n 1.5498e-14],\n [4.5627e-12, 9.6267e-13, 2.7220e-12, ..., 1.2063e-12, 1.0186e-13,\n 1.4490e-14],\n ...,\n [3.7299e-12, 7.8634e-13, 2.2508e-12, ..., 9.5870e-13, 8.8087e-14,\n 1.8160e-14],\n [4.6212e-12, 9.9790e-13, 2.7776e-12, ..., 1.2023e-12, 8.5612e-14,\n 8.5449e-15],\n [4.0999e-12, 8.6480e-13, 2.5630e-12, ..., 1.0978e-12, 9.8495e-14,\n 2.4725e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.0624e-10, 2.3045e-11, 6.3754e-11, ..., 2.7615e-11, 2.3588e-12,\n 3.5908e-13],\n [9.5722e-13, 2.4944e-13, 5.6615e-13, ..., 2.2765e-13, 2.2472e-14,\n 4.4286e-15],\n [1.3038e-12, 2.7509e-13, 7.7785e-13, ..., 3.4471e-13, 2.9108e-14,\n 4.1407e-15],\n ...,\n [1.0658e-12, 2.2470e-13, 6.4318e-13, ..., 2.7396e-13, 2.5171e-14,\n 5.1892e-15],\n [1.3206e-12, 2.8516e-13, 7.9373e-13, ..., 3.4357e-13, 2.4464e-14,\n 2.4418e-15],\n [1.1716e-12, 2.4712e-13, 7.3239e-13, ..., 3.1371e-13, 2.8146e-14,\n 7.0653e-15]], device='cuda:0')" }, "59": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([9.5841e-09, 8.6892e-11, 1.1736e-10, 1.2179e-10, 1.3963e-10, 1.6040e-10,\n 1.2752e-10, 9.4437e-11, 1.1928e-10, 1.0600e-10], device='cuda:0')" + "exp_avg_sq": "tensor([2.7387e-09, 2.4830e-11, 3.3537e-11, 3.4802e-11, 3.9901e-11, 4.5835e-11,\n 3.6441e-11, 2.6986e-11, 3.4086e-11, 3.0290e-11], device='cuda:0')" }, "60": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.7181e-10, 8.0659e-11, 2.2317e-10, ..., 9.6639e-11, 8.2734e-12,\n 1.2694e-12],\n [3.3503e-12, 8.7313e-13, 1.9822e-12, ..., 7.9667e-13, 7.8915e-14,\n 1.5685e-14],\n [4.5631e-12, 9.6284e-13, 2.7228e-12, ..., 1.2063e-12, 1.0207e-13,\n 1.4633e-14],\n ...,\n [3.7305e-12, 7.8659e-13, 2.2519e-12, ..., 9.5871e-13, 8.8397e-14,\n 1.8373e-14],\n [4.6214e-12, 9.9796e-13, 2.7779e-12, ..., 1.2023e-12, 8.5687e-14,\n 8.5957e-15],\n [4.1010e-12, 8.6525e-13, 2.5648e-12, ..., 1.0978e-12, 9.9048e-14,\n 2.5100e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.0625e-10, 2.3049e-11, 6.3773e-11, ..., 2.7615e-11, 2.3642e-12,\n 3.6274e-13],\n [9.5738e-13, 2.4950e-13, 5.6642e-13, ..., 2.2765e-13, 2.2551e-14,\n 4.4821e-15],\n [1.3039e-12, 2.7514e-13, 7.7805e-13, ..., 3.4471e-13, 2.9168e-14,\n 4.1816e-15],\n ...,\n [1.0660e-12, 2.2477e-13, 6.4349e-13, ..., 2.7396e-13, 2.5260e-14,\n 5.2502e-15],\n [1.3206e-12, 2.8517e-13, 7.9380e-13, ..., 3.4357e-13, 2.4486e-14,\n 2.4563e-15],\n [1.1719e-12, 2.4725e-13, 7.3292e-13, ..., 3.1371e-13, 2.8304e-14,\n 7.1725e-15]], device='cuda:0')" }, "61": { - "step": "tensor(10016.)", + "step": "tensor(11268.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([9.5841e-09, 8.6893e-11, 1.1736e-10, 1.2179e-10, 1.3963e-10, 1.6040e-10,\n 1.2752e-10, 9.4438e-11, 1.1928e-10, 1.0600e-10], device='cuda:0')" + "exp_avg_sq": "tensor([2.7387e-09, 2.4830e-11, 3.3537e-11, 3.4802e-11, 3.9901e-11, 4.5835e-11,\n 3.6441e-11, 2.6986e-11, 3.4086e-11, 3.0291e-11], device='cuda:0')" }, "8": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 7.1127e-07, -1.1334e-09, 1.6416e-06, ..., 7.5451e-07,\n 0.0000e+00, -2.1600e-07],\n [ 1.1585e-06, -2.0304e-08, -1.3437e-06, ..., 9.5322e-08,\n 0.0000e+00, -3.0512e-07],\n [-5.5970e-07, -1.7403e-07, -9.2124e-07, ..., 2.3764e-08,\n 0.0000e+00, -1.5400e-07],\n ...,\n [ 6.2360e-07, 9.8093e-09, 3.4336e-07, ..., 1.0255e-06,\n 0.0000e+00, 1.3901e-07],\n [-1.1767e-07, -6.6005e-11, 2.9096e-07, ..., 2.2409e-07,\n 0.0000e+00, 1.9691e-07],\n [ 7.8069e-08, 1.0753e-06, -3.0240e-08, ..., -1.4862e-06,\n 0.0000e+00, 1.1304e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.2447e-12, 4.7091e-13, 2.3883e-11, ..., 2.1091e-11, 0.0000e+00,\n 5.9657e-12],\n [1.7801e-11, 2.8543e-13, 1.8939e-11, ..., 2.5252e-12, 0.0000e+00,\n 9.6856e-12],\n [4.1287e-12, 8.9117e-13, 2.1985e-11, ..., 3.8858e-12, 0.0000e+00,\n 2.6624e-12],\n ...,\n [3.1398e-12, 9.2319e-13, 6.4868e-12, ..., 3.4168e-11, 0.0000e+00,\n 7.6595e-12],\n [1.0374e-11, 6.7254e-13, 8.0912e-11, ..., 6.4330e-11, 0.0000e+00,\n 4.3838e-11],\n [1.6799e-11, 2.7170e-12, 3.0741e-11, ..., 3.3304e-11, 0.0000e+00,\n 1.6403e-11]], device='cuda:0')" + "step": "tensor(10016.)", + "exp_avg": "tensor([[ 5.2391e-07, -9.5890e-08, -1.6794e-07, ..., 1.0383e-06,\n 0.0000e+00, 1.6587e-07],\n [-2.9076e-07, -5.4219e-09, -8.0596e-06, ..., -3.0523e-07,\n 0.0000e+00, -1.1051e-07],\n [ 3.0308e-07, 1.0616e-08, 2.8124e-07, ..., -8.6895e-08,\n 0.0000e+00, -2.3080e-07],\n ...,\n [-7.1909e-08, 1.7884e-08, -1.0837e-07, ..., 4.0920e-06,\n 0.0000e+00, 2.0505e-07],\n [-7.4948e-07, 1.5425e-07, 8.0185e-07, ..., 2.5868e-06,\n 0.0000e+00, 1.0694e-06],\n [-2.2005e-07, 6.5157e-08, 1.4630e-06, ..., 1.3091e-06,\n 0.0000e+00, 4.5262e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.7811e-12, 3.4363e-13, 2.4449e-11, ..., 1.9803e-11, 0.0000e+00,\n 5.4632e-12],\n [1.7406e-11, 1.2091e-13, 2.4161e-11, ..., 2.5411e-12, 0.0000e+00,\n 1.0146e-11],\n [3.4680e-12, 7.8053e-13, 2.0143e-11, ..., 3.2588e-12, 0.0000e+00,\n 2.1691e-12],\n ...,\n [2.2858e-12, 7.1243e-13, 6.1150e-12, ..., 3.3772e-11, 0.0000e+00,\n 8.1645e-12],\n [9.0069e-12, 5.4013e-13, 7.2687e-11, ..., 5.8154e-11, 0.0000e+00,\n 4.7426e-11],\n [1.5617e-11, 3.0288e-12, 3.6350e-11, ..., 3.1824e-11, 0.0000e+00,\n 2.1300e-11]], device='cuda:0')" }, "9": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 1.3008e-05, 3.2393e-06, -1.1118e-05, ..., 1.0196e-05,\n 5.7474e-06, -1.1814e-05], device='cuda:0')", - "exp_avg_sq": "tensor([1.4644e-09, 1.5818e-09, 1.2495e-09, ..., 1.1794e-09, 1.7696e-09,\n 1.4842e-09], device='cuda:0')" + "step": "tensor(10016.)", + "exp_avg": "tensor([ 3.6428e-06, -1.5594e-05, 4.7698e-06, ..., 5.7057e-06,\n 5.1658e-06, 3.8920e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.4399e-09, 1.6370e-09, 1.0969e-09, ..., 1.1355e-09, 1.5990e-09,\n 1.5532e-09], device='cuda:0')" }, "10": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-9.2398e-08, -8.9931e-08, -2.7301e-07, ..., -3.8736e-07,\n -1.0200e-07, -3.0045e-09],\n [ 1.6172e-07, 3.4633e-08, -8.9204e-07, ..., 3.4880e-07,\n -9.5572e-08, 3.9034e-08],\n [ 4.8591e-07, -2.4519e-07, -5.1843e-08, ..., 1.1508e-06,\n 1.9766e-07, -1.2530e-07],\n ...,\n [ 1.1949e-06, 5.7993e-08, 6.8510e-08, ..., 3.4042e-08,\n 3.0697e-07, -2.0671e-07],\n [ 3.2916e-07, -1.7900e-07, -7.1950e-07, ..., -1.2671e-06,\n -5.8147e-07, -2.3205e-07],\n [ 4.3280e-07, 4.4207e-08, -2.1287e-07, ..., -1.1003e-06,\n 3.7854e-07, 2.5036e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.3670e-13, 7.6839e-13, 1.2268e-12, ..., 1.8572e-12, 1.0994e-12,\n 6.7012e-13],\n [1.5079e-12, 1.8793e-12, 1.6123e-12, ..., 2.3724e-12, 1.9137e-12,\n 1.3360e-12],\n [1.7224e-12, 2.6368e-12, 2.0134e-12, ..., 2.1996e-12, 2.7149e-12,\n 1.3555e-12],\n ...,\n [2.2728e-12, 1.3475e-12, 2.0569e-12, ..., 2.9544e-12, 3.2887e-12,\n 1.4604e-12],\n [1.7872e-12, 1.6173e-12, 2.7772e-12, ..., 2.7901e-12, 3.7271e-12,\n 1.4227e-12],\n [2.3784e-12, 1.8834e-12, 1.5589e-12, ..., 2.3613e-12, 2.1300e-12,\n 2.3022e-12]], device='cuda:0')" + "step": "tensor(10016.)", + "exp_avg": "tensor([[ 3.9524e-08, -9.2433e-08, 1.0221e-07, ..., -4.4198e-07,\n 7.6784e-07, 2.0899e-07],\n [ 3.2126e-07, 3.1007e-07, 2.6399e-07, ..., 3.7226e-07,\n -5.8755e-07, 5.4598e-07],\n [-2.2047e-07, 3.0856e-07, 1.3871e-07, ..., -1.7447e-07,\n -9.6529e-07, -2.3211e-07],\n ...,\n [-1.7317e-07, -1.6380e-07, -9.4509e-08, ..., -3.1332e-08,\n 3.2863e-08, 1.8801e-07],\n [-2.4867e-07, -1.4015e-07, -1.4686e-07, ..., 6.6397e-07,\n -3.8798e-07, -2.3568e-07],\n [ 2.6323e-07, -3.6754e-07, -1.6236e-07, ..., 4.9175e-07,\n -6.2566e-07, -2.3958e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.7076e-13, 7.1408e-13, 1.1768e-12, ..., 1.8106e-12, 1.0345e-12,\n 6.2712e-13],\n [1.3839e-12, 1.6290e-12, 1.3338e-12, ..., 2.2275e-12, 1.6904e-12,\n 1.4582e-12],\n [1.5684e-12, 2.2788e-12, 1.8632e-12, ..., 1.9984e-12, 2.4658e-12,\n 1.2703e-12],\n ...,\n [1.9778e-12, 1.1253e-12, 1.7830e-12, ..., 2.8008e-12, 2.9287e-12,\n 1.3667e-12],\n [1.6394e-12, 1.3856e-12, 2.5231e-12, ..., 2.5221e-12, 3.2398e-12,\n 1.3361e-12],\n [2.2460e-12, 1.5797e-12, 1.4107e-12, ..., 2.1398e-12, 1.8176e-12,\n 2.2789e-12]], device='cuda:0')" }, "11": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[ 3.5158e-07, 4.0751e-07, -5.2390e-09, ..., 1.4773e-07,\n 0.0000e+00, -1.1461e-07],\n [-9.3380e-11, 1.0295e-07, 2.2734e-09, ..., 6.2574e-07,\n 0.0000e+00, 5.2535e-07],\n [ 2.4535e-06, 9.9263e-07, 3.8245e-07, ..., 1.9253e-06,\n 0.0000e+00, -7.3256e-07],\n ...,\n [ 1.3761e-09, 4.3762e-07, 1.8091e-08, ..., 6.7334e-08,\n 0.0000e+00, -9.4315e-07],\n [-1.1723e-07, -4.8408e-08, -1.5449e-06, ..., 2.1831e-07,\n 0.0000e+00, -6.1358e-07],\n [-5.5198e-09, 8.1320e-10, 2.4084e-07, ..., -2.8062e-08,\n 0.0000e+00, 7.6243e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4433e-11, 1.2915e-11, 4.4438e-12, ..., 4.4458e-12, 0.0000e+00,\n 7.1902e-12],\n [2.9458e-12, 1.2515e-11, 1.1937e-12, ..., 3.4155e-12, 0.0000e+00,\n 6.0900e-11],\n [4.5881e-12, 4.5693e-13, 2.1358e-11, ..., 8.7356e-11, 0.0000e+00,\n 6.2364e-12],\n ...,\n [3.3465e-13, 5.7126e-13, 1.0836e-11, ..., 1.9208e-12, 0.0000e+00,\n 1.1991e-11],\n [9.6349e-12, 8.4759e-12, 3.0545e-11, ..., 8.0468e-11, 0.0000e+00,\n 5.5610e-11],\n [2.1474e-12, 1.7348e-14, 1.6411e-12, ..., 8.0941e-12, 0.0000e+00,\n 2.4588e-12]], device='cuda:0')" + "step": "tensor(8764.)", + "exp_avg": "tensor([[-4.5763e-07, -5.1912e-09, -6.4911e-08, ..., -4.0983e-08,\n 0.0000e+00, -3.3231e-07],\n [-7.6240e-09, -1.2870e-07, 7.0829e-08, ..., 4.1722e-07,\n 0.0000e+00, 3.3970e-06],\n [ 1.5098e-07, 8.2674e-10, 4.2594e-07, ..., 4.2718e-07,\n 0.0000e+00, 3.4151e-07],\n ...,\n [ 3.4169e-08, 6.1919e-09, -2.5748e-07, ..., 1.3139e-07,\n 0.0000e+00, -2.2114e-07],\n [-2.9416e-07, 4.2277e-07, 9.9381e-07, ..., 1.4964e-06,\n 0.0000e+00, -3.6192e-07],\n [-2.7604e-07, 1.4446e-10, -8.1898e-08, ..., 3.2305e-08,\n 0.0000e+00, 3.6308e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3432e-11, 8.3001e-12, 4.1771e-12, ..., 4.1365e-12, 0.0000e+00,\n 7.5649e-12],\n [2.2236e-12, 8.0298e-12, 7.6686e-13, ..., 2.2507e-12, 0.0000e+00,\n 5.9823e-11],\n [3.2761e-12, 6.3425e-13, 1.8211e-11, ..., 7.7775e-11, 0.0000e+00,\n 5.9067e-12],\n ...,\n [3.2106e-13, 3.9059e-13, 7.5758e-12, ..., 2.1005e-12, 0.0000e+00,\n 1.0711e-11],\n [7.7567e-12, 7.0507e-12, 2.6024e-11, ..., 7.7393e-11, 0.0000e+00,\n 5.7496e-11],\n [1.3668e-12, 5.0223e-15, 1.4851e-12, ..., 7.4668e-12, 0.0000e+00,\n 2.2968e-12]], device='cuda:0')" }, "12": { - "step": "tensor(7512.)", - "exp_avg": "tensor([ 8.2717e-06, 1.3377e-05, 1.8875e-05, ..., 7.2173e-06,\n -5.7996e-06, 1.0398e-05], device='cuda:0')", - "exp_avg_sq": "tensor([1.1637e-09, 9.0475e-10, 1.0342e-09, ..., 1.5224e-09, 1.5881e-09,\n 1.1712e-09], device='cuda:0')" + "step": "tensor(8764.)", + "exp_avg": "tensor([-3.1734e-06, 4.8233e-06, 4.5898e-06, ..., -3.0027e-05,\n 3.4156e-07, -4.7405e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.0698e-09, 7.4755e-10, 9.0855e-10, ..., 1.2911e-09, 1.4461e-09,\n 1.0259e-09], device='cuda:0')" }, "13": { - "step": "tensor(7512.)", - "exp_avg": "tensor([[-2.2145e-07, -5.2500e-09, 3.6305e-07, ..., 1.1634e-07,\n 1.2335e-07, 1.0182e-07],\n [ 2.8138e-08, 7.9599e-08, -3.7962e-07, ..., 1.0713e-07,\n -9.6078e-08, 2.7435e-07],\n [ 1.2647e-07, -4.4712e-07, 1.3952e-07, ..., -7.9300e-08,\n -2.9413e-07, 3.5979e-07],\n ...,\n [ 1.5586e-07, 5.4410e-07, 8.1119e-08, ..., 2.9630e-07,\n 2.5929e-07, -2.2297e-07],\n [-1.4601e-07, 4.2545e-07, 1.2871e-07, ..., -2.5881e-07,\n 2.9254e-08, 2.9198e-07],\n [ 5.6778e-08, 1.4077e-07, 2.7068e-08, ..., 6.0740e-07,\n 7.7039e-08, 2.6850e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.1527e-13, 3.5789e-13, 6.7610e-13, ..., 1.1534e-12, 6.7370e-13,\n 1.6313e-12],\n [9.5567e-13, 5.6210e-13, 2.1578e-12, ..., 3.0302e-12, 1.7601e-12,\n 2.2420e-12],\n [1.0687e-12, 9.1471e-13, 2.1824e-12, ..., 3.1253e-12, 1.9043e-12,\n 8.9120e-13],\n ...,\n [1.1845e-12, 1.2876e-12, 2.3977e-12, ..., 1.7955e-12, 2.2518e-12,\n 3.0625e-12],\n [9.2555e-13, 1.1871e-12, 8.3453e-13, ..., 1.5670e-12, 1.4923e-12,\n 3.7116e-12],\n [1.0407e-12, 7.7098e-13, 1.3594e-12, ..., 3.7646e-12, 1.9047e-12,\n 1.4781e-12]], device='cuda:0')" + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 2.2182e-07, -2.0739e-09, 7.5835e-07, ..., -3.3445e-07,\n 2.1949e-08, 3.0941e-07],\n [ 1.6808e-07, 1.8245e-07, 3.3214e-07, ..., 2.7568e-07,\n 2.7083e-07, 4.0770e-07],\n [-2.0915e-07, 6.5567e-08, -1.1137e-06, ..., 6.5655e-07,\n 2.2354e-07, -9.9601e-08],\n ...,\n [-1.6749e-07, 2.1184e-07, -2.2033e-07, ..., -6.8793e-09,\n 2.4235e-07, -1.6973e-07],\n [ 1.4642e-07, 2.4461e-08, -4.6586e-07, ..., -8.2878e-10,\n -3.2903e-09, 3.3803e-09],\n [-1.5389e-07, 9.5534e-08, -1.2836e-07, ..., -8.0069e-08,\n 2.5866e-07, -2.3752e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.5088e-13, 3.1527e-13, 6.2491e-13, ..., 1.0108e-12, 5.9876e-13,\n 1.5150e-12],\n [8.1732e-13, 4.4879e-13, 1.9033e-12, ..., 2.6162e-12, 1.5164e-12,\n 1.9223e-12],\n [8.9293e-13, 7.2005e-13, 2.2119e-12, ..., 2.6867e-12, 1.7467e-12,\n 7.9776e-13],\n ...,\n [1.0520e-12, 1.0296e-12, 2.2339e-12, ..., 1.5686e-12, 1.9143e-12,\n 2.7351e-12],\n [7.8992e-13, 9.9624e-13, 7.6716e-13, ..., 1.3802e-12, 1.2542e-12,\n 3.2748e-12],\n [9.0855e-13, 6.6180e-13, 1.1952e-12, ..., 3.4612e-12, 1.5513e-12,\n 1.2847e-12]], device='cuda:0')" }, "14": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[-2.3796e-07, 3.3450e-08, -9.0917e-06, ..., -1.5764e-05,\n 0.0000e+00, 3.6126e-07],\n [-3.4231e-08, -1.6507e-06, -1.5966e-07, ..., 2.8189e-07,\n 0.0000e+00, 5.2127e-07],\n [ 3.7974e-07, 6.7047e-09, -4.8234e-07, ..., -8.3858e-08,\n 0.0000e+00, -1.1553e-07],\n ...,\n [ 2.8994e-07, 7.2693e-08, 1.1269e-06, ..., -1.7141e-07,\n 0.0000e+00, 9.9436e-08],\n [ 3.6569e-08, -2.6076e-08, -2.6538e-07, ..., -1.1463e-07,\n 0.0000e+00, 2.0413e-06],\n [ 9.2943e-09, -2.4002e-09, 6.8393e-07, ..., -2.8738e-06,\n 0.0000e+00, 4.0637e-08]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.5247e-13, 3.0665e-13, 9.5268e-11, ..., 3.1587e-10, 0.0000e+00,\n 8.9555e-12],\n [9.3897e-12, 7.1491e-13, 7.1510e-11, ..., 1.8032e-11, 0.0000e+00,\n 2.3862e-11],\n [2.3154e-11, 7.3162e-13, 4.4588e-12, ..., 1.4842e-12, 0.0000e+00,\n 1.4104e-12],\n ...,\n [3.2387e-11, 2.9470e-13, 7.9218e-12, ..., 6.0119e-12, 0.0000e+00,\n 5.0283e-12],\n [2.4030e-12, 1.3222e-12, 5.9057e-12, ..., 1.1591e-10, 0.0000e+00,\n 1.4693e-10],\n [4.6795e-12, 2.9307e-13, 1.5842e-10, ..., 1.0150e-09, 0.0000e+00,\n 1.4507e-11]], device='cuda:0')" + "step": "tensor(7512.)", + "exp_avg": "tensor([[-9.9144e-08, -3.9194e-08, 6.6145e-06, ..., 3.6352e-05,\n 0.0000e+00, 4.3041e-08],\n [ 8.1803e-08, 4.6771e-10, 9.3593e-06, ..., -1.5705e-07,\n 0.0000e+00, 7.0697e-07],\n [ 1.4673e-07, 3.3228e-08, 3.6580e-07, ..., -1.3127e-08,\n 0.0000e+00, 4.7572e-07],\n ...,\n [ 5.0273e-07, 1.8915e-09, -3.1544e-08, ..., -7.1909e-09,\n 0.0000e+00, 1.7110e-07],\n [-5.6052e-09, -9.2777e-10, 2.9593e-07, ..., 4.5681e-06,\n 0.0000e+00, 3.3314e-06],\n [ 4.4287e-08, 5.6212e-09, 1.1217e-05, ..., 8.2747e-05,\n 0.0000e+00, 6.8293e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.2447e-13, 2.2746e-13, 9.2144e-11, ..., 3.8680e-10, 0.0000e+00,\n 6.9366e-12],\n [9.2418e-12, 2.7513e-13, 7.3857e-11, ..., 1.5449e-11, 0.0000e+00,\n 2.8060e-11],\n [2.1221e-11, 4.9112e-13, 3.5593e-12, ..., 7.8030e-13, 0.0000e+00,\n 1.2318e-12],\n ...,\n [2.3565e-11, 1.7063e-13, 7.0745e-12, ..., 3.5663e-12, 0.0000e+00,\n 6.4724e-12],\n [2.8506e-12, 5.2280e-13, 5.3315e-12, ..., 1.2502e-10, 0.0000e+00,\n 1.3957e-10],\n [3.5639e-12, 1.4660e-13, 1.5281e-10, ..., 1.6086e-09, 0.0000e+00,\n 1.5458e-11]], device='cuda:0')" }, "15": { - "step": "tensor(6260.)", - "exp_avg": "tensor([-2.4162e-05, 6.8402e-06, 1.5809e-05, ..., 1.7564e-05,\n 2.2542e-05, -5.7766e-06], device='cuda:0')", - "exp_avg_sq": "tensor([2.3836e-09, 2.4163e-09, 2.4188e-09, ..., 2.7619e-09, 3.3472e-09,\n 3.4432e-09], device='cuda:0')" + "step": "tensor(7512.)", + "exp_avg": "tensor([ 3.3027e-05, 2.2930e-05, 3.7146e-06, ..., 1.8131e-05,\n -3.2139e-06, 6.8807e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.3151e-09, 2.0855e-09, 1.9548e-09, ..., 2.8112e-09, 3.1657e-09,\n 3.5868e-09], device='cuda:0')" }, "16": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 2.0833e-07, 1.2373e-07, -1.2603e-07, ..., 1.7794e-08,\n 4.1926e-08, -3.3565e-08],\n [-3.4089e-07, -2.0883e-07, -3.8116e-08, ..., -3.3353e-07,\n 3.5926e-07, -8.0729e-08],\n [ 3.9877e-07, 2.7054e-08, 1.6577e-07, ..., -5.8346e-08,\n 1.0115e-07, 4.3878e-08],\n ...,\n [ 2.7488e-07, 1.1380e-07, -3.3661e-08, ..., -3.4499e-07,\n 1.0795e-07, 3.4906e-09],\n [-2.8160e-07, -2.3850e-08, -5.1252e-07, ..., -6.2772e-08,\n 1.3890e-07, 7.2452e-08],\n [ 1.5879e-07, -5.0921e-07, 4.7472e-07, ..., -1.9866e-08,\n -3.3360e-07, 6.3981e-09]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.5884e-13, 8.9705e-13, 7.4598e-13, ..., 4.2738e-13, 1.0752e-12,\n 5.0304e-13],\n [9.2557e-13, 1.1767e-12, 1.6215e-12, ..., 2.2294e-12, 2.1189e-12,\n 1.0549e-12],\n [8.6112e-13, 1.0922e-12, 9.0558e-13, ..., 8.6149e-13, 2.5477e-12,\n 8.2274e-13],\n ...,\n [1.7882e-12, 1.3242e-12, 7.5551e-13, ..., 1.3508e-12, 8.9120e-13,\n 8.6295e-13],\n [1.0881e-12, 1.5198e-12, 1.4560e-12, ..., 1.5987e-12, 5.8468e-12,\n 7.5534e-13],\n [1.1380e-12, 2.4903e-12, 1.3301e-12, ..., 7.8992e-13, 1.8391e-12,\n 1.3495e-12]], device='cuda:0')" + "step": "tensor(7512.)", + "exp_avg": "tensor([[-2.1174e-09, -8.2970e-07, -1.1059e-07, ..., 5.8443e-08,\n -1.8422e-07, -4.0374e-07],\n [ 4.0283e-07, -2.6375e-07, 1.5211e-07, ..., -1.8824e-07,\n -1.4319e-06, 1.5411e-06],\n [-1.3496e-07, 5.9090e-07, 5.6911e-08, ..., 3.6908e-07,\n 3.0063e-07, -6.0522e-07],\n ...,\n [ 8.0747e-08, -2.1092e-07, -1.2430e-07, ..., -2.7408e-07,\n 2.8949e-07, 2.6415e-07],\n [ 6.3674e-08, -6.9401e-07, -2.4110e-07, ..., -3.4040e-07,\n -2.9516e-06, 6.0260e-08],\n [-3.6565e-07, -1.6630e-08, 6.8491e-08, ..., -1.8997e-07,\n -1.0694e-06, -1.1596e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.8353e-13, 7.9268e-13, 6.1169e-13, ..., 4.0575e-13, 9.0677e-13,\n 4.7225e-13],\n [9.2926e-13, 1.0657e-12, 1.2535e-12, ..., 2.1452e-12, 2.0690e-12,\n 1.1186e-12],\n [7.6799e-13, 9.2466e-13, 7.5990e-13, ..., 7.1637e-13, 2.1274e-12,\n 7.4540e-13],\n ...,\n [1.6328e-12, 1.1935e-12, 6.1102e-13, ..., 1.2074e-12, 7.5510e-13,\n 7.7320e-13],\n [1.0021e-12, 1.3796e-12, 1.2161e-12, ..., 1.5376e-12, 6.0830e-12,\n 6.1298e-13],\n [1.0035e-12, 2.1793e-12, 1.0239e-12, ..., 7.3054e-13, 1.6506e-12,\n 1.3283e-12]], device='cuda:0')" }, "17": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[-7.1597e-08, 2.2719e-10, 1.6526e-07, ..., -1.6091e-07,\n 0.0000e+00, -2.3640e-07],\n [-2.2614e-08, 3.6520e-07, 5.6226e-06, ..., 8.3258e-06,\n 0.0000e+00, 5.6855e-06],\n [-2.6790e-07, -7.2473e-08, -7.1747e-09, ..., 4.8490e-08,\n 0.0000e+00, 5.3141e-07],\n ...,\n [ 8.7261e-08, 7.1585e-09, 5.2969e-08, ..., -1.3986e-08,\n 0.0000e+00, -1.3055e-07],\n [-1.6224e-07, -1.3445e-06, 1.0314e-06, ..., -3.5344e-08,\n 0.0000e+00, 9.4345e-07],\n [ 2.5486e-07, -8.2496e-07, 1.9914e-06, ..., 5.2879e-07,\n 0.0000e+00, 4.8906e-08]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.1824e-12, 1.6233e-13, 5.5340e-12, ..., 3.9182e-12, 0.0000e+00,\n 1.4018e-12],\n [1.9629e-12, 7.5744e-12, 2.0060e-11, ..., 3.9744e-10, 0.0000e+00,\n 3.0806e-10],\n [6.3216e-11, 6.3397e-13, 3.0982e-11, ..., 1.7816e-12, 0.0000e+00,\n 7.8789e-12],\n ...,\n [6.9647e-12, 1.1375e-11, 8.2961e-11, ..., 1.4136e-11, 0.0000e+00,\n 1.3307e-11],\n [1.6043e-11, 1.8580e-11, 3.1790e-11, ..., 6.8612e-11, 0.0000e+00,\n 1.3649e-11],\n [1.5098e-12, 1.3773e-12, 7.5214e-11, ..., 1.4406e-11, 0.0000e+00,\n 4.5238e-12]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 9.5503e-07, -1.3270e-09, -2.9824e-07, ..., 5.8676e-08,\n 0.0000e+00, -1.4706e-07],\n [-8.1059e-08, 1.4618e-07, 1.1680e-08, ..., 2.0458e-06,\n 0.0000e+00, 2.1162e-07],\n [ 1.4763e-07, 1.2673e-09, 6.5117e-08, ..., -6.0637e-08,\n 0.0000e+00, -2.7041e-07],\n ...,\n [-4.1496e-08, -2.2324e-07, 1.0280e-07, ..., 8.3597e-07,\n 0.0000e+00, -9.0796e-08],\n [-1.2783e-07, -6.0111e-08, 7.7144e-08, ..., -5.2068e-06,\n 0.0000e+00, 1.2393e-07],\n [ 1.1435e-07, -8.7357e-09, 1.9023e-06, ..., 2.4482e-07,\n 0.0000e+00, 8.5112e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.9995e-12, 6.4075e-14, 6.3755e-12, ..., 2.2819e-12, 0.0000e+00,\n 2.2964e-12],\n [1.0422e-12, 7.2287e-12, 1.5739e-11, ..., 4.0577e-10, 0.0000e+00,\n 3.0019e-10],\n [5.9075e-11, 5.2013e-13, 2.8558e-11, ..., 1.0987e-12, 0.0000e+00,\n 3.4598e-12],\n ...,\n [5.3187e-12, 6.8257e-12, 6.3911e-11, ..., 1.2294e-11, 0.0000e+00,\n 1.1448e-11],\n [1.1078e-11, 1.3477e-11, 2.5968e-11, ..., 6.5423e-11, 0.0000e+00,\n 1.0950e-11],\n [1.8585e-12, 1.4802e-12, 7.0148e-11, ..., 9.6244e-12, 0.0000e+00,\n 2.7618e-12]], device='cuda:0')" }, "18": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 1.9906e-05, 2.1545e-05, -2.0907e-06, ..., -2.2385e-06,\n 1.4803e-05, 2.0915e-05], device='cuda:0')", - "exp_avg_sq": "tensor([2.4895e-09, 3.6101e-09, 2.9492e-09, ..., 1.6990e-09, 2.8636e-09,\n 2.3282e-09], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([-7.1890e-07, -5.6708e-05, -8.4577e-06, ..., -1.9044e-05,\n -2.6201e-05, 1.1328e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.2720e-09, 3.6701e-09, 2.6689e-09, ..., 1.4793e-09, 2.5373e-09,\n 2.0937e-09], device='cuda:0')" }, "19": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[-5.6433e-08, -9.4330e-08, 7.2399e-08, ..., -1.2613e-07,\n -2.7710e-08, -1.7631e-07],\n [-8.9386e-08, 3.4684e-07, -2.6833e-07, ..., 9.1135e-08,\n -2.8159e-07, -3.4468e-08],\n [-2.2282e-07, -2.6711e-07, 1.2957e-07, ..., -9.0066e-08,\n -2.2485e-08, 1.5679e-07],\n ...,\n [-3.6954e-08, 2.3255e-07, 6.1588e-08, ..., -1.1820e-07,\n 4.9194e-07, 1.8535e-08],\n [ 3.3866e-08, 4.0013e-08, -4.0082e-07, ..., -1.8886e-07,\n 1.3172e-07, -1.6107e-07],\n [-2.5659e-07, -1.0471e-07, 1.2490e-07, ..., -2.7572e-07,\n 1.9965e-07, 2.8518e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.6935e-13, 1.1888e-12, 3.9248e-13, ..., 4.5012e-13, 6.3255e-13,\n 6.6504e-13],\n [5.5365e-13, 2.6951e-12, 1.0462e-12, ..., 9.7470e-13, 1.5863e-12,\n 9.8249e-13],\n [1.0227e-12, 2.2406e-12, 1.6761e-12, ..., 1.1463e-12, 1.8743e-12,\n 8.7311e-13],\n ...,\n [6.1435e-13, 9.9760e-13, 8.8313e-13, ..., 1.0095e-12, 1.4963e-12,\n 9.0707e-13],\n [3.6726e-13, 2.4129e-12, 1.2730e-12, ..., 9.0564e-13, 1.8094e-12,\n 1.2675e-12],\n [4.5767e-13, 8.2033e-13, 1.1057e-12, ..., 1.1764e-12, 1.7002e-12,\n 1.0398e-12]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[-2.6417e-07, 4.7218e-08, 1.9952e-08, ..., 1.2112e-07,\n -4.0797e-08, -1.0241e-07],\n [-1.0129e-07, 2.9652e-07, -7.9153e-09, ..., -7.3317e-08,\n 2.1145e-07, -4.3682e-08],\n [-9.1283e-07, -1.3733e-07, 3.2584e-07, ..., -3.1132e-08,\n 1.0390e-07, 4.0456e-08],\n ...,\n [-4.9108e-08, 3.5919e-08, -1.1573e-07, ..., 5.2719e-09,\n -7.5300e-08, 3.9253e-10],\n [ 1.8438e-07, 3.0469e-08, -1.7891e-07, ..., -1.2010e-08,\n -4.4716e-07, -1.3029e-07],\n [-2.0279e-08, -8.9886e-08, 4.3602e-08, ..., 1.6823e-07,\n 2.5753e-07, -6.5618e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9577e-13, 1.0110e-12, 3.0728e-13, ..., 3.3254e-13, 5.0185e-13,\n 5.0305e-13],\n [4.8619e-13, 2.3776e-12, 8.1337e-13, ..., 7.3788e-13, 1.3377e-12,\n 7.5397e-13],\n [9.2196e-13, 1.8124e-12, 1.3355e-12, ..., 8.1659e-13, 1.4627e-12,\n 6.2089e-13],\n ...,\n [4.7633e-13, 8.1497e-13, 6.6300e-13, ..., 7.5343e-13, 1.2362e-12,\n 7.8757e-13],\n [2.6809e-13, 2.0698e-12, 1.0591e-12, ..., 6.5731e-13, 1.5546e-12,\n 9.0076e-13],\n [3.8023e-13, 6.2662e-13, 7.4862e-13, ..., 8.9159e-13, 1.3479e-12,\n 7.8253e-13]], device='cuda:0')" }, "20": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-1.9209e-07, 6.5273e-07, 2.7398e-06, ..., 7.8668e-06,\n 0.0000e+00, 3.0735e-06],\n [-1.7261e-07, -1.2233e-06, 4.2487e-07, ..., 9.2460e-07,\n 0.0000e+00, 1.0711e-06],\n [-2.9962e-08, 1.9932e-08, 8.2828e-07, ..., -2.2836e-07,\n 0.0000e+00, 2.3120e-07],\n ...,\n [-4.5028e-10, 1.3000e-07, -1.3680e-06, ..., 7.0789e-07,\n 0.0000e+00, -2.0037e-06],\n [ 6.3164e-08, -5.8430e-07, 8.8215e-08, ..., -1.0109e-10,\n 0.0000e+00, 3.4881e-08],\n [-1.8986e-07, -7.8567e-08, -2.2256e-07, ..., 3.3110e-08,\n 0.0000e+00, 1.1433e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.5740e-11, 8.1367e-12, 9.4823e-12, ..., 1.6728e-10, 0.0000e+00,\n 7.3287e-12],\n [3.5922e-11, 5.9315e-11, 2.9527e-11, ..., 3.4097e-11, 0.0000e+00,\n 7.4009e-11],\n [3.0157e-12, 1.2926e-12, 5.7884e-11, ..., 1.1554e-11, 0.0000e+00,\n 5.0556e-11],\n ...,\n [2.1563e-12, 5.2466e-13, 1.2233e-10, ..., 1.3322e-10, 0.0000e+00,\n 1.4375e-10],\n [2.6351e-11, 4.4854e-12, 8.7566e-12, ..., 9.4808e-12, 0.0000e+00,\n 1.2755e-11],\n [9.5835e-11, 5.4463e-12, 3.2313e-11, ..., 1.7789e-11, 0.0000e+00,\n 5.5592e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-6.1845e-07, 1.4413e-07, -1.9318e-08, ..., -1.1891e-05,\n 0.0000e+00, 6.4408e-09],\n [-5.6226e-08, 5.8805e-07, -2.1868e-07, ..., -2.5101e-07,\n 0.0000e+00, -5.6194e-06],\n [-3.4456e-09, -1.6632e-08, -1.7002e-06, ..., -5.1660e-07,\n 0.0000e+00, -5.5233e-08],\n ...,\n [ 9.0172e-08, 6.4309e-08, 1.4471e-07, ..., 2.6056e-07,\n 0.0000e+00, -4.0211e-06],\n [-4.8459e-07, 1.5604e-08, 1.1054e-07, ..., -1.5318e-07,\n 0.0000e+00, -1.9394e-07],\n [ 2.0508e-07, -8.9246e-09, -2.7086e-07, ..., -5.4503e-08,\n 0.0000e+00, 2.0587e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9100e-11, 5.6381e-12, 7.9570e-12, ..., 1.5905e-10, 0.0000e+00,\n 3.4304e-12],\n [2.2399e-11, 5.8809e-11, 2.5600e-11, ..., 2.7774e-11, 0.0000e+00,\n 7.5856e-11],\n [1.1651e-12, 5.0924e-13, 6.7110e-11, ..., 1.0249e-11, 0.0000e+00,\n 4.4959e-11],\n ...,\n [1.4415e-12, 3.9656e-13, 9.8264e-11, ..., 1.1758e-10, 0.0000e+00,\n 1.2689e-10],\n [4.0739e-11, 2.2677e-12, 4.7581e-12, ..., 6.9080e-12, 0.0000e+00,\n 2.6612e-11],\n [6.0083e-11, 2.7452e-12, 2.6890e-11, ..., 1.6219e-11, 0.0000e+00,\n 4.1770e-11]], device='cuda:0')" }, "21": { - "step": "tensor(3756.)", - "exp_avg": "tensor([ 2.5318e-05, 1.0012e-05, -2.1274e-07, ..., 7.6633e-06,\n -1.1625e-04, 8.3162e-06], device='cuda:0')", - "exp_avg_sq": "tensor([5.8892e-09, 5.9875e-09, 4.2380e-09, ..., 6.3855e-09, 7.2759e-09,\n 6.1289e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-3.7956e-05, -5.4031e-06, 7.0594e-06, ..., 2.7980e-05,\n -2.8727e-05, 1.1770e-05], device='cuda:0')", + "exp_avg_sq": "tensor([5.6596e-09, 4.7770e-09, 3.7082e-09, ..., 5.6955e-09, 6.1637e-09,\n 4.7482e-09], device='cuda:0')" }, "22": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 1.1933e-07, -4.1805e-07, 1.0370e-07, ..., -7.7419e-07,\n 2.2342e-07, -2.1200e-07],\n [-3.6433e-08, 1.3735e-07, -3.4701e-08, ..., 1.1892e-07,\n 1.0547e-06, 7.0613e-08],\n [ 9.8728e-08, -2.6052e-07, 2.8234e-07, ..., -1.2677e-06,\n -1.0721e-07, -2.1798e-07],\n ...,\n [-1.8351e-07, -6.5217e-08, 3.8720e-08, ..., -2.6817e-07,\n 6.1159e-07, 2.8806e-07],\n [ 8.8933e-08, -3.4765e-07, -2.7428e-07, ..., 3.4735e-07,\n 5.0101e-08, -3.5880e-08],\n [ 8.3315e-08, -3.3532e-07, -1.7819e-08, ..., -1.0086e-07,\n -7.2765e-07, 3.2657e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0868e-12, 1.2630e-12, 6.4575e-13, ..., 3.1898e-12, 4.9059e-13,\n 9.8704e-13],\n [1.0356e-12, 1.3398e-12, 1.1566e-12, ..., 2.4569e-12, 9.6565e-13,\n 1.2098e-12],\n [1.8458e-12, 2.1157e-12, 2.0008e-12, ..., 4.2673e-12, 5.9944e-13,\n 2.2589e-12],\n ...,\n [1.7787e-12, 2.0446e-12, 1.6373e-12, ..., 2.1008e-12, 8.3578e-13,\n 2.9565e-12],\n [9.5308e-13, 2.0353e-12, 1.7296e-12, ..., 2.4931e-12, 1.5902e-12,\n 9.9642e-13],\n [2.5965e-12, 2.5197e-12, 1.5240e-12, ..., 2.8496e-12, 1.4242e-12,\n 3.4371e-12]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-6.8812e-08, 3.2808e-07, -1.0458e-07, ..., -2.6516e-07,\n 1.2057e-07, -1.0668e-07],\n [ 3.3064e-07, 3.1144e-07, -1.5562e-07, ..., 1.8608e-07,\n -1.5108e-07, -1.7357e-08],\n [ 6.3645e-07, -5.7058e-08, -1.4178e-07, ..., -5.1062e-07,\n -1.1225e-07, -4.3968e-08],\n ...,\n [-1.6826e-07, -3.9908e-07, 6.1119e-07, ..., 2.9228e-07,\n 9.4299e-09, 7.5585e-08],\n [-5.2122e-08, -9.3836e-08, 9.5097e-08, ..., -4.7426e-07,\n -3.3345e-07, -2.4999e-08],\n [ 1.1571e-06, -4.2969e-07, -3.5850e-08, ..., 4.9074e-07,\n -1.7465e-07, -5.1722e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.1614e-13, 1.0121e-12, 4.9026e-13, ..., 2.4729e-12, 3.4346e-13,\n 6.9624e-13],\n [7.2384e-13, 9.3973e-13, 8.1467e-13, ..., 1.8977e-12, 7.2797e-13,\n 8.4920e-13],\n [1.3063e-12, 1.5015e-12, 1.0908e-12, ..., 3.6295e-12, 3.9936e-13,\n 1.2586e-12],\n ...,\n [1.1700e-12, 1.3447e-12, 1.3369e-12, ..., 1.5498e-12, 6.1317e-13,\n 2.0993e-12],\n [6.6169e-13, 1.5391e-12, 9.4662e-13, ..., 1.9525e-12, 1.1179e-12,\n 6.5295e-13],\n [2.1305e-12, 1.8973e-12, 1.0005e-12, ..., 2.1663e-12, 8.8708e-13,\n 2.3615e-12]], device='cuda:0')" }, "23": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 2.5372e-07, -1.8535e-07, 3.2837e-07, ..., 4.4095e-07,\n 0.0000e+00, -4.5844e-07],\n [ 1.3777e-07, 3.4478e-07, 5.1561e-07, ..., 7.2723e-07,\n 0.0000e+00, -1.7451e-06],\n [ 2.2605e-08, -2.3888e-07, 8.8496e-07, ..., 4.0476e-07,\n 0.0000e+00, 2.8720e-07],\n ...,\n [-1.8805e-06, -2.9881e-07, -4.9019e-06, ..., -3.6671e-06,\n 0.0000e+00, 2.6311e-07],\n [ 1.3376e-07, 2.4405e-05, -5.9606e-08, ..., -2.6227e-08,\n 0.0000e+00, 3.7713e-06],\n [ 4.7923e-07, 1.1344e-08, -1.5546e-06, ..., -2.2969e-06,\n 0.0000e+00, 2.6475e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.4099e-11, 4.6144e-11, 4.1706e-11, ..., 1.8959e-10, 0.0000e+00,\n 7.5892e-11],\n [2.1909e-11, 1.2555e-11, 2.7565e-11, ..., 5.8230e-11, 0.0000e+00,\n 3.6794e-11],\n [5.8183e-11, 1.0204e-10, 1.1272e-10, ..., 3.6086e-11, 0.0000e+00,\n 1.2759e-11],\n ...,\n [2.2794e-10, 2.0747e-11, 3.4102e-11, ..., 7.1028e-11, 0.0000e+00,\n 5.9259e-11],\n [3.0549e-11, 2.8442e-10, 7.2744e-11, ..., 3.3640e-11, 0.0000e+00,\n 7.6385e-10],\n [6.9252e-11, 3.2011e-12, 1.4709e-10, ..., 8.1099e-11, 0.0000e+00,\n 3.1671e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 8.6625e-08, -1.8288e-07, 8.3755e-07, ..., 3.9635e-06,\n 0.0000e+00, 2.1120e-08],\n [ 4.0071e-07, 1.8793e-08, -4.0375e-07, ..., -5.6561e-07,\n 0.0000e+00, -7.3566e-08],\n [-1.4025e-06, 3.7002e-07, 9.7900e-07, ..., 1.2052e-06,\n 0.0000e+00, -4.3734e-07],\n ...,\n [-3.7614e-06, 5.9165e-08, -1.9186e-06, ..., 8.2550e-07,\n 0.0000e+00, -3.2771e-07],\n [-1.5528e-07, 1.0160e-06, -6.5283e-07, ..., -2.0301e-07,\n 0.0000e+00, -1.9045e-06],\n [ 1.6437e-07, 5.3580e-09, -2.5795e-06, ..., -3.8795e-07,\n 0.0000e+00, 3.0473e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.8523e-11, 2.1524e-11, 2.5717e-11, ..., 1.3093e-10, 0.0000e+00,\n 6.7806e-11],\n [1.7258e-11, 1.5388e-11, 2.2932e-11, ..., 5.8121e-11, 0.0000e+00,\n 2.5037e-11],\n [3.2654e-11, 4.8526e-11, 1.0993e-10, ..., 2.8691e-11, 0.0000e+00,\n 1.3936e-11],\n ...,\n [1.8172e-10, 1.3802e-11, 2.6416e-11, ..., 3.6502e-11, 0.0000e+00,\n 3.8433e-11],\n [2.1074e-11, 1.9554e-10, 3.4346e-11, ..., 1.7332e-11, 0.0000e+00,\n 9.0037e-10],\n [7.1665e-11, 1.8041e-12, 1.1006e-10, ..., 6.6133e-11, 0.0000e+00,\n 2.7220e-11]], device='cuda:0')" }, "24": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.8917e-07, -3.5586e-06, 1.3365e-05, ..., -1.0406e-04,\n 5.2955e-05, 2.2643e-05], device='cuda:0')", - "exp_avg_sq": "tensor([1.5673e-08, 1.0588e-08, 1.1079e-08, ..., 1.1767e-08, 1.4554e-08,\n 1.5420e-08], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 4.2499e-05, 1.5117e-06, -1.1170e-05, ..., 2.8940e-05,\n -1.3405e-05, -2.5607e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.1779e-08, 1.0526e-08, 9.5232e-09, ..., 8.4988e-09, 1.1161e-08,\n 1.1279e-08], device='cuda:0')" }, "25": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-2.3328e-07, 1.6582e-07, 1.6090e-07, ..., -9.4751e-07,\n 1.9169e-06, -2.6419e-07],\n [ 2.0847e-07, 1.5380e-07, -1.0761e-06, ..., 6.1166e-07,\n 5.4966e-07, -8.9132e-08],\n [-5.5691e-07, 1.3937e-07, 3.5461e-07, ..., 5.4644e-07,\n -1.8015e-06, -1.4268e-07],\n ...,\n [ 2.8713e-07, 1.8556e-07, -2.1238e-07, ..., -1.3077e-06,\n 1.3911e-06, -6.5188e-07],\n [ 1.7067e-07, -1.7208e-08, 4.4386e-08, ..., 1.3498e-07,\n 1.7258e-07, 5.0684e-07],\n [ 5.9864e-07, 8.5150e-08, -6.2766e-07, ..., -2.8029e-07,\n 1.7140e-06, 3.7437e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.3407e-12, 1.1319e-12, 1.5778e-12, ..., 4.3575e-12, 4.4045e-12,\n 2.0905e-12],\n [4.3287e-12, 2.7496e-12, 3.1471e-12, ..., 5.2643e-12, 6.6980e-12,\n 7.6817e-12],\n [5.3473e-12, 3.7713e-12, 3.7376e-12, ..., 4.7671e-12, 8.4619e-12,\n 7.5098e-12],\n ...,\n [4.6325e-12, 3.2420e-12, 3.7952e-12, ..., 6.8267e-12, 5.7886e-12,\n 7.1170e-12],\n [6.4980e-12, 2.0942e-12, 3.8943e-12, ..., 9.1502e-12, 5.1498e-12,\n 6.8550e-12],\n [4.2597e-12, 2.3187e-12, 4.3442e-12, ..., 4.8745e-12, 6.6551e-12,\n 4.6529e-12]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 3.6704e-07, -6.6624e-08, 4.4290e-08, ..., -3.3636e-07,\n 1.0906e-07, -3.6141e-08],\n [ 3.8203e-07, 7.1240e-08, 4.9511e-07, ..., 4.2769e-07,\n -2.1302e-07, -1.6420e-07],\n [ 4.0396e-07, 3.3011e-07, -3.1809e-08, ..., 2.4831e-07,\n -8.7148e-08, -3.5561e-07],\n ...,\n [-4.2670e-07, -3.5578e-07, 9.9700e-07, ..., -2.2710e-07,\n -2.2648e-07, -1.0786e-07],\n [ 5.7559e-09, 3.5136e-07, -1.8028e-07, ..., -1.0323e-07,\n 3.0361e-08, 1.0445e-07],\n [ 2.6267e-07, -1.4132e-07, -2.6558e-09, ..., -1.1966e-06,\n 5.1226e-07, 1.6046e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4248e-12, 7.3370e-13, 9.7379e-13, ..., 2.6822e-12, 2.3264e-12,\n 1.3054e-12],\n [2.6117e-12, 1.6776e-12, 2.0095e-12, ..., 3.0942e-12, 3.7556e-12,\n 4.9571e-12],\n [3.1881e-12, 2.2160e-12, 2.2749e-12, ..., 2.7148e-12, 4.6848e-12,\n 5.0733e-12],\n ...,\n [2.8834e-12, 2.3681e-12, 2.6422e-12, ..., 4.3040e-12, 3.3138e-12,\n 4.8472e-12],\n [3.8981e-12, 1.3963e-12, 2.5805e-12, ..., 5.9204e-12, 2.8707e-12,\n 4.9756e-12],\n [2.6075e-12, 1.4737e-12, 2.8133e-12, ..., 3.1698e-12, 3.8211e-12,\n 2.9316e-12]], device='cuda:0')" }, "26": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 7.2725e-07, -5.2496e-07, -5.7601e-07, ..., 1.3208e-06,\n 0.0000e+00, 1.5051e-06],\n [-2.9653e-07, -2.1660e-06, -5.4385e-07, ..., -2.7894e-07,\n 0.0000e+00, 8.4366e-08],\n [-4.2870e-07, -4.7062e-06, 1.5154e-08, ..., 3.9340e-07,\n 0.0000e+00, 1.2024e-06],\n ...,\n [ 2.3313e-07, 2.2711e-08, -5.0997e-06, ..., 2.8759e-06,\n 0.0000e+00, -2.6341e-06],\n [-2.2056e-06, -1.4550e-06, -2.9628e-07, ..., -3.9467e-06,\n 0.0000e+00, 7.7755e-07],\n [ 2.8094e-07, -3.3516e-07, -4.4573e-06, ..., -1.4208e-06,\n 0.0000e+00, -9.7460e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.7492e-11, 8.4630e-11, 6.6799e-11, ..., 1.3094e-10, 0.0000e+00,\n 7.6516e-11],\n [1.2775e-10, 1.2657e-10, 3.8540e-10, ..., 6.5135e-11, 0.0000e+00,\n 1.6302e-10],\n [1.5407e-10, 2.1185e-10, 5.2360e-11, ..., 1.3644e-10, 0.0000e+00,\n 1.5052e-10],\n ...,\n [1.8137e-10, 1.1531e-11, 1.6625e-09, ..., 3.0945e-09, 0.0000e+00,\n 4.6929e-10],\n [2.0220e-10, 1.7326e-10, 8.7547e-11, ..., 4.3323e-10, 0.0000e+00,\n 1.5154e-10],\n [1.1344e-10, 4.0969e-11, 4.1576e-10, ..., 2.9838e-10, 0.0000e+00,\n 5.6939e-10]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 2.6818e-07, -5.6154e-07, 1.7891e-06, ..., 2.9804e-06,\n 0.0000e+00, -9.3856e-07],\n [ 1.0162e-06, 8.9720e-08, 1.3353e-06, ..., 6.9238e-07,\n 0.0000e+00, 2.1799e-07],\n [ 8.5368e-07, -2.2396e-06, -9.7303e-07, ..., -7.0409e-06,\n 0.0000e+00, -7.1518e-07],\n ...,\n [-1.0315e-07, 4.5212e-08, 1.8161e-05, ..., 1.7887e-04,\n 0.0000e+00, -1.0776e-06],\n [-4.4393e-07, 1.0571e-06, -3.6268e-07, ..., -7.2545e-06,\n 0.0000e+00, -2.9153e-06],\n [-1.3460e-07, 1.5739e-06, 1.3340e-06, ..., -4.3566e-06,\n 0.0000e+00, 3.0733e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.7025e-11, 6.6304e-11, 5.2935e-11, ..., 8.1728e-11, 0.0000e+00,\n 4.4678e-11],\n [6.6985e-11, 7.9299e-11, 3.3033e-10, ..., 3.8502e-11, 0.0000e+00,\n 2.0548e-10],\n [1.0850e-10, 3.2719e-10, 4.2286e-11, ..., 2.8572e-10, 0.0000e+00,\n 1.9407e-10],\n ...,\n [8.3318e-11, 7.8421e-12, 1.5768e-09, ..., 6.0843e-09, 0.0000e+00,\n 6.0611e-10],\n [1.3663e-10, 1.7230e-10, 4.3073e-11, ..., 2.6909e-10, 0.0000e+00,\n 1.2199e-10],\n [6.6905e-11, 3.0412e-11, 3.7693e-10, ..., 2.2863e-10, 0.0000e+00,\n 4.6511e-10]], device='cuda:0')" }, "27": { - "step": "tensor(1252.)", - "exp_avg": "tensor([ 2.3203e-05, -3.9301e-05, -3.7484e-05, ..., -1.8705e-05,\n -6.5615e-05, 2.5084e-05], device='cuda:0')", - "exp_avg_sq": "tensor([2.0441e-08, 2.5917e-08, 4.1598e-08, ..., 3.7004e-08, 2.3231e-08,\n 3.7824e-08], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 2.4152e-05, 4.0226e-05, -2.2483e-04, ..., 1.2204e-04,\n -1.1629e-05, -3.6174e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.9226e-08, 1.8703e-08, 5.1420e-08, ..., 3.1267e-08, 1.6952e-08,\n 2.9708e-08], device='cuda:0')" }, "28": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 7.0014e-08, -2.3550e-07, -1.8928e-07, ..., 9.9566e-07,\n 3.5126e-08, 8.7351e-07],\n [ 3.3054e-07, -1.3881e-07, 6.6509e-07, ..., 2.5783e-06,\n -2.5351e-07, 2.2954e-07],\n [-1.7870e-06, -8.3651e-08, -1.6035e-06, ..., 5.2402e-08,\n 4.0490e-08, 7.3153e-07],\n ...,\n [-1.4263e-06, 6.4761e-07, -3.9834e-07, ..., 4.7154e-07,\n 4.9860e-07, 4.2942e-07],\n [-1.4211e-07, -3.5462e-07, -1.1245e-06, ..., 8.8498e-07,\n -4.1586e-08, -1.3816e-06],\n [ 6.1833e-07, 2.5424e-07, -6.6536e-07, ..., -9.7480e-07,\n -6.8482e-07, 4.6943e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.9734e-12, 3.6468e-12, 1.1986e-11, ..., 7.9659e-12, 4.1398e-12,\n 6.7981e-12],\n [8.2350e-12, 5.4648e-12, 8.4214e-12, ..., 1.3570e-11, 9.0190e-12,\n 1.0688e-11],\n [8.6306e-12, 1.0488e-11, 1.5727e-11, ..., 1.3385e-11, 8.3640e-12,\n 1.1622e-11],\n ...,\n [1.4206e-11, 7.6523e-12, 1.6370e-11, ..., 1.7749e-11, 1.4635e-11,\n 1.3071e-11],\n [8.9737e-12, 6.8857e-12, 9.2066e-12, ..., 1.3603e-11, 9.1160e-12,\n 1.5713e-11],\n [8.0421e-12, 7.1649e-12, 9.7956e-12, ..., 1.0800e-11, 1.1423e-11,\n 1.1974e-11]], device='cuda:0')" + }, + "29": { "step": "tensor(1252.)", - "exp_avg": "tensor([[ 6.6740e-08, 4.5027e-07, 6.9554e-07, ..., 3.7485e-07,\n 1.6396e-08, 4.1967e-07],\n [-1.7465e-06, -3.9027e-07, 9.9006e-08, ..., 1.4767e-07,\n -6.5718e-07, -9.1373e-08],\n [ 4.7919e-07, 1.7972e-07, -7.1477e-07, ..., -1.3184e-06,\n 6.2917e-08, 4.8562e-07],\n ...,\n [ 8.9765e-07, -5.2517e-07, -5.9885e-07, ..., 2.1146e-07,\n 8.7962e-07, -1.4466e-06],\n [ 1.1576e-06, -1.7669e-07, 1.3090e-06, ..., -6.8327e-08,\n 2.5305e-07, 2.5753e-07],\n [-6.1162e-07, -5.0246e-07, 2.0910e-07, ..., 1.8338e-07,\n 3.2468e-07, -2.1189e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.2722e-12, 6.6119e-12, 1.5768e-11, ..., 1.2787e-11, 7.3154e-12,\n 1.1481e-11],\n [1.6149e-11, 1.1410e-11, 1.5771e-11, ..., 2.4998e-11, 1.7105e-11,\n 1.8729e-11],\n [1.4424e-11, 1.8113e-11, 2.1790e-11, ..., 2.1822e-11, 1.5442e-11,\n 2.0752e-11],\n ...,\n [2.9813e-11, 1.5832e-11, 3.1659e-11, ..., 3.3587e-11, 3.4843e-11,\n 2.7472e-11],\n [1.7944e-11, 1.3109e-11, 1.7555e-11, ..., 2.3641e-11, 1.8521e-11,\n 2.6582e-11],\n [1.6672e-11, 1.4147e-11, 1.9292e-11, ..., 2.0529e-11, 2.2558e-11,\n 2.2296e-11]], device='cuda:0')" + "exp_avg": "tensor([[ 4.6549e-06, 1.0463e-07, 1.1726e-05, ..., 3.6087e-06,\n 0.0000e+00, -1.1304e-05],\n [-9.7613e-06, 2.8244e-06, 1.8208e-06, ..., -1.0593e-06,\n 0.0000e+00, 2.3663e-06],\n [ 3.5186e-06, -2.2636e-06, -6.4493e-06, ..., 1.2065e-05,\n 0.0000e+00, -5.4349e-06],\n ...,\n [ 2.2149e-06, -7.3551e-07, -1.5728e-05, ..., 1.5083e-05,\n 0.0000e+00, -1.8443e-06],\n [ 4.7179e-06, -1.4825e-06, -4.6107e-07, ..., -3.2699e-06,\n 0.0000e+00, 2.1100e-06],\n [-3.4079e-06, -8.8482e-07, 7.7006e-06, ..., 4.0681e-06,\n 0.0000e+00, -1.1189e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.1810e-10, 1.4450e-10, 1.0865e-09, ..., 6.7395e-10, 0.0000e+00,\n 8.9202e-10],\n [5.3529e-10, 9.5948e-11, 3.6140e-10, ..., 2.2561e-10, 0.0000e+00,\n 2.8036e-10],\n [2.4466e-10, 1.3245e-10, 4.2474e-10, ..., 7.6973e-10, 0.0000e+00,\n 1.4730e-09],\n ...,\n [1.7838e-10, 1.8297e-10, 1.4252e-09, ..., 7.3969e-10, 0.0000e+00,\n 6.2450e-10],\n [3.8208e-10, 2.1327e-10, 1.6977e-10, ..., 3.3027e-10, 0.0000e+00,\n 3.0958e-10],\n [6.4949e-10, 1.5909e-10, 1.8713e-09, ..., 4.3686e-10, 0.0000e+00,\n 9.1388e-10]], device='cuda:0')" + }, + "30": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 1.0712e-04, -1.0426e-05, -2.6064e-05, ..., -6.4751e-05,\n -5.9825e-06, -4.5317e-05], device='cuda:0')", + "exp_avg_sq": "tensor([5.7841e-08, 6.2386e-08, 6.5328e-08, ..., 5.7239e-08, 5.1020e-08,\n 7.6560e-08], device='cuda:0')" + }, + "31": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-8.7127e-07, 2.3044e-06, 5.4709e-07, ..., 7.9924e-07,\n -1.5380e-06, -2.5151e-06],\n [-2.4811e-06, -9.3771e-07, 4.0047e-06, ..., -2.0239e-06,\n 5.3400e-07, -1.1437e-06],\n [-4.2748e-07, -2.8912e-06, -8.8406e-07, ..., -2.2906e-06,\n 3.9683e-06, -1.3857e-06],\n ...,\n [-5.2646e-07, -2.6523e-07, 2.5794e-07, ..., 1.8322e-06,\n 7.0432e-07, -1.9736e-06],\n [ 9.9442e-07, -2.8867e-06, -2.5019e-06, ..., -6.7045e-06,\n -3.0750e-06, -9.6310e-06],\n [-7.0356e-07, 3.3788e-06, -1.2132e-06, ..., 5.8414e-06,\n 4.4418e-06, 6.7221e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.7381e-11, 4.7652e-11, 6.1029e-11, ..., 3.9400e-11, 5.7003e-11,\n 4.7475e-11],\n [6.2163e-11, 7.9086e-11, 1.2563e-10, ..., 7.5610e-11, 8.1847e-11,\n 7.7077e-11],\n [5.6396e-11, 7.7227e-11, 1.3232e-10, ..., 6.4271e-11, 8.3451e-11,\n 7.6538e-11],\n ...,\n [5.1067e-11, 6.8155e-11, 1.2001e-10, ..., 6.5949e-11, 7.1325e-11,\n 8.0999e-11],\n [7.6909e-11, 8.6984e-11, 1.4603e-10, ..., 7.7356e-11, 8.8286e-11,\n 8.7655e-11],\n [6.5247e-11, 8.7666e-11, 1.1892e-10, ..., 7.5368e-11, 8.9398e-11,\n 8.1653e-11]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "shared", "betas": [ 0.9, @@ -317,7 +332,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_256", "betas": [ 0.9, @@ -340,7 +355,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_512", "betas": [ 0.9, @@ -363,7 +378,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_768", "betas": [ 0.9, @@ -386,7 +401,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_1024", "betas": [ 0.9, @@ -409,7 +424,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_1280", "betas": [ 0.9, @@ -432,7 +447,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_1536", "betas": [ 0.9, @@ -455,7 +470,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_1792", "betas": [ 0.9, @@ -478,7 +493,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_2048", "betas": [ 0.9, @@ -501,7 +516,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_2304", "betas": [ 0.9, @@ -524,7 +539,7 @@ ] }, { - "lr": 0.00024569294678237997, + "lr": 0.01, "name": "scale_2560", "betas": [ 0.9, @@ -547,7 +562,7 @@ ] }, { - "lr": 0.00012333423752026375, + "lr": 0.005, "name": "fusion", "betas": [ 0.9, @@ -600,10 +615,10 @@ }, "scheduler_state_dict": { "T_0": 10, - "T_i": 10, + "T_i": 20, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 9, + "T_cur": 0, "base_lrs": [ 0.01, 0.01, @@ -618,38 +633,39 @@ 0.01, 0.005 ], - "last_epoch": 9, + "last_epoch": 10, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00024569294678237997, - 0.00012333423752026375 + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 ] }, "metrics": { - "best_val_acc": 82.324, - "best_epoch": 8, + "best_val_acc": 82.374, + "best_epoch": 9, "scale_accuracies": { - "256": 82.324, - "512": 82.706, - "768": 82.654, - "1024": 82.456, - "1280": 82.404, - "1536": 82.352, - "1792": 82.274, - "2048": 82.28, - "2304": 81.792 + "256": 82.374, + "512": 82.728, + "768": 82.702, + "1024": 82.526, + "1280": 82.534, + "1536": 82.482, + "1792": 82.604, + "2048": 82.554, + "2304": 82.604, + "2560": 82.166 } }, "train_config": {