diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,211 +1,226 @@ { - "epoch": 2, + "epoch": 3, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 2.1517e-05, -2.6941e-05, -7.6814e-06, ..., -7.1187e-06,\n -2.2356e-06, 1.9026e-05],\n [ 7.0848e-06, -2.2839e-05, -2.4405e-07, ..., -3.2418e-05,\n -1.1959e-05, -2.4559e-05],\n [ 2.1240e-40, 3.4596e-40, 1.1568e-40, ..., -4.2081e-41,\n 1.2767e-40, -1.5001e-40],\n ...,\n [-1.1554e-05, -1.6690e-05, 1.2038e-05, ..., 1.7157e-05,\n -1.2429e-05, -4.7271e-06],\n [-1.9076e-05, 1.8564e-05, 7.2819e-06, ..., 1.5901e-05,\n 1.4939e-05, 2.8084e-05],\n [ 3.8714e-06, 1.4334e-06, 2.7298e-05, ..., -6.0566e-06,\n -1.2570e-05, -1.5604e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3189e-08, 1.5605e-08, 6.9154e-09, ..., 8.6941e-09, 7.6699e-09,\n 4.9935e-09],\n [1.0543e-08, 9.8523e-09, 1.0035e-08, ..., 7.7223e-09, 5.9935e-09,\n 5.0743e-09],\n [4.9501e-12, 5.9935e-12, 5.9748e-12, ..., 8.6235e-12, 1.6632e-12,\n 3.8329e-12],\n ...,\n [1.3503e-08, 1.1356e-08, 9.2611e-09, ..., 8.3496e-09, 7.0566e-09,\n 5.9478e-09],\n [1.4595e-08, 1.2197e-08, 9.5062e-09, ..., 9.2816e-09, 7.8575e-09,\n 6.2278e-09],\n [3.1345e-09, 4.8947e-09, 3.4502e-09, ..., 2.2795e-09, 2.4005e-09,\n 2.1024e-09]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-1.4727e-05, 9.6125e-05, 4.4182e-06, ..., 1.8888e-05,\n 3.2354e-05, -3.2978e-05],\n [ 1.7186e-05, 2.1444e-05, 6.2785e-05, ..., -5.0763e-05,\n -4.9622e-05, -2.0544e-06],\n [-3.5583e-17, -6.3245e-17, 3.9489e-18, ..., -1.4693e-17,\n 8.8409e-18, 1.0104e-16],\n ...,\n [ 5.4912e-05, -1.1023e-05, 8.0683e-06, ..., 5.8102e-05,\n 9.9074e-05, 9.2469e-06],\n [-2.0231e-06, -2.8735e-05, -4.6638e-05, ..., -2.6582e-06,\n 6.9262e-06, 1.7296e-05],\n [ 1.3488e-05, 2.0492e-05, -1.1510e-05, ..., -1.8018e-05,\n 9.9040e-06, 2.5564e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3783e-08, 1.7052e-08, 7.6170e-09, ..., 9.6923e-09, 8.6143e-09,\n 5.4852e-09],\n [1.2322e-08, 1.1216e-08, 1.1246e-08, ..., 9.0442e-09, 6.9648e-09,\n 5.6959e-09],\n [1.4516e-12, 1.7526e-12, 1.7107e-12, ..., 2.4980e-12, 4.7983e-13,\n 1.2027e-12],\n ...,\n [1.4175e-08, 1.1985e-08, 1.0139e-08, ..., 8.5773e-09, 8.2615e-09,\n 6.2049e-09],\n [1.5163e-08, 1.3115e-08, 9.3555e-09, ..., 1.0157e-08, 8.0871e-09,\n 6.6994e-09],\n [3.3138e-09, 4.9418e-09, 3.8416e-09, ..., 2.1946e-09, 2.6334e-09,\n 2.2108e-09]], device='cuda:0')" }, "1": { - "step": "tensor(3756.)", - "exp_avg": "tensor([-1.4859e-04, 2.3281e-04, -9.1672e-39, ..., -6.7725e-04,\n -1.2363e-03, -1.0333e-04], device='cuda:0')", - "exp_avg_sq": "tensor([1.6299e-05, 1.3848e-05, 1.0350e-08, ..., 1.6846e-05, 1.6282e-05,\n 5.2225e-06], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([ 2.9744e-03, -2.3959e-03, 4.5018e-15, ..., -6.1077e-04,\n 1.2236e-03, 7.8891e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.7595e-05, 1.5581e-05, 3.1536e-09, ..., 1.7378e-05, 1.7380e-05,\n 5.7453e-06], device='cuda:0')" }, "2": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-9.9799e-06, -4.5945e-07, 5.6052e-45, ..., -2.7008e-06,\n -2.7476e-07, -1.2597e-06],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-6.7492e-08, -2.6027e-07, 0.0000e+00, ..., 8.6992e-08,\n 6.6784e-07, 5.4310e-12],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 6.7450e-06, 7.5090e-07, -5.6052e-45, ..., 2.9990e-05,\n 6.8130e-07, 1.2804e-05],\n [ 1.9819e-06, -3.3921e-06, -5.6052e-45, ..., -9.3940e-06,\n 4.9197e-06, -9.4716e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.7625e-09, 1.1538e-09, 2.7922e-12, ..., 4.0106e-09, 6.2717e-09,\n 5.1303e-10],\n [2.9863e-12, 5.4691e-11, 0.0000e+00, ..., 1.0261e-11, 3.7961e-15,\n 3.7790e-11],\n [7.5890e-11, 2.7005e-10, 0.0000e+00, ..., 1.5915e-10, 1.1158e-11,\n 3.0124e-10],\n ...,\n [0.0000e+00, 1.2804e-17, 0.0000e+00, ..., 3.7509e-19, 4.4798e-19,\n 0.0000e+00],\n [6.1476e-09, 2.2487e-09, 4.0238e-13, ..., 3.7948e-09, 5.9777e-10,\n 4.3273e-09],\n [5.5344e-10, 9.7504e-10, 2.1450e-12, ..., 1.1998e-09, 9.3081e-09,\n 6.3259e-10]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-6.2350e-06, 6.8298e-06, 5.6052e-45, ..., -9.7226e-06,\n 9.8143e-06, 2.7689e-06],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-1.7465e-07, 8.9511e-07, 0.0000e+00, ..., 2.4717e-06,\n -3.0347e-06, -2.3281e-08],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [-4.9470e-06, -5.0006e-06, -5.6052e-45, ..., 4.7047e-06,\n -1.4633e-05, 4.3875e-06],\n [ 1.2712e-06, -3.1989e-07, -5.6052e-45, ..., 1.8128e-06,\n 1.0142e-05, -6.3883e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.8631e-09, 7.4611e-10, 7.9790e-13, ..., 2.6214e-09, 2.3772e-09,\n 5.1907e-10],\n [8.5335e-13, 1.5629e-11, 0.0000e+00, ..., 2.9320e-12, 1.0848e-15,\n 1.0799e-11],\n [1.1750e-10, 4.8641e-10, 0.0000e+00, ..., 2.1420e-10, 5.5720e-10,\n 1.0681e-10],\n ...,\n [0.0000e+00, 3.6588e-18, 0.0000e+00, ..., 1.0719e-19, 1.2801e-19,\n 0.0000e+00],\n [5.3824e-09, 1.0494e-09, 1.1498e-13, ..., 2.7933e-09, 7.2901e-10,\n 2.1291e-09],\n [1.1844e-09, 1.2613e-09, 6.1294e-13, ..., 8.5529e-10, 6.3261e-09,\n 8.3949e-10]], device='cuda:0')" }, "3": { - "step": "tensor(3756.)", - "exp_avg": "tensor([-2.7197e-04, 5.6052e-45, 6.1862e-06, -2.9774e-04, 2.9000e-04,\n 5.6052e-45, 1.5417e-04, -5.0991e-05, -2.9012e-05, -4.8245e-04,\n 1.2664e-04, -8.3094e-23, 1.3998e-04, -2.4330e-05, 1.9183e-04,\n -8.5183e-05, -2.9388e-05, 3.1215e-11, 2.2819e-05, -1.6405e-04,\n 1.0840e-12, -2.2573e-05, 2.5321e-05, 4.0705e-05, 1.6067e-04,\n 8.7898e-05, 2.1385e-05, 5.6052e-45, 5.9112e-05, -1.4310e-05,\n -5.6351e-06, 7.4121e-05, 5.4636e-05, -7.1182e-05, -9.7100e-05,\n -2.2994e-04, 5.6052e-45, -7.5110e-08, 4.1827e-05, -1.8324e-06,\n 2.1942e-04, -9.5022e-05, -1.5455e-04, 7.6379e-05, -1.6164e-04,\n 5.5655e-06, 1.0588e-04, 1.2678e-04, 5.1292e-05, -1.2479e-04,\n -1.6342e-35, -1.7196e-04, -6.5582e-05, -7.1310e-05, 1.3368e-04,\n 8.7433e-05, -4.4700e-06, 9.4420e-05, -7.3208e-05, -3.2428e-05,\n 4.9684e-05, -2.5186e-05, 8.6725e-05, -5.5476e-05, 3.7623e-05,\n -1.0068e-04, 3.6693e-05, 2.1892e-04, 5.6052e-45, 1.0065e-28,\n 1.8233e-04, 5.6052e-45, 1.7627e-04, 9.6067e-05, 1.6458e-04,\n 7.3967e-06, 5.6052e-45, -2.4910e-04, 2.5265e-05, -1.9294e-05,\n -4.6444e-06, 1.6491e-04, 1.8365e-04, 2.4745e-13, -8.0199e-04,\n -1.1084e-04, -2.0079e-04, -8.7441e-05, 2.9475e-04, -1.3690e-04,\n 6.7087e-05, -1.5930e-04, -3.6879e-06, -2.5063e-04, 8.6976e-05,\n -3.1316e-05, 1.9400e-04, -1.4038e-08, 1.8542e-05, 1.0500e-04,\n 1.1307e-04, -1.2929e-05, -5.4928e-05, 1.1323e-04, 2.0373e-04,\n 3.8913e-06, 4.9983e-05, 1.3431e-04, -1.6120e-05, -1.2842e-05,\n -5.3360e-05, -1.0518e-04, 4.9170e-05, 5.6052e-45, -2.7246e-04,\n 8.3844e-05, 2.9317e-05, -1.0397e-04, -5.0215e-06, 5.6052e-45,\n 3.7516e-05, 7.3719e-05, -4.2309e-05, 5.6052e-45, 7.9699e-05,\n 3.0156e-05, -1.7834e-04, -1.9200e-23, 1.0121e-05, 2.0687e-17,\n 2.4977e-04, 3.7485e-05, 5.6052e-45, -2.7051e-04, 1.6674e-13,\n 1.8268e-04, 5.6052e-45, -1.4405e-04, -2.7886e-04, -2.5406e-09,\n 5.6052e-45, 3.3899e-28, 4.6283e-05, 2.8794e-11, 5.6052e-45,\n 5.6052e-45, 9.9085e-05, -1.4151e-04, 5.6052e-45, -2.7200e-05,\n -1.5855e-05, -8.8834e-05, 4.9665e-05, -1.1635e-04, 5.6052e-45,\n 1.8282e-04, -7.7508e-05, -6.1249e-05, 5.6052e-45, -2.0261e-04,\n 1.0973e-04, 2.8602e-05, 1.8772e-04, 4.8478e-05, 3.2414e-05,\n -1.6312e-05, -3.6639e-13, 5.6052e-45, -1.0602e-04, 1.4927e-04,\n -3.5654e-05, 6.7847e-05, -6.3458e-05, 7.0233e-06, 3.2258e-05,\n -2.9439e-05, -1.1037e-07, 2.4340e-04, 1.0282e-04, 3.8703e-05,\n -9.1587e-05, 1.6932e-04, -3.6300e-05, 5.6052e-45, 2.3114e-11,\n 5.6052e-45, 6.8321e-05, -1.3482e-05, 1.5178e-05, 1.5293e-05,\n 6.8559e-05, -7.1032e-06, 1.8499e-04, 1.3980e-04, -2.6252e-05,\n -5.1081e-06, 3.6192e-05, 2.4450e-05, -9.5711e-05, 5.6052e-45,\n -1.3945e-05, 8.0197e-06, 3.0071e-04, -1.4174e-04, 1.5901e-04,\n -5.3548e-05, -3.4124e-04, 5.1332e-05, 2.5218e-30, 5.6052e-45,\n 3.7048e-05, 5.6052e-45, 8.9936e-05, 9.7511e-05, 1.8544e-05,\n -2.1783e-05, -1.2728e-04, -9.7020e-05, 1.9047e-05, -2.7507e-04,\n -5.8108e-05, -2.4093e-04, -4.6631e-05, 2.5515e-05, 7.9329e-05,\n 5.6052e-45, -1.7058e-04, -2.3092e-05, 2.5453e-04, 5.6052e-45,\n 2.2804e-04, 1.9779e-04, -2.2965e-05, -5.1846e-05, 5.6052e-45,\n -3.8675e-05, -1.9136e-05, 5.6052e-45, 6.2991e-05, 8.9380e-05,\n -4.7784e-05, 5.6052e-45, 1.6830e-04, 8.9307e-05, 6.5347e-06,\n -6.1316e-05, 1.1459e-04, -3.0487e-04, 3.1000e-05, 5.1483e-15,\n 8.2123e-05, 8.2426e-05, -2.4996e-04, -7.0940e-05, 2.2304e-05,\n 2.1345e-04, 5.6052e-45, -1.4850e-05, 5.6052e-45, -4.7127e-05,\n 5.6052e-45, 8.6978e-06, 1.9905e-04, -3.2898e-05, -1.2458e-04,\n 1.1960e-05, -1.2238e-04, 9.1834e-06, 2.6696e-04, -5.6052e-45,\n 3.1820e-04, -4.6181e-05, -6.7187e-05, -1.7674e-04, 2.2346e-04,\n 1.6948e-04, -1.4031e-04, 5.6052e-45, 5.9813e-05, -1.0007e-05,\n 5.6052e-45, 1.2687e-04, -4.2957e-05, 1.3414e-04, 2.5318e-05,\n 3.0422e-05, -1.2204e-04, -9.9087e-05, 3.8138e-05, 2.7195e-05,\n 1.1470e-05, 2.5279e-05, -6.5930e-17, 3.1055e-05, 1.5536e-04,\n -8.9411e-05, 1.3547e-04, 2.1328e-05, -7.0201e-05, -6.2227e-05,\n 2.0935e-05, -1.8993e-05, -3.1796e-05, -6.9201e-05, -6.7764e-19,\n 5.6052e-45, -1.1405e-07, 7.5524e-05, 2.0957e-05, 4.1945e-04,\n 7.0866e-05, 5.6052e-45, -5.8064e-04, 3.2967e-05, 1.0483e-04,\n 5.6052e-45, 8.4335e-06, -7.4314e-05, -1.0994e-04, -8.6618e-05,\n -4.1244e-05, -3.6992e-05, 2.1660e-05, 1.3446e-04, 6.7636e-05,\n 5.6052e-45, -2.3004e-04, -9.1055e-05, 3.8643e-05, 6.9657e-05,\n 2.2690e-04, 1.6341e-04, -2.4598e-05, 5.6057e-05, -2.0569e-04,\n 4.9524e-05, -8.1786e-05, 1.8336e-12, 5.6052e-45, 2.7056e-07,\n -1.9340e-04, -8.7592e-05, 9.5886e-05, -3.6200e-05, -1.2018e-04,\n -5.6607e-06, 3.6047e-05, 9.7618e-06, -2.0907e-04, 1.5156e-04,\n 1.0126e-04, 1.8733e-05, -1.6791e-04, 5.6052e-45, -1.3523e-04,\n 3.1359e-04, 4.0186e-05, -8.7976e-10, 1.5100e-04, -1.6622e-05,\n 2.8341e-04, 2.0688e-05, 2.5014e-32, -8.8644e-05, 2.0949e-04,\n -7.4947e-07, -7.6747e-05, 5.0886e-05, 5.1182e-05, -7.4682e-05,\n 9.6326e-05, -3.0874e-12, -1.6551e-04, 1.9047e-05, 1.0544e-04,\n -2.1925e-05, -6.5996e-09, 7.2938e-07, 2.9194e-19, 1.0881e-05,\n 5.9383e-05, -1.1777e-04, -3.3133e-05, 4.7211e-05, 5.6052e-45,\n -2.3213e-05, 5.6052e-45, -3.1118e-04, 4.9845e-06, -5.2574e-04,\n 4.0634e-05, -4.1606e-04, -8.4469e-05, 9.4994e-05, -2.9859e-05,\n 7.0320e-05, -2.2820e-09, 3.5886e-36, 5.8348e-16, -3.7702e-26,\n -4.5789e-05, 1.4645e-04, -9.3109e-06, -1.0897e-05, 6.3990e-05,\n 1.9604e-04, 1.0568e-18, -1.8391e-04, 5.2837e-05, -1.5669e-04,\n 1.1816e-05, 5.6052e-45, 1.3155e-05, 3.2457e-05, 1.1492e-06,\n -2.3563e-04, 2.4960e-27, 9.9720e-05, 2.6914e-05, 2.8514e-05,\n 4.7296e-05, -1.5961e-05, -1.8248e-04, 4.6205e-28, 5.6052e-45,\n 4.9044e-05, 2.6359e-05, 5.6052e-45, 5.0447e-44, 5.6052e-45,\n -1.9330e-04, -7.7495e-05, 9.9997e-06, -5.7853e-05, 2.1513e-04,\n 2.4750e-32, -8.4590e-05, -4.7036e-05, 7.2286e-05, 1.6814e-37,\n 6.0902e-09, -1.4020e-04, 2.1939e-06, 5.6052e-45, 8.4410e-05,\n -5.4605e-05, -1.0199e-04, -4.8959e-05, -6.2225e-05, 7.0820e-05,\n -2.4949e-04, -1.8071e-04, 3.1417e-04, 9.3810e-05, -1.8540e-04,\n -3.6795e-04, 2.2137e-13, -6.1994e-05, 5.6052e-45, -1.5517e-05,\n 5.7795e-05, 1.3899e-05, 5.6052e-45, -1.7321e-05, 5.6052e-45,\n -1.5170e-04, 8.5985e-05, -1.3560e-04, 6.6246e-05, 2.2613e-04,\n -6.4542e-05, 5.6052e-45, -5.1237e-10, 9.9993e-05, 5.6599e-05,\n 5.6052e-45, -2.4587e-04, 1.1440e-04, 4.5608e-05, 5.9847e-05,\n 6.7499e-05, 3.2050e-04, 2.8625e-04, 1.0268e-04, -5.2267e-05,\n 2.7519e-05, 3.9442e-05, -2.6537e-04, -1.9359e-04, 1.2529e-04,\n 5.0109e-05, -7.1149e-05, 3.2045e-04, 1.1430e-04, 7.7393e-05,\n 7.8939e-05, 7.9574e-05, 7.8275e-05, -8.9304e-05, -9.2056e-05,\n 5.6052e-45, 1.1124e-04, 1.6354e-04, -6.4147e-05, -1.1524e-04,\n -2.8964e-05, -5.1288e-05, 9.9683e-05, 1.0288e-04, -6.2678e-06,\n 1.0625e-04, -5.2388e-06, -1.9756e-05, -6.2773e-05, 3.8623e-05,\n -3.2490e-27, 5.2549e-05, 2.2645e-05, 5.6052e-45, 1.0446e-05,\n -1.1806e-04, 7.7663e-05, -2.0286e-04, -1.4781e-04, -4.3916e-07,\n 1.2635e-04, 5.6052e-45, 7.5277e-06, 3.0858e-05, 2.1860e-05,\n -1.4400e-04, -1.1795e-04, -2.2052e-04, 1.1407e-04, -6.0323e-05,\n 2.9382e-07, 1.0523e-04, 5.6052e-45, -8.6857e-05, -2.4203e-04,\n 4.6841e-05, -3.1884e-05, 8.5388e-05, -4.2835e-05, -3.5724e-05,\n -4.1905e-17, 7.5036e-05, 2.5012e-04, 3.9005e-04, -3.1379e-07,\n -1.4315e-04, 9.2852e-05, -4.9741e-05, -1.7292e-04, -1.8868e-04,\n 5.6052e-45, 7.4991e-05, 7.6131e-05, -9.2764e-05, -1.1851e-04,\n 5.6052e-45, 1.5513e-04, 5.6052e-45, 9.4664e-05, -1.3530e-04,\n -1.8565e-04, 9.2021e-06, 5.6052e-45, 1.4131e-04, 5.6052e-45,\n -3.4181e-05, 9.9309e-05, 1.9535e-04, 5.6052e-45, -1.2597e-05,\n -2.5968e-04, 2.1725e-04, -6.8999e-06, 1.9287e-05, -1.0047e-04,\n 2.4287e-04, -2.0028e-33, -5.9378e-05, -3.7998e-05, 1.6493e-39,\n 1.9648e-04, -2.8246e-05, 6.6649e-05, 3.9815e-05, 9.2276e-05,\n 2.0202e-04, 5.6052e-45, 1.1482e-04, 7.5223e-05, 3.0317e-05,\n 6.0908e-06, -7.4249e-05, -1.1544e-04, 6.8567e-05, -2.1952e-05,\n 3.1647e-04, -6.4769e-05, 2.3637e-04, -7.6074e-05, -1.6926e-04,\n 2.1360e-04, -1.4481e-04, -2.3779e-06, 1.1521e-05, -1.8595e-05,\n -1.2157e-04, 8.2667e-05, -1.3583e-04, 3.9066e-30, -1.7687e-05,\n 5.6052e-45, 1.8040e-04, 5.5298e-11, 1.5759e-04, 3.5618e-04,\n 1.9267e-04, -9.7537e-05, -1.3507e-04, -3.1299e-04, -8.7066e-05,\n 2.0294e-04, 8.2441e-15, 9.8858e-05, 7.6483e-05, -1.2555e-25,\n -4.1530e-05, 5.5543e-15, 5.6052e-45, 5.6052e-45, 8.3818e-05,\n 1.1757e-04, 5.6052e-45, 6.0960e-05, 7.0454e-05, -1.1895e-04,\n 6.1255e-05, 5.6052e-45, -5.6566e-05, 5.6052e-45, -2.4682e-04,\n -9.4397e-05, -2.6014e-04, 7.9740e-05, -9.1210e-05, -1.5157e-04,\n -4.8062e-05, -1.0497e-04, -2.5522e-04, -3.1553e-05, -5.2444e-05,\n 4.9597e-04, -1.1990e-04, 1.2030e-04, 6.6100e-05, -4.7959e-10,\n 1.2865e-04, 4.5508e-08, 4.2180e-04, -5.6052e-45, -3.3299e-05,\n 4.7784e-05, -5.7541e-05, -1.6514e-05, -1.5006e-04, 5.6052e-45,\n -1.1211e-04, 5.6052e-45, 9.0198e-05, 1.5240e-04, 1.6124e-04,\n -1.3589e-04, 1.4160e-04, -9.2734e-41, 5.6052e-45, 5.6052e-45,\n -3.8678e-05, -2.0310e-06, -7.0891e-05, 5.6052e-45, -1.9308e-04,\n 5.6052e-45, -1.9534e-06, -7.3181e-05, -5.1901e-05, 1.4131e-05,\n 1.3803e-42, 1.2441e-04, 5.6052e-45, -3.3919e-05, 5.6052e-45,\n 2.5494e-05, -2.2080e-05, 1.1969e-04, 5.6052e-45, 4.7694e-05,\n 2.3294e-04, -1.5167e-04, -9.1955e-05, -1.8953e-04, -1.0304e-04,\n -2.8700e-05, 5.6052e-45, -4.4384e-04, -5.4701e-05, 5.6052e-45,\n 2.3059e-05, -3.5691e-04, -1.3450e-04, 8.7911e-06, 1.3863e-05,\n -3.9465e-05, -2.9224e-04, 1.7523e-05, -1.5286e-04, -1.0216e-04,\n 5.6052e-45, 1.9340e-04, -4.3325e-05, 4.6669e-05, -2.3518e-10,\n -2.1366e-04, -1.6569e-04, -1.3570e-04, 2.3838e-04, 2.2108e-04,\n 1.5096e-04, 1.2108e-04, -3.3061e-05, -3.7859e-05, 5.6052e-45,\n -1.8971e-05, 1.1077e-04, -4.1044e-05, -1.4532e-04, 1.2329e-04,\n -1.9154e-04, -7.4180e-05, -1.9347e-05, -6.0849e-04, 1.7598e-05,\n -3.3117e-05, -1.8057e-04, 5.6052e-45, -1.8914e-04, -5.6052e-45,\n -2.2562e-05, -2.7832e-06, -1.2531e-05, 5.4463e-05, 2.1362e-04,\n -2.7446e-04, -1.7087e-04, 7.0099e-05, 1.4282e-05, 3.2202e-05,\n -2.0840e-05, -2.4095e-05, 5.4923e-05, -1.3832e-05, 5.6052e-45,\n 5.6052e-45, 1.8718e-04, 1.4130e-05], device='cuda:0')", - "exp_avg_sq": "tensor([5.8486e-07, 6.7305e-08, 2.1271e-07, 3.0549e-07, 1.3273e-06, 6.2720e-07,\n 1.5887e-06, 2.3324e-06, 7.2390e-07, 1.2560e-06, 8.7363e-07, 3.4640e-07,\n 4.6742e-07, 8.0218e-07, 5.8896e-07, 4.1006e-07, 1.9655e-06, 3.8291e-07,\n 5.0782e-08, 1.5670e-06, 7.8601e-09, 2.6791e-07, 7.2331e-07, 7.2412e-07,\n 1.1452e-06, 6.0510e-07, 3.1855e-07, 1.4243e-09, 9.1714e-07, 9.5761e-07,\n 2.0724e-07, 4.4289e-07, 6.4884e-07, 1.6348e-06, 4.7535e-07, 2.2039e-07,\n 7.6491e-07, 1.1045e-06, 6.0452e-07, 8.1290e-07, 2.0725e-06, 3.8307e-07,\n 9.1507e-07, 7.9083e-07, 1.1137e-06, 7.7224e-07, 4.5089e-07, 4.0657e-07,\n 3.4039e-06, 5.8783e-07, 1.2217e-06, 8.8047e-06, 6.6752e-07, 5.7676e-07,\n 1.2308e-06, 8.2569e-07, 1.7555e-06, 1.7820e-07, 3.9248e-07, 7.1190e-07,\n 7.6397e-08, 5.7588e-07, 1.5579e-06, 8.1114e-07, 1.5803e-07, 3.8394e-07,\n 1.0286e-06, 1.4942e-06, 6.6350e-07, 1.5794e-07, 6.3893e-07, 5.5578e-07,\n 2.7309e-07, 1.2165e-06, 5.5716e-07, 5.4762e-07, 3.8576e-09, 7.3741e-07,\n 2.0749e-07, 2.9355e-07, 1.6103e-06, 4.9930e-07, 1.4187e-06, 1.0650e-08,\n 3.5276e-06, 6.3142e-07, 9.2966e-07, 1.3082e-06, 4.4710e-07, 1.5247e-06,\n 4.5413e-07, 5.0332e-07, 1.6961e-07, 7.6558e-07, 2.1276e-07, 1.7768e-06,\n 1.5160e-06, 4.6359e-06, 1.0665e-06, 8.9231e-07, 1.0957e-06, 1.0281e-06,\n 3.1427e-06, 3.3817e-06, 6.7145e-07, 9.1689e-07, 3.5746e-07, 5.3008e-07,\n 5.8143e-07, 1.6473e-07, 5.7906e-07, 2.0707e-06, 7.9360e-07, 5.3427e-07,\n 1.5583e-06, 1.1565e-06, 5.3022e-07, 2.0895e-07, 5.3916e-07, 2.9158e-07,\n 9.9888e-07, 1.8331e-06, 5.4202e-07, 7.3067e-07, 5.9543e-07, 8.1662e-07,\n 1.3319e-06, 2.3294e-08, 1.0184e-06, 3.2522e-06, 7.8704e-07, 7.8408e-07,\n 3.1917e-08, 6.9062e-07, 3.4200e-07, 4.0409e-07, 1.2934e-10, 1.1698e-06,\n 5.1154e-07, 5.2417e-07, 3.2179e-07, 1.7696e-07, 3.0267e-07, 5.0393e-11,\n 6.8559e-10, 9.1193e-07, 9.1115e-07, 9.5189e-07, 2.4501e-06, 1.6752e-07,\n 1.9600e-07, 5.6579e-07, 2.3485e-07, 3.4101e-07, 2.1005e-09, 8.4503e-07,\n 7.9377e-07, 1.0071e-06, 9.9014e-07, 1.3660e-06, 3.2417e-07, 3.8327e-07,\n 6.9487e-07, 8.5898e-07, 6.7081e-07, 6.1049e-07, 3.8819e-07, 1.2891e-12,\n 6.0883e-07, 3.4680e-07, 1.4388e-06, 1.3092e-06, 5.0973e-07, 6.5577e-07,\n 8.4424e-07, 4.9309e-07, 1.3300e-10, 6.6550e-07, 1.3847e-06, 2.6365e-06,\n 1.4983e-06, 1.1356e-06, 6.9869e-07, 2.9638e-11, 5.4145e-11, 3.1130e-17,\n 1.1904e-06, 5.0787e-07, 9.1074e-08, 2.2184e-08, 5.7910e-07, 2.8942e-07,\n 8.9419e-07, 1.6842e-06, 1.0801e-06, 4.3451e-07, 8.4985e-07, 1.1028e-06,\n 3.7253e-07, 8.2499e-18, 9.7984e-07, 4.2011e-07, 9.0166e-07, 1.1720e-06,\n 9.7640e-07, 3.1239e-07, 1.2087e-06, 8.5500e-07, 3.4058e-07, 6.1838e-09,\n 1.0757e-06, 8.6760e-09, 4.3835e-07, 1.3780e-06, 7.6326e-07, 5.3528e-07,\n 6.5963e-07, 2.6045e-07, 9.0671e-07, 5.2666e-07, 3.8014e-07, 1.1267e-06,\n 8.1268e-07, 4.0897e-07, 1.5186e-06, 1.2215e-06, 2.8119e-06, 1.0392e-06,\n 1.0312e-06, 2.4305e-08, 6.4014e-07, 7.7685e-07, 5.9689e-07, 7.4781e-07,\n 7.7290e-07, 6.2347e-07, 1.9523e-06, 1.4656e-10, 4.4775e-07, 7.3036e-07,\n 5.0637e-07, 4.0205e-07, 1.0413e-06, 5.3219e-07, 3.5436e-07, 5.5503e-07,\n 4.7760e-07, 1.4421e-06, 3.0863e-07, 5.1285e-07, 2.6418e-06, 1.1078e-06,\n 1.2863e-06, 1.6111e-06, 4.6936e-07, 1.0095e-06, 1.5675e-10, 4.2362e-07,\n 1.1630e-15, 2.5368e-06, 9.2332e-16, 9.2061e-07, 6.4367e-07, 6.4506e-07,\n 2.5284e-06, 1.0492e-06, 3.0020e-07, 5.5645e-07, 1.7123e-06, 1.8173e-06,\n 1.2249e-06, 4.4884e-07, 4.8044e-07, 1.2102e-06, 1.2730e-06, 5.2293e-07,\n 1.1242e-06, 7.9942e-10, 1.5004e-07, 1.9010e-06, 2.2795e-06, 1.7769e-06,\n 1.3396e-06, 8.0307e-07, 1.2985e-06, 2.7493e-07, 9.8741e-07, 6.9259e-07,\n 3.6900e-08, 3.3425e-06, 2.6978e-07, 1.3398e-06, 2.8595e-06, 1.0765e-07,\n 7.5429e-07, 2.9283e-06, 1.0467e-06, 2.6411e-06, 5.4300e-07, 9.6525e-08,\n 4.9003e-07, 2.9545e-06, 1.3446e-07, 7.7174e-08, 8.5541e-08, 1.8408e-09,\n 2.2653e-08, 2.8438e-07, 7.1372e-07, 1.3210e-06, 1.4321e-06, 1.1977e-10,\n 7.5630e-07, 3.0710e-07, 5.1653e-07, 2.6290e-10, 1.4990e-08, 4.6652e-06,\n 5.5571e-07, 2.9821e-07, 8.8349e-07, 1.4700e-07, 4.7275e-08, 7.9849e-07,\n 2.7607e-07, 9.5377e-07, 2.1626e-06, 6.6508e-07, 6.9812e-07, 6.0029e-07,\n 8.3848e-07, 2.7599e-07, 8.9292e-07, 3.4591e-07, 7.7833e-08, 2.4878e-06,\n 4.4520e-07, 1.5379e-07, 7.1636e-16, 3.4817e-09, 2.0073e-06, 7.1493e-07,\n 3.4610e-06, 2.7943e-07, 1.4511e-06, 5.5975e-07, 6.9824e-07, 7.2428e-06,\n 5.5577e-07, 1.2911e-06, 1.0543e-06, 8.6477e-07, 1.4342e-07, 1.9502e-07,\n 4.2301e-07, 9.9569e-07, 1.2475e-08, 2.0556e-07, 4.0302e-07, 4.1481e-06,\n 8.0554e-07, 6.9444e-07, 8.0879e-07, 1.7548e-06, 5.1805e-07, 2.6492e-07,\n 2.4805e-07, 1.0640e-06, 1.8839e-07, 1.1453e-07, 5.2543e-07, 4.1449e-07,\n 2.9529e-07, 3.8180e-07, 5.0466e-07, 1.7990e-06, 1.4080e-07, 5.1564e-10,\n 3.8400e-07, 3.4136e-07, 1.0148e-06, 9.8705e-07, 3.4755e-07, 7.3117e-07,\n 8.0249e-07, 6.0858e-07, 2.0602e-09, 6.1735e-07, 1.8794e-06, 4.4716e-07,\n 1.2138e-06, 8.5758e-07, 5.0984e-08, 1.4056e-07, 1.0171e-06, 1.3524e-06,\n 1.5431e-08, 3.3588e-07, 2.7600e-06, 3.5748e-07, 1.7232e-06, 5.8755e-07,\n 8.9006e-07, 1.5649e-06, 8.2405e-07, 6.5520e-07, 2.3303e-06, 1.3869e-06,\n 9.7881e-07, 1.0458e-06, 1.2780e-06, 4.9681e-12, 1.1335e-06, 7.0893e-07,\n 7.8894e-08, 4.9371e-07, 4.7118e-10, 9.9218e-07, 4.0002e-07, 3.9096e-07,\n 1.1457e-06, 1.0767e-06, 8.8331e-07, 3.3403e-10, 2.2041e-17, 1.7459e-07,\n 7.8920e-07, 1.8536e-07, 5.0958e-10, 1.0657e-09, 8.1103e-07, 5.4135e-07,\n 1.3195e-06, 4.7695e-07, 1.4170e-06, 1.1812e-07, 6.9507e-07, 2.5021e-07,\n 8.4511e-07, 2.7787e-09, 4.2356e-07, 5.7534e-07, 1.0271e-07, 3.3696e-09,\n 2.7431e-07, 1.0747e-06, 4.8789e-07, 4.7937e-07, 2.9958e-08, 1.5074e-07,\n 7.0133e-07, 4.4668e-07, 9.0904e-07, 7.0668e-07, 8.5795e-07, 1.3056e-06,\n 1.8408e-07, 1.3976e-06, 1.0443e-07, 6.8383e-08, 9.1052e-08, 1.3667e-06,\n 2.5443e-11, 6.3652e-07, 2.1910e-07, 6.3879e-07, 1.9526e-07, 3.1935e-07,\n 1.3301e-06, 1.8577e-07, 4.1887e-07, 1.0300e-08, 6.4127e-09, 8.0370e-07,\n 1.0992e-07, 4.5740e-10, 3.2147e-07, 3.1744e-07, 2.3280e-06, 1.1528e-06,\n 9.1396e-07, 1.2202e-06, 7.2606e-07, 1.5138e-06, 4.6831e-07, 7.6289e-07,\n 1.4355e-06, 1.3786e-06, 5.9561e-07, 6.2240e-07, 1.0685e-06, 1.5100e-07,\n 1.1965e-06, 1.0568e-06, 5.5998e-07, 1.1728e-06, 7.5300e-07, 1.3953e-06,\n 8.1084e-07, 5.7104e-07, 5.5135e-06, 1.2401e-06, 5.1247e-07, 1.4647e-07,\n 1.0463e-06, 6.4147e-07, 5.5631e-07, 8.4690e-07, 3.8226e-07, 1.1633e-06,\n 9.1130e-07, 7.1735e-07, 6.6029e-07, 1.4952e-06, 7.4427e-08, 2.9427e-07,\n 3.6868e-07, 3.1176e-07, 7.6368e-07, 8.9063e-07, 8.2386e-07, 1.3683e-07,\n 6.1614e-07, 6.2491e-07, 3.5184e-07, 4.1233e-07, 2.1883e-11, 1.0088e-06,\n 4.8563e-07, 4.7879e-07, 1.0296e-06, 1.5800e-06, 8.2253e-07, 1.1187e-06,\n 3.6826e-07, 8.7401e-08, 1.1891e-06, 2.5500e-06, 8.8205e-07, 4.8460e-07,\n 1.0479e-06, 5.4647e-07, 1.2814e-06, 1.5067e-06, 3.4917e-06, 9.1755e-09,\n 5.7110e-07, 6.9218e-07, 7.8733e-07, 4.0053e-06, 1.8350e-06, 7.4901e-07,\n 1.0182e-06, 7.9896e-07, 2.7494e-07, 5.7868e-06, 4.2023e-07, 9.8054e-07,\n 2.8858e-07, 9.4421e-07, 1.5494e-10, 2.2449e-06, 7.6642e-08, 4.5723e-07,\n 9.7069e-07, 4.8916e-07, 3.1357e-07, 3.6359e-09, 1.6837e-06, 1.7734e-07,\n 4.2306e-07, 2.0147e-07, 6.2035e-07, 1.5180e-07, 9.5219e-07, 1.9761e-06,\n 7.6016e-07, 1.3163e-07, 7.5185e-07, 5.8537e-07, 1.0418e-06, 4.6039e-10,\n 1.4087e-07, 5.3565e-07, 6.1220e-14, 1.1441e-06, 6.3994e-07, 8.6216e-07,\n 3.9157e-07, 9.2755e-07, 3.4498e-07, 7.1148e-11, 9.0453e-07, 1.1713e-06,\n 5.8192e-07, 5.0824e-07, 4.3069e-07, 3.6747e-07, 4.1851e-07, 9.5970e-07,\n 4.0166e-07, 9.1797e-07, 1.0541e-06, 3.4054e-07, 3.8524e-07, 1.2907e-06,\n 6.1173e-07, 1.7278e-09, 8.2294e-07, 4.4052e-07, 8.9499e-07, 3.7058e-07,\n 5.4024e-07, 1.2713e-06, 1.2534e-06, 3.2180e-10, 7.6525e-07, 3.7338e-07,\n 4.7344e-07, 1.2873e-06, 9.9905e-07, 8.9615e-07, 1.7492e-06, 1.3731e-06,\n 1.4028e-06, 7.9521e-07, 3.1927e-11, 9.4321e-07, 5.5022e-07, 1.0915e-07,\n 3.5490e-07, 1.2023e-07, 8.3820e-09, 2.6125e-10, 3.9734e-07, 2.5277e-06,\n 9.8898e-13, 6.0548e-07, 6.5310e-07, 7.1258e-07, 1.6666e-06, 1.0600e-10,\n 1.1263e-06, 2.7673e-06, 7.3475e-08, 6.4961e-07, 6.9310e-07, 4.9224e-07,\n 2.0200e-06, 3.2431e-07, 7.2796e-07, 7.5071e-07, 6.7080e-07, 8.7545e-07,\n 4.5792e-07, 4.9774e-07, 1.4234e-06, 2.8189e-07, 1.9259e-06, 2.4070e-08,\n 1.7216e-06, 2.4019e-06, 1.1586e-06, 2.8589e-06, 4.4713e-07, 2.4244e-06,\n 1.0345e-06, 1.5155e-07, 1.7144e-06, 3.8055e-08, 6.3984e-07, 9.3172e-12,\n 3.5636e-07, 3.3954e-07, 6.0256e-07, 4.1759e-07, 1.1727e-06, 4.6991e-08,\n 4.1896e-07, 1.0031e-09, 1.1015e-06, 9.4829e-07, 1.0061e-06, 2.3732e-06,\n 6.1455e-07, 4.3822e-07, 1.1105e-06, 2.5620e-07, 6.1177e-07, 1.7062e-06,\n 3.7789e-09, 1.2076e-07, 3.5970e-06, 1.0530e-07, 2.1249e-08, 4.4448e-07,\n 1.4379e-06, 7.9509e-07, 2.8473e-10, 1.0784e-06, 7.9449e-07, 4.9015e-07,\n 9.5823e-07, 1.0845e-06, 1.1661e-06, 6.6448e-07, 2.2667e-09, 1.6183e-06,\n 5.3888e-07, 1.7866e-11, 8.7242e-07, 3.2521e-07, 3.9884e-07, 1.3442e-06,\n 1.0079e-06, 1.5342e-07, 6.2635e-07, 5.0330e-07, 2.6914e-07, 1.3293e-06,\n 1.5943e-06, 1.6918e-06, 6.5854e-07, 7.2117e-07, 2.4250e-06, 1.3364e-07,\n 5.8485e-07, 1.2992e-06, 1.1971e-06, 1.7535e-06, 3.1576e-07, 2.7208e-06,\n 6.3838e-08, 7.8176e-07, 2.1405e-06, 1.9674e-07, 1.3169e-06, 4.3461e-07,\n 4.1396e-07, 2.4920e-07, 5.9988e-07, 1.8233e-06, 4.4708e-06, 7.4075e-07,\n 2.0702e-07, 3.1660e-07, 2.1280e-06, 1.4262e-06, 9.3477e-07, 4.2852e-06,\n 5.9127e-07, 4.3328e-08, 1.8275e-07, 4.8662e-07, 4.7664e-07, 1.1740e-06,\n 2.1481e-07, 5.0966e-07, 5.2168e-07, 6.5028e-07, 4.5627e-07, 2.8062e-07,\n 1.3837e-06, 1.9243e-06, 2.2690e-09, 3.4297e-16, 1.7694e-06, 3.1399e-07],\n device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([ 1.3401e-04, 5.6052e-45, -6.2644e-06, -9.7304e-05, 4.3269e-06,\n 5.6052e-45, -2.0040e-04, -8.3697e-05, -7.2948e-06, -1.2949e-04,\n -6.2824e-05, 4.2856e-16, 9.5880e-05, -6.8601e-05, -1.0370e-04,\n 4.3672e-05, 5.7833e-05, 3.2737e-05, -1.1208e-05, -4.5552e-05,\n 8.2982e-37, 7.3079e-05, -1.1610e-05, -3.0190e-05, 1.4747e-04,\n 7.2015e-05, 1.6198e-05, 5.6052e-45, -8.1331e-05, 1.6237e-04,\n -7.9612e-05, 2.6094e-04, -1.5501e-04, 2.3547e-05, -7.1045e-05,\n 3.5669e-05, 5.6052e-45, -9.4261e-05, -1.0254e-04, 3.4336e-05,\n 5.6989e-05, 2.8937e-05, -1.8554e-04, -1.3883e-04, 7.4411e-05,\n -4.5730e-05, -5.9864e-04, -1.4483e-04, 1.4346e-04, 3.9030e-04,\n 1.0786e-34, -2.0056e-04, -2.7017e-04, 2.9667e-04, 5.9601e-05,\n -3.5043e-05, -2.6332e-05, 9.1417e-05, 1.2760e-04, 3.0233e-05,\n -1.5171e-05, -8.3641e-05, -2.4594e-04, 1.6736e-04, -4.0906e-05,\n -6.9888e-05, -4.3127e-05, -2.2948e-05, 5.0552e-13, 1.9833e-12,\n 2.5597e-05, 5.6052e-45, -2.7023e-04, -1.4586e-04, -1.3290e-04,\n -3.2931e-05, 5.6052e-45, 1.7882e-04, -8.5191e-05, 3.4768e-06,\n 3.3717e-05, 2.9722e-05, 4.2705e-05, -5.6052e-45, 1.5721e-04,\n 1.3741e-04, -1.1317e-04, 4.9899e-05, -2.9393e-05, 3.7463e-04,\n -2.0011e-05, 5.7755e-05, 8.8730e-05, 1.2077e-04, -2.7251e-04,\n 9.4886e-05, -2.7866e-06, -4.8543e-05, 3.8890e-05, 2.3308e-05,\n 6.5394e-05, -1.6433e-05, -2.1861e-04, 4.4827e-05, 2.2400e-04,\n 3.6094e-05, 1.2216e-04, 3.2721e-04, -1.3657e-04, -2.3235e-05,\n -1.6783e-04, -1.3674e-04, -3.0435e-04, -3.4201e-05, -1.8890e-04,\n -1.4058e-04, 1.3106e-04, -5.6735e-05, 1.8936e-05, 5.6052e-45,\n 5.4879e-05, 7.0258e-05, 5.8783e-05, 4.5210e-14, -7.5807e-05,\n 1.6036e-04, 3.7173e-06, 3.0738e-07, -9.3992e-05, -1.2458e-07,\n 6.4962e-06, -2.5152e-05, 5.6052e-45, -1.8557e-04, 8.4301e-16,\n 9.0713e-05, 2.1242e-10, 3.5781e-04, 4.9424e-05, 1.0746e-04,\n 1.1300e-20, 5.6052e-45, 7.9660e-05, -2.5780e-20, 5.6052e-45,\n 5.6052e-45, -1.8502e-04, 3.3052e-05, 5.1554e-21, 5.1123e-06,\n -1.1424e-05, 3.6018e-05, 9.1746e-05, -5.6057e-05, -4.7201e-06,\n -6.7439e-05, -8.9118e-06, 1.9876e-04, 7.4944e-10, 1.3363e-04,\n 1.5282e-05, 1.2615e-04, -7.2584e-05, 1.1550e-04, 8.1575e-06,\n -1.4684e-04, 4.2468e-07, 5.6052e-45, 9.0784e-05, -9.2113e-05,\n 2.0557e-04, 4.9073e-05, 1.0615e-04, 8.5641e-05, 5.9585e-05,\n 5.6973e-05, -1.1878e-04, 1.0991e-04, 1.7263e-04, -8.4363e-05,\n -9.6052e-05, 7.2729e-05, -1.2001e-05, 1.5239e-05, 5.6052e-45,\n 5.6052e-45, -1.1592e-04, -6.2606e-06, 5.5454e-05, 2.3503e-05,\n 1.8138e-04, -7.2504e-06, 3.1775e-04, -5.2582e-05, -1.9290e-05,\n -1.0439e-04, 6.1520e-05, 1.6071e-04, 2.3279e-05, 5.6052e-45,\n 1.3808e-05, 1.9843e-04, -1.7049e-04, -2.4280e-04, -1.7729e-04,\n -4.9945e-05, -5.8612e-05, 8.9853e-05, 5.6052e-45, 5.6052e-45,\n 8.5312e-05, 5.6052e-45, 4.1249e-05, 8.2501e-05, -1.0660e-04,\n 1.1107e-04, -8.7361e-05, 8.9445e-06, 1.2801e-04, 2.5612e-04,\n 1.0368e-04, -2.1521e-04, -2.0458e-04, 3.2321e-05, 3.1305e-04,\n -5.6052e-45, -6.3493e-05, -4.0146e-05, -7.6074e-07, 1.2196e-04,\n -1.9392e-04, 1.3858e-04, 9.0847e-05, 1.4484e-04, 5.6052e-45,\n -3.3414e-04, -4.0924e-04, 5.6052e-45, -6.2524e-05, 1.3521e-04,\n -5.4264e-06, 5.6052e-45, -4.2800e-05, 1.9403e-04, -1.8363e-05,\n -5.5718e-05, 5.7732e-05, -9.1581e-05, -7.3404e-06, 1.2515e-29,\n -2.2163e-05, 6.3616e-05, 7.1164e-05, 1.9167e-04, 6.0801e-05,\n 6.2982e-05, 5.6052e-45, 3.6656e-05, 9.9906e-06, 7.8404e-05,\n 5.6052e-45, -2.9096e-05, -2.7413e-05, -3.1304e-05, 1.2835e-04,\n 1.8700e-05, -9.1180e-05, 1.0435e-04, 2.9978e-04, 1.1939e-05,\n -1.2081e-04, -7.6675e-05, 1.0620e-04, 2.2361e-05, 7.3982e-05,\n -1.6413e-06, -5.5709e-05, 4.7025e-39, -7.3152e-05, -1.0914e-04,\n -5.6052e-45, 1.3320e-04, 4.0947e-05, 1.7639e-04, 1.2749e-04,\n -2.2215e-05, -5.6385e-05, -1.4954e-05, 1.8229e-05, -8.7766e-05,\n 5.3702e-05, 2.5272e-05, 2.7594e-05, -1.8341e-04, 5.0277e-05,\n 5.8091e-06, -2.7338e-05, -1.2987e-04, -2.2459e-04, -7.9666e-05,\n -6.8944e-05, -2.2566e-05, 9.8629e-06, -6.5832e-05, 2.4214e-05,\n 5.6052e-45, 7.4029e-15, -3.7809e-05, -5.6630e-05, -1.6457e-04,\n -1.7598e-04, 5.6052e-45, -8.9594e-06, -5.4417e-05, 1.5756e-04,\n 5.6052e-45, 2.9428e-06, 1.1504e-04, -1.1578e-05, 1.6289e-05,\n -8.7762e-05, 4.8958e-05, 3.3315e-05, 1.0445e-04, 1.0805e-05,\n 5.6052e-45, -1.0699e-04, -7.7283e-06, 4.1137e-04, -7.2044e-05,\n 1.9609e-04, -4.0758e-06, 3.8561e-05, -1.0593e-05, 7.6603e-05,\n -4.2874e-06, -2.0838e-04, 5.5386e-05, 5.6052e-45, -2.8946e-09,\n -1.0648e-04, 4.2920e-05, -1.7413e-05, 1.8005e-07, 7.6674e-05,\n -5.6801e-05, -3.8644e-05, -1.0763e-04, -2.1174e-04, 2.1255e-04,\n -4.5556e-05, -1.9779e-04, -3.9448e-05, 9.6265e-31, -2.2905e-04,\n 1.5332e-04, -2.3975e-05, -9.1510e-06, 5.3081e-05, 3.8210e-05,\n -9.8098e-05, -8.6392e-05, 5.6052e-45, -2.3820e-04, -6.5929e-05,\n -1.5640e-05, -5.8210e-06, 4.2587e-05, -1.0191e-04, -8.7200e-05,\n 8.9749e-05, -6.6093e-06, 5.9356e-05, 7.1627e-05, 7.0712e-05,\n 6.5979e-06, -1.1135e-22, -1.0719e-05, 5.6052e-45, 1.0914e-04,\n 2.4111e-04, -2.0877e-04, -1.9686e-05, -1.3509e-05, 5.6052e-45,\n 4.1146e-05, -5.1564e-09, 2.0104e-04, 2.5153e-04, -2.8091e-04,\n -5.0162e-05, 7.9999e-05, 2.4352e-06, 6.1628e-06, -1.4080e-04,\n -5.3284e-05, -4.5643e-05, -1.1892e-30, 5.6052e-45, -4.9967e-06,\n 7.1555e-05, 2.4361e-05, 2.5469e-04, -1.6059e-04, 9.5334e-06,\n 4.4156e-04, 5.6052e-45, 1.0917e-06, -4.0461e-07, -1.3370e-05,\n -1.2816e-04, 1.6562e-20, -5.0005e-05, 1.9150e-04, -3.0641e-05,\n -8.1052e-05, -1.1349e-09, -1.1912e-04, 1.7612e-05, 1.3307e-04,\n -3.1517e-05, -2.4467e-04, 8.8918e-05, 1.1595e-20, 5.6052e-45,\n 6.2538e-05, 3.3598e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 1.4366e-04, 6.0018e-06, 6.1842e-05, 1.4459e-04, 2.5917e-05,\n 5.6052e-45, 2.1359e-04, -1.7919e-04, -1.0860e-04, 5.6052e-45,\n 1.1594e-04, -2.3318e-04, -7.6987e-04, 5.6052e-45, -3.4201e-04,\n 1.1699e-04, 6.3067e-06, 2.2314e-04, 2.2639e-04, -2.9622e-06,\n -1.0983e-04, 3.9249e-05, -3.9467e-05, -5.2579e-05, -1.4382e-04,\n 1.7714e-04, 2.0510e-12, -2.4685e-04, -4.1326e-38, 8.0420e-06,\n 1.7039e-04, -7.5062e-05, 5.6052e-45, 6.0590e-05, 5.6052e-45,\n 1.5977e-04, 2.3573e-05, 3.0964e-04, -5.9194e-05, 1.0892e-04,\n -9.0616e-06, 5.6052e-45, 1.7595e-05, 7.9936e-05, -3.3682e-05,\n 5.6052e-45, -3.1364e-05, -7.3109e-06, 1.3499e-05, -3.6335e-05,\n -2.1982e-04, 6.6899e-06, 1.8728e-04, -6.4828e-05, -1.5052e-04,\n -2.4139e-04, 5.8454e-05, 1.5121e-05, 8.1400e-06, -7.1012e-05,\n -2.1785e-04, 5.0432e-05, -3.9808e-05, 8.7696e-05, -3.7668e-06,\n -3.7282e-05, -1.9960e-04, -1.5776e-04, 8.5493e-05, 3.6515e-05,\n 5.6052e-45, 6.6274e-05, 5.3892e-05, -4.3374e-04, 1.3706e-04,\n 1.7446e-05, 5.2743e-05, 6.3901e-05, -1.6095e-04, -2.2230e-04,\n 2.5615e-05, -1.5683e-04, -2.4848e-04, -2.4546e-04, -1.2350e-04,\n -1.3206e-06, 5.6552e-05, -5.4684e-05, 6.2284e-41, -1.2618e-05,\n 1.1526e-04, -1.5721e-04, -4.6146e-05, 1.0260e-05, 2.3262e-05,\n -5.7413e-05, 5.6052e-45, -1.0545e-04, -4.3232e-04, 1.6360e-04,\n 1.1938e-04, -5.1265e-05, -7.4470e-05, -1.9854e-04, 1.2509e-05,\n -5.4566e-06, 3.2106e-05, 5.6052e-45, -3.2203e-05, -2.5878e-04,\n -1.6058e-04, -6.8822e-05, -1.0292e-04, 1.8069e-04, 1.4212e-04,\n -1.2589e-09, -3.5599e-05, 6.8286e-05, 4.0978e-05, 1.2103e-06,\n -1.4554e-04, 1.0070e-05, 5.5421e-05, 1.7426e-04, 1.7820e-04,\n -3.1262e-05, 2.7132e-05, 8.9497e-06, -2.1564e-05, -1.3612e-04,\n 5.6052e-45, 1.6903e-04, -1.3442e-10, 9.0311e-05, 8.3781e-05,\n 2.3219e-04, -8.2306e-05, 5.6052e-45, 9.4486e-05, 5.6052e-45,\n 1.1960e-04, -1.1955e-05, -6.7594e-05, -1.2846e-31, -1.1176e-04,\n -2.8732e-04, -2.1194e-04, 3.5659e-05, 1.1161e-04, -8.9805e-05,\n 1.8802e-04, 7.5613e-24, 1.0864e-04, 8.3588e-06, 5.6052e-45,\n -1.1543e-04, 6.7276e-05, -4.0843e-04, -3.2496e-05, -3.9364e-06,\n -5.7613e-04, 5.6052e-45, 1.7492e-04, 2.4271e-05, 8.8970e-05,\n -1.8492e-05, 2.7960e-05, -6.2142e-05, 8.2053e-05, -1.5701e-05,\n -5.2802e-05, 4.2292e-04, -3.0062e-05, -5.8785e-05, -9.9055e-05,\n -6.6835e-05, 2.3692e-04, 9.8381e-05, 6.2732e-05, 9.8214e-05,\n -1.1315e-04, 8.5333e-05, -1.0048e-04, 8.8580e-29, -1.7057e-04,\n 5.6052e-45, 2.7111e-04, 1.5613e-33, 3.2786e-05, 8.3056e-05,\n 8.1977e-05, -1.6679e-04, -2.7298e-05, 5.4330e-05, -1.1984e-05,\n 1.3030e-04, 5.6052e-45, -2.9161e-05, -5.8067e-05, 3.7128e-05,\n 2.6681e-04, 4.6839e-33, 5.6052e-45, 5.6052e-45, 1.2124e-05,\n -1.4921e-04, 5.6052e-45, 1.8558e-04, 1.8449e-04, -2.9584e-04,\n 1.7813e-04, 5.6052e-45, 8.9779e-05, -1.0995e-07, -2.9693e-05,\n -2.0330e-05, -1.8277e-05, 4.8589e-05, -2.1496e-05, 1.0102e-04,\n -2.0851e-04, 1.2364e-04, -8.2080e-05, 1.2275e-04, -3.2149e-04,\n 1.7918e-04, -6.8584e-05, -1.2537e-06, 7.2832e-07, 5.6584e-27,\n 2.0016e-04, -6.4768e-05, -6.0360e-06, -4.3189e-05, -3.8007e-04,\n -1.6184e-04, -1.9628e-05, -4.1078e-05, -1.8256e-04, 5.6052e-45,\n -1.5106e-04, 5.6052e-45, -7.7646e-05, 2.2833e-04, -2.9517e-05,\n -2.0022e-04, -1.7055e-04, 4.5815e-23, -4.8939e-14, 3.4503e-17,\n -2.1483e-05, -9.6001e-05, -7.0151e-05, 4.1070e-18, -3.7208e-04,\n -5.6052e-45, 1.7335e-04, 3.0939e-05, 6.4471e-05, -9.1275e-05,\n 4.8288e-06, 1.1388e-05, 5.6052e-45, 5.0152e-05, -6.1304e-17,\n 1.0785e-04, 1.7371e-04, -2.2477e-05, 5.6052e-45, -6.1792e-05,\n -1.6953e-05, 4.7309e-05, 3.5355e-05, 1.0680e-04, 3.7922e-04,\n -3.9507e-05, 5.6052e-45, 9.6949e-05, -7.0336e-05, 5.6052e-45,\n 1.8669e-05, -8.3326e-06, 2.1600e-05, 2.1865e-04, 5.2768e-05,\n 4.2536e-05, -1.0377e-05, -8.6186e-05, -4.0438e-05, -6.3138e-05,\n 2.6625e-44, 3.2045e-04, -4.2511e-04, 5.3401e-05, -1.1481e-05,\n 3.6375e-05, -7.7616e-05, 1.9719e-04, -2.6246e-04, -6.0979e-05,\n 1.4997e-04, -2.0473e-05, -5.4263e-05, 2.4133e-04, 5.6052e-45,\n -1.9760e-05, 5.3072e-06, 3.7807e-04, -7.1875e-05, -1.2391e-05,\n -2.3003e-05, 2.9123e-04, -1.0618e-05, 1.5811e-04, -1.0353e-04,\n 1.2496e-04, 4.6253e-05, 5.6052e-45, 2.6541e-04, -5.6052e-45,\n 2.1772e-04, -5.4464e-05, -2.7460e-05, -7.9709e-05, -4.6111e-05,\n -1.3586e-04, 4.7484e-05, 1.5138e-04, -7.9942e-05, 3.7365e-05,\n -7.7960e-05, 3.8141e-05, -9.5504e-05, 4.1283e-05, 5.6052e-45,\n 5.6052e-45, -1.9719e-04, -3.6908e-05], device='cuda:0')", + "exp_avg_sq": "tensor([3.0380e-07, 1.9233e-08, 1.0556e-07, 2.1013e-07, 6.8335e-07, 1.7923e-07,\n 7.2973e-07, 8.0230e-07, 3.0135e-07, 6.1795e-07, 4.7625e-07, 9.8988e-08,\n 2.7964e-07, 2.7323e-07, 3.3026e-07, 2.9766e-07, 8.0041e-07, 1.1004e-07,\n 6.8968e-08, 5.7496e-07, 2.2461e-09, 1.4262e-07, 2.9527e-07, 4.1855e-07,\n 5.9638e-07, 4.0160e-07, 1.7916e-07, 4.0701e-10, 4.4812e-07, 4.5438e-07,\n 1.2342e-07, 3.0474e-07, 3.3277e-07, 6.0140e-07, 2.5711e-07, 2.0870e-07,\n 2.1858e-07, 4.8595e-07, 3.5936e-07, 3.8917e-07, 7.8102e-07, 2.5362e-07,\n 5.3277e-07, 4.7205e-07, 5.2115e-07, 3.0704e-07, 3.0444e-07, 2.7239e-07,\n 1.2952e-06, 3.6878e-07, 3.4910e-07, 2.8666e-06, 3.3334e-07, 3.0342e-07,\n 4.6782e-07, 3.0882e-07, 6.1505e-07, 1.3318e-07, 1.6384e-07, 3.9241e-07,\n 8.0069e-08, 3.0053e-07, 7.8173e-07, 4.0093e-07, 1.2501e-07, 2.1717e-07,\n 5.8386e-07, 5.7866e-07, 1.8960e-07, 4.5134e-08, 3.7749e-07, 1.5882e-07,\n 1.9642e-07, 7.7855e-07, 3.4774e-07, 3.7408e-07, 1.1023e-09, 4.9544e-07,\n 1.5054e-07, 2.2383e-07, 4.8735e-07, 3.2335e-07, 6.7948e-07, 3.0434e-09,\n 1.1438e-06, 4.0259e-07, 4.6238e-07, 6.6913e-07, 3.1319e-07, 6.6444e-07,\n 2.8941e-07, 2.5493e-07, 1.7481e-07, 4.0561e-07, 1.7040e-07, 7.8234e-07,\n 6.3053e-07, 1.3262e-06, 4.5120e-07, 4.5966e-07, 4.9289e-07, 4.9123e-07,\n 1.2510e-06, 1.1191e-06, 3.5053e-07, 4.8870e-07, 2.2621e-07, 3.0295e-07,\n 3.0211e-07, 1.4257e-07, 3.2761e-07, 8.6332e-07, 4.0771e-07, 1.5286e-07,\n 7.3538e-07, 4.9836e-07, 3.1888e-07, 1.7604e-07, 2.0767e-07, 8.3321e-08,\n 4.4242e-07, 8.2368e-07, 2.5955e-07, 2.0879e-07, 3.3569e-07, 4.6795e-07,\n 5.0674e-07, 6.6572e-09, 5.1733e-07, 9.2935e-07, 3.8101e-07, 3.4569e-07,\n 9.1206e-09, 4.1331e-07, 9.7729e-08, 2.6119e-07, 3.6959e-11, 5.2451e-07,\n 3.0240e-07, 1.5803e-07, 9.1956e-08, 5.0567e-08, 2.1107e-07, 1.4452e-11,\n 1.9591e-10, 2.6059e-07, 5.0701e-07, 4.9784e-07, 7.0014e-07, 1.4566e-07,\n 1.6431e-07, 3.9500e-07, 2.0949e-07, 2.6852e-07, 1.0472e-09, 4.4968e-07,\n 2.7375e-07, 4.5113e-07, 2.8294e-07, 4.8288e-07, 2.9469e-07, 1.9686e-07,\n 4.4421e-07, 5.6312e-07, 2.9010e-07, 3.3266e-07, 1.1093e-07, 3.6837e-13,\n 3.8433e-07, 2.3040e-07, 6.1631e-07, 5.4605e-07, 3.1648e-07, 3.8913e-07,\n 4.2884e-07, 3.0395e-07, 2.0576e-08, 3.9191e-07, 6.4621e-07, 1.0034e-06,\n 7.0445e-07, 4.7306e-07, 4.1150e-07, 2.9840e-08, 1.5472e-11, 8.8957e-18,\n 6.1379e-07, 3.2217e-07, 9.7838e-08, 6.5025e-08, 2.8337e-07, 2.0267e-07,\n 4.8268e-07, 7.1945e-07, 5.4258e-07, 2.3230e-07, 5.1714e-07, 5.5017e-07,\n 2.7430e-07, 2.3575e-18, 2.9973e-07, 2.3375e-07, 5.1576e-07, 5.7059e-07,\n 5.5288e-07, 2.1338e-07, 5.5459e-07, 3.8478e-07, 9.7323e-08, 1.7671e-09,\n 5.9448e-07, 2.4792e-09, 2.4035e-07, 7.1660e-07, 4.0483e-07, 3.3432e-07,\n 3.6109e-07, 1.7698e-07, 4.3250e-07, 3.0066e-07, 1.7002e-07, 5.9540e-07,\n 5.0384e-07, 2.7146e-07, 5.0855e-07, 3.4904e-07, 9.3621e-07, 6.4101e-07,\n 5.8351e-07, 2.1858e-08, 3.5078e-07, 3.9265e-07, 3.3761e-07, 5.5734e-07,\n 2.2086e-07, 4.4415e-07, 8.2942e-07, 4.1882e-11, 2.7328e-07, 3.9960e-07,\n 1.7239e-07, 1.1489e-07, 5.5878e-07, 3.3763e-07, 2.5329e-07, 3.5024e-07,\n 3.2152e-07, 6.4553e-07, 2.4551e-07, 1.4655e-07, 9.6021e-07, 6.9306e-07,\n 5.3421e-07, 7.0191e-07, 2.7405e-07, 5.7646e-07, 4.4792e-11, 2.2125e-07,\n 1.3333e-09, 1.0273e-06, 2.6385e-16, 4.6021e-07, 3.6965e-07, 2.8913e-07,\n 9.5331e-07, 3.8840e-07, 2.5962e-07, 2.9360e-07, 6.9963e-07, 5.2288e-07,\n 5.9592e-07, 3.1082e-07, 2.5969e-07, 5.4306e-07, 6.6744e-07, 3.0148e-07,\n 5.5723e-07, 2.2844e-10, 1.3626e-07, 8.1888e-07, 6.5139e-07, 8.4597e-07,\n 4.2713e-07, 4.1160e-07, 6.4585e-07, 2.1387e-07, 5.5418e-07, 4.2897e-07,\n 9.0262e-08, 1.1844e-06, 2.0439e-07, 5.3059e-07, 8.2696e-07, 1.1408e-07,\n 4.4328e-07, 9.1578e-07, 4.1502e-07, 9.4161e-07, 3.1687e-07, 9.8828e-08,\n 3.8534e-07, 1.0197e-06, 1.2808e-07, 1.2342e-07, 3.4204e-08, 5.2602e-10,\n 6.4733e-09, 1.7292e-07, 4.0258e-07, 5.4018e-07, 5.6576e-07, 3.4226e-11,\n 3.8913e-07, 2.2600e-07, 3.3617e-07, 7.5124e-11, 3.7880e-08, 1.5708e-06,\n 2.2929e-07, 1.9618e-07, 4.4145e-07, 9.5838e-08, 8.9763e-08, 4.2837e-07,\n 1.8630e-07, 2.7255e-07, 7.8292e-07, 3.9285e-07, 3.8545e-07, 3.6836e-07,\n 4.7575e-07, 1.7880e-07, 4.3758e-07, 2.4945e-07, 9.9114e-08, 1.0054e-06,\n 2.8805e-07, 7.7193e-08, 2.0470e-16, 1.0235e-09, 9.1542e-07, 3.5065e-07,\n 1.1617e-06, 1.5697e-07, 6.8922e-07, 2.9627e-07, 4.0131e-07, 2.1285e-06,\n 3.6185e-07, 5.9936e-07, 5.3026e-07, 4.4480e-07, 1.2824e-07, 5.5730e-08,\n 2.6184e-07, 5.8635e-07, 5.7782e-08, 6.3389e-08, 2.8339e-07, 1.2567e-06,\n 4.1085e-07, 3.8948e-07, 2.3112e-07, 7.6905e-07, 2.9469e-07, 9.3211e-08,\n 1.7478e-07, 5.3746e-07, 2.1484e-07, 1.0586e-07, 2.6029e-07, 1.2016e-07,\n 2.0043e-07, 2.3664e-07, 3.1045e-07, 5.4381e-07, 4.0236e-08, 2.4137e-08,\n 1.0973e-07, 2.0398e-07, 6.3087e-07, 4.9828e-07, 1.9658e-07, 4.1973e-07,\n 2.2932e-07, 4.1989e-07, 5.8898e-10, 4.0663e-07, 8.9292e-07, 2.4337e-07,\n 5.2566e-07, 4.9358e-07, 1.4879e-07, 1.0609e-07, 5.3481e-07, 6.6801e-07,\n 1.4215e-08, 9.5981e-08, 7.8868e-07, 1.0743e-07, 5.9881e-07, 4.2796e-07,\n 5.0853e-07, 8.1076e-07, 4.3308e-07, 4.3371e-07, 6.6591e-07, 6.4107e-07,\n 4.7806e-07, 5.4921e-07, 5.5221e-07, 1.4197e-12, 4.3262e-07, 4.0414e-07,\n 9.9625e-08, 3.1448e-07, 1.3464e-10, 5.2623e-07, 2.9090e-07, 2.5351e-07,\n 4.8134e-07, 5.8672e-07, 5.3430e-07, 9.5992e-11, 6.2985e-18, 1.5279e-07,\n 3.0184e-07, 5.2968e-08, 1.4562e-10, 3.0454e-10, 4.4557e-07, 3.1367e-07,\n 5.0715e-07, 2.8853e-07, 7.0969e-07, 3.3753e-08, 4.3424e-07, 2.4365e-07,\n 4.6865e-07, 7.9405e-10, 1.6416e-07, 3.0958e-07, 2.2918e-07, 9.6290e-10,\n 1.8240e-07, 6.5641e-07, 2.7917e-07, 2.9221e-07, 7.7363e-08, 1.0996e-07,\n 5.1921e-07, 2.1606e-07, 4.8488e-07, 4.1493e-07, 4.8223e-07, 5.3809e-07,\n 5.2602e-08, 7.3114e-07, 2.9842e-08, 3.9052e-08, 1.5101e-07, 4.4197e-07,\n 7.2704e-12, 3.7921e-07, 6.2610e-08, 3.7938e-07, 1.6789e-07, 2.1171e-07,\n 5.7963e-07, 1.5091e-07, 2.1527e-07, 2.9432e-09, 2.1664e-08, 4.9649e-07,\n 1.0613e-07, 1.3070e-10, 2.3585e-07, 2.4848e-07, 7.0061e-07, 6.4573e-07,\n 4.6628e-07, 5.9714e-07, 4.1805e-07, 7.1341e-07, 3.1454e-07, 4.0249e-07,\n 7.3150e-07, 6.2744e-07, 3.6217e-07, 3.5176e-07, 4.7318e-07, 1.3775e-07,\n 5.8945e-07, 3.8523e-07, 3.5848e-07, 3.9295e-07, 4.1564e-07, 5.6205e-07,\n 4.6178e-07, 2.3511e-07, 1.5755e-06, 7.3405e-07, 2.8774e-07, 1.7637e-07,\n 5.2392e-07, 2.5125e-07, 3.6837e-07, 4.6389e-07, 2.9226e-07, 5.5101e-07,\n 3.8778e-07, 4.4171e-07, 4.9384e-07, 6.4828e-07, 1.0818e-07, 8.4257e-08,\n 2.3892e-07, 1.9794e-07, 2.1823e-07, 4.6622e-07, 3.9399e-07, 1.2944e-07,\n 3.5331e-07, 3.4420e-07, 2.4789e-07, 3.1963e-07, 6.2532e-12, 5.7142e-07,\n 2.9454e-07, 3.0652e-07, 5.2712e-07, 7.3129e-07, 4.6107e-07, 5.2264e-07,\n 2.4153e-07, 3.2139e-08, 4.8467e-07, 7.2869e-07, 3.2880e-07, 3.2831e-07,\n 6.0122e-07, 3.6744e-07, 6.2868e-07, 7.8528e-07, 1.2375e-06, 2.6220e-09,\n 3.1474e-07, 4.5343e-07, 4.7370e-07, 1.1446e-06, 7.9570e-07, 5.1680e-07,\n 4.6073e-07, 3.9374e-07, 2.2723e-07, 1.6543e-06, 2.8791e-07, 5.0370e-07,\n 2.0832e-07, 4.3860e-07, 4.4276e-11, 8.4545e-07, 2.1916e-08, 2.7474e-07,\n 4.4274e-07, 3.0861e-07, 2.2163e-07, 1.0390e-09, 6.7856e-07, 5.0676e-08,\n 3.2882e-07, 1.7897e-07, 3.7333e-07, 4.3378e-08, 3.7269e-07, 7.2058e-07,\n 4.3799e-07, 1.0235e-07, 3.3094e-07, 3.1783e-07, 5.0619e-07, 1.3178e-10,\n 1.3501e-07, 2.0200e-07, 1.7494e-14, 5.7658e-07, 3.8591e-07, 4.2911e-07,\n 2.4771e-07, 3.5637e-07, 2.9421e-07, 2.0331e-11, 3.7917e-07, 5.1487e-07,\n 2.5705e-07, 2.6928e-07, 2.1682e-07, 2.5868e-07, 2.2750e-07, 3.2723e-07,\n 3.2292e-07, 3.9027e-07, 5.2630e-07, 2.4402e-07, 2.6727e-07, 6.1129e-07,\n 4.3542e-07, 2.8849e-08, 4.0093e-07, 3.1644e-07, 4.9146e-07, 2.7416e-07,\n 3.0059e-07, 3.6327e-07, 5.6547e-07, 9.1957e-11, 4.2031e-07, 1.0670e-07,\n 3.3871e-07, 7.0829e-07, 5.9199e-07, 5.2642e-07, 7.9199e-07, 7.0231e-07,\n 4.8826e-07, 4.1514e-07, 9.1235e-12, 4.6365e-07, 3.7160e-07, 5.4612e-08,\n 1.5087e-07, 3.4358e-08, 2.3952e-09, 7.4654e-11, 2.7093e-07, 9.4191e-07,\n 2.8261e-13, 2.9807e-07, 4.2693e-07, 4.7382e-07, 7.8762e-07, 3.0292e-11,\n 5.6325e-07, 7.9079e-07, 9.0142e-08, 3.4525e-07, 3.5780e-07, 3.3833e-07,\n 8.7611e-07, 1.8663e-07, 3.9496e-07, 4.0111e-07, 4.2342e-07, 4.6491e-07,\n 2.7207e-07, 3.0106e-07, 7.1026e-07, 1.7047e-07, 7.0059e-07, 6.8783e-09,\n 8.9622e-07, 6.9310e-07, 6.2852e-07, 8.3384e-07, 2.6018e-07, 1.0243e-06,\n 5.3992e-07, 1.1786e-07, 6.6475e-07, 1.0875e-08, 3.4351e-07, 2.6625e-12,\n 2.1019e-07, 2.4502e-07, 3.3709e-07, 3.0551e-07, 6.8947e-07, 1.3428e-08,\n 1.1972e-07, 2.8665e-10, 6.3546e-07, 4.5789e-07, 4.9026e-07, 6.7815e-07,\n 3.6684e-07, 1.2523e-07, 5.5975e-07, 2.1415e-07, 3.3469e-07, 7.1102e-07,\n 3.3528e-09, 1.1463e-07, 1.0279e-06, 1.1539e-07, 6.0722e-09, 2.5076e-07,\n 6.8400e-07, 4.0000e-07, 8.1364e-11, 4.1349e-07, 4.3431e-07, 3.2538e-07,\n 5.1001e-07, 6.2983e-07, 5.9404e-07, 3.2815e-07, 6.4774e-10, 1.0648e-06,\n 4.0801e-07, 5.1052e-12, 5.2051e-07, 2.0598e-07, 2.1339e-07, 5.8207e-07,\n 4.8607e-07, 1.0650e-07, 3.5299e-07, 3.6529e-07, 2.0474e-07, 6.3783e-07,\n 4.5560e-07, 8.4534e-07, 3.5970e-07, 4.3624e-07, 7.3694e-07, 1.1698e-07,\n 3.1775e-07, 6.6452e-07, 7.5309e-07, 7.9342e-07, 1.7874e-07, 9.4435e-07,\n 1.4076e-07, 4.9081e-07, 6.1167e-07, 1.3327e-07, 5.9404e-07, 3.4911e-07,\n 2.6819e-07, 1.9796e-07, 3.0262e-07, 7.9704e-07, 1.3227e-06, 3.5589e-07,\n 1.6008e-07, 2.6013e-07, 7.5773e-07, 4.0754e-07, 4.2530e-07, 1.2245e-06,\n 3.3805e-07, 9.9470e-08, 1.2866e-07, 2.7458e-07, 3.3954e-07, 5.9832e-07,\n 1.3696e-07, 2.5196e-07, 3.4275e-07, 3.7349e-07, 1.9722e-07, 1.9988e-07,\n 6.6512e-07, 5.7936e-07, 6.4840e-10, 9.8007e-17, 6.7902e-07, 2.2627e-07],\n device='cuda:0')" }, "4": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-6.9723e-06, -5.6052e-45, -1.9480e-07, ..., 5.6052e-45,\n -1.7607e-05, 7.2223e-06],\n [-1.4869e-05, -5.6052e-45, 1.6776e-07, ..., -5.6052e-45,\n 2.9163e-06, 5.0429e-06],\n [ 2.3633e-05, -5.6052e-45, -2.0383e-07, ..., -5.6052e-45,\n -1.4050e-05, 1.1134e-05],\n ...,\n [ 1.6885e-05, -5.6052e-45, 2.7860e-07, ..., -5.6052e-45,\n 7.9984e-06, 1.5818e-06],\n [-9.7993e-06, 5.6052e-45, 2.6818e-07, ..., -5.6052e-45,\n -1.4768e-05, 1.9807e-05],\n [-1.4280e-05, 5.6052e-45, -5.4336e-09, ..., -5.6052e-45,\n -5.1312e-07, 1.2072e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.5730e-09, 8.3910e-13, 1.9574e-10, ..., 9.6131e-19, 3.1881e-09,\n 5.4013e-10],\n [7.2927e-09, 1.5473e-11, 5.9144e-11, ..., 4.9130e-17, 3.4340e-09,\n 2.6113e-09],\n [1.0876e-08, 9.4201e-14, 3.1303e-11, ..., 2.1427e-17, 3.5554e-09,\n 2.7200e-09],\n ...,\n [9.9474e-09, 1.5315e-11, 2.1602e-11, ..., 1.8746e-17, 2.0396e-09,\n 1.5691e-09],\n [1.2948e-08, 1.2836e-12, 1.8681e-10, ..., 2.7938e-19, 3.5793e-09,\n 4.2711e-09],\n [1.0005e-08, 1.9714e-12, 1.5010e-10, ..., 1.7904e-17, 3.4572e-09,\n 3.1526e-09]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 1.4316e-06, -5.6052e-45, -2.0064e-06, ..., 5.6052e-45,\n 1.7084e-06, -3.0339e-07],\n [-6.7878e-06, -5.6052e-45, 6.7377e-06, ..., -5.6052e-45,\n -2.4318e-05, 4.1811e-06],\n [ 1.8044e-05, -5.6052e-45, -4.1622e-06, ..., -5.6052e-45,\n 1.5543e-05, -5.2742e-06],\n ...,\n [ 9.2628e-08, -5.6052e-45, -6.4906e-07, ..., -5.6052e-45,\n -9.4916e-06, -4.6556e-06],\n [-1.9014e-06, 5.6052e-45, 9.9980e-07, ..., -5.6052e-45,\n -8.8697e-06, -6.8626e-06],\n [-5.4511e-06, 5.6052e-45, 1.0360e-05, ..., -5.6052e-45,\n 8.2508e-06, 3.0087e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4567e-09, 2.3978e-13, 1.4296e-10, ..., 2.7470e-19, 2.2476e-09,\n 4.8657e-10],\n [4.4572e-09, 4.4215e-12, 2.5097e-10, ..., 1.4039e-17, 3.3906e-09,\n 2.2608e-09],\n [6.2393e-09, 2.6919e-14, 2.9377e-10, ..., 6.1229e-18, 3.4560e-09,\n 2.3802e-09],\n ...,\n [5.8083e-09, 4.3764e-12, 3.1514e-10, ..., 5.3568e-18, 1.9122e-09,\n 1.4335e-09],\n [7.5745e-09, 3.6680e-13, 2.8010e-10, ..., 7.9834e-20, 3.2970e-09,\n 3.3057e-09],\n [5.9110e-09, 5.6333e-13, 4.8798e-10, ..., 5.1164e-18, 2.7308e-09,\n 2.6614e-09]], device='cuda:0')" }, "5": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-1.1049e-37, -7.5380e-12, 0.0000e+00, ..., -1.9722e-10,\n 3.4509e-15, 5.6052e-45],\n [ 1.0658e-07, 9.5533e-07, -5.6052e-45, ..., -2.6661e-07,\n 4.6272e-08, 3.7274e-07],\n [-8.4002e-09, 1.2374e-07, -5.6052e-45, ..., -4.0115e-07,\n 1.4932e-06, 3.1902e-07],\n ...,\n [-1.4340e-06, -1.9331e-06, 2.6553e-41, ..., -2.3349e-07,\n 2.4483e-07, 1.3270e-06],\n [ 1.1407e-09, 2.2466e-06, -5.6052e-45, ..., -3.0531e-06,\n 1.5926e-06, 3.9220e-08],\n [ 2.7531e-06, -1.5339e-06, -5.6052e-45, ..., 7.7509e-07,\n 1.2155e-06, 3.3995e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.5670e-12, 4.3546e-12, 0.0000e+00, ..., 8.2909e-13, 2.2270e-11,\n 5.8867e-13],\n [3.8068e-10, 1.8270e-10, 1.0331e-12, ..., 6.5363e-10, 5.0998e-10,\n 7.3235e-11],\n [2.8487e-11, 2.9304e-11, 6.8448e-14, ..., 5.0903e-11, 1.7708e-10,\n 4.6909e-11],\n ...,\n [4.8653e-10, 2.1801e-10, 1.4983e-12, ..., 7.8652e-11, 9.7122e-11,\n 2.4574e-09],\n [7.9861e-12, 1.2800e-10, 1.4390e-14, ..., 1.7001e-10, 3.1079e-10,\n 2.2594e-11],\n [2.3274e-10, 1.1993e-10, 2.5879e-13, ..., 4.9258e-10, 1.1941e-10,\n 7.4302e-10]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-6.6167e-19, -5.5776e-11, 0.0000e+00, ..., -2.1565e-10,\n -2.7090e-19, -5.8115e-24],\n [ 1.7766e-06, 1.7971e-06, -5.6052e-45, ..., 2.7743e-06,\n -7.4871e-06, -4.4757e-07],\n [-6.9732e-08, -8.8972e-07, -5.6052e-45, ..., -2.2696e-07,\n 4.7031e-07, -8.5477e-07],\n ...,\n [ 2.3844e-06, -1.5031e-06, 5.6052e-45, ..., -9.9324e-08,\n -1.6296e-07, -1.8572e-05],\n [ 7.6787e-08, 1.2739e-06, -5.6052e-45, ..., 3.3988e-06,\n 3.0498e-06, 3.7120e-08],\n [ 2.5113e-06, 1.5274e-06, -5.6052e-45, ..., 1.4171e-06,\n -1.0460e-07, 4.7918e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3051e-12, 1.2449e-12, 0.0000e+00, ..., 2.3701e-13, 6.3638e-12,\n 1.6822e-13],\n [2.6424e-10, 1.1281e-10, 2.9522e-13, ..., 4.1773e-10, 2.3751e-10,\n 3.1496e-11],\n [1.1785e-11, 1.2350e-11, 1.9559e-14, ..., 2.4892e-11, 1.1284e-10,\n 1.7720e-11],\n ...,\n [3.8727e-10, 1.4767e-10, 4.2815e-13, ..., 4.3400e-11, 1.0708e-10,\n 2.1363e-09],\n [3.3517e-12, 2.5129e-10, 4.1120e-15, ..., 1.5983e-10, 2.1311e-10,\n 1.8791e-11],\n [1.4092e-10, 5.6812e-11, 7.3950e-14, ..., 2.6643e-10, 7.2655e-11,\n 5.8548e-10]], device='cuda:0')" }, "6": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 1.1606e-08, -3.6526e-05, -3.7104e-06, ..., 2.8214e-05,\n 2.0651e-05, 3.5293e-05], device='cuda:0')", - "exp_avg_sq": "tensor([4.5584e-10, 5.1557e-08, 1.2877e-08, ..., 3.5296e-08, 1.3297e-08,\n 3.0710e-08], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-2.9739e-08, -9.6404e-06, -2.4299e-05, ..., 4.5289e-06,\n 3.8405e-06, 3.5178e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.3030e-10, 2.7239e-08, 7.4001e-09, ..., 2.0782e-08, 8.5761e-09,\n 1.6234e-08], device='cuda:0')" }, "7": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 6.6865e-10, 8.2075e-07, -1.7039e-06, ..., 8.5299e-07,\n -1.4859e-06, -2.3474e-06],\n [-3.7056e-09, 2.6293e-06, 1.6175e-06, ..., 2.6888e-06,\n 7.2217e-07, 1.7641e-06],\n [-8.9940e-10, -8.0847e-07, 1.1742e-06, ..., 2.2207e-06,\n -4.1395e-07, -2.1736e-07],\n ...,\n [-3.4476e-09, -2.1548e-06, -1.7662e-07, ..., -2.8316e-07,\n 1.3020e-06, -2.6602e-06],\n [ 1.7088e-09, 1.2234e-06, 1.0442e-06, ..., 2.1703e-06,\n 8.6852e-07, -3.0169e-06],\n [-5.4819e-09, 2.3014e-06, 1.5491e-06, ..., -1.0597e-06,\n 6.9973e-07, 1.8270e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.7376e-11, 3.0709e-11, 5.3679e-11, ..., 2.6120e-11, 2.6402e-11,\n 3.8409e-11],\n [8.5892e-11, 8.3604e-11, 6.3620e-11, ..., 5.9595e-11, 3.4082e-11,\n 5.6347e-11],\n [7.9039e-12, 6.2825e-11, 1.2633e-10, ..., 4.9444e-11, 3.3846e-11,\n 7.3381e-11],\n ...,\n [2.0057e-11, 7.6697e-11, 3.4492e-11, ..., 5.6300e-11, 5.2138e-11,\n 1.0624e-10],\n [2.4010e-11, 9.0693e-11, 9.8801e-11, ..., 6.1589e-11, 5.1280e-11,\n 7.0476e-11],\n [1.3375e-11, 7.0728e-11, 2.0362e-10, ..., 5.5899e-11, 3.1467e-11,\n 5.7287e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 6.0665e-11, 5.0995e-07, 3.5536e-07, ..., -2.0912e-08,\n 1.1108e-06, 9.4519e-09],\n [ 5.1774e-10, 1.0021e-07, -3.0247e-08, ..., -1.3988e-06,\n -7.6887e-07, -1.2255e-07],\n [-1.6020e-10, 7.1003e-07, 3.6876e-07, ..., -2.2104e-07,\n -4.4563e-07, -6.5591e-08],\n ...,\n [-1.7519e-09, 5.9486e-07, -6.1973e-07, ..., 5.2647e-07,\n -7.6905e-07, -2.5892e-07],\n [-2.1909e-09, 2.0069e-08, 3.5291e-07, ..., -1.4907e-06,\n -9.7642e-07, -5.1371e-08],\n [-3.9906e-13, 2.3625e-07, -7.6400e-07, ..., 1.1686e-06,\n 1.6430e-07, 6.5122e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.9655e-12, 1.6720e-11, 3.5387e-11, ..., 1.6368e-11, 1.4670e-11,\n 1.8055e-11],\n [2.4545e-11, 4.3705e-11, 3.4227e-11, ..., 3.0594e-11, 1.8008e-11,\n 2.5034e-11],\n [2.2587e-12, 3.3762e-11, 4.4664e-11, ..., 2.8628e-11, 1.5808e-11,\n 3.6068e-11],\n ...,\n [5.7318e-12, 4.1034e-11, 2.1585e-11, ..., 3.3085e-11, 2.2133e-11,\n 4.9021e-11],\n [6.8614e-12, 5.0979e-11, 6.9217e-11, ..., 3.5820e-11, 2.0390e-11,\n 3.5244e-11],\n [3.8234e-12, 3.4248e-11, 1.4577e-10, ..., 2.9569e-11, 1.8069e-11,\n 2.7822e-11]], device='cuda:0')" }, "14": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.1537e-06], device='cuda:0')" + "exp_avg_sq": "tensor([6.1544e-07], device='cuda:0')" }, "15": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.7784e-09, 1.5728e-07, 1.1825e-07], device='cuda:0')" + "exp_avg_sq": "tensor([7.9396e-10, 4.4944e-08, 3.3791e-08], device='cuda:0')" }, "16": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.6731e-04, 1.7239e-05, 1.8654e-05, 1.9926e-05], device='cuda:0')" + "exp_avg_sq": "tensor([4.7810e-05, 4.9263e-06, 5.3304e-06, 5.6940e-06], device='cuda:0')" }, "18": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.7838e-10, 1.1135e-10, 0.0000e+00, ..., 7.3476e-10, 2.6426e-10,\n 8.4820e-12],\n [3.8057e-11, 9.7831e-11, 0.0000e+00, ..., 8.9943e-11, 3.1627e-10,\n 8.9133e-11],\n [1.1438e-11, 4.0365e-11, 0.0000e+00, ..., 1.3489e-11, 8.7453e-11,\n 2.8642e-11],\n ...,\n [1.4493e-12, 7.7948e-12, 0.0000e+00, ..., 1.1116e-11, 1.5394e-10,\n 8.6724e-13],\n [2.7916e-10, 2.9348e-10, 0.0000e+00, ..., 3.5522e-10, 1.4889e-09,\n 8.9350e-11],\n [8.7198e-13, 6.7768e-13, 0.0000e+00, ..., 1.3413e-12, 1.3304e-11,\n 3.1512e-12]], device='cuda:0')" + "exp_avg_sq": "tensor([[5.0973e-11, 3.1820e-11, 0.0000e+00, ..., 2.0996e-10, 7.5513e-11,\n 2.4238e-12],\n [1.0875e-11, 2.7956e-11, 0.0000e+00, ..., 2.5702e-11, 9.0376e-11,\n 2.5470e-11],\n [3.2684e-12, 1.1535e-11, 0.0000e+00, ..., 3.8547e-12, 2.4990e-11,\n 8.1845e-12],\n ...,\n [4.1414e-13, 2.2274e-12, 0.0000e+00, ..., 3.1765e-12, 4.3990e-11,\n 2.4782e-13],\n [7.9773e-11, 8.3864e-11, 0.0000e+00, ..., 1.0151e-10, 4.2545e-10,\n 2.5532e-11],\n [2.4918e-13, 1.9365e-13, 0.0000e+00, ..., 3.8328e-13, 3.8018e-12,\n 9.0047e-13]], device='cuda:0')" }, "19": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.5235e-07, 1.1506e-07, 2.6289e-08, 4.7726e-08, 1.7363e-08, 1.6826e-10,\n 2.9874e-09, 4.7385e-09, 1.2110e-08, 6.8553e-10, 3.8405e-08, 1.9531e-07,\n 8.0481e-08, 9.7491e-08, 1.4719e-08, 9.1790e-08, 2.1878e-07, 1.8135e-08,\n 1.2130e-07, 1.9870e-07, 2.3718e-09, 8.3483e-08, 1.8048e-08, 1.7844e-07,\n 3.6101e-08, 7.3060e-08, 3.2562e-08, 2.0256e-08, 5.5157e-09, 3.4170e-08,\n 1.0222e-07, 5.1230e-08, 1.8615e-09, 4.6991e-07, 2.4365e-07, 1.1846e-07,\n 1.2283e-08, 1.3546e-09, 9.3102e-08, 9.0735e-08, 4.4516e-07, 1.1011e-08,\n 1.9650e-09, 6.2227e-09, 1.5709e-08, 1.1413e-07, 4.9074e-08, 3.1989e-08,\n 3.1128e-08, 3.6491e-08, 1.4927e-09, 5.2545e-08, 1.3346e-07, 8.1420e-08,\n 1.7266e-08, 9.0035e-09, 6.1358e-08, 7.8158e-10, 2.3602e-08, 8.4855e-09,\n 2.6432e-08, 5.8395e-08, 3.7659e-08, 2.1826e-08, 2.1391e-09, 1.3930e-08,\n 5.0655e-08, 2.9798e-09, 6.3849e-08, 6.0097e-08, 5.7479e-10, 1.9402e-08,\n 5.1324e-08, 2.3332e-09, 2.0591e-08, 1.7641e-10, 1.3652e-08, 3.4862e-07,\n 6.9314e-09, 6.4063e-10, 1.0687e-07, 1.4304e-07, 2.3442e-11, 7.9764e-10,\n 2.5482e-08, 8.8509e-08, 2.6630e-09, 4.5297e-08, 5.8069e-09, 1.0072e-07,\n 1.5994e-08, 1.4608e-07, 1.8620e-07, 1.3176e-08, 1.8447e-08, 7.3434e-08,\n 4.8411e-09, 2.8869e-07, 9.6297e-08, 3.4492e-07, 9.8441e-08, 6.8211e-09,\n 1.3611e-09, 4.1655e-08, 3.4619e-09, 3.4387e-07, 8.6317e-09, 2.4456e-08,\n 5.5106e-08, 4.2516e-10, 4.4364e-07, 8.2186e-09, 1.3195e-07, 5.6036e-08,\n 1.9009e-08, 2.6149e-07, 6.5149e-09, 3.1698e-08, 8.7519e-08, 4.1874e-08,\n 4.5678e-08, 8.4521e-08, 8.3933e-09, 1.4109e-09, 5.5018e-10, 3.0735e-08,\n 7.3298e-07, 5.7154e-09, 4.8111e-08, 9.5418e-09, 1.2840e-09, 2.3571e-07,\n 1.9810e-08, 3.3179e-08, 1.3428e-07, 9.5886e-08, 8.1039e-08, 2.4846e-07,\n 1.0713e-07, 2.3569e-11, 9.7726e-10, 1.5856e-09, 8.6281e-08, 3.5408e-07,\n 1.9831e-07, 1.3029e-08, 1.3326e-08, 1.7203e-08, 1.0613e-07, 2.4033e-10,\n 5.4997e-09, 1.1048e-07, 3.8141e-08, 4.2477e-08, 9.5027e-08, 3.2692e-09,\n 2.9849e-08, 7.8425e-08, 1.5236e-08, 2.5402e-07, 1.2044e-07, 1.9764e-08,\n 4.6882e-08, 1.2729e-07, 8.0467e-09, 5.7572e-08, 5.6675e-07, 8.6112e-08,\n 5.9501e-08, 2.7437e-08, 3.9684e-09, 9.4661e-08, 3.8696e-07, 9.7328e-08,\n 1.2791e-07, 9.3170e-10, 1.2897e-08, 4.4531e-08, 5.0747e-08, 5.4406e-08,\n 6.6768e-08, 3.1176e-08, 6.2598e-09, 3.7589e-08, 7.1548e-08, 1.9854e-10,\n 1.8740e-08, 7.6965e-08, 1.9522e-08, 3.1025e-07, 2.3191e-07, 2.4052e-08,\n 1.5922e-08, 6.8965e-09, 4.6026e-11, 1.1113e-06, 1.3024e-08, 2.2424e-08,\n 2.4758e-08, 5.4281e-07, 5.4079e-12, 5.0690e-07, 9.5349e-08, 1.4507e-08,\n 9.2618e-08, 1.1380e-06, 1.0463e-07, 3.2654e-08, 4.7391e-10, 1.4689e-07,\n 1.1859e-08, 2.1436e-07, 3.3377e-08, 6.8006e-08, 5.2058e-10, 8.6597e-08,\n 6.6591e-07, 3.7697e-10, 7.3397e-09, 2.1527e-08, 1.5848e-08, 4.4301e-07,\n 5.5792e-08, 3.1854e-07, 4.4160e-08, 5.3624e-11, 1.1377e-09, 6.5178e-09,\n 2.4157e-09, 9.3006e-08, 7.9516e-10, 8.4989e-10, 1.6144e-07, 6.3136e-08,\n 4.9240e-08, 1.0971e-08, 1.3132e-08, 9.9402e-08, 1.2445e-08, 1.0936e-08,\n 1.2686e-07, 5.1630e-10, 2.7930e-08, 1.7126e-07, 2.9854e-08, 8.5523e-10,\n 1.5983e-09, 7.1870e-08, 8.1143e-09, 2.5849e-07, 1.3476e-07, 1.3541e-08,\n 1.1641e-07, 1.3745e-08, 3.8876e-07, 6.8741e-09], device='cuda:0')" + "exp_avg_sq": "tensor([7.2111e-08, 3.2879e-08, 7.5124e-09, 1.3638e-08, 4.9617e-09, 4.8081e-11,\n 8.5368e-10, 1.3541e-09, 3.4605e-09, 1.9590e-10, 1.0975e-08, 5.5811e-08,\n 2.2998e-08, 2.7859e-08, 4.2059e-09, 2.6230e-08, 6.2517e-08, 5.1822e-09,\n 3.4662e-08, 5.6780e-08, 6.7777e-10, 2.3856e-08, 5.1573e-09, 5.0992e-08,\n 1.0316e-08, 2.0878e-08, 9.3048e-09, 5.7882e-09, 1.5762e-09, 9.7644e-09,\n 2.9211e-08, 1.4639e-08, 5.3193e-10, 1.3428e-07, 6.9624e-08, 3.3850e-08,\n 3.5099e-09, 3.8708e-10, 2.6605e-08, 2.5928e-08, 1.2721e-07, 3.1465e-09,\n 5.6150e-10, 1.7782e-09, 4.4891e-09, 3.2614e-08, 1.4023e-08, 9.1412e-09,\n 8.8950e-09, 1.0427e-08, 4.2656e-10, 1.5015e-08, 3.8137e-08, 2.3267e-08,\n 4.9338e-09, 2.5728e-09, 1.7534e-08, 2.2334e-10, 6.7443e-09, 2.4248e-09,\n 7.5530e-09, 1.6687e-08, 1.0761e-08, 6.2370e-09, 6.1128e-10, 3.9807e-09,\n 1.4475e-08, 8.5150e-10, 1.8245e-08, 1.7173e-08, 1.6425e-10, 5.5442e-09,\n 1.4666e-08, 6.6673e-10, 5.8840e-09, 5.0412e-11, 3.9012e-09, 9.9621e-08,\n 1.9807e-09, 1.8306e-10, 3.0539e-08, 4.0876e-08, 6.6988e-12, 2.2793e-10,\n 7.2817e-09, 2.5292e-08, 7.6099e-10, 1.2944e-08, 1.6594e-09, 2.8781e-08,\n 4.5703e-09, 4.1744e-08, 5.3209e-08, 3.7651e-09, 5.2714e-09, 2.0984e-08,\n 1.3834e-09, 8.2495e-08, 2.7518e-08, 9.8563e-08, 2.8130e-08, 1.9492e-09,\n 3.8896e-10, 1.1903e-08, 9.8927e-10, 9.8263e-08, 2.4666e-09, 6.9886e-09,\n 1.5747e-08, 1.2149e-10, 1.2677e-07, 2.3485e-09, 3.7704e-08, 1.6013e-08,\n 5.4320e-09, 7.4722e-08, 1.8617e-09, 9.0578e-09, 2.5009e-08, 1.1966e-08,\n 1.3053e-08, 2.4153e-08, 2.3984e-09, 4.0316e-10, 1.5722e-10, 8.7827e-09,\n 2.0945e-07, 1.6332e-09, 1.3748e-08, 2.7266e-09, 3.6691e-10, 6.7357e-08,\n 5.6610e-09, 9.4812e-09, 3.8372e-08, 2.7400e-08, 2.3158e-08, 7.0999e-08,\n 3.0614e-08, 6.7351e-12, 2.7926e-10, 4.5310e-10, 2.4655e-08, 1.0118e-07,\n 5.6669e-08, 3.7232e-09, 3.8080e-09, 4.9158e-09, 3.0326e-08, 6.8675e-11,\n 1.5716e-09, 3.1572e-08, 1.0899e-08, 1.2138e-08, 2.7155e-08, 9.3420e-10,\n 8.5297e-09, 2.2411e-08, 4.3538e-09, 7.2589e-08, 3.4418e-08, 5.6478e-09,\n 1.3397e-08, 3.6374e-08, 2.2994e-09, 1.6452e-08, 1.6195e-07, 2.4607e-08,\n 1.7003e-08, 7.8402e-09, 1.1340e-09, 2.7050e-08, 1.1058e-07, 2.7812e-08,\n 3.6550e-08, 2.6624e-10, 3.6853e-09, 1.2725e-08, 1.4501e-08, 1.5547e-08,\n 1.9080e-08, 8.9087e-09, 1.7888e-09, 1.0741e-08, 2.0446e-08, 5.6734e-11,\n 5.3550e-09, 2.1993e-08, 5.5786e-09, 8.8656e-08, 6.6271e-08, 6.8729e-09,\n 4.5497e-09, 1.9707e-09, 1.3152e-11, 3.1757e-07, 3.7217e-09, 6.4078e-09,\n 7.0747e-09, 1.5511e-07, 1.5454e-12, 1.4485e-07, 2.7247e-08, 4.1455e-09,\n 2.6466e-08, 3.2520e-07, 2.9900e-08, 9.3312e-09, 1.3543e-10, 4.1974e-08,\n 3.3888e-09, 6.1255e-08, 9.5377e-09, 1.9433e-08, 1.4876e-10, 2.4746e-08,\n 1.9029e-07, 1.0772e-10, 2.0974e-09, 6.1515e-09, 4.5288e-09, 1.2659e-07,\n 1.5943e-08, 9.1026e-08, 1.2619e-08, 1.5323e-11, 3.2511e-10, 1.8625e-09,\n 6.9031e-10, 2.6577e-08, 2.2722e-10, 2.4286e-10, 4.6132e-08, 1.8042e-08,\n 1.4071e-08, 3.1350e-09, 3.7526e-09, 2.8405e-08, 3.5563e-09, 3.1250e-09,\n 3.6252e-08, 1.4754e-10, 7.9813e-09, 4.8939e-08, 8.5311e-09, 2.4439e-10,\n 4.5672e-10, 2.0537e-08, 2.3187e-09, 7.3865e-08, 3.8509e-08, 3.8693e-09,\n 3.3264e-08, 3.9276e-09, 1.1109e-07, 1.9643e-09], device='cuda:0')" }, "20": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.2257e-10, 5.6932e-10, 3.8286e-11, 2.4985e-10, 2.9311e-11, 2.4174e-12,\n 5.3155e-12, 2.4684e-11, 1.4657e-11, 8.6104e-13, 7.0743e-11, 3.5426e-10,\n 2.0396e-10, 4.5882e-10, 4.7230e-11, 2.1621e-10, 7.0913e-10, 3.1453e-11,\n 3.9236e-10, 5.6533e-10, 1.1715e-12, 3.5115e-10, 3.9704e-11, 7.2827e-10,\n 1.4156e-10, 1.4180e-10, 8.2349e-11, 6.7301e-11, 2.0017e-11, 6.0479e-11,\n 3.1219e-10, 1.5199e-10, 1.1855e-11, 1.2651e-09, 9.4403e-10, 2.9542e-10,\n 2.4386e-11, 2.4679e-14, 1.9346e-10, 1.6403e-10, 1.8830e-09, 2.6268e-11,\n 8.3597e-12, 2.7300e-11, 3.1384e-11, 2.5674e-10, 1.0690e-10, 6.7485e-11,\n 4.7607e-11, 5.6381e-11, 1.1696e-12, 1.4357e-10, 2.7709e-10, 1.7749e-10,\n 2.3651e-11, 4.2459e-11, 1.9850e-10, 1.6353e-12, 7.7894e-11, 2.4035e-11,\n 4.7723e-11, 4.3562e-10, 1.0563e-10, 5.2423e-11, 3.2372e-14, 2.2255e-11,\n 1.1060e-10, 1.6178e-12, 1.1789e-10, 1.9457e-10, 1.3526e-11, 4.4578e-11,\n 5.6985e-11, 7.8336e-12, 3.7659e-11, 1.1401e-11, 7.9605e-11, 1.7701e-09,\n 6.0667e-12, 1.8104e-15, 4.4762e-10, 3.7085e-10, 3.4081e-12, 3.9404e-12,\n 5.0124e-11, 1.9775e-10, 1.0613e-11, 2.1205e-10, 1.7019e-11, 1.8780e-10,\n 4.8445e-11, 3.3381e-10, 6.7688e-10, 1.6706e-11, 5.1036e-11, 2.5882e-10,\n 1.0482e-11, 6.1490e-10, 3.7476e-10, 9.6987e-10, 2.9504e-10, 3.1755e-11,\n 1.6645e-11, 7.3831e-11, 1.4441e-12, 8.0806e-10, 9.0230e-12, 5.7656e-11,\n 1.1474e-10, 6.0086e-13, 1.1323e-09, 2.5619e-11, 3.7393e-10, 1.1766e-10,\n 3.4686e-11, 1.0554e-09, 2.5668e-11, 6.7585e-11, 2.6388e-10, 1.0158e-10,\n 8.4045e-11, 1.7603e-10, 7.0538e-12, 1.3614e-13, 5.6146e-12, 4.7298e-11,\n 3.3144e-09, 2.1675e-11, 1.5366e-10, 1.4553e-11, 4.7051e-13, 7.0296e-10,\n 4.8478e-11, 9.8714e-11, 2.5776e-10, 2.1654e-10, 2.2762e-10, 6.4126e-10,\n 2.9308e-10, 3.9763e-12, 2.5060e-11, 9.0858e-12, 1.8958e-10, 1.0407e-09,\n 1.2461e-09, 5.1576e-11, 5.3532e-11, 4.0269e-11, 2.1984e-10, 6.0952e-14,\n 5.5052e-11, 2.4226e-10, 1.2314e-10, 4.8358e-11, 2.2293e-10, 1.9806e-11,\n 4.4051e-11, 1.3361e-10, 2.5064e-11, 1.0047e-09, 2.3487e-10, 2.2368e-11,\n 1.0922e-10, 4.2432e-10, 2.3383e-11, 7.4820e-11, 1.9482e-09, 1.6332e-10,\n 1.8207e-10, 6.9920e-11, 2.0337e-12, 3.7757e-10, 9.3210e-10, 3.6325e-10,\n 2.4077e-10, 3.5841e-12, 2.0154e-11, 1.9919e-10, 1.5544e-10, 1.4209e-10,\n 1.7243e-10, 6.0332e-11, 7.6017e-12, 5.6437e-11, 1.7911e-10, 1.0707e-16,\n 2.8869e-11, 1.7122e-10, 1.7193e-11, 7.7686e-10, 6.0531e-10, 3.0294e-11,\n 4.7349e-11, 3.7732e-11, 4.3385e-12, 4.3081e-09, 1.6581e-11, 3.5907e-11,\n 7.4929e-11, 1.4918e-09, 1.2852e-12, 1.3620e-09, 1.7206e-10, 1.9817e-11,\n 3.0808e-10, 2.9004e-09, 2.2664e-10, 8.2401e-11, 1.5535e-13, 2.2953e-10,\n 1.6244e-11, 5.3433e-10, 2.2663e-10, 2.0753e-10, 3.1112e-13, 1.5039e-10,\n 3.1682e-09, 1.2925e-13, 2.7556e-11, 2.5787e-11, 2.0243e-11, 1.9487e-09,\n 3.1046e-10, 7.7518e-10, 1.3056e-10, 1.1409e-12, 5.5301e-14, 1.9326e-11,\n 2.3927e-12, 2.6151e-10, 6.4271e-12, 5.9855e-13, 9.5229e-10, 2.7879e-10,\n 1.4902e-10, 2.5556e-11, 1.6797e-11, 2.8912e-10, 3.4157e-11, 1.0919e-11,\n 3.1412e-10, 1.2027e-13, 8.3802e-11, 4.4110e-10, 4.5271e-11, 1.4662e-13,\n 3.7645e-13, 1.3748e-10, 1.2798e-11, 6.9759e-10, 2.7383e-10, 1.9032e-11,\n 2.6074e-10, 2.7201e-11, 1.2450e-09, 3.6199e-11], device='cuda:0')" + "exp_avg_sq": "tensor([1.7790e-10, 1.6269e-10, 1.0941e-11, 7.1396e-11, 8.3760e-12, 6.9079e-13,\n 1.5190e-12, 7.0536e-12, 4.1884e-12, 2.4605e-13, 2.0215e-11, 1.0123e-10,\n 5.8283e-11, 1.3111e-10, 1.3496e-11, 6.1782e-11, 2.0264e-10, 8.9879e-12,\n 1.1212e-10, 1.6155e-10, 3.3478e-13, 1.0034e-10, 1.1346e-11, 2.0811e-10,\n 4.0451e-11, 4.0521e-11, 2.3532e-11, 1.9232e-11, 5.7199e-12, 1.7282e-11,\n 8.9209e-11, 4.3432e-11, 3.3877e-12, 3.6152e-10, 2.6976e-10, 8.4419e-11,\n 6.9684e-12, 7.0523e-15, 5.5283e-11, 4.6872e-11, 5.3808e-10, 7.5062e-12,\n 2.3888e-12, 7.8012e-12, 8.9683e-12, 7.3366e-11, 3.0546e-11, 1.9284e-11,\n 1.3604e-11, 1.6111e-11, 3.3422e-13, 4.1026e-11, 7.9180e-11, 5.0719e-11,\n 6.7584e-12, 1.2133e-11, 5.6724e-11, 4.6730e-13, 2.2259e-11, 6.8681e-12,\n 1.3637e-11, 1.2448e-10, 3.0185e-11, 1.4980e-11, 9.2504e-15, 6.3595e-12,\n 3.1605e-11, 4.6230e-13, 3.3689e-11, 5.5601e-11, 3.8651e-12, 1.2739e-11,\n 1.6284e-11, 2.2385e-12, 1.0761e-11, 3.2580e-12, 2.2748e-11, 5.0582e-10,\n 1.7336e-12, 5.1733e-16, 1.2791e-10, 1.0597e-10, 9.7390e-13, 1.1260e-12,\n 1.4323e-11, 5.6508e-11, 3.0326e-12, 6.0596e-11, 4.8633e-12, 5.3667e-11,\n 1.3843e-11, 9.5388e-11, 1.9342e-10, 4.7739e-12, 1.4584e-11, 7.3961e-11,\n 2.9954e-12, 1.7571e-10, 1.0709e-10, 2.7715e-10, 8.4309e-11, 9.0741e-12,\n 4.7564e-12, 2.1098e-11, 4.1266e-13, 2.3091e-10, 2.5784e-12, 1.6476e-11,\n 3.2788e-11, 1.7170e-13, 3.2358e-10, 7.3207e-12, 1.0685e-10, 3.3622e-11,\n 9.9119e-12, 3.0160e-10, 7.3347e-12, 1.9313e-11, 7.5407e-11, 2.9027e-11,\n 2.4016e-11, 5.0302e-11, 2.0157e-12, 3.8903e-14, 1.6044e-12, 1.3516e-11,\n 9.4712e-10, 6.1938e-12, 4.3911e-11, 4.1586e-12, 1.3445e-13, 2.0088e-10,\n 1.3853e-11, 2.8208e-11, 7.3658e-11, 6.1878e-11, 6.5044e-11, 1.8325e-10,\n 8.3751e-11, 1.1363e-12, 7.1612e-12, 2.5963e-12, 5.4175e-11, 2.9740e-10,\n 3.5609e-10, 1.4738e-11, 1.5297e-11, 1.1507e-11, 6.2821e-11, 1.7417e-14,\n 1.5732e-11, 6.9229e-11, 3.5189e-11, 1.3819e-11, 6.3704e-11, 5.6596e-12,\n 1.2588e-11, 3.8181e-11, 7.1622e-12, 2.8710e-10, 6.7116e-11, 6.3917e-12,\n 3.1212e-11, 1.2125e-10, 6.6820e-12, 2.1380e-11, 5.5671e-10, 4.6670e-11,\n 5.2028e-11, 1.9980e-11, 5.8114e-13, 1.0789e-10, 2.6636e-10, 1.0380e-10,\n 6.8801e-11, 1.0242e-12, 5.7591e-12, 5.6919e-11, 4.4419e-11, 4.0603e-11,\n 4.9273e-11, 1.7240e-11, 2.1723e-12, 1.6127e-11, 5.1182e-11, 3.0596e-17,\n 8.2496e-12, 4.8927e-11, 4.9129e-12, 2.2199e-10, 1.7297e-10, 8.6567e-12,\n 1.3530e-11, 1.0782e-11, 1.2397e-12, 1.2311e-09, 4.7380e-12, 1.0261e-11,\n 2.1412e-11, 4.2631e-10, 3.6725e-13, 3.8920e-10, 4.9167e-11, 5.6627e-12,\n 8.8036e-11, 8.2881e-10, 6.4764e-11, 2.3547e-11, 4.4392e-14, 6.5591e-11,\n 4.6419e-12, 1.5269e-10, 6.4762e-11, 5.9305e-11, 8.8904e-14, 4.2974e-11,\n 9.0533e-10, 3.6934e-14, 7.8743e-12, 7.3688e-12, 5.7845e-12, 5.5685e-10,\n 8.8715e-11, 2.2151e-10, 3.7307e-11, 3.2603e-13, 1.5803e-14, 5.5225e-12,\n 6.8373e-13, 7.4729e-11, 1.8366e-12, 1.7104e-13, 2.7213e-10, 7.9667e-11,\n 4.2585e-11, 7.3028e-12, 4.7998e-12, 8.2617e-11, 9.7606e-12, 3.1201e-12,\n 8.9763e-11, 3.4369e-14, 2.3947e-11, 1.2605e-10, 1.2937e-11, 4.1897e-14,\n 1.0757e-13, 3.9285e-11, 3.6571e-12, 1.9934e-10, 7.8250e-11, 5.4386e-12,\n 7.4508e-11, 7.7728e-12, 3.5577e-10, 1.0344e-11], device='cuda:0')" }, "21": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0678e-09, 5.4506e-10, 7.5041e-11, 2.4528e-10, 3.8571e-11, 1.6385e-12,\n 7.4314e-12, 4.0365e-11, 2.4637e-11, 4.8282e-13, 1.1726e-10, 6.6187e-10,\n 2.3342e-10, 4.5486e-10, 7.9489e-11, 4.1930e-10, 9.4825e-10, 5.0137e-11,\n 3.4848e-10, 8.2278e-10, 1.3245e-12, 3.9317e-10, 9.1003e-11, 7.6821e-10,\n 1.8912e-10, 2.1899e-10, 1.5164e-10, 1.0640e-10, 4.1219e-11, 8.8164e-11,\n 4.5842e-10, 2.3851e-10, 1.8538e-11, 1.6173e-09, 1.0420e-09, 5.0890e-10,\n 1.7622e-11, 5.1223e-15, 4.1091e-10, 3.1208e-10, 1.4482e-09, 6.1102e-11,\n 2.0163e-11, 4.8226e-11, 5.3808e-11, 3.6689e-10, 2.2662e-10, 1.5795e-10,\n 7.8218e-11, 1.1773e-10, 1.9114e-12, 2.3231e-10, 4.3504e-10, 2.5519e-10,\n 4.3054e-11, 5.8245e-11, 2.8359e-10, 1.4745e-12, 1.3151e-10, 5.0507e-11,\n 6.3250e-11, 3.0680e-10, 1.7229e-10, 9.9845e-11, 5.3187e-13, 3.4816e-11,\n 2.3949e-10, 4.3716e-12, 2.8387e-10, 2.9125e-10, 1.4869e-11, 9.8482e-11,\n 1.7840e-10, 1.6905e-11, 4.8963e-11, 1.0449e-11, 8.4781e-11, 1.5406e-09,\n 9.1426e-12, 1.4529e-15, 5.1624e-10, 6.4413e-10, 3.7546e-12, 8.5247e-12,\n 1.2990e-10, 4.1216e-10, 1.8741e-11, 2.3983e-10, 1.8831e-11, 4.4323e-10,\n 8.3723e-11, 6.2874e-10, 5.9965e-10, 3.4944e-11, 8.3014e-11, 1.9834e-10,\n 1.3748e-11, 9.8577e-10, 4.5153e-10, 1.4446e-09, 4.3395e-10, 4.7400e-11,\n 1.8982e-11, 1.1355e-10, 2.1332e-12, 1.1349e-09, 1.5372e-11, 1.2798e-10,\n 1.5507e-10, 3.9873e-13, 1.4898e-09, 5.0010e-11, 5.9352e-10, 2.0646e-10,\n 4.5658e-11, 8.2602e-10, 3.5286e-11, 8.5474e-11, 4.0032e-10, 2.0244e-10,\n 1.2943e-10, 2.4932e-10, 1.2977e-11, 3.7542e-13, 1.0455e-11, 8.8167e-11,\n 2.9466e-09, 3.2075e-11, 2.4291e-10, 2.6868e-11, 5.4319e-13, 1.0208e-09,\n 1.0112e-10, 1.5719e-10, 4.2328e-10, 3.1293e-10, 3.5898e-10, 8.3792e-10,\n 4.7695e-10, 4.8494e-12, 1.8949e-11, 1.4862e-11, 2.7615e-10, 1.2113e-09,\n 9.2158e-10, 7.7218e-11, 7.6436e-11, 8.9909e-11, 3.4796e-10, 1.3608e-13,\n 5.0483e-11, 3.3239e-10, 1.9334e-10, 1.2184e-10, 2.6384e-10, 3.0113e-11,\n 7.4712e-11, 2.8432e-10, 4.0406e-11, 1.0767e-09, 3.9551e-10, 5.4993e-11,\n 1.1028e-10, 5.7245e-10, 4.3240e-11, 1.6443e-10, 2.0237e-09, 2.7449e-10,\n 2.7276e-10, 1.3403e-10, 5.5895e-12, 2.4651e-10, 1.3273e-09, 4.6133e-10,\n 3.8463e-10, 7.3227e-12, 2.9980e-11, 2.3806e-10, 1.1610e-10, 2.6011e-10,\n 1.7635e-10, 7.7898e-11, 1.2677e-11, 1.0269e-10, 2.9996e-10, 3.0779e-14,\n 5.7516e-11, 2.1963e-10, 5.2477e-11, 1.2736e-09, 7.7399e-10, 6.3505e-11,\n 8.6638e-11, 4.5518e-11, 5.6985e-12, 4.5601e-09, 2.6347e-11, 4.8416e-11,\n 1.2021e-10, 1.9128e-09, 3.2144e-12, 2.1104e-09, 3.0681e-10, 3.2207e-11,\n 4.3441e-10, 3.9348e-09, 3.1358e-10, 7.6750e-11, 3.9239e-13, 5.1066e-10,\n 1.9651e-11, 9.2445e-10, 1.9025e-10, 3.2853e-10, 3.7195e-13, 2.8885e-10,\n 2.2361e-09, 8.7131e-14, 4.6379e-11, 4.8040e-11, 2.9437e-11, 1.8577e-09,\n 2.8941e-10, 1.3552e-09, 2.2692e-10, 2.1006e-12, 3.7908e-14, 4.3911e-11,\n 6.1040e-12, 3.9976e-10, 1.2858e-11, 1.2928e-12, 7.2203e-10, 2.9847e-10,\n 2.2717e-10, 5.4934e-11, 3.4007e-11, 4.4593e-10, 7.0862e-11, 2.6511e-11,\n 5.7254e-10, 1.1366e-13, 1.3594e-10, 5.3587e-10, 8.4971e-11, 2.8343e-13,\n 3.4773e-13, 2.2892e-10, 1.2780e-11, 1.1130e-09, 4.3000e-10, 3.1543e-11,\n 3.4949e-10, 4.6150e-11, 1.2892e-09, 5.0442e-11], device='cuda:0')" + "exp_avg_sq": "tensor([3.0513e-10, 1.5575e-10, 2.1444e-11, 7.0091e-11, 1.1022e-11, 4.6821e-13,\n 2.1236e-12, 1.1535e-11, 7.0403e-12, 1.3797e-13, 3.3509e-11, 1.8913e-10,\n 6.6702e-11, 1.2998e-10, 2.2715e-11, 1.1982e-10, 2.7097e-10, 1.4327e-11,\n 9.9581e-11, 2.3511e-10, 3.7849e-13, 1.1235e-10, 2.6005e-11, 2.1952e-10,\n 5.4043e-11, 6.2579e-11, 4.3333e-11, 3.0406e-11, 1.1779e-11, 2.5193e-11,\n 1.3100e-10, 6.8157e-11, 5.2974e-12, 4.6215e-10, 2.9776e-10, 1.4542e-10,\n 5.0357e-12, 1.4637e-15, 1.1742e-10, 8.9180e-11, 4.1383e-10, 1.7460e-11,\n 5.7616e-12, 1.3781e-11, 1.5376e-11, 1.0484e-10, 6.4757e-11, 4.5137e-11,\n 2.2351e-11, 3.3642e-11, 5.4620e-13, 6.6384e-11, 1.2431e-10, 7.2921e-11,\n 1.2303e-11, 1.6644e-11, 8.1039e-11, 4.2135e-13, 3.7581e-11, 1.4433e-11,\n 1.8074e-11, 8.7669e-11, 4.9232e-11, 2.8531e-11, 1.5199e-13, 9.9489e-12,\n 6.8435e-11, 1.2492e-12, 8.1119e-11, 8.3226e-11, 4.2490e-12, 2.8142e-11,\n 5.0979e-11, 4.8307e-12, 1.3991e-11, 2.9860e-12, 2.4227e-11, 4.4023e-10,\n 2.6126e-12, 4.1517e-16, 1.4752e-10, 1.8406e-10, 1.0729e-12, 2.4360e-12,\n 3.7119e-11, 1.1778e-10, 5.3554e-12, 6.8532e-11, 5.3812e-12, 1.2666e-10,\n 2.3924e-11, 1.7967e-10, 1.7135e-10, 9.9855e-12, 2.3722e-11, 5.6677e-11,\n 3.9287e-12, 2.8169e-10, 1.2903e-10, 4.1281e-10, 1.2400e-10, 1.3545e-11,\n 5.4244e-12, 3.2447e-11, 6.0957e-13, 3.2432e-10, 4.3927e-12, 3.6572e-11,\n 4.4313e-11, 1.1394e-13, 4.2573e-10, 1.4291e-11, 1.6960e-10, 5.8996e-11,\n 1.3047e-11, 2.3604e-10, 1.0083e-11, 2.4425e-11, 1.1439e-10, 5.7849e-11,\n 3.6987e-11, 7.1244e-11, 3.7083e-12, 1.0728e-13, 2.9877e-12, 2.5195e-11,\n 8.4202e-10, 9.1658e-12, 6.9414e-11, 7.6778e-12, 1.5522e-13, 2.9171e-10,\n 2.8897e-11, 4.4917e-11, 1.2096e-10, 8.9421e-11, 1.0258e-10, 2.3944e-10,\n 1.3629e-10, 1.3858e-12, 5.4149e-12, 4.2468e-12, 7.8913e-11, 3.4614e-10,\n 2.6335e-10, 2.2066e-11, 2.1842e-11, 2.5692e-11, 9.9432e-11, 3.8885e-14,\n 1.4426e-11, 9.4984e-11, 5.5247e-11, 3.4817e-11, 7.5395e-11, 8.6049e-12,\n 2.1349e-11, 8.1247e-11, 1.1546e-11, 3.0766e-10, 1.1302e-10, 1.5715e-11,\n 3.1514e-11, 1.6358e-10, 1.2356e-11, 4.6988e-11, 5.7828e-10, 7.8436e-11,\n 7.7943e-11, 3.8300e-11, 1.5973e-12, 7.0442e-11, 3.7929e-10, 1.3183e-10,\n 1.0991e-10, 2.0925e-12, 8.5671e-12, 6.8027e-11, 3.3177e-11, 7.4329e-11,\n 5.0393e-11, 2.2260e-11, 3.6225e-12, 2.9345e-11, 8.5717e-11, 8.7955e-15,\n 1.6436e-11, 6.2760e-11, 1.4996e-11, 3.6393e-10, 2.2117e-10, 1.8147e-11,\n 2.4757e-11, 1.3007e-11, 1.6284e-12, 1.3031e-09, 7.5288e-12, 1.3835e-11,\n 3.4351e-11, 5.4659e-10, 9.1853e-13, 6.0308e-10, 8.7673e-11, 9.2035e-12,\n 1.2414e-10, 1.1244e-09, 8.9607e-11, 2.1932e-11, 1.1213e-13, 1.4592e-10,\n 5.6155e-12, 2.6417e-10, 5.4364e-11, 9.3880e-11, 1.0629e-13, 8.2542e-11,\n 6.3900e-10, 2.4898e-14, 1.3253e-11, 1.3728e-11, 8.4119e-12, 5.3086e-10,\n 8.2700e-11, 3.8727e-10, 6.4844e-11, 6.0027e-13, 1.0833e-14, 1.2548e-11,\n 1.7443e-12, 1.1423e-10, 3.6742e-12, 3.6943e-13, 2.0632e-10, 8.5290e-11,\n 6.4915e-11, 1.5698e-11, 9.7177e-12, 1.2743e-10, 2.0249e-11, 7.5757e-12,\n 1.6361e-10, 3.2480e-14, 3.8847e-11, 1.5313e-10, 2.4281e-11, 8.0992e-14,\n 9.9368e-14, 6.5417e-11, 3.6521e-12, 3.1803e-10, 1.2287e-10, 9.0137e-12,\n 9.9869e-11, 1.3188e-11, 3.6839e-10, 1.4414e-11], device='cuda:0')" }, "22": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.5448e-11, 2.8341e-10, 0.0000e+00, ..., 4.1268e-10, 6.7483e-10,\n 1.0325e-10],\n [3.4607e-11, 3.8623e-11, 0.0000e+00, ..., 6.3737e-11, 1.9660e-10,\n 1.2497e-13],\n [4.6463e-11, 2.5240e-11, 0.0000e+00, ..., 4.2445e-11, 9.0686e-11,\n 7.3035e-12],\n ...,\n [7.9361e-11, 9.7847e-11, 0.0000e+00, ..., 1.2085e-10, 2.2869e-10,\n 7.2745e-11],\n [2.4163e-10, 7.6720e-11, 0.0000e+00, ..., 7.6265e-11, 4.8393e-10,\n 4.6070e-11],\n [1.1827e-12, 1.6563e-12, 0.0000e+00, ..., 1.7860e-11, 2.8466e-11,\n 5.8100e-12]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.1560e-11, 8.0988e-11, 0.0000e+00, ..., 1.1793e-10, 1.9284e-10,\n 2.9505e-11],\n [9.8892e-12, 1.1037e-11, 0.0000e+00, ..., 1.8213e-11, 5.6181e-11,\n 3.5712e-14],\n [1.3277e-11, 7.2124e-12, 0.0000e+00, ..., 1.2129e-11, 2.5914e-11,\n 2.0870e-12],\n ...,\n [2.2678e-11, 2.7961e-11, 0.0000e+00, ..., 3.4535e-11, 6.5349e-11,\n 2.0787e-11],\n [6.9048e-11, 2.1923e-11, 0.0000e+00, ..., 2.1793e-11, 1.3829e-10,\n 1.3165e-11],\n [3.3798e-13, 4.7331e-13, 0.0000e+00, ..., 5.1037e-12, 8.1343e-12,\n 1.6603e-12]], device='cuda:0')" }, "23": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.9474e-07, 2.2686e-08, 5.0107e-08, 3.7709e-08, 1.2858e-08, 6.8972e-09,\n 2.9668e-08, 1.7652e-11, 1.1830e-08, 2.0066e-09, 2.2276e-07, 4.4108e-08,\n 5.9344e-08, 3.8504e-08, 1.1268e-08, 2.7808e-08, 1.1653e-07, 7.6230e-09,\n 2.2063e-08, 2.3500e-07, 9.2765e-10, 8.9220e-08, 3.8838e-08, 2.2437e-08,\n 3.0026e-09, 9.3580e-08, 2.9560e-08, 4.6999e-09, 3.2863e-09, 2.6175e-08,\n 1.2929e-07, 2.6921e-08, 3.3025e-09, 9.4598e-07, 9.3441e-08, 6.5475e-08,\n 1.1213e-08, 8.1260e-13, 8.7371e-08, 4.4065e-07, 5.5418e-08, 3.3828e-08,\n 5.9799e-09, 3.4402e-08, 2.0088e-07, 1.4905e-07, 8.4285e-08, 4.9317e-08,\n 1.2425e-08, 2.1404e-07, 2.4001e-09, 3.8688e-07, 2.6816e-07, 6.1156e-08,\n 6.2057e-09, 1.1212e-09, 1.4712e-07, 4.6775e-09, 6.7174e-08, 1.3544e-08,\n 3.1611e-08, 3.6250e-08, 1.9285e-07, 1.0236e-07, 4.6707e-09, 1.2085e-08,\n 7.2131e-08, 1.2864e-08, 1.0100e-07, 2.4284e-09, 3.5674e-12, 4.1442e-08,\n 7.6216e-08, 1.3389e-09, 3.2324e-08, 3.9503e-09, 1.1202e-08, 2.1500e-07,\n 3.1608e-09, 1.3512e-09, 1.7273e-08, 1.5148e-07, 4.3203e-12, 1.1227e-08,\n 3.1457e-09, 1.1411e-07, 4.5921e-08, 2.5731e-08, 4.5399e-08, 6.6497e-07,\n 8.9078e-09, 1.4993e-07, 1.9121e-07, 2.8918e-08, 3.6283e-07, 2.7110e-08,\n 3.2101e-08, 2.0381e-07, 6.8492e-08, 7.9866e-08, 1.7742e-07, 9.9885e-09,\n 1.9240e-09, 7.1680e-08, 2.6564e-09, 3.8260e-07, 2.1553e-08, 4.3626e-08,\n 1.3326e-08, 6.1623e-11, 3.1288e-07, 2.2388e-08, 3.1074e-07, 9.4857e-08,\n 1.0378e-08, 1.7263e-08, 2.0567e-08, 1.7479e-08, 1.3188e-08, 3.2052e-08,\n 9.5942e-08, 5.6108e-08, 9.5329e-09, 7.6042e-12, 5.6424e-11, 3.4526e-08,\n 1.9210e-07, 1.2456e-07, 1.0079e-08, 1.6735e-07, 1.3131e-10, 8.2704e-07,\n 3.7626e-08, 6.9943e-08, 1.1999e-07, 3.2216e-07, 1.0221e-07, 3.2478e-07,\n 8.8593e-08, 4.0415e-11, 1.1589e-09, 6.0510e-09, 3.8893e-08, 3.5021e-07,\n 6.1064e-08, 1.6536e-08, 1.4633e-08, 5.6715e-09, 3.4812e-07, 7.4751e-09,\n 6.1049e-09, 2.9080e-08, 3.1776e-08, 1.2647e-07, 4.3114e-08, 5.7744e-09,\n 3.0440e-08, 3.2329e-07, 9.1400e-09, 4.4511e-07, 1.1313e-07, 1.6446e-07,\n 1.7169e-08, 1.4966e-07, 1.1448e-07, 9.4322e-08, 9.0880e-07, 1.2785e-07,\n 1.4061e-07, 1.8184e-08, 5.8004e-09, 2.4458e-08, 1.6856e-07, 5.1853e-08,\n 9.6708e-08, 1.0358e-09, 8.5195e-09, 5.6828e-09, 1.1536e-08, 4.0613e-08,\n 5.9660e-09, 3.2910e-08, 2.1719e-08, 2.9110e-08, 2.2458e-07, 7.6331e-10,\n 2.2748e-08, 6.9649e-08, 1.3376e-08, 3.6105e-07, 3.8068e-08, 5.4390e-08,\n 5.8740e-08, 4.6417e-09, 1.9747e-11, 1.0330e-06, 1.2280e-08, 1.2730e-08,\n 7.5995e-08, 1.9787e-07, 8.0226e-12, 5.6174e-08, 2.1287e-07, 2.5187e-09,\n 1.2794e-07, 4.2583e-07, 6.7104e-08, 1.3783e-08, 5.1484e-11, 5.8199e-07,\n 1.1964e-08, 1.9959e-07, 6.1330e-09, 2.7423e-08, 4.6404e-09, 5.5355e-08,\n 7.3181e-08, 2.5165e-10, 5.5686e-09, 7.2082e-09, 2.4073e-08, 9.3531e-08,\n 1.7328e-08, 3.3159e-07, 5.8447e-08, 2.3877e-09, 7.3785e-10, 3.5639e-09,\n 1.3246e-08, 3.0352e-08, 9.2102e-10, 3.2344e-09, 5.2881e-08, 4.2824e-08,\n 1.7203e-07, 5.3898e-08, 1.2598e-08, 9.6070e-08, 1.4370e-08, 2.5205e-08,\n 9.2141e-08, 1.9140e-10, 1.2050e-07, 1.4951e-07, 5.1921e-08, 1.1809e-10,\n 4.1236e-12, 5.1520e-07, 2.4874e-09, 1.8933e-07, 6.4955e-08, 1.1170e-08,\n 8.5287e-08, 8.9230e-08, 2.0711e-07, 5.8407e-09], device='cuda:0')" + "exp_avg_sq": "tensor([8.4225e-08, 6.4827e-09, 1.4318e-08, 1.0776e-08, 3.6744e-09, 1.9709e-09,\n 8.4779e-09, 5.0441e-12, 3.3806e-09, 5.7340e-10, 6.3655e-08, 1.2604e-08,\n 1.6958e-08, 1.1003e-08, 3.2199e-09, 7.9464e-09, 3.3301e-08, 2.1783e-09,\n 6.3045e-09, 6.7154e-08, 2.6508e-10, 2.5495e-08, 1.1098e-08, 6.4117e-09,\n 8.5802e-10, 2.6741e-08, 8.4472e-09, 1.3430e-09, 9.3909e-10, 7.4798e-09,\n 3.6947e-08, 7.6929e-09, 9.4371e-10, 2.7032e-07, 2.6701e-08, 1.8710e-08,\n 3.2041e-09, 2.3221e-13, 2.4967e-08, 1.2592e-07, 1.5836e-08, 9.6666e-09,\n 1.7088e-09, 9.8305e-09, 5.7402e-08, 4.2591e-08, 2.4085e-08, 1.4093e-08,\n 3.5506e-09, 6.1164e-08, 6.8584e-10, 1.1055e-07, 7.6628e-08, 1.7476e-08,\n 1.7733e-09, 3.2039e-10, 4.2040e-08, 1.3366e-09, 1.9196e-08, 3.8704e-09,\n 9.0331e-09, 1.0359e-08, 5.5109e-08, 2.9251e-08, 1.3347e-09, 3.4533e-09,\n 2.0612e-08, 3.6759e-09, 2.8863e-08, 6.9394e-10, 1.0194e-12, 1.1842e-08,\n 2.1779e-08, 3.8259e-10, 9.2368e-09, 1.1288e-09, 3.2011e-09, 6.1437e-08,\n 9.0324e-10, 3.8610e-10, 4.9360e-09, 4.3288e-08, 1.2346e-12, 3.2082e-09,\n 8.9891e-10, 3.2609e-08, 1.3122e-08, 7.3530e-09, 1.2973e-08, 1.9002e-07,\n 2.5455e-09, 4.2844e-08, 5.4640e-08, 8.2637e-09, 1.0368e-07, 7.7469e-09,\n 9.1732e-09, 5.8241e-08, 1.9572e-08, 2.2822e-08, 5.0699e-08, 2.8543e-09,\n 5.4980e-10, 2.0483e-08, 7.5908e-10, 1.0933e-07, 6.1589e-09, 1.2466e-08,\n 3.8080e-09, 1.7609e-11, 8.9409e-08, 6.3975e-09, 8.8797e-08, 2.7106e-08,\n 2.9657e-09, 4.9330e-09, 5.8773e-09, 4.9946e-09, 3.7687e-09, 9.1593e-09,\n 2.7416e-08, 1.6033e-08, 2.7241e-09, 2.1730e-12, 1.6124e-11, 9.8662e-09,\n 5.4893e-08, 3.5593e-08, 2.8801e-09, 4.7822e-08, 3.7524e-11, 2.3633e-07,\n 1.0752e-08, 1.9987e-08, 3.4288e-08, 9.2061e-08, 2.9207e-08, 9.2808e-08,\n 2.5316e-08, 1.1549e-11, 3.3116e-10, 1.7291e-09, 1.1114e-08, 1.0007e-07,\n 1.7450e-08, 4.7252e-09, 4.1815e-09, 1.6207e-09, 9.9479e-08, 2.1361e-09,\n 1.7445e-09, 8.3099e-09, 9.0801e-09, 3.6140e-08, 1.2320e-08, 1.6501e-09,\n 8.6985e-09, 9.2382e-08, 2.6118e-09, 1.2719e-07, 3.2328e-08, 4.6996e-08,\n 4.9061e-09, 4.2766e-08, 3.2712e-08, 2.6953e-08, 2.5970e-07, 3.6534e-08,\n 4.0179e-08, 5.1962e-09, 1.6575e-09, 6.9891e-09, 4.8166e-08, 1.4817e-08,\n 2.7635e-08, 2.9599e-10, 2.4345e-09, 1.6239e-09, 3.2966e-09, 1.1606e-08,\n 1.7048e-09, 9.4042e-09, 6.2063e-09, 8.3183e-09, 6.4176e-08, 2.1812e-10,\n 6.5004e-09, 1.9903e-08, 3.8224e-09, 1.0317e-07, 1.0878e-08, 1.5542e-08,\n 1.6785e-08, 1.3264e-09, 5.6429e-12, 2.9518e-07, 3.5091e-09, 3.6378e-09,\n 2.1716e-08, 5.6544e-08, 2.2925e-12, 1.6052e-08, 6.0830e-08, 7.1972e-10,\n 3.6561e-08, 1.2169e-07, 1.9176e-08, 3.9387e-09, 1.4712e-11, 1.6631e-07,\n 3.4187e-09, 5.7036e-08, 1.7526e-09, 7.8365e-09, 1.3260e-09, 1.5818e-08,\n 2.0912e-08, 7.1910e-11, 1.5913e-09, 2.0598e-09, 6.8790e-09, 2.6727e-08,\n 4.9516e-09, 9.4755e-08, 1.6702e-08, 6.8230e-10, 2.1085e-10, 1.0184e-09,\n 3.7851e-09, 8.6734e-09, 2.6319e-10, 9.2426e-10, 1.5111e-08, 1.2237e-08,\n 4.9158e-08, 1.5402e-08, 3.6000e-09, 2.7453e-08, 4.1064e-09, 7.2027e-09,\n 2.6330e-08, 5.4694e-11, 3.4435e-08, 4.2725e-08, 1.4837e-08, 3.3744e-11,\n 1.1783e-12, 1.4722e-07, 7.1078e-10, 5.4102e-08, 1.8561e-08, 3.1920e-09,\n 2.4371e-08, 2.5498e-08, 5.9184e-08, 1.6690e-09], device='cuda:0')" }, "24": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([8.2612e-10, 6.9797e-11, 7.9150e-11, 1.7127e-10, 2.0033e-11, 2.7050e-11,\n 6.9550e-11, 2.3277e-13, 1.4232e-11, 1.3653e-12, 7.9440e-10, 1.0835e-10,\n 1.2343e-10, 9.1859e-11, 3.1564e-11, 7.5724e-11, 2.8067e-10, 1.5511e-11,\n 4.8410e-11, 5.2248e-10, 1.1799e-14, 3.0615e-10, 1.1892e-10, 6.1920e-11,\n 1.2655e-11, 1.7204e-10, 6.5976e-11, 1.6421e-11, 1.2893e-11, 6.4635e-11,\n 4.9488e-10, 4.9684e-11, 8.1668e-12, 3.7454e-09, 2.1519e-10, 1.3792e-10,\n 2.7260e-11, 2.3898e-13, 2.4058e-10, 9.7890e-10, 1.0745e-10, 1.1315e-10,\n 1.3654e-11, 2.8474e-10, 5.9959e-10, 2.9310e-10, 3.8357e-10, 1.2607e-10,\n 1.9827e-11, 8.2034e-10, 2.1097e-12, 1.1653e-09, 6.6732e-10, 1.0390e-10,\n 5.3012e-12, 3.7413e-12, 4.8290e-10, 3.0252e-12, 1.8740e-10, 3.3773e-11,\n 7.0932e-11, 1.0439e-10, 8.3128e-10, 2.8110e-10, 3.3044e-12, 1.6952e-11,\n 1.9237e-10, 1.4188e-11, 2.0806e-10, 1.2100e-11, 4.2280e-12, 8.3802e-11,\n 1.5542e-10, 7.9986e-12, 5.7957e-11, 2.7080e-11, 3.5834e-11, 5.5401e-10,\n 5.0666e-12, 1.4397e-12, 4.1067e-11, 3.9865e-10, 3.8381e-13, 8.9683e-11,\n 1.4516e-11, 2.7733e-10, 2.0637e-10, 5.8891e-11, 8.2275e-11, 2.3611e-09,\n 2.4414e-11, 3.5772e-10, 6.6908e-10, 4.2858e-11, 2.3372e-09, 5.2052e-11,\n 4.1421e-11, 4.0951e-10, 1.7816e-10, 2.0543e-10, 7.5159e-10, 7.5319e-11,\n 9.6491e-12, 1.8628e-10, 1.1114e-12, 7.1087e-10, 4.2132e-11, 1.0953e-10,\n 2.0889e-11, 2.6540e-12, 6.8090e-10, 8.5004e-11, 1.9432e-09, 1.9374e-10,\n 9.0086e-12, 3.0378e-11, 5.7022e-11, 2.6548e-11, 4.7555e-11, 1.0733e-10,\n 2.7553e-10, 1.4278e-10, 1.0633e-11, 3.3883e-14, 1.3292e-12, 5.7899e-11,\n 4.2269e-10, 7.3981e-10, 2.2493e-11, 5.6314e-10, 1.6733e-13, 2.6796e-09,\n 9.3895e-11, 1.6311e-10, 1.7799e-10, 1.1703e-09, 2.3199e-10, 9.4290e-10,\n 2.1100e-10, 3.0059e-13, 1.1897e-11, 2.2251e-11, 6.5992e-11, 1.0933e-09,\n 1.2879e-10, 5.6042e-11, 4.5457e-11, 1.6160e-11, 1.2133e-09, 1.0681e-11,\n 2.6389e-11, 4.8747e-11, 9.2612e-11, 4.3438e-10, 7.1270e-11, 2.8235e-11,\n 4.5153e-11, 9.6071e-10, 2.0510e-11, 1.3653e-09, 2.3767e-10, 6.3244e-10,\n 2.4106e-11, 5.0682e-10, 6.9612e-10, 2.1103e-10, 3.1157e-09, 2.4233e-10,\n 6.0842e-10, 6.7829e-11, 4.3028e-12, 3.5035e-11, 3.1766e-10, 1.5817e-10,\n 1.7841e-10, 3.4103e-12, 9.7969e-12, 1.3504e-11, 2.0562e-11, 7.0302e-11,\n 1.0836e-11, 7.6948e-11, 3.6773e-11, 5.0001e-11, 4.8102e-10, 4.9036e-15,\n 4.1201e-11, 1.6944e-10, 1.7651e-11, 9.8086e-10, 4.7578e-11, 1.4045e-10,\n 2.6945e-10, 1.6784e-11, 4.8811e-13, 4.0792e-09, 2.2470e-11, 1.9693e-11,\n 3.0242e-10, 2.8434e-10, 1.3406e-12, 9.5291e-11, 7.2720e-10, 2.9892e-12,\n 3.9213e-10, 7.9938e-10, 1.1182e-10, 2.1689e-11, 1.6768e-14, 1.8527e-09,\n 2.4983e-11, 4.4739e-10, 2.9107e-11, 6.0082e-11, 5.4002e-12, 1.0642e-10,\n 1.3709e-10, 1.0233e-13, 1.7110e-11, 8.8138e-12, 5.8607e-11, 2.6989e-10,\n 7.9037e-11, 1.0662e-09, 2.0298e-10, 9.3388e-12, 3.3574e-13, 3.7366e-11,\n 2.0928e-11, 8.7254e-11, 4.7843e-12, 5.2616e-12, 9.0630e-11, 1.7217e-10,\n 4.3375e-10, 1.7589e-10, 1.9741e-11, 2.1085e-10, 4.4226e-11, 3.7795e-11,\n 2.3403e-10, 8.1175e-14, 1.0188e-09, 3.6493e-10, 8.5008e-11, 8.0401e-14,\n 1.4643e-12, 2.4305e-09, 2.6274e-12, 5.8661e-10, 1.2394e-10, 1.6736e-11,\n 1.9532e-10, 1.8500e-10, 4.5163e-10, 2.1905e-11], device='cuda:0')" + "exp_avg_sq": "tensor([2.3607e-10, 1.9945e-11, 2.2618e-11, 4.8943e-11, 5.7245e-12, 7.7296e-12,\n 1.9874e-11, 6.6517e-14, 4.0669e-12, 3.9016e-13, 2.2701e-10, 3.0962e-11,\n 3.5272e-11, 2.6249e-11, 9.0196e-12, 2.1639e-11, 8.0203e-11, 4.4325e-12,\n 1.3834e-11, 1.4930e-10, 3.3716e-15, 8.7484e-11, 3.3982e-11, 1.7694e-11,\n 3.6163e-12, 4.9162e-11, 1.8853e-11, 4.6925e-12, 3.6842e-12, 1.8470e-11,\n 1.4142e-10, 1.4198e-11, 2.3337e-12, 1.0703e-09, 6.1493e-11, 3.9413e-11,\n 7.7897e-12, 6.8290e-14, 6.8748e-11, 2.7973e-10, 3.0704e-11, 3.2334e-11,\n 3.9016e-12, 8.1365e-11, 1.7134e-10, 8.3755e-11, 1.0961e-10, 3.6025e-11,\n 5.6657e-12, 2.3442e-10, 6.0286e-13, 3.3300e-10, 1.9069e-10, 2.9691e-11,\n 1.5149e-12, 1.0691e-12, 1.3799e-10, 8.6448e-13, 5.3552e-11, 9.6509e-12,\n 2.0269e-11, 2.9830e-11, 2.3755e-10, 8.0327e-11, 9.4425e-13, 4.8443e-12,\n 5.4972e-11, 4.0544e-12, 5.9456e-11, 3.4577e-12, 1.2082e-12, 2.3947e-11,\n 4.4412e-11, 2.2857e-12, 1.6562e-11, 7.7383e-12, 1.0240e-11, 1.5831e-10,\n 1.4478e-12, 4.1140e-13, 1.1735e-11, 1.1392e-10, 1.0968e-13, 2.5628e-11,\n 4.1481e-12, 7.9250e-11, 5.8972e-11, 1.6828e-11, 2.3511e-11, 6.7470e-10,\n 6.9766e-12, 1.0222e-10, 1.9119e-10, 1.2247e-11, 6.6787e-10, 1.4874e-11,\n 1.1836e-11, 1.1702e-10, 5.0910e-11, 5.8704e-11, 2.1477e-10, 2.1523e-11,\n 2.7573e-12, 5.3230e-11, 3.1759e-13, 2.0314e-10, 1.2040e-11, 3.1298e-11,\n 5.9693e-12, 7.5840e-13, 1.9457e-10, 2.4290e-11, 5.5529e-10, 5.5362e-11,\n 2.5743e-12, 8.6809e-12, 1.6295e-11, 7.5862e-12, 1.3589e-11, 3.0671e-11,\n 7.8734e-11, 4.0800e-11, 3.0384e-12, 9.6824e-15, 3.7982e-13, 1.6545e-11,\n 1.2079e-10, 2.1141e-10, 6.4276e-12, 1.6092e-10, 4.7817e-14, 7.6573e-10,\n 2.6831e-11, 4.6611e-11, 5.0863e-11, 3.3441e-10, 6.6292e-11, 2.6944e-10,\n 6.0296e-11, 8.5896e-14, 3.3998e-12, 6.3583e-12, 1.8858e-11, 3.1241e-10,\n 3.6802e-11, 1.6015e-11, 1.2990e-11, 4.6177e-12, 3.4670e-10, 3.0521e-12,\n 7.5410e-12, 1.3930e-11, 2.6464e-11, 1.2413e-10, 2.0366e-11, 8.0685e-12,\n 1.2903e-11, 2.7453e-10, 5.8608e-12, 3.9014e-10, 6.7917e-11, 1.8072e-10,\n 6.8885e-12, 1.4483e-10, 1.9892e-10, 6.0304e-11, 8.9032e-10, 6.9247e-11,\n 1.7386e-10, 1.9383e-11, 1.2296e-12, 1.0011e-11, 9.0774e-11, 4.5199e-11,\n 5.0982e-11, 9.7453e-13, 2.7995e-12, 3.8589e-12, 5.8758e-12, 2.0089e-11,\n 3.0966e-12, 2.1989e-11, 1.0508e-11, 1.4288e-11, 1.3746e-10, 1.4013e-15,\n 1.1774e-11, 4.8419e-11, 5.0439e-12, 2.8029e-10, 1.3596e-11, 4.0133e-11,\n 7.6996e-11, 4.7961e-12, 1.3948e-13, 1.1657e-09, 6.4210e-12, 5.6273e-12,\n 8.6420e-11, 8.1251e-11, 3.8309e-13, 2.7230e-11, 2.0780e-10, 8.5418e-13,\n 1.1205e-10, 2.2843e-10, 3.1953e-11, 6.1977e-12, 4.7916e-15, 5.2943e-10,\n 7.1391e-12, 1.2784e-10, 8.3176e-12, 1.7169e-11, 1.5431e-12, 3.0409e-11,\n 3.9176e-11, 2.9241e-14, 4.8892e-12, 2.5186e-12, 1.6747e-11, 7.7124e-11,\n 2.2586e-11, 3.0467e-10, 5.8003e-11, 2.6686e-12, 9.5941e-14, 1.0678e-11,\n 5.9802e-12, 2.4934e-11, 1.3672e-12, 1.5035e-12, 2.5898e-11, 4.9198e-11,\n 1.2395e-10, 5.0263e-11, 5.6412e-12, 6.0251e-11, 1.2638e-11, 1.0800e-11,\n 6.6876e-11, 2.3196e-14, 2.9113e-10, 1.0428e-10, 2.4292e-11, 2.2975e-14,\n 4.1844e-13, 6.9453e-10, 7.5081e-13, 1.6763e-10, 3.5416e-11, 4.7824e-12,\n 5.5815e-11, 5.2864e-11, 1.2906e-10, 6.2596e-12], device='cuda:0')" }, "25": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.1991e-09, 1.0572e-10, 1.4266e-10, 1.9348e-10, 3.0017e-11, 4.2972e-11,\n 8.7004e-11, 2.7324e-13, 2.5743e-11, 2.3797e-12, 7.1062e-10, 1.5985e-10,\n 1.7857e-10, 1.7840e-10, 5.7107e-11, 1.3907e-10, 5.0809e-10, 2.1596e-11,\n 7.0621e-11, 1.0157e-09, 1.8527e-13, 4.0668e-10, 1.9365e-10, 1.0496e-10,\n 1.5933e-11, 3.0554e-10, 1.4021e-10, 2.8844e-11, 2.3078e-11, 7.7503e-11,\n 5.8586e-10, 1.1981e-10, 2.0451e-11, 3.3347e-09, 4.0876e-10, 2.8497e-10,\n 2.1204e-11, 1.8071e-13, 3.9220e-10, 1.5491e-09, 1.9407e-10, 1.6347e-10,\n 3.4039e-11, 1.8930e-10, 6.5687e-10, 5.2109e-10, 3.8737e-10, 2.3811e-10,\n 3.2190e-11, 6.8966e-10, 2.1360e-12, 1.5798e-09, 8.7115e-10, 1.9459e-10,\n 1.1192e-11, 9.1193e-12, 6.5612e-10, 8.3707e-12, 2.8860e-10, 7.5488e-11,\n 8.1449e-11, 1.7006e-10, 8.2869e-10, 4.4465e-10, 5.7348e-12, 3.2166e-11,\n 3.2065e-10, 2.9924e-11, 4.2928e-10, 1.4945e-11, 3.4936e-12, 1.9772e-10,\n 2.4509e-10, 8.4458e-12, 9.2488e-11, 3.0541e-11, 6.5815e-11, 8.9119e-10,\n 8.3103e-12, 1.2761e-12, 9.1461e-11, 6.7872e-10, 1.0244e-12, 7.5936e-11,\n 1.9041e-11, 4.8538e-10, 2.3060e-10, 1.1634e-10, 1.4143e-10, 2.6723e-09,\n 4.4000e-11, 6.4974e-10, 6.0892e-10, 8.9904e-11, 1.5549e-09, 8.3573e-11,\n 1.0139e-10, 7.2037e-10, 3.0528e-10, 3.7339e-10, 7.6367e-10, 6.7124e-11,\n 1.8289e-11, 2.0840e-10, 3.2203e-12, 1.3071e-09, 5.2690e-11, 2.1118e-10,\n 3.8233e-11, 4.7360e-12, 1.0743e-09, 1.1504e-10, 1.3261e-09, 3.2677e-10,\n 2.7414e-11, 5.6093e-11, 1.0599e-10, 5.2924e-11, 6.4400e-11, 1.6574e-10,\n 2.8372e-10, 1.7094e-10, 2.1399e-11, 2.2187e-14, 1.4066e-12, 1.0553e-10,\n 8.1893e-10, 5.6675e-10, 5.0968e-11, 5.0969e-10, 2.2208e-13, 3.3535e-09,\n 1.7052e-10, 3.0045e-10, 3.8258e-10, 1.0793e-09, 4.5523e-10, 1.1130e-09,\n 3.8140e-10, 6.5415e-13, 1.5090e-11, 4.0560e-11, 1.2106e-10, 1.1785e-09,\n 2.5912e-10, 8.9330e-11, 7.3604e-11, 2.9705e-11, 1.1801e-09, 1.2077e-11,\n 4.2920e-11, 9.6323e-11, 1.5386e-10, 3.7455e-10, 1.2430e-10, 4.1111e-11,\n 8.1850e-11, 1.0952e-09, 2.7214e-11, 1.8147e-09, 3.7600e-10, 4.9534e-10,\n 4.0664e-11, 6.6865e-10, 5.3135e-10, 2.9634e-10, 3.1721e-09, 4.3411e-10,\n 5.9806e-10, 8.8899e-11, 1.1098e-11, 6.9434e-11, 5.8382e-10, 2.4338e-10,\n 3.1419e-10, 8.0057e-12, 1.8930e-11, 2.7636e-11, 2.6618e-11, 1.8629e-10,\n 1.5895e-11, 8.1691e-11, 5.9509e-11, 9.0105e-11, 9.3842e-10, 8.2597e-14,\n 7.3887e-11, 2.0644e-10, 3.5127e-11, 1.5004e-09, 1.2786e-10, 1.5815e-10,\n 2.7894e-10, 2.6667e-11, 1.2857e-12, 4.1442e-09, 2.9291e-11, 3.3736e-11,\n 3.5331e-10, 6.7891e-10, 3.2436e-12, 2.4949e-10, 6.6971e-10, 5.6499e-12,\n 5.2977e-10, 1.4950e-09, 2.2074e-10, 3.5542e-11, 2.9742e-14, 2.0979e-09,\n 2.3286e-11, 8.3823e-10, 4.3327e-11, 1.3367e-10, 7.2954e-12, 1.8562e-10,\n 2.4408e-10, 1.0046e-12, 3.3909e-11, 1.6341e-11, 4.8991e-11, 4.0902e-10,\n 9.2506e-11, 1.3889e-09, 2.7647e-10, 1.9190e-11, 2.6634e-13, 3.2972e-11,\n 2.7787e-11, 1.4231e-10, 1.0226e-11, 5.6675e-12, 2.3780e-10, 2.1820e-10,\n 7.3911e-10, 2.5406e-10, 2.9269e-11, 4.0628e-10, 8.3612e-11, 7.1657e-11,\n 4.0865e-10, 1.7812e-13, 5.6721e-10, 4.9729e-10, 1.5191e-10, 6.0491e-13,\n 3.0285e-12, 1.7531e-09, 4.2234e-12, 7.6834e-10, 1.9844e-10, 2.5766e-11,\n 2.6376e-10, 2.6655e-10, 6.7354e-10, 3.9561e-11], device='cuda:0')" + "exp_avg_sq": "tensor([3.4265e-10, 3.0212e-11, 4.0765e-11, 5.5289e-11, 8.5776e-12, 1.2280e-11,\n 2.4862e-11, 7.8080e-14, 7.3564e-12, 6.8002e-13, 2.0306e-10, 4.5677e-11,\n 5.1027e-11, 5.0980e-11, 1.6319e-11, 3.9740e-11, 1.4519e-10, 6.1712e-12,\n 2.0181e-11, 2.9025e-10, 5.2942e-14, 1.1621e-10, 5.5338e-11, 2.9994e-11,\n 4.5531e-12, 8.7310e-11, 4.0065e-11, 8.2425e-12, 6.5946e-12, 2.2147e-11,\n 1.6741e-10, 3.4238e-11, 5.8441e-12, 9.5292e-10, 1.1681e-10, 8.1433e-11,\n 6.0592e-12, 5.1639e-14, 1.1207e-10, 4.4267e-10, 5.5458e-11, 4.6713e-11,\n 9.7270e-12, 5.4094e-11, 1.8771e-10, 1.4890e-10, 1.1069e-10, 6.8041e-11,\n 9.1984e-12, 1.9708e-10, 6.1039e-13, 4.5144e-10, 2.4894e-10, 5.5605e-11,\n 3.1983e-12, 2.6059e-12, 1.8749e-10, 2.3920e-12, 8.2471e-11, 2.1571e-11,\n 2.3275e-11, 4.8595e-11, 2.3680e-10, 1.2706e-10, 1.6388e-12, 9.1917e-12,\n 9.1630e-11, 8.5510e-12, 1.2267e-10, 4.2707e-12, 9.9833e-13, 5.6501e-11,\n 7.0037e-11, 2.4135e-12, 2.6429e-11, 8.7272e-12, 1.8807e-11, 2.5466e-10,\n 2.3747e-12, 3.6465e-13, 2.6136e-11, 1.9395e-10, 2.9274e-13, 2.1699e-11,\n 5.4411e-12, 1.3870e-10, 6.5897e-11, 3.3245e-11, 4.0413e-11, 7.6364e-10,\n 1.2573e-11, 1.8567e-10, 1.7401e-10, 2.5691e-11, 4.4432e-10, 2.3881e-11,\n 2.8973e-11, 2.0585e-10, 8.7235e-11, 1.0670e-10, 2.1822e-10, 1.9181e-11,\n 5.2261e-12, 5.9551e-11, 9.2023e-13, 3.7351e-10, 1.5057e-11, 6.0347e-11,\n 1.0925e-11, 1.3533e-12, 3.0698e-10, 3.2875e-11, 3.7894e-10, 9.3377e-11,\n 7.8337e-12, 1.6029e-11, 3.0287e-11, 1.5123e-11, 1.8403e-11, 4.7361e-11,\n 8.1076e-11, 4.8846e-11, 6.1148e-12, 6.3400e-15, 4.0194e-13, 3.0156e-11,\n 2.3402e-10, 1.6195e-10, 1.4564e-11, 1.4565e-10, 6.3462e-14, 9.5830e-10,\n 4.8729e-11, 8.5856e-11, 1.0933e-10, 3.0841e-10, 1.3009e-10, 3.1804e-10,\n 1.0899e-10, 1.8693e-13, 4.3121e-12, 1.1590e-11, 3.4595e-11, 3.3676e-10,\n 7.4044e-11, 2.5527e-11, 2.1033e-11, 8.4884e-12, 3.3721e-10, 3.4511e-12,\n 1.2265e-11, 2.7525e-11, 4.3966e-11, 1.0703e-10, 3.5519e-11, 1.1748e-11,\n 2.3389e-11, 3.1297e-10, 7.7766e-12, 5.1856e-10, 1.0745e-10, 1.4155e-10,\n 1.1620e-11, 1.9107e-10, 1.5184e-10, 8.4683e-11, 9.0645e-10, 1.2405e-10,\n 1.7090e-10, 2.5404e-11, 3.1714e-12, 1.9841e-11, 1.6683e-10, 6.9547e-11,\n 8.9782e-11, 2.2877e-12, 5.4094e-12, 7.8971e-12, 7.6062e-12, 5.3234e-11,\n 4.5422e-12, 2.3344e-11, 1.7005e-11, 2.5748e-11, 2.6816e-10, 2.3603e-14,\n 2.1114e-11, 5.8993e-11, 1.0038e-11, 4.2875e-10, 3.6536e-11, 4.5192e-11,\n 7.9710e-11, 7.6203e-12, 3.6740e-13, 1.1842e-09, 8.3702e-12, 9.6403e-12,\n 1.0096e-10, 1.9400e-10, 9.2688e-13, 7.1295e-11, 1.9138e-10, 1.6145e-12,\n 1.5139e-10, 4.2722e-10, 6.3078e-11, 1.0156e-11, 8.4990e-15, 5.9950e-10,\n 6.6542e-12, 2.3953e-10, 1.2381e-11, 3.8197e-11, 2.0847e-12, 5.3041e-11,\n 6.9748e-11, 2.8708e-13, 9.6897e-12, 4.6694e-12, 1.3999e-11, 1.1688e-10,\n 2.6434e-11, 3.9690e-10, 7.9003e-11, 5.4838e-12, 7.6108e-14, 9.4221e-12,\n 7.9402e-12, 4.0666e-11, 2.9221e-12, 1.6195e-12, 6.7952e-11, 6.2351e-11,\n 2.1121e-10, 7.2599e-11, 8.3638e-12, 1.1610e-10, 2.3893e-11, 2.0477e-11,\n 1.1678e-10, 5.0899e-14, 1.6209e-10, 1.4210e-10, 4.3411e-11, 1.7286e-13,\n 8.6543e-13, 5.0095e-10, 1.2069e-12, 2.1956e-10, 5.6707e-11, 7.3629e-12,\n 7.5372e-11, 7.6170e-11, 1.9247e-10, 1.1305e-11], device='cuda:0')" }, "26": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.7820e-11, 7.9925e-11, 0.0000e+00, ..., 1.6917e-10, 2.7280e-10,\n 3.3323e-11],\n [5.3454e-12, 7.4315e-12, 0.0000e+00, ..., 2.0769e-13, 6.6953e-12,\n 2.1381e-13],\n [4.3205e-11, 3.3764e-11, 0.0000e+00, ..., 3.4753e-11, 3.7534e-11,\n 2.4413e-11],\n ...,\n [2.1515e-11, 1.7681e-11, 0.0000e+00, ..., 1.2472e-11, 1.2181e-10,\n 3.5156e-12],\n [1.2626e-10, 3.9072e-11, 0.0000e+00, ..., 3.3672e-11, 2.7596e-10,\n 6.1115e-11],\n [2.5532e-11, 4.4888e-12, 0.0000e+00, ..., 6.1932e-12, 5.3201e-11,\n 5.5933e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.6523e-11, 2.2839e-11, 0.0000e+00, ..., 4.8343e-11, 7.7953e-11,\n 9.5223e-12],\n [1.5275e-12, 2.1236e-12, 0.0000e+00, ..., 5.9349e-14, 1.9132e-12,\n 6.1098e-14],\n [1.2346e-11, 9.6482e-12, 0.0000e+00, ..., 9.9309e-12, 1.0726e-11,\n 6.9763e-12],\n ...,\n [6.1480e-12, 5.0524e-12, 0.0000e+00, ..., 3.5641e-12, 3.4809e-11,\n 1.0046e-12],\n [3.6078e-11, 1.1165e-11, 0.0000e+00, ..., 9.6220e-12, 7.8859e-11,\n 1.7464e-11],\n [7.2961e-12, 1.2827e-12, 0.0000e+00, ..., 1.7698e-12, 1.5203e-11,\n 1.5983e-13]], device='cuda:0')" }, "27": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.7991e-08, 4.3947e-09, 3.4882e-08, 4.5325e-08, 1.3802e-08, 3.4790e-09,\n 4.5782e-08, 1.1142e-09, 2.0546e-08, 3.1357e-09, 1.0417e-07, 3.6052e-07,\n 6.1376e-08, 7.2178e-08, 7.5306e-08, 3.1171e-07, 3.4935e-08, 3.4634e-08,\n 1.1690e-07, 1.3592e-07, 4.3586e-10, 1.4603e-08, 2.9453e-08, 3.6517e-08,\n 3.3821e-08, 6.2698e-08, 4.9844e-08, 9.7901e-09, 1.4306e-09, 1.6295e-08,\n 8.6594e-08, 5.7211e-08, 8.9708e-09, 4.4002e-07, 1.5938e-07, 9.7684e-08,\n 2.9059e-09, 5.5076e-11, 4.0976e-08, 1.3232e-06, 1.9739e-07, 2.7751e-08,\n 8.6989e-09, 5.4445e-09, 2.2821e-07, 4.0188e-07, 6.2600e-08, 1.3868e-07,\n 1.7277e-08, 1.1102e-07, 2.1410e-09, 2.3636e-07, 6.0264e-08, 5.4292e-08,\n 8.6282e-09, 7.5263e-10, 1.0292e-07, 3.8194e-09, 6.8404e-08, 5.9862e-09,\n 3.5152e-08, 6.7296e-08, 1.6965e-07, 9.0413e-08, 2.5500e-09, 6.3095e-09,\n 6.7832e-08, 4.4160e-09, 4.3013e-08, 8.1728e-09, 1.4921e-10, 4.1684e-08,\n 1.7618e-07, 1.2940e-08, 3.5669e-08, 2.3844e-10, 2.3164e-09, 1.4390e-07,\n 9.2864e-09, 3.0665e-12, 8.5176e-08, 1.0251e-07, 1.4679e-10, 8.7012e-09,\n 6.0731e-08, 2.5940e-07, 2.0852e-08, 8.9567e-08, 1.7743e-08, 1.4795e-07,\n 1.0478e-07, 6.4446e-08, 1.6381e-07, 2.9888e-08, 5.2780e-08, 8.0896e-09,\n 2.7517e-08, 1.8839e-07, 4.5710e-08, 3.8046e-07, 3.3703e-07, 8.2463e-09,\n 1.4658e-10, 4.5265e-08, 2.5439e-09, 2.9846e-07, 2.3920e-08, 2.1846e-08,\n 6.5094e-08, 4.0592e-10, 3.2265e-08, 2.1055e-10, 2.1029e-07, 2.0599e-07,\n 1.6238e-08, 1.8007e-07, 1.5376e-08, 2.9369e-08, 4.3512e-08, 4.4692e-08,\n 1.5575e-08, 3.5438e-09, 6.3904e-09, 8.4773e-12, 8.3041e-10, 3.9997e-08,\n 3.3991e-07, 5.6140e-08, 1.4196e-07, 6.4549e-08, 3.5647e-10, 4.0962e-07,\n 6.6040e-08, 6.1042e-08, 1.4907e-07, 8.6978e-08, 9.6932e-08, 1.3007e-07,\n 8.8351e-08, 3.4040e-10, 2.5783e-09, 4.7506e-09, 3.4979e-08, 4.4511e-07,\n 8.5044e-08, 5.1952e-09, 8.0203e-08, 9.6887e-09, 8.1886e-08, 2.0256e-09,\n 3.9463e-09, 1.3876e-07, 1.0123e-07, 1.2668e-07, 2.6900e-08, 4.5351e-09,\n 8.0664e-08, 2.3880e-07, 4.1894e-08, 5.2787e-08, 6.5218e-08, 7.4877e-08,\n 4.0651e-08, 5.4278e-08, 5.0819e-08, 4.7833e-08, 3.4336e-07, 2.6446e-07,\n 3.8604e-08, 1.4565e-07, 1.4501e-08, 1.0464e-08, 2.1611e-07, 2.0561e-08,\n 1.5434e-07, 1.1480e-08, 4.4169e-09, 1.3936e-08, 1.4903e-08, 1.6571e-08,\n 1.9533e-08, 1.6813e-08, 6.1242e-09, 4.3627e-08, 6.7132e-07, 1.0255e-10,\n 3.3282e-08, 2.7908e-08, 1.9702e-08, 2.0135e-07, 5.9192e-08, 7.2591e-09,\n 2.9935e-08, 4.1273e-08, 3.1348e-10, 3.6569e-07, 1.9242e-08, 2.9304e-09,\n 9.1396e-08, 4.0555e-07, 3.3960e-13, 3.9658e-08, 1.9684e-07, 4.3807e-08,\n 3.6580e-08, 2.8559e-07, 7.0631e-08, 1.5217e-08, 1.5168e-11, 3.4251e-07,\n 6.9574e-09, 3.4320e-07, 2.1086e-10, 7.6103e-08, 1.0923e-09, 9.1440e-08,\n 1.2871e-07, 2.1880e-12, 1.0881e-08, 1.5129e-08, 1.2879e-08, 4.1884e-07,\n 2.2143e-08, 8.7854e-08, 1.9780e-08, 8.1388e-10, 4.5310e-10, 1.6142e-09,\n 1.4799e-08, 2.9487e-08, 2.4721e-09, 3.6273e-10, 1.5429e-07, 1.1012e-08,\n 8.0952e-09, 5.6452e-08, 4.8020e-08, 7.0656e-08, 2.9222e-08, 2.2674e-08,\n 7.6623e-09, 1.7357e-11, 5.8176e-08, 3.7723e-08, 1.6877e-08, 5.3911e-10,\n 3.4832e-10, 2.8819e-07, 1.3390e-08, 3.3227e-07, 1.0563e-07, 1.1530e-08,\n 1.5859e-07, 4.4823e-08, 9.3635e-08, 1.7796e-08], device='cuda:0')" + "exp_avg_sq": "tensor([2.8002e-08, 1.2558e-09, 9.9677e-09, 1.2952e-08, 3.9440e-09, 9.9414e-10,\n 1.3083e-08, 3.1839e-10, 5.8711e-09, 8.9604e-10, 2.9766e-08, 1.0302e-07,\n 1.7539e-08, 2.0625e-08, 2.1519e-08, 8.9075e-08, 9.9828e-09, 9.8969e-09,\n 3.3406e-08, 3.8840e-08, 1.2455e-10, 4.1729e-09, 8.4165e-09, 1.0435e-08,\n 9.6646e-09, 1.7917e-08, 1.4243e-08, 2.7976e-09, 4.0881e-10, 4.6564e-09,\n 2.4745e-08, 1.6348e-08, 2.5635e-09, 1.2574e-07, 4.5543e-08, 2.7914e-08,\n 8.3039e-10, 1.5738e-11, 1.1709e-08, 3.7811e-07, 5.6406e-08, 7.9301e-09,\n 2.4858e-09, 1.5558e-09, 6.5213e-08, 1.1484e-07, 1.7889e-08, 3.9629e-08,\n 4.9369e-09, 3.1725e-08, 6.1180e-10, 6.7542e-08, 1.7221e-08, 1.5514e-08,\n 2.4656e-09, 2.1507e-10, 2.9410e-08, 1.0914e-09, 1.9547e-08, 1.7106e-09,\n 1.0045e-08, 1.9230e-08, 4.8478e-08, 2.5836e-08, 7.2870e-10, 1.8030e-09,\n 1.9384e-08, 1.2619e-09, 1.2291e-08, 2.3355e-09, 4.2637e-11, 1.1912e-08,\n 5.0345e-08, 3.6976e-09, 1.0193e-08, 6.8137e-11, 6.6193e-10, 4.1121e-08,\n 2.6537e-09, 8.7627e-13, 2.4340e-08, 2.9292e-08, 4.1947e-11, 2.4865e-09,\n 1.7354e-08, 7.4127e-08, 5.9587e-09, 2.5594e-08, 5.0701e-09, 4.2279e-08,\n 2.9941e-08, 1.8416e-08, 4.6809e-08, 8.5408e-09, 1.5082e-08, 2.3117e-09,\n 7.8632e-09, 5.3833e-08, 1.3062e-08, 1.0872e-07, 9.6308e-08, 2.3564e-09,\n 4.1886e-11, 1.2935e-08, 7.2694e-10, 8.5288e-08, 6.8354e-09, 6.2425e-09,\n 1.8601e-08, 1.1600e-10, 9.2201e-09, 6.0165e-11, 6.0091e-08, 5.8863e-08,\n 4.6401e-09, 5.1457e-08, 4.3939e-09, 8.3924e-09, 1.2434e-08, 1.2771e-08,\n 4.4508e-09, 1.0127e-09, 1.8261e-09, 2.4225e-12, 2.3730e-10, 1.1429e-08,\n 9.7132e-08, 1.6043e-08, 4.0566e-08, 1.8446e-08, 1.0186e-10, 1.1705e-07,\n 1.8872e-08, 1.7443e-08, 4.2597e-08, 2.4855e-08, 2.7699e-08, 3.7168e-08,\n 2.5247e-08, 9.7272e-11, 7.3678e-10, 1.3575e-09, 9.9954e-09, 1.2719e-07,\n 2.4302e-08, 1.4846e-09, 2.2919e-08, 2.7686e-09, 2.3400e-08, 5.7885e-10,\n 1.1277e-09, 3.9652e-08, 2.8927e-08, 3.6201e-08, 7.6870e-09, 1.2959e-09,\n 2.3050e-08, 6.8239e-08, 1.1971e-08, 1.5084e-08, 1.8636e-08, 2.1397e-08,\n 1.1616e-08, 1.5510e-08, 1.4522e-08, 1.3669e-08, 9.8119e-08, 7.5573e-08,\n 1.1031e-08, 4.1622e-08, 4.1439e-09, 2.9903e-09, 6.1755e-08, 5.8756e-09,\n 4.4103e-08, 3.2804e-09, 1.2622e-09, 3.9822e-09, 4.2587e-09, 4.7354e-09,\n 5.5817e-09, 4.8043e-09, 1.7500e-09, 1.2467e-08, 1.9183e-07, 2.9304e-11,\n 9.5105e-09, 7.9750e-09, 5.6301e-09, 5.7538e-08, 1.6915e-08, 2.0743e-09,\n 8.5543e-09, 1.1794e-08, 8.9580e-11, 1.0450e-07, 5.4986e-09, 8.3739e-10,\n 2.6117e-08, 1.1589e-07, 9.7045e-14, 1.1333e-08, 5.6249e-08, 1.2518e-08,\n 1.0453e-08, 8.1609e-08, 2.0183e-08, 4.3484e-09, 4.3344e-12, 9.7874e-08,\n 1.9881e-09, 9.8071e-08, 6.0255e-11, 2.1747e-08, 3.1212e-10, 2.6130e-08,\n 3.6780e-08, 6.2525e-13, 3.1092e-09, 4.3233e-09, 3.6802e-09, 1.1969e-07,\n 6.3275e-09, 2.5105e-08, 5.6523e-09, 2.3257e-10, 1.2948e-10, 4.6126e-10,\n 4.2289e-09, 8.4262e-09, 7.0643e-10, 1.0365e-10, 4.4089e-08, 3.1469e-09,\n 2.3133e-09, 1.6132e-08, 1.3722e-08, 2.0191e-08, 8.3504e-09, 6.4792e-09,\n 2.1896e-09, 4.9600e-12, 1.6624e-08, 1.0780e-08, 4.8227e-09, 1.5405e-10,\n 9.9534e-11, 8.2353e-08, 3.8264e-09, 9.4948e-08, 3.0183e-08, 3.2947e-09,\n 4.5319e-08, 1.2809e-08, 2.6757e-08, 5.0855e-09], device='cuda:0')" }, "28": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7533e-10, 2.1751e-11, 6.3863e-11, 1.4810e-10, 2.6418e-11, 1.6324e-11,\n 9.4243e-11, 6.2449e-12, 3.1948e-11, 3.8874e-12, 1.9537e-10, 9.5306e-10,\n 1.3398e-10, 1.8055e-10, 2.8960e-10, 1.2923e-09, 1.0541e-10, 8.5149e-11,\n 3.9413e-10, 2.6950e-10, 4.7665e-14, 3.8671e-11, 1.0962e-10, 7.4320e-11,\n 1.2060e-10, 1.5122e-10, 1.0376e-10, 3.0885e-11, 3.6758e-12, 2.8525e-11,\n 2.3440e-10, 1.3613e-10, 2.1113e-11, 1.1041e-09, 4.1692e-10, 2.9489e-10,\n 6.2254e-12, 2.3520e-14, 9.9415e-11, 8.0888e-09, 4.4292e-10, 8.8774e-11,\n 2.9050e-11, 1.6246e-11, 7.7254e-10, 1.7734e-09, 1.9327e-10, 6.3171e-10,\n 1.8763e-11, 2.3026e-10, 2.1333e-12, 5.9697e-10, 1.1567e-10, 1.1997e-10,\n 1.2211e-11, 2.8525e-12, 3.2983e-10, 3.2465e-12, 2.7946e-10, 2.0792e-11,\n 8.1425e-11, 4.8319e-10, 8.0888e-10, 3.1666e-10, 1.1931e-12, 7.3003e-12,\n 1.8366e-10, 3.8991e-12, 7.0424e-11, 2.6517e-11, 5.8474e-12, 1.2572e-10,\n 4.0965e-10, 3.0533e-11, 8.5512e-11, 2.5024e-12, 8.3254e-12, 4.1272e-10,\n 1.3170e-11, 2.3453e-13, 3.1652e-10, 2.2058e-10, 2.5620e-13, 4.2271e-11,\n 2.4066e-10, 7.5482e-10, 6.0379e-11, 3.8222e-10, 4.3421e-11, 3.3902e-10,\n 5.9375e-10, 1.6141e-10, 4.0471e-10, 4.3768e-11, 1.1416e-10, 7.9044e-12,\n 5.5549e-11, 3.3462e-10, 1.0758e-10, 1.0647e-09, 2.2066e-09, 3.7626e-11,\n 8.4164e-13, 1.0319e-10, 1.3483e-12, 6.6003e-10, 7.2937e-11, 5.9506e-11,\n 1.9477e-10, 1.6831e-15, 8.4814e-11, 4.2568e-12, 6.4342e-10, 4.0598e-10,\n 2.2681e-11, 4.7411e-10, 5.1443e-11, 5.6337e-11, 1.0915e-10, 1.1758e-10,\n 2.0433e-11, 1.4545e-11, 1.1830e-11, 5.0162e-14, 3.3071e-12, 7.5412e-11,\n 8.6566e-10, 1.7435e-10, 7.2243e-10, 1.4632e-10, 7.6911e-14, 1.0696e-09,\n 3.0647e-10, 1.1863e-10, 4.6470e-10, 1.5787e-10, 1.6588e-10, 2.2096e-10,\n 1.9039e-10, 1.8710e-14, 1.2854e-11, 2.8932e-11, 4.1608e-11, 1.2712e-09,\n 2.1834e-10, 1.5362e-11, 3.6379e-10, 2.5487e-11, 1.4483e-10, 4.1026e-13,\n 1.0800e-11, 3.6055e-10, 6.2421e-10, 5.1333e-10, 2.7092e-11, 3.8915e-11,\n 3.8204e-10, 6.3669e-10, 9.2102e-11, 1.1352e-10, 8.8809e-11, 1.2518e-10,\n 1.9936e-10, 1.0008e-10, 1.5809e-10, 1.1194e-10, 6.2194e-10, 8.7227e-10,\n 7.9425e-11, 3.4301e-10, 2.6732e-11, 1.5230e-11, 4.7424e-10, 4.0453e-11,\n 4.5135e-10, 6.9089e-11, 9.5759e-12, 3.2379e-11, 2.9066e-11, 5.8234e-11,\n 2.6314e-11, 3.6669e-11, 6.9043e-12, 8.1080e-11, 2.0643e-09, 6.9788e-13,\n 5.2862e-11, 5.0726e-11, 2.1944e-11, 4.4169e-10, 8.7444e-11, 1.2067e-11,\n 1.0519e-10, 1.8985e-10, 1.3120e-12, 8.1215e-10, 6.5068e-11, 4.5002e-12,\n 4.2424e-10, 1.0047e-09, 1.3411e-13, 9.0344e-11, 7.1792e-10, 1.4416e-10,\n 1.3527e-10, 5.6792e-10, 1.3748e-10, 3.9515e-11, 5.5446e-14, 7.3230e-10,\n 1.0966e-11, 9.3127e-10, 2.4294e-12, 2.0357e-10, 1.1431e-12, 2.0632e-10,\n 1.8137e-10, 9.8445e-13, 3.6474e-11, 2.6630e-11, 3.2565e-11, 1.8969e-09,\n 8.0300e-11, 2.2993e-10, 5.4943e-11, 8.8130e-12, 1.2521e-14, 9.2584e-12,\n 2.0378e-11, 6.7298e-11, 9.5200e-12, 6.9121e-15, 5.7492e-10, 3.7194e-11,\n 2.9416e-11, 2.0396e-10, 1.7451e-10, 1.4608e-10, 1.3170e-10, 5.0818e-11,\n 3.9549e-11, 3.5435e-13, 1.9527e-10, 6.3153e-11, 3.0781e-11, 5.4188e-13,\n 6.2762e-13, 9.5813e-10, 3.0663e-11, 1.0849e-09, 1.7981e-10, 1.9878e-11,\n 6.4541e-10, 8.1277e-11, 1.6891e-10, 1.1504e-10], device='cuda:0')" + "exp_avg_sq": "tensor([5.0103e-11, 6.2154e-12, 1.8249e-11, 4.2320e-11, 7.5490e-12, 4.6646e-12,\n 2.6931e-11, 1.7845e-12, 9.1293e-12, 1.1108e-12, 5.5828e-11, 2.7234e-10,\n 3.8285e-11, 5.1594e-11, 8.2755e-11, 3.6930e-10, 3.0121e-11, 2.4332e-11,\n 1.1263e-10, 7.7013e-11, 1.3621e-14, 1.1050e-11, 3.1324e-11, 2.1238e-11,\n 3.4462e-11, 4.3214e-11, 2.9650e-11, 8.8257e-12, 1.0504e-12, 8.1513e-12,\n 6.6982e-11, 3.8900e-11, 6.0331e-12, 3.1551e-10, 1.1914e-10, 8.4267e-11,\n 1.7790e-12, 6.7210e-15, 2.8409e-11, 2.3114e-09, 1.2657e-10, 2.5368e-11,\n 8.3013e-12, 4.6423e-12, 2.2076e-10, 5.0676e-10, 5.5228e-11, 1.8052e-10,\n 5.3616e-12, 6.5799e-11, 6.0960e-13, 1.7059e-10, 3.3054e-11, 3.4281e-11,\n 3.4894e-12, 8.1512e-13, 9.4252e-11, 9.2770e-13, 7.9857e-11, 5.9414e-12,\n 2.3268e-11, 1.3808e-10, 2.3114e-10, 9.0488e-11, 3.4094e-13, 2.0861e-12,\n 5.2482e-11, 1.1142e-12, 2.0124e-11, 7.5775e-12, 1.6710e-12, 3.5926e-11,\n 1.1706e-10, 8.7250e-12, 2.4436e-11, 7.1509e-13, 2.3790e-12, 1.1794e-10,\n 3.7635e-12, 6.7018e-14, 9.0448e-11, 6.3031e-11, 7.3212e-14, 1.2079e-11,\n 6.8769e-11, 2.1570e-10, 1.7254e-11, 1.0922e-10, 1.2408e-11, 9.6877e-11,\n 1.6967e-10, 4.6125e-11, 1.1565e-10, 1.2507e-11, 3.2623e-11, 2.2587e-12,\n 1.5874e-11, 9.5619e-11, 3.0741e-11, 3.0424e-10, 6.3055e-10, 1.0752e-11,\n 2.4050e-13, 2.9488e-11, 3.8529e-13, 1.8861e-10, 2.0842e-11, 1.7004e-11,\n 5.5658e-11, 4.8095e-16, 2.4236e-11, 1.2164e-12, 1.8386e-10, 1.1601e-10,\n 6.4813e-12, 1.3548e-10, 1.4700e-11, 1.6099e-11, 3.1189e-11, 3.3599e-11,\n 5.8390e-12, 4.1564e-12, 3.3805e-12, 1.4334e-14, 9.4504e-13, 2.1549e-11,\n 2.4737e-10, 4.9821e-11, 2.0644e-10, 4.1812e-11, 2.1978e-14, 3.0566e-10,\n 8.7576e-11, 3.3899e-11, 1.3279e-10, 4.5112e-11, 4.7403e-11, 6.3141e-11,\n 5.4405e-11, 5.3466e-15, 3.6731e-12, 8.2674e-12, 1.1890e-11, 3.6325e-10,\n 6.2392e-11, 4.3899e-12, 1.0395e-10, 7.2831e-12, 4.1386e-11, 1.1723e-13,\n 3.0863e-12, 1.0303e-10, 1.7837e-10, 1.4669e-10, 7.7417e-12, 1.1120e-11,\n 1.0917e-10, 1.8194e-10, 2.6319e-11, 3.2440e-11, 2.5378e-11, 3.5772e-11,\n 5.6969e-11, 2.8598e-11, 4.5177e-11, 3.1989e-11, 1.7773e-10, 2.4926e-10,\n 2.2696e-11, 9.8017e-11, 7.6389e-12, 4.3522e-12, 1.3552e-10, 1.1560e-11,\n 1.2898e-10, 1.9743e-11, 2.7364e-12, 9.2525e-12, 8.3060e-12, 1.6641e-11,\n 7.5195e-12, 1.0479e-11, 1.9730e-12, 2.3169e-11, 5.8990e-10, 1.9942e-13,\n 1.5106e-11, 1.4495e-11, 6.2707e-12, 1.2622e-10, 2.4988e-11, 3.4482e-12,\n 3.0059e-11, 5.4250e-11, 3.7490e-13, 2.3208e-10, 1.8594e-11, 1.2860e-12,\n 1.2123e-10, 2.8710e-10, 3.8324e-14, 2.5816e-11, 2.0515e-10, 4.1194e-11,\n 3.8656e-11, 1.6229e-10, 3.9285e-11, 1.1292e-11, 1.5844e-14, 2.0926e-10,\n 3.1335e-12, 2.6612e-10, 6.9423e-13, 5.8171e-11, 3.2665e-13, 5.8957e-11,\n 5.1828e-11, 2.8132e-13, 1.0423e-11, 7.6097e-12, 9.3058e-12, 5.4207e-10,\n 2.2946e-11, 6.5704e-11, 1.5700e-11, 2.5184e-12, 3.5781e-15, 2.6457e-12,\n 5.8231e-12, 1.9231e-11, 2.7204e-12, 1.9752e-15, 1.6429e-10, 1.0628e-11,\n 8.4059e-12, 5.8283e-11, 4.9868e-11, 4.1743e-11, 3.7634e-11, 1.4522e-11,\n 1.1301e-11, 1.0126e-13, 5.5800e-11, 1.8047e-11, 8.7960e-12, 1.5485e-13,\n 1.7935e-13, 2.7379e-10, 8.7622e-12, 3.1001e-10, 5.1382e-11, 5.6802e-12,\n 1.8443e-10, 2.3225e-11, 4.8267e-11, 3.2875e-11], device='cuda:0')" }, "29": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.9107e-10, 2.1489e-11, 1.1681e-10, 1.9892e-10, 3.1992e-11, 2.2221e-11,\n 1.4112e-10, 1.0110e-11, 5.8261e-11, 6.3689e-12, 3.4380e-10, 1.3157e-09,\n 2.0495e-10, 2.9375e-10, 3.4255e-10, 1.3389e-09, 1.7995e-10, 1.1687e-10,\n 3.8436e-10, 5.8645e-10, 6.6082e-15, 6.8883e-11, 1.4271e-10, 1.6875e-10,\n 1.6469e-10, 2.0815e-10, 2.2256e-10, 5.2443e-11, 9.0644e-12, 5.0827e-11,\n 3.9149e-10, 2.4608e-10, 4.7242e-11, 1.5972e-09, 6.8931e-10, 4.4208e-10,\n 4.9947e-12, 1.0806e-13, 1.8228e-10, 4.8146e-09, 6.8878e-10, 1.3994e-10,\n 4.7230e-11, 3.2067e-11, 7.9386e-10, 1.4093e-09, 2.8766e-10, 6.2692e-10,\n 4.6860e-11, 3.8741e-10, 3.7245e-12, 1.0048e-09, 1.9829e-10, 1.8628e-10,\n 2.2228e-11, 7.3375e-12, 4.4806e-10, 9.9401e-12, 3.2223e-10, 3.3751e-11,\n 1.0289e-10, 3.3036e-10, 7.3511e-10, 4.0253e-10, 4.2051e-12, 1.8124e-11,\n 3.0645e-10, 9.5670e-12, 1.8949e-10, 4.0356e-11, 3.7761e-12, 1.9970e-10,\n 5.8850e-10, 7.0120e-11, 1.0956e-10, 2.1026e-12, 1.3455e-11, 6.2804e-10,\n 1.9453e-11, 2.7286e-13, 3.7710e-10, 4.5879e-10, 9.8373e-13, 4.9239e-11,\n 2.8980e-10, 1.1226e-09, 9.4663e-11, 3.9038e-10, 6.1023e-11, 6.5240e-10,\n 4.8355e-10, 2.8729e-10, 5.7393e-10, 9.6852e-11, 2.3152e-10, 2.3661e-11,\n 9.0070e-11, 6.9434e-10, 2.1799e-10, 1.5328e-09, 1.4552e-09, 4.9502e-11,\n 1.4456e-12, 1.3942e-10, 2.0147e-12, 1.0904e-09, 6.1354e-11, 1.0520e-10,\n 2.0758e-10, 4.0779e-14, 1.1493e-10, 1.1590e-12, 8.7763e-10, 7.3468e-10,\n 5.1819e-11, 6.2825e-10, 7.4795e-11, 9.2910e-11, 1.9769e-10, 1.8870e-10,\n 4.9525e-11, 1.1766e-11, 1.3631e-11, 1.3081e-13, 7.8030e-12, 1.2645e-10,\n 1.4422e-09, 2.5937e-10, 6.2489e-10, 2.0916e-10, 4.6773e-15, 1.6491e-09,\n 3.1307e-10, 2.7472e-10, 5.0778e-10, 2.9824e-10, 4.2208e-10, 4.7899e-10,\n 3.8760e-10, 4.0253e-14, 1.9156e-11, 3.3830e-11, 1.2037e-10, 1.5588e-09,\n 3.5803e-10, 2.9053e-11, 3.6645e-10, 5.1045e-11, 2.7602e-10, 3.3461e-12,\n 2.3648e-11, 4.6885e-10, 4.5986e-10, 4.2052e-10, 8.2109e-11, 3.2967e-11,\n 2.4692e-10, 8.7211e-10, 1.2952e-10, 2.4349e-10, 2.3140e-10, 2.5403e-10,\n 1.1009e-10, 2.3218e-10, 2.2877e-10, 1.5595e-10, 1.2616e-09, 9.1557e-10,\n 1.8670e-10, 6.5040e-10, 3.6139e-11, 3.5636e-11, 7.7212e-10, 9.5116e-11,\n 5.1735e-10, 7.0657e-11, 1.2734e-11, 7.0225e-11, 3.8298e-11, 8.3636e-11,\n 5.8034e-11, 4.8960e-11, 1.6353e-11, 1.3568e-10, 2.6331e-09, 1.3082e-12,\n 1.1126e-10, 8.7550e-11, 5.6940e-11, 8.3099e-10, 2.1692e-10, 2.2494e-11,\n 1.4111e-10, 2.0744e-10, 4.8116e-12, 1.5835e-09, 5.2046e-11, 7.7132e-12,\n 4.2569e-10, 1.5158e-09, 1.2309e-12, 1.8172e-10, 6.7908e-10, 1.2049e-10,\n 1.7933e-10, 1.1135e-09, 2.3750e-10, 4.6452e-11, 4.7250e-13, 1.2477e-09,\n 1.5214e-11, 1.3817e-09, 2.2911e-12, 3.3467e-10, 9.0570e-13, 3.3876e-10,\n 4.4476e-10, 1.1570e-12, 6.0398e-11, 4.8304e-11, 2.7131e-11, 1.7380e-09,\n 1.1004e-10, 4.0659e-10, 1.0338e-10, 1.1586e-11, 1.9508e-14, 1.5040e-11,\n 3.8918e-11, 1.3127e-10, 1.6997e-11, 2.0579e-13, 6.8048e-10, 5.4863e-11,\n 3.9413e-11, 2.6136e-10, 1.3980e-10, 3.1998e-10, 1.4630e-10, 6.6744e-11,\n 3.1139e-11, 8.3092e-13, 2.6738e-10, 1.3123e-10, 5.6371e-11, 6.5920e-13,\n 2.7381e-13, 9.9844e-10, 3.1836e-11, 1.3720e-09, 3.5598e-10, 3.1293e-11,\n 5.2147e-10, 1.4426e-10, 3.5660e-10, 9.8846e-11], device='cuda:0')" + "exp_avg_sq": "tensor([1.1175e-10, 6.1407e-12, 3.3378e-11, 5.6844e-11, 9.1420e-12, 6.3499e-12,\n 4.0327e-11, 2.8890e-12, 1.6648e-11, 1.8200e-12, 9.8243e-11, 3.7597e-10,\n 5.8565e-11, 8.3940e-11, 9.7887e-11, 3.8261e-10, 5.1423e-11, 3.3398e-11,\n 1.0983e-10, 1.6758e-10, 1.8884e-15, 1.9684e-11, 4.0781e-11, 4.8221e-11,\n 4.7061e-11, 5.9480e-11, 6.3597e-11, 1.4986e-11, 2.5902e-12, 1.4524e-11,\n 1.1187e-10, 7.0319e-11, 1.3500e-11, 4.5642e-10, 1.9698e-10, 1.2633e-10,\n 1.4273e-12, 3.0880e-14, 5.2089e-11, 1.3758e-09, 1.9682e-10, 3.9989e-11,\n 1.3496e-11, 9.1634e-12, 2.2685e-10, 4.0273e-10, 8.2201e-11, 1.7915e-10,\n 1.3391e-11, 1.1071e-10, 1.0643e-12, 2.8714e-10, 5.6662e-11, 5.3231e-11,\n 6.3519e-12, 2.0968e-12, 1.2804e-10, 2.8405e-12, 9.2080e-11, 9.6447e-12,\n 2.9401e-11, 9.4403e-11, 2.1007e-10, 1.1503e-10, 1.2016e-12, 5.1792e-12,\n 8.7571e-11, 2.7338e-12, 5.4149e-11, 1.1532e-11, 1.0791e-12, 5.7065e-11,\n 1.6817e-10, 2.0037e-11, 3.1307e-11, 6.0083e-13, 3.8449e-12, 1.7947e-10,\n 5.5588e-12, 7.7973e-14, 1.0776e-10, 1.3110e-10, 2.8111e-13, 1.4070e-11,\n 8.2813e-11, 3.2079e-10, 2.7051e-11, 1.1155e-10, 1.7438e-11, 1.8643e-10,\n 1.3818e-10, 8.2095e-11, 1.6401e-10, 2.7676e-11, 6.6159e-11, 6.7613e-12,\n 2.5738e-11, 1.9841e-10, 6.2294e-11, 4.3802e-10, 4.1583e-10, 1.4146e-11,\n 4.1309e-13, 3.9840e-11, 5.7572e-13, 3.1159e-10, 1.7532e-11, 3.0061e-11,\n 5.9316e-11, 1.1653e-14, 3.2842e-11, 3.3119e-13, 2.5079e-10, 2.0994e-10,\n 1.4808e-11, 1.7953e-10, 2.1373e-11, 2.6550e-11, 5.6491e-11, 5.3923e-11,\n 1.4152e-11, 3.3621e-12, 3.8950e-12, 3.7379e-14, 2.2298e-12, 3.6134e-11,\n 4.1212e-10, 7.4117e-11, 1.7857e-10, 5.9771e-11, 1.3366e-15, 4.7123e-10,\n 8.9461e-11, 7.8504e-11, 1.4510e-10, 8.5226e-11, 1.2061e-10, 1.3687e-10,\n 1.1076e-10, 1.1503e-14, 5.4741e-12, 9.6672e-12, 3.4396e-11, 4.4545e-10,\n 1.0231e-10, 8.3022e-12, 1.0472e-10, 1.4587e-11, 7.8876e-11, 9.5617e-13,\n 6.7576e-12, 1.3398e-10, 1.3141e-10, 1.2017e-10, 2.3463e-11, 9.4206e-12,\n 7.0559e-11, 2.4921e-10, 3.7013e-11, 6.9580e-11, 6.6123e-11, 7.2590e-11,\n 3.1459e-11, 6.6347e-11, 6.5372e-11, 4.4564e-11, 3.6050e-10, 2.6163e-10,\n 5.3350e-11, 1.8586e-10, 1.0327e-11, 1.0183e-11, 2.2064e-10, 2.7180e-11,\n 1.4784e-10, 2.0191e-11, 3.6389e-12, 2.0067e-11, 1.0944e-11, 2.3900e-11,\n 1.6584e-11, 1.3991e-11, 4.6731e-12, 3.8772e-11, 7.5244e-10, 3.7384e-13,\n 3.1793e-11, 2.5018e-11, 1.6271e-11, 2.3746e-10, 6.1987e-11, 6.4278e-12,\n 4.0324e-11, 5.9276e-11, 1.3750e-12, 4.5249e-10, 1.4872e-11, 2.2041e-12,\n 1.2164e-10, 4.3315e-10, 3.5175e-13, 5.1927e-11, 1.9405e-10, 3.4431e-11,\n 5.1244e-11, 3.1820e-10, 6.7868e-11, 1.3274e-11, 1.3502e-13, 3.5655e-10,\n 4.3476e-12, 3.9484e-10, 6.5469e-13, 9.5634e-11, 2.5881e-13, 9.6803e-11,\n 1.2709e-10, 3.3063e-13, 1.7259e-11, 1.3803e-11, 7.7530e-12, 4.9663e-10,\n 3.1444e-11, 1.1619e-10, 2.9543e-11, 3.3107e-12, 5.5745e-15, 4.2977e-12,\n 1.1121e-11, 3.7511e-11, 4.8570e-12, 5.8807e-14, 1.9445e-10, 1.5678e-11,\n 1.1263e-11, 7.4685e-11, 3.9949e-11, 9.1436e-11, 4.1807e-11, 1.9073e-11,\n 8.8981e-12, 2.3744e-13, 7.6405e-11, 3.7500e-11, 1.6108e-11, 1.8837e-13,\n 7.8244e-14, 2.8531e-10, 9.0973e-12, 3.9205e-10, 1.0172e-10, 8.9423e-12,\n 1.4901e-10, 4.1223e-11, 1.0190e-10, 2.8246e-11], device='cuda:0')" }, "30": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.6009e-11, 1.2161e-10, 0.0000e+00, ..., 4.1834e-10, 2.3805e-10,\n 2.5449e-10],\n [5.7525e-11, 2.1486e-11, 0.0000e+00, ..., 1.7970e-11, 2.0815e-10,\n 9.3863e-12],\n [4.6852e-11, 8.2675e-11, 0.0000e+00, ..., 5.7510e-11, 3.9014e-10,\n 2.3066e-11],\n ...,\n [1.1327e-11, 3.0838e-11, 0.0000e+00, ..., 3.9222e-11, 5.8295e-11,\n 3.5988e-13],\n [2.5863e-10, 2.2991e-10, 0.0000e+00, ..., 3.3432e-10, 1.4261e-09,\n 2.9072e-10],\n [2.0879e-12, 2.7917e-12, 0.0000e+00, ..., 1.0768e-11, 2.5183e-11,\n 2.6205e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.0290e-11, 3.4752e-11, 0.0000e+00, ..., 1.1954e-10, 6.8025e-11,\n 7.2722e-11],\n [1.6438e-11, 6.1398e-12, 0.0000e+00, ..., 5.1351e-12, 5.9480e-11,\n 2.6822e-12],\n [1.3388e-11, 2.3625e-11, 0.0000e+00, ..., 1.6434e-11, 1.1148e-10,\n 6.5913e-12],\n ...,\n [3.2368e-12, 8.8122e-12, 0.0000e+00, ..., 1.1208e-11, 1.6658e-11,\n 1.0284e-13],\n [7.3905e-11, 6.5699e-11, 0.0000e+00, ..., 9.5535e-11, 4.0751e-10,\n 8.3076e-11],\n [5.9663e-13, 7.9775e-13, 0.0000e+00, ..., 3.0771e-12, 7.1962e-12,\n 7.4882e-14]], device='cuda:0')" }, "31": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.2314e-07, 6.9363e-08, 8.8195e-08, 2.7447e-09, 2.4939e-08, 2.6379e-09,\n 2.7056e-08, 2.3139e-09, 1.5810e-08, 1.0133e-08, 2.0817e-07, 2.0519e-07,\n 2.8161e-08, 2.0330e-08, 3.9778e-08, 5.3814e-09, 1.0103e-07, 3.0052e-08,\n 1.0091e-07, 2.8096e-07, 4.6215e-10, 8.0541e-08, 1.7747e-08, 1.6488e-07,\n 1.2161e-08, 2.6706e-08, 1.7658e-07, 2.8546e-09, 1.0026e-08, 3.0613e-08,\n 5.3106e-08, 1.7481e-07, 2.3102e-10, 3.1462e-07, 1.8102e-07, 2.1489e-07,\n 8.1508e-09, 2.9052e-10, 1.1677e-07, 7.7103e-07, 3.2405e-07, 9.1255e-09,\n 8.2935e-09, 8.3596e-09, 5.8928e-08, 1.3451e-07, 8.1981e-08, 3.7889e-08,\n 8.2526e-09, 1.1398e-07, 3.0550e-09, 2.8543e-07, 1.4306e-07, 9.5313e-08,\n 4.2193e-09, 8.7629e-09, 6.4150e-08, 7.5049e-09, 4.7560e-08, 1.5037e-08,\n 1.8375e-08, 4.7557e-08, 7.8280e-08, 1.9255e-07, 2.0172e-09, 4.7994e-08,\n 4.6225e-08, 1.1176e-08, 9.2082e-08, 1.3369e-08, 9.3757e-11, 3.7254e-08,\n 1.5289e-07, 1.4901e-08, 9.1244e-09, 1.1811e-10, 2.3985e-08, 2.6834e-07,\n 1.0499e-08, 7.5349e-11, 1.0934e-07, 3.4918e-07, 1.5606e-10, 2.5683e-09,\n 4.3367e-08, 2.5850e-07, 1.1671e-08, 5.3019e-08, 3.8787e-08, 9.9286e-09,\n 5.4925e-08, 1.9804e-07, 6.0327e-08, 6.9993e-08, 2.2446e-07, 5.5946e-08,\n 9.9201e-08, 3.3657e-07, 5.2463e-08, 4.0714e-07, 3.7926e-08, 7.7672e-09,\n 1.5132e-11, 4.0159e-08, 4.4083e-10, 4.8531e-07, 1.3892e-08, 5.4901e-08,\n 4.1745e-08, 2.9018e-10, 3.6798e-07, 1.6277e-08, 1.1430e-07, 1.7214e-07,\n 3.0077e-08, 2.3967e-07, 5.1661e-09, 1.9031e-08, 2.9775e-08, 1.5980e-07,\n 1.6664e-08, 1.2760e-08, 7.1039e-09, 1.1929e-10, 1.1809e-09, 8.5859e-08,\n 1.4368e-07, 2.1167e-08, 7.1201e-08, 1.4592e-07, 1.5932e-09, 3.2717e-07,\n 7.9899e-08, 1.1961e-08, 1.3850e-07, 5.2397e-08, 2.6218e-07, 5.0804e-08,\n 4.0036e-08, 2.8804e-11, 1.0210e-10, 9.8689e-10, 1.7955e-07, 6.6570e-07,\n 1.1657e-07, 2.2949e-08, 2.8162e-08, 4.4553e-08, 3.9345e-08, 3.3621e-09,\n 1.0293e-08, 9.1186e-08, 6.6127e-08, 1.6184e-08, 1.9807e-08, 1.1716e-08,\n 3.1507e-08, 3.5461e-07, 1.5691e-08, 1.6998e-08, 5.6174e-08, 1.7586e-08,\n 1.6745e-08, 5.0168e-08, 1.5922e-08, 7.4717e-08, 3.4260e-07, 2.4786e-08,\n 1.1701e-07, 2.1216e-08, 1.0754e-09, 3.8855e-08, 7.1811e-08, 1.1233e-08,\n 1.4535e-07, 2.4709e-08, 1.7157e-08, 1.6388e-08, 1.8267e-08, 4.9390e-08,\n 5.9638e-08, 1.7793e-08, 2.6622e-08, 4.4211e-08, 3.9065e-07, 2.2829e-10,\n 3.0629e-08, 1.1514e-07, 2.0445e-08, 3.7517e-07, 6.5176e-08, 3.8376e-08,\n 2.2064e-08, 7.6938e-09, 3.3755e-11, 1.0404e-06, 1.4537e-08, 3.1032e-09,\n 3.0495e-08, 1.4267e-07, 1.6374e-10, 4.2323e-07, 2.9174e-08, 3.6406e-08,\n 3.5767e-08, 9.8282e-08, 1.7843e-07, 3.6335e-09, 8.3099e-11, 6.8111e-08,\n 1.9676e-08, 5.0019e-08, 1.8641e-09, 1.2448e-07, 2.4652e-09, 2.7107e-07,\n 3.1453e-07, 1.2381e-09, 3.0975e-09, 3.1511e-08, 1.2835e-08, 1.7562e-07,\n 1.6332e-08, 2.1264e-07, 2.8680e-08, 3.9369e-12, 3.6470e-11, 2.8118e-09,\n 4.3642e-09, 4.5956e-08, 1.2096e-09, 4.0805e-09, 4.7478e-08, 2.7543e-08,\n 1.8796e-07, 1.4467e-09, 1.3210e-08, 8.7999e-08, 2.4585e-08, 2.8799e-08,\n 5.2223e-08, 2.1297e-10, 2.0679e-08, 7.3777e-08, 3.0317e-08, 7.3146e-10,\n 1.7047e-09, 1.4091e-07, 9.7664e-09, 3.1535e-08, 2.5361e-07, 3.8848e-09,\n 1.5817e-07, 1.3985e-08, 3.7656e-07, 9.0068e-09], device='cuda:0')" + "exp_avg_sq": "tensor([6.3764e-08, 1.9821e-08, 2.5203e-08, 7.8433e-10, 7.1265e-09, 7.5381e-10,\n 7.7316e-09, 6.6121e-10, 4.5179e-09, 2.8956e-09, 5.9486e-08, 5.8634e-08,\n 8.0472e-09, 5.8094e-09, 1.1367e-08, 1.5378e-09, 2.8869e-08, 8.5876e-09,\n 2.8837e-08, 8.0288e-08, 1.3206e-10, 2.3015e-08, 5.0714e-09, 4.7115e-08,\n 3.4751e-09, 7.6315e-09, 5.0460e-08, 8.1573e-10, 2.8650e-09, 8.7480e-09,\n 1.5175e-08, 4.9953e-08, 6.6016e-11, 8.9904e-08, 5.1727e-08, 6.1407e-08,\n 2.3292e-09, 8.3018e-11, 3.3369e-08, 2.2033e-07, 9.2600e-08, 2.6077e-09,\n 2.3699e-09, 2.3888e-09, 1.6839e-08, 3.8438e-08, 2.3427e-08, 1.0827e-08,\n 2.3583e-09, 3.2571e-08, 8.7300e-10, 8.1564e-08, 4.0880e-08, 2.7237e-08,\n 1.2057e-09, 2.5041e-09, 1.8331e-08, 2.1446e-09, 1.3591e-08, 4.2970e-09,\n 5.2508e-09, 1.3590e-08, 2.2369e-08, 5.5023e-08, 5.7644e-10, 1.3715e-08,\n 1.3209e-08, 3.1935e-09, 2.6313e-08, 3.8202e-09, 2.6792e-11, 1.0646e-08,\n 4.3691e-08, 4.2580e-09, 2.6074e-09, 3.3750e-11, 6.8538e-09, 7.6681e-08,\n 3.0003e-09, 2.1532e-11, 3.1244e-08, 9.9781e-08, 4.4596e-11, 7.3391e-10,\n 1.2392e-08, 7.3869e-08, 3.3352e-09, 1.5151e-08, 1.1084e-08, 2.8372e-09,\n 1.5695e-08, 5.6593e-08, 1.7239e-08, 2.0001e-08, 6.4141e-08, 1.5987e-08,\n 2.8347e-08, 9.6179e-08, 1.4992e-08, 1.1634e-07, 1.0838e-08, 2.2195e-09,\n 4.3241e-12, 1.1476e-08, 1.2597e-10, 1.3868e-07, 3.9696e-09, 1.5688e-08,\n 1.1929e-08, 8.2922e-11, 1.0515e-07, 4.6514e-09, 3.2663e-08, 4.9191e-08,\n 8.5946e-09, 6.8487e-08, 1.4763e-09, 5.4382e-09, 8.5083e-09, 4.5663e-08,\n 4.7620e-09, 3.6462e-09, 2.0300e-09, 3.4089e-11, 3.3745e-10, 2.4535e-08,\n 4.1057e-08, 6.0488e-09, 2.0346e-08, 4.1697e-08, 4.5526e-10, 9.3491e-08,\n 2.2832e-08, 3.4181e-09, 3.9576e-08, 1.4973e-08, 7.4919e-08, 1.4518e-08,\n 1.1441e-08, 8.2310e-12, 2.9175e-11, 2.8201e-10, 5.1309e-08, 1.9023e-07,\n 3.3310e-08, 6.5578e-09, 8.0474e-09, 1.2731e-08, 1.1243e-08, 9.6075e-10,\n 2.9414e-09, 2.6057e-08, 1.8896e-08, 4.6248e-09, 5.6601e-09, 3.3480e-09,\n 9.0034e-09, 1.0133e-07, 4.4839e-09, 4.8574e-09, 1.6052e-08, 5.0253e-09,\n 4.7850e-09, 1.4336e-08, 4.5499e-09, 2.1351e-08, 9.7902e-08, 7.0829e-09,\n 3.3436e-08, 6.0626e-09, 3.0731e-10, 1.1103e-08, 2.0520e-08, 3.2099e-09,\n 4.1534e-08, 7.0607e-09, 4.9028e-09, 4.6829e-09, 5.2200e-09, 1.4113e-08,\n 1.7042e-08, 5.0846e-09, 7.6073e-09, 1.2634e-08, 1.1163e-07, 6.5236e-11,\n 8.7523e-09, 3.2901e-08, 5.8424e-09, 1.0721e-07, 1.8625e-08, 1.0966e-08,\n 6.3051e-09, 2.1986e-09, 9.6458e-12, 2.9731e-07, 4.1541e-09, 8.8677e-10,\n 8.7141e-09, 4.0768e-08, 4.6791e-11, 1.2094e-07, 8.3366e-09, 1.0403e-08,\n 1.0221e-08, 2.8085e-08, 5.0987e-08, 1.0383e-09, 2.3746e-11, 1.9463e-08,\n 5.6225e-09, 1.4293e-08, 5.3268e-10, 3.5572e-08, 7.0444e-10, 7.7462e-08,\n 8.9879e-08, 3.5378e-10, 8.8513e-10, 9.0044e-09, 3.6677e-09, 5.0185e-08,\n 4.6670e-09, 6.0762e-08, 8.1954e-09, 1.1250e-12, 1.0422e-11, 8.0350e-10,\n 1.2471e-09, 1.3132e-08, 3.4566e-10, 1.1660e-09, 1.3567e-08, 7.8708e-09,\n 5.3712e-08, 4.1341e-10, 3.7748e-09, 2.5146e-08, 7.0253e-09, 8.2296e-09,\n 1.4923e-08, 6.0857e-11, 5.9091e-09, 2.1082e-08, 8.6632e-09, 2.0902e-10,\n 4.8714e-10, 4.0266e-08, 2.7908e-09, 9.0114e-09, 7.2471e-08, 1.1101e-09,\n 4.5198e-08, 3.9964e-09, 1.0761e-07, 2.5738e-09], device='cuda:0')" }, "32": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.8096e-10, 2.2232e-10, 2.4636e-10, 1.4581e-11, 5.5242e-11, 7.8308e-12,\n 3.9463e-11, 1.8831e-11, 2.1572e-11, 2.4322e-11, 3.8324e-10, 3.3736e-10,\n 5.8286e-11, 5.1517e-11, 1.3135e-10, 3.9075e-11, 2.4109e-10, 5.2299e-11,\n 2.4692e-10, 6.6808e-10, 2.2236e-13, 2.8275e-10, 8.4585e-11, 3.4729e-10,\n 4.0820e-11, 3.0905e-11, 8.4324e-10, 8.7893e-12, 3.4869e-11, 5.8758e-11,\n 8.0348e-11, 1.2733e-09, 2.1725e-12, 5.3793e-10, 5.3834e-10, 8.1841e-10,\n 1.0207e-11, 3.2913e-13, 3.4067e-10, 1.9517e-09, 9.5838e-10, 1.5177e-11,\n 2.2098e-11, 3.8056e-11, 7.6394e-11, 2.4126e-10, 3.3818e-10, 9.1236e-11,\n 1.0533e-11, 2.6470e-10, 3.3366e-12, 8.7444e-10, 2.9732e-10, 2.0762e-10,\n 4.9371e-12, 6.2487e-11, 1.3821e-10, 8.3051e-12, 2.4078e-10, 5.1849e-11,\n 4.2258e-11, 1.9505e-10, 2.4853e-10, 8.6468e-10, 2.4320e-12, 1.3967e-10,\n 1.2595e-10, 1.7920e-11, 2.1388e-10, 2.8984e-11, 2.7157e-14, 1.3587e-10,\n 3.9083e-10, 4.9271e-11, 1.3794e-11, 4.4663e-13, 1.7050e-10, 7.8002e-10,\n 2.1863e-11, 4.0358e-15, 6.3827e-10, 1.9218e-09, 9.4772e-13, 1.4425e-11,\n 1.9514e-10, 7.5489e-10, 4.4691e-11, 2.0689e-10, 8.2615e-11, 5.2139e-11,\n 1.4947e-10, 4.7456e-10, 8.7358e-11, 1.8484e-10, 7.6729e-10, 1.1163e-10,\n 2.6533e-10, 7.7035e-10, 1.3373e-10, 1.1793e-09, 8.3857e-11, 4.7067e-11,\n 2.1591e-12, 7.6928e-11, 2.2376e-13, 1.4942e-09, 2.7601e-11, 2.7821e-10,\n 6.7091e-11, 1.0674e-16, 1.1270e-09, 4.7928e-11, 2.8804e-10, 2.9959e-10,\n 5.3553e-11, 7.5291e-10, 1.6168e-11, 3.1139e-11, 5.9314e-11, 6.9067e-10,\n 2.0307e-11, 1.8282e-11, 7.5374e-12, 5.5813e-12, 1.0961e-11, 2.8424e-10,\n 2.5727e-10, 5.3257e-11, 2.1913e-10, 5.5014e-10, 8.3024e-14, 6.1402e-10,\n 3.0830e-10, 3.6615e-11, 2.2955e-10, 7.5403e-11, 8.9478e-10, 1.1461e-10,\n 1.0140e-10, 4.4354e-15, 4.1952e-14, 4.8571e-12, 4.3149e-10, 2.7551e-09,\n 4.0399e-10, 8.7249e-11, 6.9112e-11, 2.6605e-10, 4.6568e-11, 3.3508e-12,\n 6.7486e-11, 1.8618e-10, 2.6168e-10, 2.6401e-11, 2.6117e-11, 9.3626e-11,\n 5.5757e-11, 9.0092e-10, 2.3706e-11, 7.3997e-11, 1.2449e-10, 2.6146e-11,\n 2.0265e-11, 1.7097e-10, 3.8188e-11, 1.6223e-10, 6.4208e-10, 4.7495e-11,\n 5.6340e-10, 6.9881e-11, 3.3882e-13, 7.2924e-11, 1.1667e-10, 3.4287e-11,\n 4.1255e-10, 1.7129e-10, 2.5274e-11, 5.2893e-11, 3.8161e-11, 1.0560e-10,\n 1.3509e-10, 2.7332e-11, 5.6793e-11, 7.2448e-11, 9.0696e-10, 1.4730e-14,\n 5.1625e-11, 4.1997e-10, 2.3388e-11, 8.1853e-10, 1.0971e-10, 9.4477e-11,\n 8.5373e-11, 2.4182e-11, 8.7083e-14, 4.2624e-09, 1.5616e-11, 2.2995e-12,\n 6.3635e-11, 2.7343e-10, 5.6422e-12, 1.8585e-09, 5.1187e-11, 1.0049e-10,\n 7.1329e-11, 2.1937e-10, 4.6028e-10, 6.3850e-12, 5.9954e-15, 1.1853e-10,\n 4.0581e-11, 1.1332e-10, 5.8830e-12, 4.3595e-10, 1.7801e-12, 7.8570e-10,\n 7.4113e-10, 1.5001e-13, 8.6217e-12, 6.7543e-11, 2.5009e-11, 4.2693e-10,\n 5.5207e-11, 4.5218e-10, 7.8587e-11, 3.5702e-13, 2.6473e-13, 1.0287e-11,\n 6.2003e-12, 1.3875e-10, 3.6220e-12, 2.3449e-12, 1.6431e-10, 9.6357e-11,\n 5.9248e-10, 9.8125e-12, 1.9518e-11, 1.8795e-10, 1.0505e-10, 5.2746e-11,\n 1.0258e-10, 1.1594e-13, 5.8709e-11, 2.1032e-10, 4.9575e-11, 5.7768e-14,\n 7.3265e-13, 2.3055e-10, 1.5432e-11, 1.0774e-10, 7.4580e-10, 3.6314e-12,\n 5.6714e-10, 2.6355e-11, 1.0042e-09, 2.8388e-11], device='cuda:0')" + "exp_avg_sq": "tensor([1.3744e-10, 6.3530e-11, 7.0398e-11, 4.1665e-12, 1.5786e-11, 2.2377e-12,\n 1.1277e-11, 5.3811e-12, 6.1644e-12, 6.9503e-12, 1.0951e-10, 9.6403e-11,\n 1.6656e-11, 1.4721e-11, 3.7533e-11, 1.1166e-11, 6.8892e-11, 1.4945e-11,\n 7.0558e-11, 1.9091e-10, 6.3541e-14, 8.0797e-11, 2.4171e-11, 9.9242e-11,\n 1.1665e-11, 8.8314e-12, 2.4096e-10, 2.5116e-12, 9.9642e-12, 1.6790e-11,\n 2.2960e-11, 3.6385e-10, 6.2080e-13, 1.5372e-10, 1.5384e-10, 2.3387e-10,\n 2.9168e-12, 9.4051e-14, 9.7350e-11, 5.5770e-10, 2.7387e-10, 4.3368e-12,\n 6.3147e-12, 1.0875e-11, 2.1830e-11, 6.8942e-11, 9.6639e-11, 2.6071e-11,\n 3.0100e-12, 7.5640e-11, 9.5345e-13, 2.4988e-10, 8.4960e-11, 5.9330e-11,\n 1.4108e-12, 1.7856e-11, 3.9493e-11, 2.3733e-12, 6.8804e-11, 1.4816e-11,\n 1.2075e-11, 5.5737e-11, 7.1020e-11, 2.4709e-10, 6.9496e-13, 3.9911e-11,\n 3.5991e-11, 5.1208e-12, 6.1118e-11, 8.2823e-12, 7.7604e-15, 3.8825e-11,\n 1.1168e-10, 1.4080e-11, 3.9419e-12, 1.2763e-13, 4.8721e-11, 2.2290e-10,\n 6.2475e-12, 1.1533e-15, 1.8239e-10, 5.4918e-10, 2.7082e-13, 4.1221e-12,\n 5.5762e-11, 2.1572e-10, 1.2771e-11, 5.9119e-11, 2.3608e-11, 1.4899e-11,\n 4.2714e-11, 1.3561e-10, 2.4963e-11, 5.2821e-11, 2.1926e-10, 3.1899e-11,\n 7.5819e-11, 2.2013e-10, 3.8214e-11, 3.3698e-10, 2.3963e-11, 1.3450e-11,\n 6.1698e-13, 2.1983e-11, 6.3942e-14, 4.2697e-10, 7.8871e-12, 7.9499e-11,\n 1.9172e-11, 3.0502e-17, 3.2205e-10, 1.3696e-11, 8.2310e-11, 8.5610e-11,\n 1.5303e-11, 2.1515e-10, 4.6201e-12, 8.8981e-12, 1.6949e-11, 1.9737e-10,\n 5.8030e-12, 5.2242e-12, 2.1539e-12, 1.5949e-12, 3.1322e-12, 8.1223e-11,\n 7.3516e-11, 1.5219e-11, 6.2618e-11, 1.5721e-10, 2.3725e-14, 1.7546e-10,\n 8.8099e-11, 1.0463e-11, 6.5597e-11, 2.1547e-11, 2.5569e-10, 3.2752e-11,\n 2.8977e-11, 1.2675e-15, 1.1988e-14, 1.3880e-12, 1.2330e-10, 7.8728e-10,\n 1.1544e-10, 2.4932e-11, 1.9749e-11, 7.6027e-11, 1.3307e-11, 9.5751e-13,\n 1.9285e-11, 5.3204e-11, 7.4776e-11, 7.5444e-12, 7.4633e-12, 2.6754e-11,\n 1.5933e-11, 2.5745e-10, 6.7742e-12, 2.1145e-11, 3.5574e-11, 7.4715e-12,\n 5.7909e-12, 4.8856e-11, 1.0913e-11, 4.6359e-11, 1.8348e-10, 1.3572e-11,\n 1.6100e-10, 1.9969e-11, 9.6820e-14, 2.0839e-11, 3.3341e-11, 9.7979e-12,\n 1.1789e-10, 4.8948e-11, 7.2222e-12, 1.5115e-11, 1.0905e-11, 3.0175e-11,\n 3.8603e-11, 7.8104e-12, 1.6229e-11, 2.0702e-11, 2.5917e-10, 4.2093e-15,\n 1.4752e-11, 1.2001e-10, 6.6833e-12, 2.3390e-10, 3.1349e-11, 2.6997e-11,\n 2.4396e-11, 6.9103e-12, 2.4885e-14, 1.2180e-09, 4.4625e-12, 6.5711e-13,\n 1.8184e-11, 7.8135e-11, 1.6123e-12, 5.3109e-10, 1.4627e-11, 2.8714e-11,\n 2.0383e-11, 6.2688e-11, 1.3153e-10, 1.8246e-12, 1.7132e-15, 3.3872e-11,\n 1.1596e-11, 3.2383e-11, 1.6811e-12, 1.2458e-10, 5.0868e-13, 2.2452e-10,\n 2.1178e-10, 4.2867e-14, 2.4637e-12, 1.9301e-11, 7.1466e-12, 1.2200e-10,\n 1.5776e-11, 1.2922e-10, 2.2457e-11, 1.0202e-13, 7.5649e-14, 2.9396e-12,\n 1.7718e-12, 3.9649e-11, 1.0350e-12, 6.7008e-13, 4.6952e-11, 2.7535e-11,\n 1.6931e-10, 2.8040e-12, 5.5776e-12, 5.3708e-11, 3.0018e-11, 1.5073e-11,\n 2.9314e-11, 3.3129e-14, 1.6777e-11, 6.0101e-11, 1.4166e-11, 1.6508e-14,\n 2.0936e-13, 6.5881e-11, 4.4100e-12, 3.0788e-11, 2.1312e-10, 1.0377e-12,\n 1.6207e-10, 7.5313e-12, 2.8696e-10, 8.1122e-12], device='cuda:0')" }, "33": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.2247e-10, 3.2524e-10, 2.5845e-10, 1.6792e-11, 5.3938e-11, 2.0971e-11,\n 7.4263e-11, 2.4319e-11, 3.6377e-11, 1.6920e-11, 6.7963e-10, 7.3020e-10,\n 8.7671e-11, 9.7171e-11, 2.0661e-10, 2.2366e-11, 4.5275e-10, 7.7025e-11,\n 3.2452e-10, 1.1715e-09, 1.8328e-13, 3.7351e-10, 1.0441e-10, 6.9282e-10,\n 6.7651e-11, 8.4423e-11, 7.9257e-10, 1.6737e-11, 6.0716e-11, 8.0264e-11,\n 2.5215e-10, 8.1843e-10, 2.4433e-12, 1.0774e-09, 7.6761e-10, 9.5022e-10,\n 1.7155e-11, 5.9077e-13, 5.2043e-10, 2.7001e-09, 1.0849e-09, 5.3741e-11,\n 4.9168e-11, 5.2129e-11, 1.9872e-10, 4.4898e-10, 3.9583e-10, 1.8491e-10,\n 2.1890e-11, 3.8588e-10, 2.8729e-12, 1.2137e-09, 4.8623e-10, 2.8915e-10,\n 8.3939e-12, 6.6989e-11, 2.8484e-10, 1.8444e-11, 2.4565e-10, 8.2402e-11,\n 4.0900e-11, 2.4292e-10, 3.7312e-10, 8.5706e-10, 3.3551e-12, 1.2557e-10,\n 2.2987e-10, 2.3951e-11, 3.9821e-10, 6.8106e-11, 6.7940e-16, 1.8667e-10,\n 4.9329e-10, 8.4646e-11, 2.5322e-11, 2.0528e-12, 1.3896e-10, 1.1091e-09,\n 1.6014e-11, 1.0998e-13, 5.1784e-10, 1.5173e-09, 1.2941e-12, 2.3148e-11,\n 2.2018e-10, 1.0803e-09, 6.6747e-11, 2.6252e-10, 1.2638e-10, 4.3763e-11,\n 2.5269e-10, 8.7501e-10, 1.9638e-10, 2.2317e-10, 9.7914e-10, 1.6108e-10,\n 3.0327e-10, 1.1499e-09, 2.6207e-10, 1.7105e-09, 1.8287e-10, 5.0912e-11,\n 4.5757e-12, 1.1224e-10, 1.3560e-13, 1.6376e-09, 2.8473e-11, 2.7333e-10,\n 1.2210e-10, 1.4139e-13, 1.2387e-09, 8.8496e-11, 5.1492e-10, 5.9186e-10,\n 7.4779e-11, 7.9536e-10, 2.7616e-11, 5.3777e-11, 1.3446e-10, 7.3476e-10,\n 5.0653e-11, 3.6500e-11, 1.3382e-11, 2.9540e-12, 1.3621e-11, 2.5242e-10,\n 6.0453e-10, 1.0666e-10, 3.2968e-10, 4.3975e-10, 4.4811e-13, 1.3405e-09,\n 3.7532e-10, 6.2703e-11, 4.5465e-10, 1.6935e-10, 1.1091e-09, 1.7527e-10,\n 1.8818e-10, 1.8062e-13, 3.9109e-13, 8.5775e-12, 5.5105e-10, 2.2436e-09,\n 5.1212e-10, 1.2136e-10, 1.3422e-10, 2.3851e-10, 1.3067e-10, 3.5650e-12,\n 6.9440e-11, 2.8994e-10, 3.2169e-10, 4.8930e-11, 6.0736e-11, 8.4750e-11,\n 8.8537e-11, 1.2112e-09, 4.4764e-11, 7.6266e-11, 1.8769e-10, 5.1584e-11,\n 4.1172e-11, 2.3055e-10, 8.1861e-11, 2.3406e-10, 1.2064e-09, 8.1967e-11,\n 5.5593e-10, 1.0245e-10, 1.8700e-12, 1.1080e-10, 2.5331e-10, 6.1340e-11,\n 4.7040e-10, 1.5100e-10, 4.3178e-11, 9.3402e-11, 3.8407e-11, 2.2440e-10,\n 1.6339e-10, 4.6842e-11, 6.3804e-11, 1.2703e-10, 1.6499e-09, 1.2189e-13,\n 9.6375e-11, 3.3185e-10, 5.6225e-11, 1.5214e-09, 2.1972e-10, 1.1140e-10,\n 1.1329e-10, 4.9250e-11, 8.2635e-14, 4.2029e-09, 3.4192e-11, 5.7051e-12,\n 1.5113e-10, 5.1168e-10, 9.9092e-12, 1.8221e-09, 1.0341e-10, 9.1843e-11,\n 1.6998e-10, 3.7070e-10, 5.4846e-10, 1.0385e-11, 2.8420e-13, 2.4418e-10,\n 4.2640e-11, 2.0952e-10, 1.4129e-11, 5.4804e-10, 1.9984e-12, 8.9452e-10,\n 1.0646e-09, 1.8354e-13, 1.7575e-11, 7.8756e-11, 1.9652e-11, 7.6592e-10,\n 9.1917e-11, 9.1079e-10, 1.3748e-10, 9.4617e-13, 2.1675e-13, 2.1719e-11,\n 1.0371e-11, 2.2238e-10, 8.0438e-12, 4.7735e-12, 2.1561e-10, 1.4227e-10,\n 8.1629e-10, 7.4152e-12, 3.1786e-11, 3.9488e-10, 1.3445e-10, 7.6245e-11,\n 2.3468e-10, 1.2106e-13, 1.1043e-10, 2.6046e-10, 9.0226e-11, 2.6233e-15,\n 1.0185e-12, 4.5760e-10, 2.1778e-11, 1.4501e-10, 8.5250e-10, 7.9020e-12,\n 4.7242e-10, 4.3412e-11, 1.3091e-09, 5.5445e-11], device='cuda:0')" + "exp_avg_sq": "tensor([2.6360e-10, 9.2939e-11, 7.3853e-11, 4.7984e-12, 1.5413e-11, 5.9927e-12,\n 2.1221e-11, 6.9494e-12, 1.0395e-11, 4.8349e-12, 1.9421e-10, 2.0866e-10,\n 2.5053e-11, 2.7767e-11, 5.9042e-11, 6.3912e-12, 1.2938e-10, 2.2011e-11,\n 9.2735e-11, 3.3475e-10, 5.2375e-14, 1.0673e-10, 2.9837e-11, 1.9798e-10,\n 1.9332e-11, 2.4125e-11, 2.2648e-10, 4.7826e-12, 1.7350e-11, 2.2936e-11,\n 7.2053e-11, 2.3387e-10, 6.9818e-13, 3.0788e-10, 2.1935e-10, 2.7153e-10,\n 4.9021e-12, 1.6882e-13, 1.4872e-10, 7.7158e-10, 3.1002e-10, 1.5357e-11,\n 1.4050e-11, 1.4896e-11, 5.6786e-11, 1.2830e-10, 1.1311e-10, 5.2841e-11,\n 6.2552e-12, 1.1027e-10, 8.2097e-13, 3.4682e-10, 1.3895e-10, 8.2626e-11,\n 2.3986e-12, 1.9143e-11, 8.1395e-11, 5.2705e-12, 7.0198e-11, 2.3547e-11,\n 1.1688e-11, 6.9415e-11, 1.0662e-10, 2.4491e-10, 9.5876e-13, 3.5882e-11,\n 6.5687e-11, 6.8441e-12, 1.1379e-10, 1.9462e-11, 1.9415e-16, 5.3342e-11,\n 1.4096e-10, 2.4188e-11, 7.2361e-12, 5.8660e-13, 3.9710e-11, 3.1694e-10,\n 4.5761e-12, 3.1429e-14, 1.4798e-10, 4.3358e-10, 3.6980e-13, 6.6146e-12,\n 6.2917e-11, 3.0869e-10, 1.9074e-11, 7.5018e-11, 3.6114e-11, 1.2506e-11,\n 7.2208e-11, 2.5004e-10, 5.6117e-11, 6.3773e-11, 2.7980e-10, 4.6031e-11,\n 8.6662e-11, 3.2859e-10, 7.4888e-11, 4.8878e-10, 5.2257e-11, 1.4549e-11,\n 1.3075e-12, 3.2073e-11, 3.8748e-14, 4.6796e-10, 8.1365e-12, 7.8105e-11,\n 3.4891e-11, 4.0404e-14, 3.5398e-10, 2.5288e-11, 1.4714e-10, 1.6913e-10,\n 2.1369e-11, 2.2728e-10, 7.8915e-12, 1.5367e-11, 3.8422e-11, 2.0996e-10,\n 1.4474e-11, 1.0430e-11, 3.8241e-12, 8.4413e-13, 3.8923e-12, 7.2130e-11,\n 1.7275e-10, 3.0480e-11, 9.4208e-11, 1.2566e-10, 1.2805e-13, 3.8305e-10,\n 1.0725e-10, 1.7918e-11, 1.2992e-10, 4.8392e-11, 3.1695e-10, 5.0084e-11,\n 5.3773e-11, 5.1615e-14, 1.1176e-13, 2.4511e-12, 1.5747e-10, 6.4113e-10,\n 1.4634e-10, 3.4679e-11, 3.8355e-11, 6.8156e-11, 3.7340e-11, 1.0187e-12,\n 1.9843e-11, 8.2853e-11, 9.1927e-11, 1.3982e-11, 1.7356e-11, 2.4218e-11,\n 2.5300e-11, 3.4611e-10, 1.2792e-11, 2.1794e-11, 5.3633e-11, 1.4741e-11,\n 1.1765e-11, 6.5881e-11, 2.3392e-11, 6.6885e-11, 3.4475e-10, 2.3423e-11,\n 1.5886e-10, 2.9277e-11, 5.3438e-13, 3.1662e-11, 7.2386e-11, 1.7528e-11,\n 1.3442e-10, 4.3150e-11, 1.2338e-11, 2.6690e-11, 1.0975e-11, 6.4125e-11,\n 4.6690e-11, 1.3386e-11, 1.8233e-11, 3.6301e-11, 4.7146e-10, 3.4831e-14,\n 2.7540e-11, 9.4828e-11, 1.6067e-11, 4.3476e-10, 6.2788e-11, 3.1833e-11,\n 3.2373e-11, 1.4074e-11, 2.3614e-14, 1.2010e-09, 9.7706e-12, 1.6303e-12,\n 4.3188e-11, 1.4622e-10, 2.8316e-12, 5.2067e-10, 2.9550e-11, 2.6245e-11,\n 4.8572e-11, 1.0593e-10, 1.5673e-10, 2.9676e-12, 8.1213e-14, 6.9776e-11,\n 1.2185e-11, 5.9873e-11, 4.0374e-12, 1.5661e-10, 5.7107e-13, 2.5561e-10,\n 3.0422e-10, 5.2449e-14, 5.0222e-12, 2.2505e-11, 5.6157e-12, 2.1887e-10,\n 2.6266e-11, 2.6026e-10, 3.9286e-11, 2.7037e-13, 6.1939e-14, 6.2063e-12,\n 2.9635e-12, 6.3547e-11, 2.2986e-12, 1.3641e-12, 6.1611e-11, 4.0654e-11,\n 2.3326e-10, 2.1189e-12, 9.0830e-12, 1.1284e-10, 3.8420e-11, 2.1788e-11,\n 6.7063e-11, 3.4594e-14, 3.1557e-11, 7.4428e-11, 2.5783e-11, 7.4963e-16,\n 2.9105e-13, 1.3076e-10, 6.2234e-12, 4.1436e-11, 2.4361e-10, 2.2581e-12,\n 1.3500e-10, 1.2405e-11, 3.7407e-10, 1.5844e-11], device='cuda:0')" }, "34": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.2880e-12, 1.1873e-11, 7.2428e-12, ..., 1.6393e-12, 8.9753e-12,\n 5.3934e-12],\n [2.3402e-13, 3.5010e-14, 3.8158e-13, ..., 1.0183e-12, 1.1838e-12,\n 4.2191e-14],\n [1.1407e-12, 5.0168e-12, 1.5819e-12, ..., 8.0945e-13, 4.8744e-12,\n 9.3270e-13],\n ...,\n [2.8544e-11, 6.7706e-11, 7.0167e-11, ..., 1.2468e-11, 1.0527e-10,\n 1.5236e-10],\n [1.9060e-10, 3.0147e-10, 4.6530e-10, ..., 1.0333e-10, 4.5527e-10,\n 8.3589e-10],\n [2.6151e-09, 5.8004e-09, 6.9314e-09, ..., 1.4756e-09, 8.7304e-09,\n 1.3697e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[9.3956e-13, 3.3927e-12, 2.0697e-12, ..., 4.6844e-13, 2.5647e-12,\n 1.5412e-12],\n [6.6873e-14, 1.0004e-14, 1.0904e-13, ..., 2.9099e-13, 3.3827e-13,\n 1.2056e-14],\n [3.2596e-13, 1.4336e-12, 4.5203e-13, ..., 2.3131e-13, 1.3929e-12,\n 2.6653e-13],\n ...,\n [8.1566e-12, 1.9348e-11, 2.0051e-11, ..., 3.5628e-12, 3.0083e-11,\n 4.3538e-11],\n [5.4466e-11, 8.6148e-11, 1.3296e-10, ..., 2.9526e-11, 1.3010e-10,\n 2.3886e-10],\n [7.4728e-10, 1.6575e-09, 1.9807e-09, ..., 4.2166e-10, 2.4948e-09,\n 3.9141e-09]], device='cuda:0')" }, "35": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.6292e-11, 2.1305e-12, 1.9019e-11, 1.2516e-12, 8.3804e-12, 1.2389e-11,\n 1.8295e-12, 1.5826e-12, 7.7098e-14, 1.2917e-11, 1.0477e-11, 1.8120e-11,\n 4.5005e-13, 3.8100e-13, 4.6356e-12, 3.0403e-12, 1.0029e-12, 3.9695e-11,\n 1.3547e-11, 1.7350e-13, 6.8309e-13, 5.5430e-12, 1.2799e-11, 4.9370e-13,\n 2.3911e-11, 2.8043e-12, 3.9706e-12, 4.0486e-13, 9.2279e-13, 1.8876e-12,\n 1.8840e-12, 3.1335e-11, 4.5411e-12, 2.4308e-13, 9.1695e-15, 9.3683e-12,\n 3.3337e-12, 1.3431e-12, 4.0159e-14, 1.7907e-15, 5.4407e-12, 6.8084e-13,\n 3.3245e-12, 1.5697e-12, 2.6853e-13, 3.0570e-12, 3.8709e-12, 3.9274e-12,\n 2.0737e-12, 8.8116e-12, 3.4195e-12, 9.7259e-13, 1.2691e-12, 7.4734e-13,\n 2.8035e-14, 3.8117e-14, 8.7241e-13, 7.0024e-14, 2.8070e-12, 1.1938e-13,\n 9.9138e-12, 3.3064e-12, 7.8024e-15, 6.0150e-12, 1.1633e-11, 3.9621e-12,\n 5.9405e-12, 1.1763e-12, 1.5065e-13, 9.2859e-14, 6.0865e-12, 6.4294e-13,\n 5.1159e-14, 2.5135e-12, 1.4646e-12, 3.9478e-12, 2.9036e-12, 7.5196e-13,\n 1.2454e-17, 2.7269e-12, 2.0767e-12, 1.8279e-11, 2.7971e-13, 2.5842e-12,\n 3.5155e-13, 1.7738e-12, 1.0340e-12, 2.8612e-15, 1.7594e-12, 2.2835e-13,\n 8.1674e-15, 1.3251e-12, 1.1849e-12, 3.3769e-11, 1.1126e-11, 7.7374e-12,\n 8.4295e-13, 3.9777e-11, 2.7476e-12, 5.0955e-12, 4.7358e-13, 5.4478e-13,\n 2.6832e-13, 2.9142e-12, 3.5422e-14, 1.0163e-11, 1.6688e-11, 1.8549e-12,\n 2.1063e-12, 3.1665e-13, 3.2031e-12, 3.6374e-11, 2.7516e-13, 4.1672e-11,\n 5.7802e-12, 2.5750e-11, 2.5269e-18, 2.3621e-11, 1.7206e-11, 8.5157e-12,\n 3.2590e-11, 1.4580e-13, 1.7014e-11, 6.4156e-14, 9.2857e-12, 1.5466e-11,\n 1.4017e-11, 1.0152e-11, 1.8740e-11, 2.8359e-11, 3.2614e-12, 6.3610e-12,\n 1.9157e-13, 1.0931e-11, 3.1405e-12, 2.8724e-12, 6.6417e-12, 8.9929e-12,\n 7.5560e-15, 1.7545e-12, 2.2059e-11, 8.1178e-14, 4.6396e-12, 1.7118e-12,\n 1.0716e-11, 4.4576e-11, 8.8635e-12, 1.1384e-13, 2.8113e-11, 4.2633e-13,\n 9.0670e-13, 7.3658e-12, 3.0584e-11, 2.5138e-12, 1.0800e-11, 1.6203e-11,\n 1.1932e-13, 3.0090e-14, 1.1030e-12, 5.4225e-12, 5.1732e-12, 1.4347e-12,\n 2.1598e-11, 1.1172e-11, 1.2440e-12, 1.2004e-14, 2.3971e-13, 3.3961e-14,\n 6.7812e-13, 3.0537e-12, 6.1748e-13, 3.7459e-13, 2.5860e-12, 3.1327e-12,\n 2.8515e-13, 8.0004e-14, 2.9977e-12, 6.9211e-13, 4.2983e-13, 6.1260e-14,\n 4.9947e-12, 1.4528e-14, 1.0803e-12, 4.6635e-12, 1.3277e-12, 6.1907e-13,\n 4.6262e-12, 1.5531e-12, 1.5584e-11, 5.0881e-13, 4.3458e-12, 3.5820e-13,\n 3.9362e-12, 3.9086e-11, 5.5167e-12, 1.1756e-11, 6.0132e-12, 1.7577e-11,\n 2.0103e-15, 2.1345e-13, 1.0224e-12, 1.5180e-12, 2.6171e-12, 2.1559e-11,\n 5.4123e-13, 1.6205e-12, 1.2118e-13, 1.8252e-12, 2.1024e-13, 4.8822e-12,\n 8.3547e-16, 3.2830e-12, 2.2181e-13, 2.4181e-12, 2.2249e-11, 1.1279e-11,\n 1.6569e-11, 4.4809e-12, 5.6704e-12, 1.7640e-11, 4.4797e-12, 8.5543e-14,\n 2.2681e-12, 2.0175e-11, 1.2905e-13, 2.6634e-12, 1.6128e-11, 5.7342e-12,\n 5.0584e-12, 4.8939e-12, 4.2356e-12, 1.2611e-13, 1.8602e-12, 9.8265e-13,\n 4.5586e-12, 1.2512e-12, 3.1506e-14, 2.8903e-13, 1.1671e-12, 1.2291e-13,\n 1.1306e-11, 3.1185e-12, 4.7742e-13, 3.7793e-12, 1.1575e-13, 4.3222e-12,\n 1.1609e-11, 1.4896e-12, 4.9357e-12, 1.5386e-12, 1.1738e-12, 1.2071e-11,\n 4.6883e-12, 1.3358e-11, 1.2495e-11, 2.7085e-12, 4.2971e-28, 1.8996e-29,\n 7.7498e-27, 5.4489e-28, 8.6767e-27, 3.4101e-28, 2.2369e-28, 4.7386e-27,\n 3.1053e-27, 4.5199e-27, 1.3645e-28, 1.6901e-28, 4.7399e-27, 4.2599e-27,\n 2.3484e-28, 5.1748e-28, 1.9064e-27, 1.2145e-30, 1.2051e-26, 1.9075e-27,\n 6.2769e-28, 4.2876e-27, 1.9069e-27, 1.9575e-27, 1.4313e-27, 4.5716e-28,\n 1.1232e-27, 1.2854e-28, 9.7128e-27, 1.9963e-27, 1.7341e-27, 1.5445e-27,\n 8.3598e-27, 2.5323e-27, 2.7317e-28, 9.6797e-28, 1.5382e-27, 2.1949e-29,\n 2.1128e-28, 6.6276e-29, 4.5629e-30, 2.0393e-28, 1.0098e-27, 1.4497e-29,\n 1.6562e-27, 4.8692e-28, 2.5730e-27, 1.3295e-29, 2.1044e-27, 1.8129e-29,\n 5.5476e-28, 6.3368e-32, 7.7907e-28, 3.7087e-27, 1.0761e-27, 2.0654e-28,\n 8.0572e-27, 1.0365e-27, 1.4972e-26, 1.2047e-27, 3.4610e-29, 1.9252e-28,\n 1.7477e-27, 1.0362e-27, 2.6698e-28, 1.3914e-27, 5.0923e-27, 6.6727e-27,\n 3.5535e-27, 1.4602e-28, 1.2433e-26, 9.0627e-27, 9.5241e-27, 1.5896e-26,\n 3.2887e-26, 1.0341e-27, 2.0494e-26, 4.3585e-28, 1.5094e-27, 3.0101e-28,\n 4.4360e-27, 1.7571e-26, 1.0498e-26, 6.8405e-27, 1.8200e-27, 3.2918e-27,\n 1.6019e-26, 3.6028e-26, 2.0177e-27, 5.5263e-27, 3.1811e-27, 1.6658e-26,\n 1.7478e-27, 1.0213e-28, 1.1590e-27, 1.1065e-27, 1.0291e-26, 3.4986e-29,\n 2.0981e-27, 1.5172e-29, 1.1929e-27, 5.6327e-27, 8.2812e-27, 7.0971e-28,\n 1.8722e-28, 8.1782e-28, 1.4151e-27, 8.1155e-29, 4.9848e-29, 2.3301e-28,\n 2.6936e-28, 4.6060e-29, 1.0661e-27, 1.3325e-27, 1.8849e-27, 1.7095e-27,\n 1.2158e-27, 1.6055e-29, 5.4228e-31, 3.1242e-29, 2.2127e-29, 2.2986e-27,\n 3.1870e-27, 2.3327e-27, 1.1014e-27, 2.9883e-27, 1.4812e-29, 5.0079e-27,\n 5.3317e-29, 6.1239e-28, 2.4945e-28, 7.9488e-27, 3.1772e-27, 4.9677e-27,\n 4.1329e-27, 8.7538e-29, 7.7862e-29, 9.1911e-28, 1.8401e-27, 5.6954e-27,\n 6.0203e-28, 2.8561e-28, 1.6388e-28, 2.4822e-29, 9.3432e-27, 8.7879e-28,\n 2.6412e-27, 1.9900e-28, 3.1291e-28, 2.0239e-26, 3.4405e-27, 3.9119e-29,\n 2.7922e-27, 3.3145e-27, 9.6203e-27, 5.7187e-27, 7.3250e-27, 3.5042e-27,\n 5.2882e-29, 4.1237e-26, 9.3429e-27, 1.1458e-28, 1.1301e-26, 2.8710e-27,\n 2.4987e-27, 1.4714e-28, 1.6964e-27, 8.0862e-28, 8.4230e-28, 3.3106e-27,\n 2.6477e-27, 3.1508e-31, 2.4566e-27, 2.4831e-28, 3.8229e-29, 5.8427e-28,\n 5.9826e-27, 4.2336e-27, 2.1890e-29, 3.3346e-27, 9.6800e-28, 6.9525e-28,\n 1.1651e-26, 1.1669e-26, 1.2351e-27, 6.3473e-29, 1.1924e-26, 1.1048e-26,\n 5.1515e-27, 6.5521e-28, 6.6861e-28, 2.6554e-27, 3.1139e-27, 3.4873e-27,\n 1.7161e-26, 1.2079e-27, 3.7838e-28, 5.9741e-27, 1.1531e-27, 5.7633e-27,\n 9.0955e-28, 9.8591e-28, 9.3502e-28, 4.4538e-27, 8.6901e-27, 6.2667e-27,\n 4.1744e-29, 1.5829e-28, 2.0340e-27, 8.4039e-31, 6.1687e-28, 3.8704e-27,\n 4.1310e-28, 1.1987e-27, 1.7685e-27, 3.5299e-27, 1.1029e-27, 1.7547e-27,\n 3.6318e-27, 1.9875e-27, 1.7162e-28, 3.9793e-28, 1.2677e-28, 2.3038e-28,\n 1.5658e-27, 1.0384e-27, 2.9288e-27, 1.0059e-26, 6.3423e-27, 4.3524e-27,\n 3.1149e-27, 3.7202e-27, 1.3436e-28, 7.7065e-27, 2.3323e-28, 3.5146e-28,\n 1.6579e-27, 9.6135e-28, 1.7516e-27, 3.5384e-28, 2.7147e-28, 5.4490e-32,\n 1.6444e-28, 1.3607e-28, 2.5312e-27, 4.4998e-27, 3.5006e-28, 3.0244e-27,\n 1.3307e-27, 3.9770e-27, 3.2862e-29, 1.1168e-27, 3.1830e-28, 4.7117e-27,\n 7.3069e-28, 4.2133e-28, 8.7437e-09, 1.3688e-07, 2.4094e-08, 2.2977e-08,\n 1.8180e-07, 1.6153e-07, 2.2035e-09, 1.1745e-09, 2.9444e-09, 2.9470e-08,\n 3.9033e-11, 7.8566e-09, 1.4262e-07, 4.7973e-09, 7.8937e-09, 3.6144e-08,\n 5.3062e-08, 1.0899e-08, 9.7153e-08, 3.6656e-08, 4.0655e-11, 1.1417e-10,\n 1.9717e-08, 1.2990e-07, 1.0943e-07, 3.8125e-10, 4.9293e-08, 2.6511e-09,\n 1.3870e-08, 2.8138e-09, 6.1340e-08, 5.1220e-08, 1.9852e-07, 2.1310e-09,\n 4.0615e-08, 5.8303e-08, 3.4394e-08, 3.4810e-08, 3.0701e-08, 1.2134e-09,\n 1.8801e-07, 6.1570e-10, 3.0415e-09, 5.3551e-09, 9.1049e-08, 7.7861e-09,\n 1.7027e-08, 7.1051e-09, 1.3666e-09, 1.3013e-08, 2.6085e-08, 4.6595e-09,\n 7.6086e-08, 7.1810e-08, 7.7391e-09, 5.4233e-09, 8.3919e-09, 6.0823e-10,\n 4.9138e-08, 1.8640e-08, 3.9732e-08, 3.9322e-08, 4.1401e-09, 1.0721e-09,\n 2.2134e-12, 1.0791e-10, 3.3331e-10, 9.9681e-10, 1.4376e-08, 1.0245e-09,\n 1.2802e-08, 3.3385e-08, 4.8047e-08, 7.6586e-09, 1.7356e-08, 2.1914e-10,\n 9.7875e-09, 2.8983e-07, 1.5207e-09, 1.4611e-08, 1.9616e-08, 1.3415e-07,\n 4.9236e-11, 3.8050e-08, 1.0964e-08, 5.2952e-08, 3.3807e-08, 3.9257e-08,\n 3.2077e-12, 1.2522e-07, 8.4175e-08, 7.6630e-11, 3.7698e-09, 9.0680e-08,\n 4.2118e-09, 1.7591e-08, 2.4863e-10, 6.2481e-08, 1.2030e-08, 5.8472e-08,\n 3.0477e-08, 1.0568e-07, 1.3310e-09, 2.5554e-08, 5.2393e-08, 1.9092e-07,\n 8.4427e-08, 1.4901e-10, 3.2011e-09, 3.2056e-08, 2.0036e-08, 3.7000e-09,\n 5.6451e-09, 6.6735e-09, 1.9792e-08, 2.9435e-09, 2.4938e-08, 4.9493e-10,\n 7.6599e-08, 9.6813e-10, 7.5052e-08, 8.5979e-08, 2.6492e-08, 7.8070e-10,\n 9.0539e-09, 1.0599e-09, 2.4786e-08, 5.8745e-08, 5.8657e-09, 1.5217e-09,\n 6.1077e-08, 3.0306e-09, 3.5671e-08, 2.0100e-08, 1.0355e-08, 7.0525e-11,\n 1.4389e-09, 1.5240e-07, 1.9640e-09, 1.0573e-07, 1.8257e-11, 5.2335e-08,\n 1.9830e-08, 1.9809e-08, 5.8465e-09, 7.1232e-08, 2.6796e-08, 1.0407e-08,\n 4.3197e-08, 1.3482e-10, 9.8179e-08, 6.1840e-09, 7.6138e-10, 1.0209e-08,\n 2.0185e-08, 7.9166e-08, 1.5256e-08, 3.6262e-08, 5.6247e-10, 9.1207e-08,\n 2.6667e-08, 2.7253e-08, 2.1386e-08, 1.6726e-07, 1.5233e-10, 4.7321e-09,\n 1.4513e-07, 1.1327e-08, 1.3382e-08, 6.9615e-10, 2.3189e-08, 5.9993e-08,\n 5.2130e-08, 7.9279e-08, 1.9237e-09, 2.3595e-08, 1.3769e-08, 1.2322e-08,\n 1.2593e-08, 7.9328e-09, 2.0073e-08, 5.0358e-08, 3.8677e-09, 8.5873e-08,\n 9.5899e-08, 2.3302e-08, 2.2886e-08, 2.1782e-08, 1.5754e-10, 4.6612e-09,\n 4.7472e-09, 1.0238e-08, 4.6314e-08, 5.8662e-09, 2.5525e-09, 3.5307e-08,\n 9.7430e-12, 8.5833e-08, 7.2580e-10, 9.4045e-08, 4.0011e-08, 1.1247e-07,\n 2.9031e-09, 1.9237e-08, 7.5542e-08, 2.8425e-08, 2.7598e-09, 7.2913e-08,\n 5.5552e-09, 3.5342e-08, 1.7953e-10, 5.4452e-09, 1.5904e-07, 3.4697e-08,\n 1.5962e-07, 1.7960e-08, 2.5644e-08, 6.3518e-11, 8.8698e-09, 2.2368e-08,\n 2.0568e-10, 1.5056e-09, 1.9322e-08, 6.1867e-09, 8.8891e-09, 5.0632e-09,\n 5.7367e-08, 5.7464e-09, 1.4511e-09, 4.7901e-10, 1.5861e-10, 2.7624e-10,\n 3.4613e-09, 4.7442e-08, 3.3098e-09, 6.7676e-09, 8.4584e-10, 8.0770e-08,\n 3.9607e-08, 7.6802e-08, 1.7475e-07, 3.6194e-09, 9.4587e-08, 3.8602e-08,\n 8.5721e-09, 3.2106e-07, 6.9286e-09, 3.3040e-08, 1.1078e-08, 7.9492e-09,\n 4.1353e-09, 3.5522e-08, 1.9136e-08, 9.2426e-10, 4.7868e-09, 8.2253e-08],\n device='cuda:0')" + "exp_avg_sq": "tensor([1.8943e-11, 6.0880e-13, 5.4349e-12, 3.5765e-13, 2.3948e-12, 3.5404e-12,\n 5.2278e-13, 4.5224e-13, 2.2031e-14, 3.6910e-12, 2.9938e-12, 5.1779e-12,\n 1.2861e-13, 1.0887e-13, 1.3246e-12, 8.6880e-13, 2.8659e-13, 1.1343e-11,\n 3.8710e-12, 4.9580e-14, 1.9520e-13, 1.5840e-12, 3.6573e-12, 1.4108e-13,\n 6.8328e-12, 8.0135e-13, 1.1346e-12, 1.1569e-13, 2.6369e-13, 5.3940e-13,\n 5.3837e-13, 8.9543e-12, 1.2976e-12, 6.9462e-14, 2.6203e-15, 2.6771e-12,\n 9.5264e-13, 3.8380e-13, 1.1476e-14, 5.1169e-16, 1.5547e-12, 1.9455e-13,\n 9.5000e-13, 4.4856e-13, 7.6735e-14, 8.7357e-13, 1.1061e-12, 1.1223e-12,\n 5.9256e-13, 2.5180e-12, 9.7715e-13, 2.7792e-13, 3.6265e-13, 2.1356e-13,\n 8.0111e-15, 1.0892e-14, 2.4930e-13, 2.0010e-14, 8.0212e-13, 3.4114e-14,\n 2.8329e-12, 9.4482e-13, 2.2296e-15, 1.7188e-12, 3.3241e-12, 1.1322e-12,\n 1.6975e-12, 3.3612e-13, 4.3049e-14, 2.6535e-14, 1.7393e-12, 1.8373e-13,\n 1.4619e-14, 7.1826e-13, 4.1851e-13, 1.1281e-12, 8.2974e-13, 2.1488e-13,\n 3.5587e-18, 7.7924e-13, 5.9344e-13, 5.2234e-12, 7.9930e-14, 7.3847e-13,\n 1.0046e-13, 5.0687e-13, 2.9547e-13, 8.1760e-16, 5.0275e-13, 6.5252e-14,\n 2.3339e-15, 3.7867e-13, 3.3861e-13, 9.6499e-12, 3.1794e-12, 2.2110e-12,\n 2.4088e-13, 1.1366e-11, 7.8515e-13, 1.4561e-12, 1.3533e-13, 1.5568e-13,\n 7.6674e-14, 8.3274e-13, 1.0122e-14, 2.9040e-12, 4.7688e-12, 5.3007e-13,\n 6.0189e-13, 9.0486e-14, 9.1532e-13, 1.0394e-11, 7.8628e-14, 1.1908e-11,\n 1.6517e-12, 7.3584e-12, 7.2209e-19, 6.7498e-12, 4.9169e-12, 2.4334e-12,\n 9.3127e-12, 4.1663e-14, 4.8620e-12, 1.8333e-14, 2.6535e-12, 4.4197e-12,\n 4.0054e-12, 2.9011e-12, 5.3551e-12, 8.1039e-12, 9.3198e-13, 1.8177e-12,\n 5.4742e-14, 3.1236e-12, 8.9741e-13, 8.2082e-13, 1.8979e-12, 2.5698e-12,\n 2.1592e-15, 5.0137e-13, 6.3034e-12, 2.3197e-14, 1.3258e-12, 4.8916e-13,\n 3.0621e-12, 1.2738e-11, 2.5328e-12, 3.2529e-14, 8.0335e-12, 1.2183e-13,\n 2.5910e-13, 2.1048e-12, 8.7397e-12, 7.1834e-13, 3.0863e-12, 4.6303e-12,\n 3.4096e-14, 8.5983e-15, 3.1518e-13, 1.5495e-12, 1.4783e-12, 4.0998e-13,\n 6.1718e-12, 3.1924e-12, 3.5549e-13, 3.4301e-15, 6.8498e-14, 9.7045e-15,\n 1.9378e-13, 8.7261e-13, 1.7645e-13, 1.0704e-13, 7.3898e-13, 8.9519e-13,\n 8.1484e-14, 2.2862e-14, 8.5662e-13, 1.9778e-13, 1.2283e-13, 1.7506e-14,\n 1.4273e-12, 4.1516e-15, 3.0870e-13, 1.3326e-12, 3.7939e-13, 1.7690e-13,\n 1.3220e-12, 4.4381e-13, 4.4533e-12, 1.4540e-13, 1.2418e-12, 1.0236e-13,\n 1.1248e-12, 1.1169e-11, 1.5764e-12, 3.3593e-12, 1.7183e-12, 5.0227e-12,\n 5.7447e-16, 6.0995e-14, 2.9216e-13, 4.3378e-13, 7.4785e-13, 6.1606e-12,\n 1.5466e-13, 4.6308e-13, 3.4629e-14, 5.2157e-13, 6.0078e-14, 1.3951e-12,\n 2.3874e-16, 9.3814e-13, 6.3384e-14, 6.9099e-13, 6.3577e-12, 3.2230e-12,\n 4.7347e-12, 1.2805e-12, 1.6204e-12, 5.0408e-12, 1.2801e-12, 2.4445e-14,\n 6.4812e-13, 5.7651e-12, 3.6876e-14, 7.6110e-13, 4.6088e-12, 1.6386e-12,\n 1.4455e-12, 1.3985e-12, 1.2104e-12, 3.6037e-14, 5.3157e-13, 2.8080e-13,\n 1.3027e-12, 3.5753e-13, 9.0030e-15, 8.2592e-14, 3.3352e-13, 3.5124e-14,\n 3.2309e-12, 8.9114e-13, 1.3643e-13, 1.0800e-12, 3.3078e-14, 1.2351e-12,\n 3.3174e-12, 4.2567e-13, 1.4104e-12, 4.3967e-13, 3.3542e-13, 3.4495e-12,\n 1.3397e-12, 3.8170e-12, 3.5707e-12, 7.7399e-13, 1.2279e-28, 5.4281e-30,\n 2.2146e-27, 1.5571e-28, 2.4794e-27, 9.7447e-29, 6.3920e-29, 1.3541e-27,\n 8.8735e-28, 1.2916e-27, 3.8992e-29, 4.8296e-29, 1.3545e-27, 1.2173e-27,\n 6.7107e-29, 1.4787e-28, 5.4476e-28, 3.4706e-31, 3.4437e-27, 5.4508e-28,\n 1.7937e-28, 1.2252e-27, 5.4490e-28, 5.5938e-28, 4.0901e-28, 1.3064e-28,\n 3.2096e-28, 3.6733e-29, 2.7755e-27, 5.7045e-28, 4.9553e-28, 4.4134e-28,\n 2.3889e-27, 7.2364e-28, 7.8060e-29, 2.7661e-28, 4.3956e-28, 6.2721e-30,\n 6.0376e-29, 1.8939e-29, 1.3039e-30, 5.8274e-29, 2.8857e-28, 4.1426e-30,\n 4.7328e-28, 1.3914e-28, 7.3526e-28, 3.7991e-30, 6.0136e-28, 5.1804e-30,\n 1.5853e-28, 1.8108e-32, 2.2263e-28, 1.0598e-27, 3.0751e-28, 5.9020e-29,\n 2.3024e-27, 2.9620e-28, 4.2783e-27, 3.4426e-28, 9.8902e-30, 5.5013e-29,\n 4.9942e-28, 2.9611e-28, 7.6291e-29, 3.9760e-28, 1.4552e-27, 1.9068e-27,\n 1.0155e-27, 4.1728e-29, 3.5528e-27, 2.5897e-27, 2.7216e-27, 4.5425e-27,\n 9.3976e-27, 2.9550e-28, 5.8564e-27, 1.2455e-28, 4.3133e-28, 8.6016e-29,\n 1.2676e-27, 5.0211e-27, 3.0000e-27, 1.9547e-27, 5.2008e-28, 9.4065e-28,\n 4.5777e-27, 1.0295e-26, 5.7657e-28, 1.5792e-27, 9.0902e-28, 4.7601e-27,\n 4.9945e-28, 2.9185e-29, 3.3120e-28, 3.1620e-28, 2.9406e-27, 9.9974e-30,\n 5.9955e-28, 4.3356e-30, 3.4088e-28, 1.6096e-27, 2.3664e-27, 2.0281e-28,\n 5.3500e-29, 2.3370e-28, 4.0437e-28, 2.3191e-29, 1.4244e-29, 6.6585e-29,\n 7.6972e-29, 1.3162e-29, 3.0464e-28, 3.8078e-28, 5.3862e-28, 4.8851e-28,\n 3.4742e-28, 4.5877e-30, 1.5496e-31, 8.9275e-30, 6.3231e-30, 6.5683e-28,\n 9.1070e-28, 6.6657e-28, 3.1473e-28, 8.5394e-28, 4.2325e-30, 1.4310e-27,\n 1.5236e-29, 1.7500e-28, 7.1282e-29, 2.2714e-27, 9.0791e-28, 1.4196e-27,\n 1.1810e-27, 2.5015e-29, 2.2250e-29, 2.6264e-28, 5.2583e-28, 1.6275e-27,\n 1.7203e-28, 8.1616e-29, 4.6830e-29, 7.0932e-30, 2.6699e-27, 2.5112e-28,\n 7.5474e-28, 5.6867e-29, 8.9415e-29, 5.7835e-27, 9.8314e-28, 1.1179e-29,\n 7.9791e-28, 9.4715e-28, 2.7491e-27, 1.6342e-27, 2.0932e-27, 1.0013e-27,\n 1.5111e-29, 1.1784e-26, 2.6698e-27, 3.2743e-29, 3.2294e-27, 8.2041e-28,\n 7.1402e-28, 4.2047e-29, 4.8475e-28, 2.3107e-28, 2.4069e-28, 9.4603e-28,\n 7.5661e-28, 9.0036e-32, 7.0199e-28, 7.0955e-29, 1.0924e-29, 1.6696e-28,\n 1.7096e-27, 1.2098e-27, 6.2551e-30, 9.5288e-28, 2.7661e-28, 1.9867e-28,\n 3.3293e-27, 3.3345e-27, 3.5294e-28, 1.8138e-29, 3.4073e-27, 3.1570e-27,\n 1.4721e-27, 1.8723e-28, 1.9106e-28, 7.5879e-28, 8.8983e-28, 9.9653e-28,\n 4.9038e-27, 3.4517e-28, 1.0812e-28, 1.7071e-27, 3.2952e-28, 1.6469e-27,\n 2.5991e-28, 2.8173e-28, 2.6719e-28, 1.2727e-27, 2.4833e-27, 1.7908e-27,\n 1.1929e-29, 4.5231e-29, 5.8124e-28, 2.4015e-31, 1.7628e-28, 1.1060e-27,\n 1.1805e-28, 3.4253e-28, 5.0537e-28, 1.0087e-27, 3.1515e-28, 5.0143e-28,\n 1.0378e-27, 5.6795e-28, 4.9043e-29, 1.1371e-28, 3.6224e-29, 6.5833e-29,\n 4.4744e-28, 2.9672e-28, 8.3693e-28, 2.8744e-27, 1.8124e-27, 1.2437e-27,\n 8.9012e-28, 1.0631e-27, 3.8396e-29, 2.2022e-27, 6.6647e-29, 1.0043e-28,\n 4.7376e-28, 2.7471e-28, 5.0053e-28, 1.0111e-28, 7.7574e-29, 1.5571e-32,\n 4.6990e-29, 3.8884e-29, 7.2331e-28, 1.2859e-27, 1.0003e-28, 8.6424e-28,\n 3.8025e-28, 1.1365e-27, 9.3906e-30, 3.1913e-28, 9.0955e-29, 1.3464e-27,\n 2.0880e-28, 1.2040e-28, 2.4986e-09, 3.9114e-08, 6.8851e-09, 6.5659e-09,\n 5.1950e-08, 4.6159e-08, 6.2966e-10, 3.3561e-10, 8.4139e-10, 8.4212e-09,\n 1.1154e-11, 2.2451e-09, 4.0754e-08, 1.3709e-09, 2.2557e-09, 1.0328e-08,\n 1.5163e-08, 3.1146e-09, 2.7762e-08, 1.0475e-08, 1.1618e-11, 3.2626e-11,\n 5.6344e-09, 3.7119e-08, 3.1271e-08, 1.0894e-10, 1.4086e-08, 7.5759e-10,\n 3.9633e-09, 8.0407e-10, 1.7528e-08, 1.4636e-08, 5.6730e-08, 6.0895e-10,\n 1.1606e-08, 1.6661e-08, 9.8282e-09, 9.9473e-09, 8.7729e-09, 3.4675e-10,\n 5.3726e-08, 1.7594e-10, 8.6912e-10, 1.5302e-09, 2.6018e-08, 2.2249e-09,\n 4.8656e-09, 2.0304e-09, 3.9053e-10, 3.7186e-09, 7.4541e-09, 1.3315e-09,\n 2.1742e-08, 2.0520e-08, 2.2115e-09, 1.5498e-09, 2.3981e-09, 1.7381e-10,\n 1.4041e-08, 5.3266e-09, 1.1354e-08, 1.1237e-08, 1.1831e-09, 3.0636e-10,\n 6.3250e-13, 3.0836e-11, 9.5246e-11, 2.8485e-10, 4.1080e-09, 2.9276e-10,\n 3.6582e-09, 9.5400e-09, 1.3730e-08, 2.1885e-09, 4.9595e-09, 6.2621e-11,\n 2.7969e-09, 8.2821e-08, 4.3456e-10, 4.1751e-09, 5.6054e-09, 3.8335e-08,\n 1.4070e-11, 1.0873e-08, 3.1329e-09, 1.5132e-08, 9.6606e-09, 1.1218e-08,\n 9.1663e-13, 3.5782e-08, 2.4054e-08, 2.1898e-11, 1.0773e-09, 2.5912e-08,\n 1.2036e-09, 5.0268e-09, 7.1047e-11, 1.7855e-08, 3.4377e-09, 1.6709e-08,\n 8.7089e-09, 3.0200e-08, 3.8035e-10, 7.3023e-09, 1.4972e-08, 5.4558e-08,\n 2.4126e-08, 4.2581e-11, 9.1474e-10, 9.1602e-09, 5.7255e-09, 1.0573e-09,\n 1.6131e-09, 1.9070e-09, 5.6556e-09, 8.4113e-10, 7.1263e-09, 1.4143e-10,\n 2.1889e-08, 2.7665e-10, 2.1447e-08, 2.4569e-08, 7.5702e-09, 2.2309e-10,\n 2.5872e-09, 3.0288e-10, 7.0827e-09, 1.6787e-08, 1.6762e-09, 4.3483e-10,\n 1.7453e-08, 8.6602e-10, 1.0193e-08, 5.7436e-09, 2.9590e-09, 2.0153e-11,\n 4.1118e-10, 4.3550e-08, 5.6122e-10, 3.0214e-08, 5.2171e-12, 1.4955e-08,\n 5.6666e-09, 5.6606e-09, 1.6707e-09, 2.0355e-08, 7.6573e-09, 2.9739e-09,\n 1.2344e-08, 3.8526e-11, 2.8055e-08, 1.7671e-09, 2.1757e-10, 2.9173e-09,\n 5.7679e-09, 2.2622e-08, 4.3595e-09, 1.0362e-08, 1.6073e-10, 2.6063e-08,\n 7.6202e-09, 7.7877e-09, 6.1113e-09, 4.7795e-08, 4.3530e-11, 1.3522e-09,\n 4.1473e-08, 3.2369e-09, 3.8239e-09, 1.9893e-10, 6.6263e-09, 1.7144e-08,\n 1.4897e-08, 2.2655e-08, 5.4972e-10, 6.7426e-09, 3.9347e-09, 3.5210e-09,\n 3.5985e-09, 2.2669e-09, 5.7359e-09, 1.4390e-08, 1.1052e-09, 2.4539e-08,\n 2.7404e-08, 6.6588e-09, 6.5398e-09, 6.2245e-09, 4.5019e-11, 1.3320e-09,\n 1.3565e-09, 2.9256e-09, 1.3235e-08, 1.6763e-09, 7.2938e-10, 1.0089e-08,\n 2.7841e-12, 2.4527e-08, 2.0740e-10, 2.6874e-08, 1.1433e-08, 3.2139e-08,\n 8.2959e-10, 5.4973e-09, 2.1587e-08, 8.1227e-09, 7.8862e-10, 2.0835e-08,\n 1.5874e-09, 1.0099e-08, 5.1302e-11, 1.5560e-09, 4.5448e-08, 9.9150e-09,\n 4.5614e-08, 5.1323e-09, 7.3280e-09, 1.8151e-11, 2.5346e-09, 6.3917e-09,\n 5.8774e-11, 4.3025e-10, 5.5214e-09, 1.7679e-09, 2.5401e-09, 1.4469e-09,\n 1.6393e-08, 1.6421e-09, 4.1467e-10, 1.3688e-10, 4.5325e-11, 7.8938e-11,\n 9.8910e-10, 1.3557e-08, 9.4581e-10, 1.9339e-09, 2.4170e-10, 2.3081e-08,\n 1.1318e-08, 2.1947e-08, 4.9937e-08, 1.0343e-09, 2.7029e-08, 1.1031e-08,\n 2.4495e-09, 9.1746e-08, 1.9799e-09, 9.4413e-09, 3.1655e-09, 2.2715e-09,\n 1.1817e-09, 1.0151e-08, 5.4682e-09, 2.6411e-10, 1.3679e-09, 2.3504e-08],\n device='cuda:0')" }, "36": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.9288e-09, 1.6355e-12, 1.6355e-08, ..., 4.2355e-11, 4.2639e-10,\n 1.1787e-09],\n [3.9699e-09, 4.1374e-13, 2.3198e-08, ..., 6.6393e-11, 6.3007e-10,\n 1.6640e-09],\n [2.2417e-09, 2.8515e-13, 1.3111e-08, ..., 3.4642e-11, 3.7315e-10,\n 8.4782e-10],\n ...,\n [1.6963e-09, 2.4212e-13, 1.0288e-08, ..., 1.5912e-11, 2.5667e-10,\n 6.8239e-10],\n [2.2285e-10, 5.9601e-13, 1.2275e-09, ..., 4.2694e-12, 2.4129e-11,\n 9.2513e-11],\n [1.1791e-09, 1.4208e-12, 6.9190e-09, ..., 2.0137e-11, 1.6114e-10,\n 4.7349e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[8.3694e-10, 4.6735e-13, 4.6736e-09, ..., 1.2103e-11, 1.2184e-10,\n 3.3681e-10],\n [1.1344e-09, 1.1823e-13, 6.6290e-09, ..., 1.8972e-11, 1.8005e-10,\n 4.7550e-10],\n [6.4060e-10, 8.1485e-14, 3.7466e-09, ..., 9.8993e-12, 1.0663e-10,\n 2.4227e-10],\n ...,\n [4.8473e-10, 6.9188e-14, 2.9398e-09, ..., 4.5469e-12, 7.3345e-11,\n 1.9500e-10],\n [6.3681e-11, 1.7031e-13, 3.5078e-10, ..., 1.2200e-12, 6.8951e-12,\n 2.6436e-11],\n [3.3694e-10, 4.0601e-13, 1.9772e-09, ..., 5.7542e-12, 4.6046e-11,\n 1.3530e-10]], device='cuda:0')" }, "37": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0596e-07, 1.4741e-07, 7.8830e-08, 4.1853e-07, 1.2702e-09, 1.2139e-08,\n 2.2231e-07, 4.1743e-07, 3.3763e-07, 1.6813e-07, 7.2874e-09, 3.5106e-07,\n 1.7343e-08, 8.6488e-10, 4.5652e-08, 4.3618e-10, 1.9068e-07, 3.6110e-07,\n 1.0099e-07, 2.1574e-08, 3.9319e-08, 2.8939e-08, 1.3776e-07, 1.7660e-09,\n 4.5904e-09, 3.8951e-08, 4.0163e-09, 5.8721e-08, 2.7828e-08, 8.7366e-08,\n 1.9104e-10, 4.6882e-08, 4.7438e-08, 1.6522e-08, 2.1271e-09, 9.3008e-08,\n 3.1847e-09, 5.3480e-07, 8.9682e-08, 6.3826e-09, 2.0147e-07, 1.3124e-08,\n 3.8563e-08, 3.4871e-09, 7.0367e-08, 1.2068e-07, 1.5154e-09, 1.5765e-10,\n 9.8998e-08, 8.9946e-08, 1.2849e-07, 5.3552e-09, 5.5843e-08, 2.0931e-07,\n 2.6739e-09, 2.3388e-08, 2.1157e-07, 8.3936e-10, 9.7088e-09, 1.7430e-08,\n 7.3168e-08, 5.7269e-08, 1.1782e-08, 1.1194e-07, 7.0376e-10, 1.7645e-07,\n 2.4034e-08, 4.5948e-08, 5.1739e-08, 1.8945e-08, 8.9005e-09, 1.7437e-07,\n 2.1365e-07, 4.4761e-07, 2.8885e-09, 3.4947e-10, 1.7626e-08, 1.5049e-07,\n 2.2804e-09, 4.3280e-10, 3.5377e-08, 2.2432e-09, 8.9692e-08, 1.5407e-08,\n 1.6953e-08, 8.7634e-09, 1.7124e-08, 2.4619e-08, 2.5380e-08, 2.9609e-07,\n 2.5804e-10, 1.7603e-07, 7.2999e-08, 1.9614e-07, 4.7801e-07, 3.0467e-07,\n 3.0939e-08, 4.1712e-07, 1.4994e-08, 5.2718e-09, 1.8566e-07, 6.3825e-11,\n 8.0936e-08, 4.8897e-08, 4.6095e-09, 1.2937e-07, 7.7751e-11, 7.6186e-07,\n 1.5784e-07, 1.6375e-07, 5.7869e-10, 4.7365e-09, 1.4668e-08, 1.4632e-07,\n 4.6125e-08, 1.2296e-07, 2.6002e-10, 1.5198e-07, 1.9111e-07, 7.6615e-08,\n 2.5551e-07, 1.0094e-07, 7.8895e-08, 1.1307e-08, 5.3453e-07, 2.8031e-07,\n 7.1245e-08, 2.9443e-08, 6.1040e-08, 2.1726e-10, 6.3563e-07, 6.1775e-08,\n 8.0248e-10, 2.3698e-10, 4.1287e-07, 1.5926e-07, 2.3981e-08, 1.0538e-08,\n 9.7446e-10, 3.1907e-10, 2.6321e-07, 5.4747e-10, 3.9494e-07, 7.6848e-09,\n 8.9159e-09, 1.8900e-08, 1.5727e-07, 1.3191e-07, 3.8310e-08, 4.6134e-10,\n 7.5958e-09, 2.4237e-09, 2.8762e-10, 2.6005e-07, 2.3010e-08, 3.4115e-07,\n 8.1853e-09, 1.9124e-07, 2.5875e-08, 2.5916e-07, 6.7748e-08, 1.1027e-08,\n 1.1634e-07, 1.0121e-07, 1.4043e-07, 3.0949e-07, 6.5025e-07, 3.0722e-10,\n 1.2201e-09, 5.9169e-08, 3.4215e-08, 5.2092e-10, 2.0982e-07, 2.7008e-08,\n 3.0335e-07, 3.7829e-10, 1.4176e-07, 1.5819e-08, 6.8252e-08, 6.8496e-08,\n 7.3026e-07, 7.2711e-08, 4.6951e-08, 6.9802e-11, 1.3680e-08, 2.1204e-07,\n 8.6614e-10, 7.7468e-08, 1.9736e-07, 1.7585e-08, 2.0165e-07, 1.7534e-08,\n 5.0833e-08, 4.6529e-08, 3.0745e-07, 1.6453e-07, 1.9358e-07, 1.4210e-09,\n 6.1668e-08, 3.9495e-07, 4.2227e-10, 3.6633e-08, 1.8884e-08, 2.7691e-07,\n 2.7054e-07, 3.8696e-08, 1.2799e-07, 2.6403e-08, 1.8913e-09, 7.2143e-10,\n 8.5678e-10, 4.3086e-08, 2.2200e-07, 1.9759e-07, 7.1302e-08, 2.4804e-07,\n 3.0127e-08, 1.4903e-07, 1.0856e-09, 3.9105e-09, 7.4674e-11, 2.1048e-08,\n 2.3347e-07, 1.7399e-08, 5.8677e-09, 1.2017e-07, 3.3827e-08, 1.0563e-07,\n 7.5115e-09, 1.4900e-07, 2.0852e-09, 8.1531e-08, 2.5342e-07, 1.0690e-09,\n 2.1102e-07, 4.7351e-09, 5.0106e-08, 1.8808e-10, 3.9786e-07, 8.0190e-09,\n 3.6470e-08, 8.2328e-09, 9.6320e-09, 5.6418e-08, 8.3028e-08, 5.7536e-08,\n 2.0799e-07, 7.9092e-08, 3.3670e-07, 3.2353e-08, 1.7512e-07, 3.7963e-12,\n 1.3803e-08, 6.5032e-08, 8.4063e-09, 4.4746e-08], device='cuda:0')" + "exp_avg_sq": "tensor([3.0278e-08, 4.2122e-08, 2.2526e-08, 1.1960e-07, 3.6297e-10, 3.4689e-09,\n 6.3526e-08, 1.1928e-07, 9.6481e-08, 4.8045e-08, 2.0824e-09, 1.0032e-07,\n 4.9560e-09, 2.4715e-10, 1.3045e-08, 1.2464e-10, 5.4488e-08, 1.0319e-07,\n 2.8858e-08, 6.1650e-09, 1.1236e-08, 8.2696e-09, 3.9366e-08, 5.0464e-10,\n 1.3117e-09, 1.1131e-08, 1.1477e-09, 1.6780e-08, 7.9520e-09, 2.4966e-08,\n 5.4590e-11, 1.3397e-08, 1.3556e-08, 4.7212e-09, 6.0783e-10, 2.6578e-08,\n 9.1005e-10, 1.5282e-07, 2.5627e-08, 1.8239e-09, 5.7572e-08, 3.7503e-09,\n 1.1020e-08, 9.9646e-10, 2.0108e-08, 3.4486e-08, 4.3303e-10, 4.5049e-11,\n 2.8289e-08, 2.5703e-08, 3.6717e-08, 1.5303e-09, 1.5958e-08, 5.9812e-08,\n 7.6409e-10, 6.6833e-09, 6.0457e-08, 2.3985e-10, 2.7744e-09, 4.9809e-09,\n 2.0908e-08, 1.6365e-08, 3.3667e-09, 3.1989e-08, 2.0111e-10, 5.0423e-08,\n 6.8678e-09, 1.3130e-08, 1.4785e-08, 5.4138e-09, 2.5434e-09, 4.9828e-08,\n 6.1051e-08, 1.2791e-07, 8.2542e-10, 9.9865e-11, 5.0368e-09, 4.3005e-08,\n 6.5166e-10, 1.2368e-10, 1.0109e-08, 6.4100e-10, 2.5630e-08, 4.4027e-09,\n 4.8446e-09, 2.5042e-09, 4.8932e-09, 7.0352e-09, 7.2524e-09, 8.4610e-08,\n 7.3736e-11, 5.0303e-08, 2.0860e-08, 5.6048e-08, 1.3659e-07, 8.7062e-08,\n 8.8410e-09, 1.1919e-07, 4.2847e-09, 1.5065e-09, 5.3054e-08, 1.8238e-11,\n 2.3128e-08, 1.3973e-08, 1.3172e-09, 3.6967e-08, 2.2218e-11, 2.1771e-07,\n 4.5104e-08, 4.6792e-08, 1.6537e-10, 1.3535e-09, 4.1915e-09, 4.1814e-08,\n 1.3181e-08, 3.5137e-08, 7.4303e-11, 4.3430e-08, 5.4611e-08, 2.1893e-08,\n 7.3014e-08, 2.8844e-08, 2.2545e-08, 3.2309e-09, 1.5275e-07, 8.0100e-08,\n 2.0359e-08, 8.4137e-09, 1.7443e-08, 6.2083e-11, 1.8164e-07, 1.7653e-08,\n 2.2931e-10, 6.7719e-11, 1.1798e-07, 4.5510e-08, 6.8527e-09, 3.0112e-09,\n 2.7846e-10, 9.1177e-11, 7.5215e-08, 1.5644e-10, 1.1286e-07, 2.1960e-09,\n 2.5478e-09, 5.4009e-09, 4.4940e-08, 3.7695e-08, 1.0947e-08, 1.3183e-10,\n 2.1706e-09, 6.9260e-10, 8.2188e-11, 7.4310e-08, 6.5752e-09, 9.7488e-08,\n 2.3390e-09, 5.4649e-08, 7.3940e-09, 7.4058e-08, 1.9360e-08, 3.1512e-09,\n 3.3246e-08, 2.8923e-08, 4.0128e-08, 8.8438e-08, 1.8581e-07, 8.7791e-11,\n 3.4864e-10, 1.6908e-08, 9.7773e-09, 1.4886e-10, 5.9959e-08, 7.7176e-09,\n 8.6685e-08, 1.0810e-10, 4.0510e-08, 4.5204e-09, 1.9503e-08, 1.9573e-08,\n 2.0868e-07, 2.0778e-08, 1.3417e-08, 1.9946e-11, 3.9092e-09, 6.0593e-08,\n 2.4750e-10, 2.2137e-08, 5.6398e-08, 5.0250e-09, 5.7622e-08, 5.0104e-09,\n 1.4526e-08, 1.3296e-08, 8.7857e-08, 4.7014e-08, 5.5317e-08, 4.0607e-10,\n 1.7622e-08, 1.1286e-07, 1.2067e-10, 1.0468e-08, 5.3961e-09, 7.9129e-08,\n 7.7309e-08, 1.1058e-08, 3.6575e-08, 7.5450e-09, 5.4045e-10, 2.0616e-10,\n 2.4483e-10, 1.2312e-08, 6.3438e-08, 5.6463e-08, 2.0375e-08, 7.0879e-08,\n 8.6092e-09, 4.2586e-08, 3.1023e-10, 1.1175e-09, 2.1339e-11, 6.0146e-09,\n 6.6716e-08, 4.9719e-09, 1.6767e-09, 3.4338e-08, 9.6663e-09, 3.0184e-08,\n 2.1465e-09, 4.2577e-08, 5.9586e-10, 2.3298e-08, 7.2417e-08, 3.0546e-10,\n 6.0301e-08, 1.3531e-09, 1.4318e-08, 5.3745e-11, 1.1369e-07, 2.2915e-09,\n 1.0421e-08, 2.3526e-09, 2.7524e-09, 1.6122e-08, 2.3726e-08, 1.6441e-08,\n 5.9434e-08, 2.2601e-08, 9.6215e-08, 9.2450e-09, 5.0041e-08, 1.0848e-12,\n 3.9442e-09, 1.8584e-08, 2.4022e-09, 1.2787e-08], device='cuda:0')" }, "38": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.4134e-07, 1.9624e-07, 2.8089e-07, ..., 3.5777e-08, 2.1838e-07,\n 4.0326e-07],\n [7.6546e-08, 1.9897e-08, 2.9788e-08, ..., 3.9668e-09, 2.2049e-08,\n 3.9978e-08],\n [8.3387e-08, 2.1498e-08, 3.1037e-08, ..., 3.8195e-09, 2.4751e-08,\n 4.6237e-08],\n [8.7365e-08, 2.4120e-08, 3.2844e-08, ..., 4.1428e-09, 2.6082e-08,\n 4.8425e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.1184e-07, 5.6076e-08, 8.0267e-08, ..., 1.0224e-08, 6.2404e-08,\n 1.1523e-07],\n [2.1874e-08, 5.6856e-09, 8.5121e-09, ..., 1.1335e-09, 6.3006e-09,\n 1.1424e-08],\n [2.3828e-08, 6.1433e-09, 8.8690e-09, ..., 1.0915e-09, 7.0728e-09,\n 1.3213e-08],\n [2.4965e-08, 6.8926e-09, 9.3854e-09, ..., 1.1838e-09, 7.4531e-09,\n 1.3838e-08]], device='cuda:0')" }, "39": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8590e-05, 1.9155e-06, 2.0726e-06, 2.2140e-06], device='cuda:0')" + "exp_avg_sq": "tensor([5.3123e-06, 5.4737e-07, 5.9227e-07, 6.3267e-07], device='cuda:0')" }, "40": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.4134e-07, 1.9624e-07, 2.8089e-07, ..., 3.5777e-08, 2.1838e-07,\n 4.0326e-07],\n [7.6546e-08, 1.9897e-08, 2.9788e-08, ..., 3.9668e-09, 2.2049e-08,\n 3.9978e-08],\n [8.3387e-08, 2.1498e-08, 3.1037e-08, ..., 3.8195e-09, 2.4751e-08,\n 4.6237e-08],\n [8.7365e-08, 2.4120e-08, 3.2844e-08, ..., 4.1428e-09, 2.6082e-08,\n 4.8425e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.1184e-07, 5.6076e-08, 8.0267e-08, ..., 1.0224e-08, 6.2404e-08,\n 1.1523e-07],\n [2.1874e-08, 5.6856e-09, 8.5121e-09, ..., 1.1335e-09, 6.3006e-09,\n 1.1424e-08],\n [2.3828e-08, 6.1433e-09, 8.8690e-09, ..., 1.0915e-09, 7.0728e-09,\n 1.3213e-08],\n [2.4965e-08, 6.8926e-09, 9.3854e-09, ..., 1.1839e-09, 7.4531e-09,\n 1.3838e-08]], device='cuda:0')" }, "41": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8590e-05, 1.9155e-06, 2.0726e-06, 2.2140e-06], device='cuda:0')" + "exp_avg_sq": "tensor([5.3123e-06, 5.4737e-07, 5.9227e-07, 6.3267e-07], device='cuda:0')" }, "42": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.4134e-07, 1.9624e-07, 2.8089e-07, ..., 3.5777e-08, 2.1838e-07,\n 4.0326e-07],\n [7.6546e-08, 1.9897e-08, 2.9788e-08, ..., 3.9668e-09, 2.2049e-08,\n 3.9978e-08],\n [8.3387e-08, 2.1498e-08, 3.1037e-08, ..., 3.8195e-09, 2.4751e-08,\n 4.6237e-08],\n [8.7365e-08, 2.4120e-08, 3.2844e-08, ..., 4.1428e-09, 2.6082e-08,\n 4.8425e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.1184e-07, 5.6076e-08, 8.0267e-08, ..., 1.0224e-08, 6.2404e-08,\n 1.1523e-07],\n [2.1874e-08, 5.6856e-09, 8.5121e-09, ..., 1.1335e-09, 6.3006e-09,\n 1.1424e-08],\n [2.3828e-08, 6.1433e-09, 8.8690e-09, ..., 1.0915e-09, 7.0728e-09,\n 1.3213e-08],\n [2.4965e-08, 6.8926e-09, 9.3854e-09, ..., 1.1838e-09, 7.4531e-09,\n 1.3838e-08]], device='cuda:0')" }, "43": { - "step": "tensor(2504.)", + "step": "tensor(3756.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8590e-05, 1.9155e-06, 2.0726e-06, 2.2140e-06], device='cuda:0')" + "exp_avg_sq": "tensor([5.3123e-06, 5.4737e-07, 5.9227e-07, 6.3267e-07], device='cuda:0')" }, "8": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 9.5150e-07, -3.8557e-06, 0.0000e+00, ..., -4.2832e-06,\n 4.4121e-06, 1.9494e-06],\n [ 7.1172e-07, -1.6450e-06, 0.0000e+00, ..., -8.8052e-07,\n -3.8274e-06, -1.6891e-06],\n [-1.8110e-06, 3.2072e-07, 0.0000e+00, ..., -2.7184e-07,\n -4.4957e-06, -1.4556e-06],\n ...,\n [ 6.7636e-08, -2.5984e-07, 0.0000e+00, ..., 3.8079e-07,\n 2.7812e-07, 1.9362e-06],\n [ 1.1224e-06, 4.5980e-08, 0.0000e+00, ..., -5.3150e-07,\n 7.8061e-07, -7.7626e-07],\n [-2.8625e-06, 4.3572e-07, 0.0000e+00, ..., 7.9463e-07,\n 3.0544e-08, 1.9235e-09]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.1393e-11, 3.1440e-11, 0.0000e+00, ..., 1.2101e-10, 1.8385e-10,\n 6.7095e-11],\n [1.0556e-10, 1.4296e-10, 0.0000e+00, ..., 2.0345e-10, 1.3139e-09,\n 3.1743e-11],\n [4.6596e-10, 5.7350e-11, 0.0000e+00, ..., 1.3298e-10, 4.8543e-10,\n 1.8508e-10],\n ...,\n [1.4537e-10, 5.4636e-11, 0.0000e+00, ..., 5.9882e-11, 2.3956e-10,\n 7.1686e-11],\n [2.3094e-10, 2.3896e-10, 0.0000e+00, ..., 3.0046e-10, 1.6661e-10,\n 1.9870e-11],\n [6.5843e-11, 2.6893e-11, 0.0000e+00, ..., 7.8340e-11, 8.1009e-11,\n 1.3380e-10]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-4.4523e-06, 1.1523e-06, 0.0000e+00, ..., 3.5463e-07,\n -1.4896e-06, 6.8977e-07],\n [-2.0151e-07, 1.6960e-06, 0.0000e+00, ..., -2.3096e-07,\n -6.8775e-07, -3.3926e-07],\n [ 1.2007e-06, 7.4930e-07, 0.0000e+00, ..., -7.8574e-09,\n -5.8041e-08, 1.2709e-06],\n ...,\n [ 6.1018e-07, 1.3851e-06, 0.0000e+00, ..., -1.8327e-07,\n -1.4359e-06, 1.8392e-09],\n [-5.3767e-07, 9.5174e-07, 0.0000e+00, ..., 1.5572e-07,\n -1.4112e-06, 1.0046e-07],\n [-1.2861e-07, 1.6370e-07, 0.0000e+00, ..., -1.8408e-07,\n 2.3764e-07, 3.4351e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.2666e-11, 2.4534e-11, 0.0000e+00, ..., 1.0180e-10, 1.3348e-10,\n 9.1413e-11],\n [5.9084e-11, 7.7246e-11, 0.0000e+00, ..., 8.7205e-11, 8.4473e-10,\n 1.9648e-11],\n [1.9848e-10, 2.7334e-11, 0.0000e+00, ..., 5.7313e-11, 3.1346e-10,\n 1.1452e-10],\n ...,\n [9.5413e-11, 5.8716e-11, 0.0000e+00, ..., 3.6664e-11, 1.8609e-10,\n 2.3802e-11],\n [1.1595e-10, 1.2614e-10, 0.0000e+00, ..., 1.6966e-10, 2.0447e-10,\n 7.5163e-12],\n [6.5990e-11, 2.3642e-11, 0.0000e+00, ..., 4.4316e-11, 3.2325e-11,\n 3.8416e-11]], device='cuda:0')" }, "9": { - "step": "tensor(1252.)", - "exp_avg": "tensor([ 3.5455e-05, 1.1664e-05, -7.1505e-05, ..., -8.4824e-06,\n -1.8059e-05, 4.3417e-07], device='cuda:0')", - "exp_avg_sq": "tensor([1.4199e-08, 3.8053e-08, 2.7986e-08, ..., 1.7082e-08, 1.9369e-08,\n 6.2494e-09], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([-1.9333e-06, -4.2817e-06, 1.0080e-05, ..., -4.0209e-06,\n -9.4793e-06, 1.8024e-06], device='cuda:0')", + "exp_avg_sq": "tensor([8.8601e-09, 2.0776e-08, 1.4356e-08, ..., 1.3059e-08, 1.3017e-08,\n 4.9354e-09], device='cuda:0')" }, "10": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-6.4523e-07, 3.1645e-07, -4.4169e-07, ..., 3.6505e-08,\n 5.2030e-07, 5.2288e-10],\n [-7.5371e-08, 5.2285e-07, 1.9666e-07, ..., -1.6403e-06,\n -6.7216e-07, 3.5034e-08],\n [-7.4451e-07, -8.2593e-07, -4.5041e-07, ..., -3.8111e-07,\n -2.1297e-07, -4.1838e-07],\n ...,\n [-1.9955e-06, -1.9980e-08, 6.7451e-07, ..., 6.9857e-07,\n 1.1548e-06, 2.4766e-07],\n [ 1.5569e-06, 5.5988e-07, 2.6527e-07, ..., 1.7804e-06,\n -7.9711e-08, 5.6187e-07],\n [ 2.0127e-07, 1.6426e-06, -8.8116e-07, ..., 1.2192e-06,\n -3.6353e-07, 4.3646e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2961e-11, 2.8509e-11, 2.0315e-11, ..., 9.4200e-12, 1.9704e-11,\n 3.5367e-11],\n [1.0941e-11, 5.4378e-11, 2.0592e-11, ..., 2.0660e-11, 1.7938e-11,\n 1.1181e-11],\n [1.6481e-11, 4.8725e-11, 2.3653e-11, ..., 2.5206e-11, 2.5710e-11,\n 2.2670e-11],\n ...,\n [3.8730e-11, 5.8155e-11, 2.8541e-11, ..., 2.6765e-11, 2.1327e-11,\n 7.1015e-11],\n [2.3826e-11, 4.7132e-11, 2.2704e-11, ..., 2.4364e-11, 1.8158e-11,\n 1.4312e-11],\n [1.7663e-11, 4.7892e-11, 2.9680e-11, ..., 4.1836e-11, 2.4641e-11,\n 1.7300e-11]], device='cuda:0')" + }, + "11": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-9.5452e-07, -1.3101e-06, 0.0000e+00, ..., -3.3857e-06,\n 5.9020e-06, -1.4618e-07],\n [ 3.7597e-06, -3.0803e-06, 6.4780e-21, ..., -9.9085e-07,\n 5.6761e-07, 1.9089e-06],\n [-1.7598e-06, 1.0675e-07, 0.0000e+00, ..., 2.9983e-07,\n 7.6658e-07, -2.6469e-08],\n ...,\n [-1.4907e-05, -2.7848e-08, 5.6052e-45, ..., -1.9234e-07,\n 4.3957e-07, -3.8273e-07],\n [-2.3764e-06, 3.1821e-07, 0.0000e+00, ..., 1.0096e-06,\n -9.1933e-07, -3.8665e-08],\n [ 3.4229e-07, 1.3795e-06, -2.6979e-20, ..., -3.6340e-06,\n 3.2684e-07, 1.6937e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3583e-10, 2.2402e-10, 0.0000e+00, ..., 1.3020e-09, 5.5486e-10,\n 1.4219e-10],\n [4.4909e-10, 3.4460e-11, 1.0823e-16, ..., 6.0535e-11, 2.2124e-10,\n 2.3886e-10],\n [1.6617e-10, 8.5684e-11, 0.0000e+00, ..., 4.4212e-11, 2.7147e-10,\n 8.9163e-12],\n ...,\n [1.2742e-10, 7.2834e-12, 5.0207e-17, ..., 1.2851e-11, 6.7411e-11,\n 1.8696e-11],\n [1.6373e-10, 4.2294e-11, 0.0000e+00, ..., 4.6947e-10, 1.2421e-09,\n 2.3724e-11],\n [3.5465e-11, 1.3497e-10, 8.1546e-15, ..., 7.5636e-10, 1.3995e-10,\n 1.6871e-11]], device='cuda:0')" + }, + "12": { + "step": "tensor(1252.)", + "exp_avg": "tensor([ 7.3165e-07, -1.4954e-05, 1.8879e-05, ..., -5.3266e-05,\n -5.9604e-05, -4.2511e-06], device='cuda:0')", + "exp_avg_sq": "tensor([2.4369e-08, 1.8732e-08, 1.3634e-08, ..., 8.3645e-09, 1.9563e-08,\n 1.7500e-08], device='cuda:0')" + }, + "13": { "step": "tensor(1252.)", - "exp_avg": "tensor([[-6.0034e-08, 8.3175e-07, 5.6886e-07, ..., -1.3715e-07,\n 3.5541e-07, -2.4610e-07],\n [-2.7396e-07, 5.3619e-07, -2.5902e-06, ..., -1.7347e-06,\n 8.8987e-07, -5.4779e-07],\n [-4.0385e-07, -4.3323e-07, 8.1043e-07, ..., 2.3942e-07,\n 1.1977e-06, 7.2821e-07],\n ...,\n [-1.1566e-06, 7.1615e-07, -3.2375e-06, ..., -3.3065e-07,\n 1.0924e-06, 5.3783e-07],\n [ 2.4937e-06, 1.2662e-06, 2.0117e-07, ..., 7.8634e-08,\n 4.8693e-07, -1.5626e-07],\n [ 9.2190e-07, 5.1389e-07, -1.2902e-06, ..., -4.0980e-07,\n 1.0450e-06, -1.0818e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.9890e-11, 7.0728e-11, 4.1142e-11, ..., 2.0425e-11, 4.8474e-11,\n 1.0066e-10],\n [1.9791e-11, 1.2321e-10, 3.9717e-11, ..., 3.1398e-11, 3.0796e-11,\n 1.6469e-11],\n [3.0407e-11, 1.0305e-10, 4.5896e-11, ..., 4.8042e-11, 5.0357e-11,\n 4.0974e-11],\n ...,\n [1.0168e-10, 1.1815e-10, 5.8931e-11, ..., 5.4174e-11, 3.8480e-11,\n 2.0247e-10],\n [5.3361e-11, 1.0527e-10, 5.0470e-11, ..., 3.9453e-11, 3.5202e-11,\n 2.4136e-11],\n [4.3872e-11, 9.0027e-11, 6.3022e-11, ..., 6.6265e-11, 4.6989e-11,\n 3.8512e-11]], device='cuda:0')" + "exp_avg": "tensor([[-3.2940e-07, -2.4490e-07, -1.0957e-06, ..., 1.3607e-07,\n -1.1163e-07, -5.4119e-07],\n [ 1.5030e-08, 4.0016e-07, 3.9324e-07, ..., 4.1304e-07,\n 6.4674e-07, 1.1241e-07],\n [-1.4720e-06, 2.2739e-06, -4.3752e-07, ..., 4.0518e-06,\n -1.0429e-08, -4.5754e-07],\n ...,\n [-5.5502e-07, 1.6260e-08, -9.2345e-07, ..., 7.5728e-07,\n 8.5722e-07, 5.5773e-09],\n [-1.7185e-06, 6.2725e-07, 8.6740e-07, ..., -6.7087e-07,\n -6.7863e-08, 8.3310e-07],\n [ 3.6407e-07, -2.9836e-07, -3.4236e-08, ..., -3.5914e-07,\n 5.4213e-07, -9.2315e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.2573e-11, 1.4459e-11, 4.4655e-11, ..., 1.2465e-11, 8.6706e-12,\n 1.8555e-11],\n [3.4603e-11, 2.2091e-11, 5.5515e-11, ..., 2.2348e-11, 1.1782e-11,\n 4.8839e-11],\n [3.4585e-11, 2.2773e-11, 5.4309e-11, ..., 3.2049e-11, 1.9993e-11,\n 2.8135e-11],\n ...,\n [4.5234e-11, 5.5801e-11, 4.9056e-11, ..., 1.5769e-11, 1.7477e-11,\n 4.8252e-11],\n [5.2270e-11, 4.6982e-11, 2.3132e-11, ..., 2.4825e-11, 1.5397e-11,\n 3.4846e-11],\n [3.6778e-11, 3.0157e-11, 1.4188e-11, ..., 1.4417e-11, 1.2249e-11,\n 7.6116e-11]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00793913236883622, + "lr": 0.00654543046337755, "name": "shared", "betas": [ 0.9, @@ -227,7 +242,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00654543046337755, "name": "scale_384", "betas": [ 0.9, @@ -250,7 +265,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00654543046337755, "name": "scale_768", "betas": [ 0.9, @@ -273,7 +288,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00654543046337755, "name": "scale_1024", "betas": [ 0.9, @@ -296,7 +311,7 @@ ] }, { - "lr": 0.00793913236883622, + "lr": 0.00654543046337755, "name": "scale_1280", "betas": [ 0.9, @@ -319,7 +334,7 @@ ] }, { - "lr": 0.003969669238105037, + "lr": 0.0032728879774401812, "name": "fusion", "betas": [ 0.9, @@ -375,7 +390,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 3, + "T_cur": 4, "base_lrs": [ 0.01, 0.01, @@ -384,26 +399,27 @@ 0.01, 0.005 ], - "last_epoch": 3, + "last_epoch": 4, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00793913236883622, - 0.00793913236883622, - 0.00793913236883622, - 0.00793913236883622, - 0.00793913236883622, - 0.003969669238105037 + 0.00654543046337755, + 0.00654543046337755, + 0.00654543046337755, + 0.00654543046337755, + 0.00654543046337755, + 0.0032728879774401812 ] }, "metrics": { - "best_val_acc": 81.482, - "best_epoch": 2, + "best_val_acc": 81.84, + "best_epoch": 3, "scale_accuracies": { - "384": 81.482, - "768": 81.574, - "1024": 81.072 + "384": 81.84, + "768": 81.864, + "1024": 81.866, + "1280": 81.398 } }, "train_config": {