diff --git "a/weights/checkpoint_epoch_5_metadata.json" "b/weights/checkpoint_epoch_5_metadata.json" --- "a/weights/checkpoint_epoch_5_metadata.json" +++ "b/weights/checkpoint_epoch_5_metadata.json" @@ -3,205 +3,225 @@ "optimizer_state_dict": { "state": { "0": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[ 9.8365e-05, -3.9530e-05, 3.2748e-06, ..., 1.5921e-05,\n 3.7428e-05, -1.2425e-06],\n [ 1.2975e-04, -7.8826e-05, -5.1688e-05, ..., 8.4008e-05,\n -5.7494e-05, 4.5213e-05],\n [ 1.7282e-04, -2.2142e-04, -4.6699e-05, ..., 1.0785e-04,\n 6.8168e-05, 9.4126e-05],\n ...,\n [-3.2087e-05, 8.4764e-05, -3.5756e-05, ..., 9.9390e-05,\n -1.1274e-05, 6.8323e-06],\n [-3.9967e-09, 4.1533e-09, -5.4748e-10, ..., 1.8102e-09,\n 1.3380e-09, -4.0356e-09],\n [-1.4352e-04, 1.0789e-04, 3.1936e-05, ..., -4.3365e-05,\n 1.0860e-05, 1.5493e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.5664e-08, 6.8376e-08, 8.3553e-09, ..., 1.8848e-08, 1.9269e-08,\n 5.1668e-09],\n [1.2595e-07, 9.0480e-08, 3.5098e-08, ..., 6.3703e-08, 2.8370e-08,\n 4.4021e-08],\n [8.6265e-08, 1.1261e-07, 3.7418e-08, ..., 3.2862e-08, 2.1234e-08,\n 2.3994e-08],\n ...,\n [2.2266e-07, 1.3423e-07, 2.5279e-08, ..., 2.8063e-08, 2.2872e-08,\n 1.5268e-08],\n [9.3641e-11, 3.2087e-10, 4.3172e-11, ..., 9.9154e-11, 9.0230e-11,\n 6.9915e-11],\n [1.5499e-07, 9.2860e-08, 1.4243e-08, ..., 1.9437e-08, 3.4265e-08,\n 1.6971e-08]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 7.8277e-05, 4.9578e-06, 2.3352e-05, ..., -1.4571e-05,\n -4.8797e-05, 3.8431e-05],\n [ 8.8573e-07, 2.5922e-05, -5.2547e-05, ..., 2.1645e-05,\n -4.8459e-05, 3.4791e-05],\n [-4.6154e-08, 4.1890e-08, 1.3520e-08, ..., -2.1759e-08,\n 5.8983e-09, 6.1123e-09],\n ...,\n [-5.7400e-05, -1.7486e-05, -1.8404e-05, ..., -6.6152e-06,\n 1.5364e-05, -1.8938e-05],\n [-2.3509e-05, 4.3610e-05, -2.1072e-05, ..., -6.2180e-05,\n -1.5057e-05, 3.1203e-05],\n [ 1.3180e-05, -9.1399e-06, 2.6388e-05, ..., -1.5124e-05,\n -1.3041e-05, 3.6407e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3581e-08, 1.6482e-08, 7.6694e-09, ..., 9.3753e-09, 9.0039e-09,\n 5.7178e-09],\n [1.3218e-08, 1.1571e-08, 1.1668e-08, ..., 9.4941e-09, 7.0953e-09,\n 5.6897e-09],\n [7.1313e-13, 6.7727e-13, 5.5255e-13, ..., 8.2565e-13, 1.6906e-13,\n 4.4071e-13],\n ...,\n [1.3172e-08, 1.1464e-08, 9.6745e-09, ..., 7.9186e-09, 7.1973e-09,\n 5.6510e-09],\n [1.5851e-08, 1.3371e-08, 9.1543e-09, ..., 1.0205e-08, 8.3490e-09,\n 6.9020e-09],\n [3.4848e-09, 5.3274e-09, 3.8560e-09, ..., 2.3512e-09, 2.4853e-09,\n 2.4110e-09]], device='cuda:0')" }, "1": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 1.0100e-03, 4.3379e-03, 3.8078e-03, -6.8617e-04, -2.3703e-05,\n -1.6201e-03, -6.1781e-04, 1.3134e-03, -1.3918e-03, -5.8697e-04,\n 9.8567e-05, -2.0752e-03, 5.6052e-45, -1.2030e-03, -2.4545e-04,\n -2.3046e-03, 1.6347e-03, 2.1588e-03, -3.1311e-04, 5.6052e-45,\n 8.9469e-04, 3.0753e-03, -1.2493e-04, -1.3703e-03, 5.6052e-45,\n -1.6960e-03, -5.1683e-04, 7.4499e-04, 1.0409e-03, -1.6920e-03,\n 2.1064e-03, 4.6151e-03, 3.3147e-20, 5.6052e-45, 1.7645e-04,\n -3.7787e-04, -8.6520e-04, -6.4018e-04, 1.3967e-04, -1.4665e-03,\n 9.5046e-04, 1.4998e-03, -2.7317e-04, -2.1101e-03, 3.5007e-04,\n -4.1675e-03, -1.9003e-03, 6.9841e-04, 1.5728e-04, -2.1979e-05,\n -1.0592e-03, 5.6052e-45, -2.1912e-39, 5.6052e-45, -2.9434e-03,\n -2.0338e-03, 3.7159e-04, -9.1424e-04, -1.6186e-18, -9.4037e-04,\n -1.7348e-03, -8.5306e-04, 4.5903e-04, -9.8386e-04, 2.2164e-03,\n 4.1495e-04, -6.5090e-03, 5.6052e-45, 1.7661e-04, -7.5188e-04,\n -5.0380e-04, 5.6052e-45, -7.0589e-05, -1.6036e-03, -3.4066e-04,\n 2.7951e-03, 2.1685e-03, -3.1408e-03, -6.2691e-03, 1.9335e-03,\n 1.1323e-03, -1.5969e-03, -1.0306e-04, 2.8081e-04, 3.3097e-04,\n 1.6313e-03, 8.5051e-05, -1.2763e-03, 9.9258e-04, 9.0070e-04,\n 5.6052e-45, 1.9071e-03, -2.1552e-04, 1.6449e-03, 3.0969e-04,\n -8.7793e-04, 3.1738e-03, 2.1918e-04, 7.9928e-04, -4.3610e-03,\n -5.8235e-06, -1.3455e-03, -2.5091e-03, 2.0328e-03, 1.2953e-03,\n -1.2261e-04, 8.3383e-04, -2.0773e-03, 1.2066e-03, 2.8728e-04,\n 2.4194e-03, 2.5031e-03, 5.6052e-45, 4.1482e-03, -6.7992e-04,\n -4.1152e-04, -1.8527e-03, 4.2581e-04, 1.1210e-44, 2.4570e-08,\n 5.6052e-45, -3.1715e-04, 5.6052e-45, 5.8409e-04, -7.2493e-06,\n -1.2793e-03, -4.5642e-03, 5.6052e-45, 6.4749e-04, -5.5242e-04,\n 5.4763e-04, -6.0022e-04, 6.4328e-04, 1.7319e-27, -1.6059e-04,\n 3.0520e-03, -2.7362e-03, -1.0627e-03, -9.6242e-04, -1.7206e-03,\n -2.7262e-03, 8.4477e-04, 2.0411e-03, 2.1349e-03, 8.2899e-04,\n 1.4842e-03, -1.0239e-17, 1.3936e-03, -3.2412e-04, 2.1297e-03,\n 4.0267e-09, -5.8235e-04, -3.7435e-04, 5.6052e-45, 1.3363e-03,\n 7.1109e-04, 9.3358e-11, 4.5252e-04, 3.4241e-03, 1.3508e-03,\n 1.7445e-03, 9.2164e-04, -1.5054e-03, 1.4935e-03, -3.3573e-05,\n -5.2704e-04, -6.4840e-04, 1.1677e-03, 1.4617e-04, 2.6853e-11,\n 5.6052e-45, 2.9181e-03, -1.4769e-03, -3.5561e-03, -1.1103e-03,\n 1.9168e-03, 3.3116e-04, 2.1760e-03, 5.6052e-45, 2.8412e-03,\n -6.4284e-04, 4.4530e-04, 6.9378e-04, 5.6052e-45, -1.6709e-03,\n -1.2339e-03, 1.5068e-03, 1.1144e-04, -1.3644e-03, 5.6052e-45,\n 5.6052e-45, -5.9326e-04, 5.2712e-04, 3.1336e-04, 1.1670e-04,\n -7.9362e-05, -7.7011e-04, 1.7574e-03, 5.6052e-45, -1.1470e-03,\n 3.8509e-04, 5.5671e-04, -1.2742e-03, 6.5528e-04, -5.7701e-04,\n 5.6052e-45, -2.8140e-03, -2.8507e-04, -8.3316e-05, 3.9338e-04,\n 2.1675e-04, 8.2024e-04, 5.5736e-04, 3.1526e-03, -9.2956e-04,\n 2.4685e-03, 6.5738e-04, 3.6477e-03, 1.6885e-03, 1.1099e-03,\n 5.6318e-05, -7.2135e-04, -4.7470e-04, -6.0446e-04, -1.3518e-03,\n -1.6788e-03, -2.8578e-03, -1.3504e-03, 4.8137e-04, 1.4038e-03,\n 1.4515e-03, 1.3761e-03, 1.3620e-03, 5.6052e-45, -1.0080e-03,\n -5.3508e-04, 2.4061e-03, 1.5772e-03, 2.6878e-04, 1.5413e-03,\n 3.9742e-04, 2.0431e-03, 1.7856e-03, 2.3080e-03, 5.3345e-04,\n -1.8303e-03, 7.7386e-04, -8.1739e-04, 1.1217e-03, -5.1945e-04,\n -2.2008e-03, -2.5892e-03, 8.7250e-04, -1.4888e-04, -5.6389e-04,\n -1.4506e-03, 5.6052e-45, -8.0094e-04, -8.1137e-04, -6.8996e-04,\n 1.5796e-03, -6.0676e-04, -1.2699e-03, 1.7975e-04, 2.0091e-03,\n 2.7672e-03, 4.2234e-03, 5.6052e-45, -3.0253e-04, -2.5950e-03,\n -1.3132e-03, 6.0431e-04, 7.2586e-04, -1.6223e-03, -8.1025e-04,\n -2.2194e-12, -1.6881e-03, 5.6052e-45, 6.5257e-04, 3.4828e-04,\n -2.5354e-03, -3.0247e-03, 2.7225e-03, -7.6471e-04, -1.8036e-04,\n 3.2882e-04, -4.6440e-05, -2.6052e-03, -1.9783e-03, -4.0878e-04,\n -5.7524e-04, 2.3200e-03, 1.6883e-03, -6.1475e-04, 3.5326e-03,\n -1.6054e-03, -1.0662e-03, -4.6572e-03, 1.1825e-03, 3.1807e-03,\n 3.5462e-03, 1.3842e-04, -1.8833e-03, 1.8477e-03, -3.8198e-04,\n 4.2530e-04, -2.6637e-03, 5.6052e-45, 5.6052e-45, -1.9224e-03,\n -2.5075e-03, -7.6981e-04, -1.0257e-03, 1.4294e-03, 3.0726e-05,\n 3.2058e-03, 1.1552e-03, 9.2449e-04, -7.9117e-04, -6.2546e-04,\n -3.7968e-03, -7.8026e-04, 1.3298e-03, -4.2407e-04, 1.0376e-03,\n 1.9453e-03, -1.2831e-03, 1.5317e-03, -1.8528e-03, -3.5110e-03,\n -6.1829e-04, -6.2017e-04, -9.1996e-05, 1.6102e-03, 5.6052e-45,\n 1.0226e-03, -1.6305e-03, 8.3258e-04, -8.5450e-04, 9.0009e-04,\n 2.6298e-04, 2.9721e-03, 1.5471e-03, -1.7813e-03, 8.4583e-04,\n 5.0357e-04, 5.6052e-45, 2.6301e-03, -5.4768e-03, -1.8239e-04,\n -3.3442e-05, -9.8636e-04, -2.4847e-03, -1.8759e-03, -6.0489e-04,\n -1.3040e-03, -3.7023e-04, 1.5317e-03, 5.2927e-03, -1.5579e-03,\n -1.3381e-03, -1.9522e-03, -5.6323e-04, 1.3188e-03, -7.4051e-03,\n -1.7235e-03, -1.3227e-04, 1.0739e-03, -1.5837e-03, -1.7710e-03,\n 2.0112e-04, -3.7127e-03, 7.0203e-04, -2.3372e-04, 1.9161e-03,\n 1.6225e-03, -1.2839e-03, -3.2253e-04, 5.6052e-45, 4.9533e-03,\n -4.1573e-04, 5.6052e-45, -7.7195e-04, 1.4882e-03, 9.1290e-04,\n 7.2711e-05, 1.5004e-03, -3.6042e-05, -1.7492e-03, -1.3944e-03,\n -1.4387e-03, 1.9572e-03, 2.8628e-03, -1.7983e-34, 5.6052e-45,\n -2.1790e-03, -6.0765e-04, -1.5476e-03, -1.0954e-03, 3.1369e-04,\n 1.9816e-03, 1.6125e-03, -5.0271e-04, 3.4762e-03, 1.0068e-03,\n 6.9574e-04, 6.8149e-04, 5.6052e-45, 9.7559e-04, -6.7203e-05,\n 5.6052e-45, 1.5239e-04, 5.6052e-45, -2.9132e-03, -2.3410e-04,\n -7.3499e-04, -1.7396e-03, 5.6052e-45, -4.4139e-04, -7.6219e-04,\n 1.7692e-03, 6.7959e-04, -9.4247e-04, 2.6277e-05, -1.3771e-04,\n -1.6579e-04, 4.3898e-03, -1.1718e-04, -1.3533e-03, 5.6052e-45,\n -1.4949e-03, 5.1438e-04, 7.7745e-04, 5.6052e-45, 1.8780e-03,\n 2.3024e-04, 5.4866e-05, -1.6454e-04, -3.7131e-03, -1.8146e-03,\n 2.1925e-04, -9.1816e-04, 5.9034e-03, -1.2992e-03, 6.2794e-04,\n 9.2899e-04, -3.4211e-03, 5.6052e-45, 1.0105e-03, -3.6787e-04,\n 5.6052e-45, 3.2439e-04, -6.1816e-04, 7.5364e-04, -1.5462e-03,\n 3.0423e-08, -1.3518e-03, 6.2005e-04, 3.6658e-04, 5.6052e-45,\n 1.4886e-03, 2.1781e-03, 5.6052e-45, 5.8315e-04, 4.6295e-04,\n 1.3821e-03, 4.3556e-04, 5.6052e-45, 7.9198e-04, -5.5912e-04,\n 1.3256e-03, 1.1152e-03, -3.2480e-04, -8.2101e-04, 2.3535e-03,\n -4.5450e-04, 5.6052e-45, 2.9865e-04, 5.6052e-45, -1.8530e-04,\n 3.8849e-04, -2.4578e-04, -9.3907e-04, 1.4681e-03, 1.7482e-03,\n -1.9137e-03, 2.8748e-03, -2.6741e-04, 6.6680e-04, -1.7837e-03,\n 1.7951e-17, -1.6469e-03, 5.9936e-04, 1.7286e-03, 1.9009e-04,\n -5.1925e-04, 1.2606e-03, -2.4088e-04, 1.6288e-03, 5.2623e-04,\n 1.1008e-03, 5.6052e-45, 1.6066e-03, 6.6441e-04, 9.7904e-04,\n 1.3164e-03, 1.9549e-03, 1.5221e-03, 9.9715e-04, 7.2656e-04,\n 8.0884e-04, 3.7079e-03, 1.1459e-03, -1.7992e-03, 5.6052e-45,\n 5.6052e-45, 2.9923e-03, -2.7719e-03, 2.6777e-04, -5.8328e-04,\n -3.0689e-04, 3.0124e-03, 1.6872e-03, 9.2770e-04, 2.9581e-05,\n 1.7367e-03, 1.3459e-03, 2.9990e-03, 4.6529e-04, 1.8867e-03,\n 4.7433e-06, 1.7393e-05, -3.1907e-03, 5.2344e-04, 2.5924e-43,\n -1.2843e-04, 1.2594e-04, 5.6052e-45, 5.6052e-45, -2.3324e-03,\n 1.9488e-03, 2.9439e-04, -1.8544e-04, 1.0520e-03, 2.9453e-04,\n -5.0162e-03, 2.0348e-03, 4.0897e-04, -4.4827e-04, 1.9506e-03,\n -1.3328e-03, -2.6488e-03, -1.1633e-03, -3.6916e-04, -3.5345e-03,\n -1.9869e-04, 7.4746e-04, -2.0632e-03, 4.4074e-04, 7.8711e-04,\n -2.2224e-04, -1.0304e-03, -1.9457e-03, -2.9624e-04, -4.3302e-04,\n -4.3016e-04, 1.5030e-03, -6.5417e-04, 3.5795e-03, -1.1246e-03,\n -7.0597e-04, 6.3308e-04, -2.0187e-04, 8.3933e-05, -3.2152e-05,\n -5.6052e-45, 5.6052e-45, 8.9804e-04, 6.2419e-03, -4.5985e-03,\n -2.6252e-03, 5.6052e-45, -7.8141e-04, -1.0365e-03, 1.8883e-03,\n -1.5764e-03, 2.0519e-03, -7.0695e-04, -2.1459e-04, 1.5949e-04,\n -2.7336e-03, 3.4447e-04, 1.5788e-03, 1.2167e-04, 3.1616e-03,\n 1.0144e-03, 6.5112e-04, -8.3236e-04, 2.8446e-03, 9.1909e-05,\n -1.1754e-03, 4.0495e-04, 9.0915e-04, 4.7872e-04, -1.7729e-03,\n -8.6823e-04, 1.1698e-03, -1.6438e-03, 2.4684e-03, 1.4474e-03,\n 6.7911e-04, 3.4398e-03, 1.4354e-03, 3.3163e-04, 1.1224e-03,\n 1.2695e-03, 7.6487e-04, -8.0364e-04, -1.7695e-03, 7.4380e-29,\n 1.0577e-06, 7.2533e-04, -1.8876e-03, -5.4142e-04, 3.6442e-04,\n -1.4422e-03, 1.3930e-04, -2.0757e-03, -1.4255e-03, -4.7789e-04,\n 1.4461e-03, -5.6052e-45, -1.7851e-03, 1.9392e-03, 5.4515e-04,\n 4.4558e-05, 1.4193e-04, 5.6929e-04, -7.6766e-08, -2.0821e-03],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.5471e-05, 6.4076e-05, 3.3357e-05, 2.5068e-05, 4.5139e-08, 6.0154e-06,\n 4.2440e-05, 4.3535e-05, 3.0885e-05, 4.1081e-05, 4.0602e-05, 8.2067e-05,\n 1.5077e-07, 3.6482e-05, 4.7473e-05, 4.8319e-05, 7.2269e-05, 4.8481e-05,\n 2.3484e-05, 3.9256e-08, 3.9267e-05, 3.0448e-05, 3.0843e-05, 4.5522e-05,\n 1.4329e-07, 5.9980e-05, 4.2434e-05, 5.7961e-05, 4.8435e-05, 4.8824e-05,\n 5.2305e-05, 4.7203e-05, 2.8849e-07, 2.5308e-08, 4.3962e-05, 3.8334e-05,\n 4.6049e-05, 2.9303e-05, 2.8096e-05, 4.7343e-05, 4.5750e-05, 2.5210e-05,\n 4.2427e-05, 3.5225e-05, 3.5363e-05, 5.0365e-05, 4.1504e-05, 7.0064e-05,\n 7.0876e-05, 2.8045e-05, 4.8813e-05, 1.5033e-08, 2.4393e-08, 2.2829e-08,\n 5.6857e-05, 9.4807e-05, 3.3830e-05, 3.3389e-05, 2.9407e-07, 5.2148e-05,\n 4.4678e-05, 3.8734e-05, 7.6580e-05, 2.4787e-05, 4.9735e-05, 5.0104e-05,\n 4.3969e-05, 2.7941e-08, 6.2892e-05, 4.4560e-05, 1.1054e-05, 1.1413e-07,\n 5.1395e-05, 3.7822e-05, 1.4224e-05, 4.8789e-05, 5.3800e-05, 5.8071e-05,\n 3.4194e-05, 5.2288e-05, 1.2852e-04, 4.0109e-05, 5.5946e-05, 3.6911e-05,\n 4.4675e-05, 4.2686e-05, 4.5438e-05, 2.4481e-05, 4.3064e-05, 4.2094e-05,\n 1.7817e-07, 5.5198e-05, 3.8448e-05, 3.1626e-05, 7.4515e-05, 1.7346e-05,\n 4.5794e-05, 3.7357e-05, 3.6593e-05, 3.0351e-05, 2.6467e-05, 9.7158e-06,\n 5.7541e-05, 6.1389e-05, 9.9899e-05, 2.7195e-05, 3.9837e-05, 6.0316e-05,\n 1.4775e-05, 2.2642e-05, 6.7569e-05, 6.8426e-05, 1.2046e-08, 3.1963e-05,\n 5.7610e-06, 3.2757e-05, 4.4288e-05, 6.8741e-05, 5.3711e-09, 5.4133e-08,\n 1.9538e-08, 4.0175e-05, 1.5558e-08, 8.3613e-05, 4.1608e-05, 5.9230e-05,\n 5.4852e-05, 2.9267e-09, 1.8832e-05, 5.6011e-05, 3.5929e-05, 4.3218e-05,\n 4.9762e-05, 5.2210e-09, 5.1671e-05, 4.2105e-05, 2.6792e-05, 3.6953e-05,\n 3.3827e-05, 3.7374e-05, 8.2348e-05, 4.6328e-05, 5.2392e-05, 4.5294e-05,\n 5.0083e-05, 4.7611e-05, 2.3040e-07, 4.6107e-05, 4.4986e-05, 3.5930e-05,\n 1.9773e-07, 2.7350e-05, 3.9997e-05, 1.0602e-07, 3.6958e-05, 5.3994e-05,\n 2.1000e-07, 6.4021e-05, 3.5006e-05, 4.9037e-05, 2.5777e-05, 4.2837e-05,\n 1.0786e-04, 4.4976e-05, 4.2360e-05, 3.1971e-05, 7.5261e-05, 2.7413e-05,\n 4.5072e-05, 2.4950e-08, 3.8450e-08, 4.4696e-05, 6.3628e-05, 3.3728e-05,\n 3.9320e-05, 6.2878e-05, 2.0522e-05, 4.9315e-05, 6.9780e-09, 2.3722e-05,\n 4.4038e-05, 5.4736e-05, 8.3897e-06, 5.1343e-08, 4.7321e-05, 4.4493e-05,\n 6.6636e-05, 2.7097e-05, 3.6464e-05, 1.4909e-08, 1.7082e-07, 4.1329e-05,\n 2.0306e-05, 5.1909e-05, 4.3613e-05, 4.4031e-05, 3.0118e-05, 5.3227e-05,\n 2.6925e-08, 3.7160e-05, 3.1823e-05, 2.0221e-05, 6.2383e-05, 2.0509e-05,\n 9.0029e-05, 1.3280e-09, 3.8402e-05, 4.6113e-05, 3.4872e-05, 4.4204e-05,\n 2.3849e-05, 4.7509e-05, 4.8742e-05, 4.4992e-05, 2.8454e-05, 3.7261e-05,\n 3.2369e-05, 5.6639e-05, 5.2394e-05, 2.8891e-05, 2.9369e-05, 4.8403e-05,\n 3.3956e-05, 3.8979e-05, 4.0484e-05, 4.4008e-05, 6.2775e-05, 6.7298e-05,\n 2.8964e-05, 5.9275e-05, 4.7100e-05, 8.2170e-05, 5.0334e-05, 3.3011e-08,\n 4.9641e-05, 2.3867e-05, 3.7830e-05, 2.8722e-05, 1.0081e-04, 4.6842e-05,\n 3.1110e-05, 4.7810e-05, 4.1731e-05, 4.5337e-05, 6.2854e-05, 2.8358e-05,\n 2.3485e-05, 5.5194e-05, 2.9655e-05, 3.7237e-05, 4.5966e-05, 4.0572e-05,\n 3.2543e-05, 3.8830e-05, 4.6753e-05, 3.9609e-05, 1.4224e-07, 3.3866e-05,\n 2.6186e-05, 1.2490e-05, 4.3658e-05, 5.7079e-05, 5.3233e-05, 4.5710e-05,\n 4.3161e-05, 4.0960e-05, 3.4566e-05, 2.1084e-08, 4.9863e-05, 4.6781e-05,\n 3.2154e-05, 6.2497e-05, 3.4876e-05, 2.3246e-05, 5.5890e-05, 3.6793e-08,\n 4.3361e-05, 1.0397e-07, 2.9562e-05, 1.9245e-05, 4.3136e-05, 3.5374e-05,\n 6.5276e-05, 5.2394e-05, 4.6613e-05, 4.8715e-05, 8.4422e-08, 5.9592e-05,\n 6.1927e-05, 2.9575e-05, 8.6889e-06, 4.5818e-05, 5.5537e-05, 4.7967e-05,\n 3.6788e-05, 7.1107e-05, 7.6536e-05, 4.7009e-05, 4.1536e-05, 9.1709e-05,\n 4.6170e-05, 8.7964e-06, 6.4300e-06, 3.6398e-05, 4.1899e-05, 7.6441e-06,\n 6.7151e-05, 4.3765e-09, 8.6220e-08, 4.3169e-05, 6.2967e-05, 2.9643e-05,\n 4.4238e-05, 5.4220e-05, 5.1227e-05, 4.2333e-05, 4.8903e-05, 3.6519e-05,\n 3.1887e-05, 5.2849e-05, 5.8537e-05, 7.3943e-05, 6.1819e-05, 2.8871e-05,\n 1.6279e-05, 4.5091e-05, 3.6271e-05, 9.5212e-06, 3.2089e-05, 5.0530e-05,\n 9.5944e-06, 2.7550e-05, 5.0638e-05, 6.1794e-05, 1.0286e-11, 4.1570e-05,\n 2.8491e-05, 6.6689e-05, 2.6466e-05, 3.8255e-05, 7.6978e-06, 4.7571e-05,\n 3.3616e-05, 3.6401e-05, 4.0317e-05, 5.9655e-05, 1.5053e-07, 9.6399e-06,\n 4.6553e-05, 4.6551e-05, 3.3759e-05, 4.7260e-05, 3.6972e-05, 3.5313e-05,\n 7.1451e-05, 4.1493e-05, 4.0888e-05, 3.6120e-05, 5.9170e-05, 6.0958e-06,\n 2.2157e-05, 2.7656e-05, 4.7594e-05, 6.9860e-05, 5.6604e-05, 1.1206e-04,\n 1.5597e-05, 5.3454e-05, 6.0671e-05, 5.0776e-05, 5.4602e-05, 3.1172e-05,\n 3.7242e-05, 1.1501e-05, 3.9346e-05, 6.8974e-05, 9.4734e-05, 1.4374e-05,\n 6.5530e-08, 4.7837e-05, 2.3755e-05, 3.5754e-07, 3.8114e-05, 7.8584e-05,\n 4.5895e-05, 3.3532e-05, 4.4310e-05, 5.2872e-05, 2.7890e-05, 2.7272e-05,\n 6.0952e-05, 5.5481e-05, 3.6020e-05, 3.5536e-08, 1.1791e-07, 4.1462e-05,\n 4.5635e-05, 4.2247e-05, 7.1493e-05, 3.6246e-05, 4.1408e-05, 2.8786e-05,\n 1.6079e-05, 3.4269e-05, 3.9221e-05, 6.7002e-05, 3.0166e-05, 4.3424e-08,\n 1.1647e-05, 1.2419e-05, 1.2509e-08, 1.0394e-05, 2.1824e-08, 5.7851e-05,\n 5.1559e-05, 1.8121e-05, 3.6099e-05, 5.8922e-08, 2.0794e-05, 4.8529e-05,\n 2.9892e-05, 4.9308e-05, 5.5888e-05, 6.1415e-05, 3.9056e-05, 4.7653e-06,\n 3.4928e-05, 3.4899e-06, 2.4447e-05, 2.9617e-08, 2.7710e-05, 4.9795e-05,\n 4.3736e-05, 3.7848e-08, 2.6230e-05, 3.9066e-05, 6.2294e-05, 4.7004e-05,\n 4.0768e-05, 2.8438e-05, 5.5390e-05, 2.2288e-05, 4.5816e-05, 5.9187e-05,\n 3.3778e-05, 7.5553e-06, 4.2178e-05, 1.2774e-08, 3.8121e-05, 2.9007e-05,\n 4.2383e-07, 2.5315e-05, 3.3929e-05, 3.9244e-06, 3.8300e-05, 2.4240e-08,\n 2.6116e-05, 4.8966e-06, 3.6977e-05, 5.7217e-07, 4.0459e-05, 7.1547e-05,\n 3.7200e-09, 3.5580e-05, 3.4433e-06, 4.5802e-05, 3.5310e-05, 3.0382e-08,\n 5.4145e-05, 2.7690e-05, 8.0106e-05, 5.6639e-05, 3.4566e-05, 2.0206e-05,\n 2.8322e-05, 4.0039e-05, 2.1118e-07, 5.2947e-05, 6.7049e-08, 9.7945e-06,\n 7.0130e-06, 8.3251e-06, 4.1676e-05, 4.6595e-05, 8.8474e-05, 3.1435e-05,\n 4.7031e-05, 3.4636e-05, 5.1517e-05, 3.6346e-05, 2.8132e-08, 4.4653e-05,\n 4.3771e-05, 8.1040e-05, 7.1904e-06, 2.2715e-05, 3.5739e-05, 2.8812e-05,\n 4.3698e-05, 1.6248e-05, 3.5188e-05, 2.0875e-07, 1.6840e-05, 5.9010e-05,\n 3.9746e-05, 7.2859e-05, 4.8329e-05, 3.8923e-05, 5.6740e-05, 4.1708e-05,\n 4.3219e-05, 9.2567e-05, 3.2320e-05, 4.8412e-05, 1.5668e-07, 1.7292e-07,\n 5.3705e-05, 1.0544e-04, 5.9495e-05, 3.7916e-05, 4.7539e-05, 4.7210e-05,\n 4.0458e-05, 5.2002e-05, 3.9407e-05, 7.7961e-06, 3.7438e-05, 5.1940e-05,\n 3.3141e-05, 3.2486e-05, 8.9524e-08, 3.2571e-05, 5.2228e-05, 2.6336e-05,\n 1.5742e-07, 3.4957e-05, 4.4431e-05, 1.2752e-08, 3.8367e-08, 4.4677e-05,\n 4.8358e-05, 5.2858e-05, 2.7801e-05, 5.3029e-05, 2.4797e-05, 4.5741e-05,\n 3.7552e-05, 2.6059e-05, 3.0109e-05, 4.6073e-05, 4.2974e-05, 3.5412e-05,\n 6.0530e-05, 5.9085e-05, 4.8788e-05, 2.7272e-05, 4.1195e-05, 1.2267e-04,\n 3.3755e-05, 4.0985e-05, 2.1409e-05, 4.0675e-05, 4.6180e-05, 8.2109e-05,\n 4.9221e-06, 3.6088e-05, 5.9531e-05, 2.8684e-05, 5.2169e-05, 4.0875e-05,\n 5.0955e-05, 3.6219e-05, 4.3339e-05, 5.8242e-05, 3.9667e-06, 2.0001e-08,\n 2.2892e-07, 3.6641e-05, 3.8901e-05, 5.2510e-05, 4.1900e-05, 4.3570e-07,\n 4.0382e-05, 4.4372e-05, 2.2940e-05, 4.3935e-05, 3.2343e-05, 3.3750e-05,\n 5.9446e-05, 4.8774e-05, 3.8940e-05, 7.4539e-05, 5.2468e-05, 6.5181e-05,\n 4.0822e-05, 4.8215e-05, 4.4684e-05, 4.8099e-05, 6.4226e-05, 7.8774e-05,\n 6.1564e-05, 2.7928e-05, 3.8759e-05, 3.8469e-05, 4.5286e-05, 3.3635e-05,\n 3.5771e-05, 4.2228e-05, 9.0291e-05, 7.3985e-05, 5.3636e-05, 5.5798e-05,\n 4.1357e-05, 1.8346e-05, 2.2589e-05, 4.3875e-05, 4.2595e-05, 4.3447e-05,\n 2.5827e-05, 1.0024e-08, 2.8626e-07, 5.0575e-06, 4.7845e-05, 3.9193e-05,\n 4.3923e-05, 4.9661e-05, 3.8185e-05, 3.1916e-05, 3.2082e-05, 6.3147e-05,\n 2.9611e-05, 7.3380e-08, 4.7759e-06, 5.4644e-05, 3.8585e-05, 7.2799e-05,\n 4.7572e-05, 4.0752e-05, 6.6774e-08, 3.1248e-05], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([ 2.4973e-03, -1.4545e-03, 8.9858e-07, ..., -3.1942e-04,\n 8.1322e-04, -1.3398e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.7700e-05, 1.6147e-05, 1.1544e-09, ..., 1.6169e-05, 1.7467e-05,\n 6.0526e-06], device='cuda:0')" }, "2": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[ 4.2789e-06, -3.0149e-05, -3.8667e-05, ..., 6.3569e-06,\n -3.7737e-09, 2.0149e-05],\n [ 6.1900e-06, -2.6529e-05, 1.1074e-05, ..., -4.3185e-05,\n 3.6260e-09, -4.1065e-05],\n [ 2.0111e-05, 4.3233e-06, -1.1692e-05, ..., -1.3265e-06,\n -1.6374e-09, -7.2251e-05],\n ...,\n [ 1.2849e-05, 3.8253e-05, -1.0706e-04, ..., -3.2491e-05,\n -6.1890e-09, -8.1976e-05],\n [-1.1560e-05, 2.0703e-05, 3.6330e-05, ..., -2.6455e-05,\n -5.8501e-09, -1.7039e-04],\n [-1.0429e-05, 5.4415e-05, 1.6383e-04, ..., 2.3547e-05,\n 7.9381e-10, 3.9757e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1180e-09, 8.2080e-09, 8.6843e-09, ..., 8.8746e-09, 5.5765e-11,\n 7.3709e-09],\n [4.7196e-09, 2.4112e-08, 1.4612e-08, ..., 1.8684e-08, 3.5202e-11,\n 1.2070e-08],\n [3.0408e-09, 1.6331e-08, 1.5068e-08, ..., 1.2581e-08, 8.3955e-11,\n 1.8929e-08],\n ...,\n [4.4023e-09, 1.6173e-08, 1.8797e-08, ..., 1.6050e-08, 7.5485e-11,\n 2.2187e-08],\n [4.8252e-09, 1.7487e-08, 1.6493e-08, ..., 1.7246e-08, 6.3169e-11,\n 4.8849e-08],\n [4.7145e-09, 1.8057e-08, 2.0528e-08, ..., 2.4369e-08, 1.2699e-10,\n 1.1611e-08]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[-1.9668e-06, 1.8664e-06, 5.6052e-45, ..., -9.4905e-06,\n -2.9698e-07, 1.2132e-06],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-7.2952e-08, -2.9029e-06, 0.0000e+00, ..., -9.9339e-07,\n -8.3676e-06, -1.6951e-08],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 2.0413e-06, -9.1185e-07, -5.6052e-45, ..., 4.2819e-06,\n -1.9084e-06, 4.0816e-06],\n [ 9.7527e-07, 2.1144e-06, -5.6052e-45, ..., -4.1452e-07,\n 1.3256e-05, -5.2286e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7336e-09, 4.6745e-10, 2.2801e-13, ..., 2.4491e-09, 1.2078e-09,\n 3.7316e-10],\n [2.4385e-13, 4.4660e-12, 0.0000e+00, ..., 8.3785e-13, 3.0998e-16,\n 3.0858e-12],\n [3.2643e-10, 5.8905e-10, 0.0000e+00, ..., 4.1421e-10, 1.6597e-09,\n 3.9613e-11],\n ...,\n [0.0000e+00, 1.0455e-18, 0.0000e+00, ..., 3.0629e-20, 3.6581e-20,\n 0.0000e+00],\n [5.6132e-09, 9.2022e-10, 3.2857e-14, ..., 2.3175e-09, 6.9180e-10,\n 1.4418e-09],\n [1.7004e-09, 1.2668e-09, 1.7515e-13, ..., 6.6202e-10, 5.6460e-09,\n 6.5041e-10]], device='cuda:0')" }, "3": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 6.5050e-06, -1.5764e-06, -1.3506e-07, ..., -4.3790e-07,\n 1.0749e-06, 8.4761e-07],\n [ 4.0638e-44, -3.2230e-44, -5.6052e-45, ..., 1.1210e-44,\n 3.9236e-44, 5.6052e-45],\n ...,\n [-2.7930e-05, 5.2181e-06, 1.1321e-05, ..., -7.3311e-06,\n -1.8518e-05, -1.8514e-05],\n [-7.0764e-06, 1.3086e-05, 2.6873e-06, ..., 1.7399e-06,\n -6.6194e-06, -3.4864e-06],\n [-1.8533e-08, -1.6899e-09, -2.6012e-08, ..., 1.5782e-08,\n 4.2157e-09, -1.0098e-08]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.6708e-10, 2.1343e-10, 1.7283e-12, ..., 2.2451e-16, 8.0663e-12,\n 6.8884e-12],\n [1.0155e-09, 3.5066e-09, 2.6989e-10, ..., 3.7253e-10, 3.7959e-10,\n 3.9067e-10],\n [1.3857e-11, 2.0897e-11, 7.7137e-12, ..., 1.3363e-12, 1.8957e-12,\n 5.7241e-12],\n ...,\n [6.3381e-09, 9.0081e-09, 1.2811e-09, ..., 1.5458e-09, 2.8394e-09,\n 2.6534e-09],\n [5.0212e-09, 3.8868e-09, 9.6041e-10, ..., 1.7011e-09, 1.1332e-09,\n 1.0099e-09],\n [5.6227e-11, 5.1122e-11, 2.7827e-11, ..., 1.8283e-12, 6.6583e-12,\n 8.0268e-12]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([ 3.1521e-05, -1.2311e-23, 1.9869e-04, -7.6081e-05, 9.2364e-05,\n 3.6461e-21, 1.5613e-04, -2.0859e-04, -3.8042e-04, 3.2588e-04,\n 1.8848e-04, 1.0349e-36, 9.5781e-05, 2.4403e-05, 7.2596e-05,\n 2.0570e-04, 1.0269e-04, -2.7391e-05, 9.5326e-06, 5.0980e-05,\n -5.2700e-11, 8.5553e-05, -1.4996e-06, -3.7008e-06, -9.6250e-06,\n 9.8987e-05, 7.0346e-05, 5.6052e-45, -8.5273e-05, -1.5279e-05,\n 4.9009e-05, -3.8952e-06, -9.9135e-05, -3.3720e-06, 2.0738e-04,\n 5.9665e-05, 2.4778e-12, 2.9097e-05, -2.2584e-04, -1.2372e-04,\n -2.7847e-04, 7.7207e-05, -1.4023e-04, -1.9990e-04, 3.2947e-05,\n 9.3529e-05, -5.7796e-04, -8.3135e-05, -1.1395e-04, 1.1283e-05,\n 5.6052e-45, 4.5786e-06, 6.0989e-05, 7.4132e-05, 1.6638e-05,\n 1.0488e-04, 3.6284e-06, -4.1525e-05, 6.5765e-05, 3.4452e-05,\n -1.2250e-04, -1.6437e-04, -1.7052e-04, -1.3674e-04, -2.7126e-05,\n 2.8745e-05, 2.6645e-05, 2.6426e-04, -9.0528e-05, -2.3231e-05,\n 1.5187e-04, -9.4069e-27, 1.4530e-04, -1.8770e-05, 3.1416e-05,\n 3.7059e-06, 5.6052e-45, 5.8168e-04, -1.5514e-04, -2.7781e-05,\n -6.9010e-05, -1.8606e-04, -5.1562e-05, -1.3361e-33, -4.8092e-05,\n 4.0571e-05, 3.9434e-05, -3.3354e-04, -6.4586e-05, -6.3396e-05,\n 1.4266e-05, 3.2508e-05, -7.4531e-06, 2.6662e-05, 6.6810e-05,\n -3.4404e-04, -4.5648e-05, -1.1256e-05, -5.0714e-04, 1.7754e-04,\n 4.2586e-05, -9.2744e-05, 2.0982e-04, 3.1810e-05, 1.5493e-04,\n -4.9194e-04, -9.5192e-06, -4.9092e-05, 1.8848e-04, 1.5955e-04,\n -1.8737e-04, -1.4154e-04, 1.8291e-05, -5.5272e-06, 2.6245e-05,\n -2.0715e-04, -4.3237e-04, 1.2184e-04, -3.0999e-05, 5.6052e-45,\n -7.0565e-06, -1.2552e-04, -6.1019e-06, 5.6052e-45, -7.7955e-05,\n -1.7411e-04, 2.6180e-05, -2.3926e-14, -1.5691e-05, -2.9317e-05,\n -7.4496e-05, -2.7922e-05, 5.6052e-45, -4.2639e-05, -6.4004e-08,\n 1.6341e-04, 1.2682e-36, 4.4140e-05, -4.8538e-05, 5.6490e-05,\n 5.6052e-45, 5.6052e-45, -1.1404e-04, -1.7011e-09, 5.6052e-45,\n 5.6052e-45, 1.4518e-05, -1.0252e-04, 5.6052e-45, 7.7463e-05,\n -2.3733e-05, -9.5883e-06, 4.6006e-05, 1.8538e-05, 4.7356e-05,\n -8.8006e-06, 1.7081e-05, 7.8186e-05, 1.9898e-17, -1.0368e-04,\n -4.6848e-05, 8.1403e-05, 1.4906e-04, 3.2709e-05, 9.1180e-07,\n 1.8322e-04, -1.4427e-08, 5.6052e-45, 3.2181e-05, -1.4238e-04,\n 2.7206e-04, 3.6773e-04, 9.1961e-05, -8.8061e-05, 2.8030e-04,\n 2.4392e-05, -1.8160e-05, 2.8750e-05, 3.0516e-04, -2.0983e-04,\n 1.3941e-04, 7.4210e-05, -5.1194e-05, -5.7053e-05, 5.6052e-45,\n 1.2510e-40, 1.5681e-04, 1.4842e-04, 3.7542e-05, 9.3564e-05,\n 2.6983e-05, -7.0105e-05, -8.6893e-05, 5.4492e-05, -6.9348e-05,\n -1.6202e-05, 2.3960e-04, 1.0463e-04, -3.4105e-05, 5.6052e-45,\n 1.9414e-05, 6.7005e-05, 5.8286e-05, 1.7512e-04, 1.6003e-04,\n 1.6040e-05, -3.8507e-04, 4.6186e-05, -8.6852e-07, 5.6052e-45,\n -1.1705e-04, 6.1573e-28, -6.7348e-05, 9.9849e-05, -1.7887e-04,\n 8.5411e-05, -3.1872e-04, 5.7193e-05, -8.4574e-05, 1.1582e-05,\n -7.0984e-05, -4.8982e-05, -2.1179e-05, 6.4510e-05, 1.3626e-04,\n -5.6052e-45, -1.6633e-04, 1.4200e-04, 4.6956e-05, 2.9281e-05,\n -5.5499e-05, 6.6239e-06, -1.2351e-04, -1.0753e-04, 1.0038e-30,\n 2.1582e-04, 3.4709e-04, 5.6052e-45, 1.4857e-04, 1.9096e-04,\n 4.9188e-05, 5.6052e-45, 1.7566e-04, 8.0078e-05, -3.0343e-05,\n 1.8349e-04, -4.2999e-05, -2.2348e-04, -1.4158e-04, 9.6752e-36,\n -1.9542e-04, -2.5392e-04, -1.0916e-04, -1.3869e-04, -2.5880e-05,\n 1.1429e-04, 4.1407e-40, 9.7374e-05, 1.9185e-06, 1.3247e-04,\n 5.6052e-45, 3.0778e-04, 3.9682e-05, 3.3542e-04, -1.8589e-05,\n -4.2212e-05, -1.8472e-04, 9.9252e-06, 3.6182e-04, -4.3417e-05,\n -4.5407e-05, 3.5957e-05, 2.6875e-05, -8.2606e-05, -2.1872e-04,\n -1.3050e-04, -4.2799e-05, 1.2146e-11, 6.1240e-05, -9.0383e-05,\n 1.6296e-23, 1.2110e-05, 4.9330e-05, 9.5203e-05, 1.5581e-04,\n -1.1960e-04, -5.1113e-06, 9.0011e-05, -9.0278e-05, 2.4280e-05,\n 5.2265e-05, -4.0891e-05, 2.2373e-05, -3.3726e-05, -2.3673e-04,\n 4.1865e-05, -2.5736e-04, -1.1135e-04, 2.0706e-04, 7.1923e-05,\n -8.1276e-05, -1.7075e-04, 3.2565e-05, -2.4586e-05, -2.6026e-05,\n 5.6052e-45, -2.0990e-09, -1.1199e-04, -1.8375e-04, 5.6440e-05,\n -1.1075e-04, 5.6052e-45, 1.2897e-04, 1.0463e-04, 3.4039e-04,\n 5.6052e-45, 8.2677e-05, 2.9165e-05, 8.2419e-06, -1.9874e-05,\n 5.6985e-05, -2.3537e-06, 2.9127e-05, -6.5033e-05, -6.7711e-05,\n 5.6052e-45, -2.3083e-04, 1.8713e-04, 2.6040e-04, 1.0097e-04,\n 1.4297e-04, -4.5225e-05, 8.7781e-05, 6.1370e-05, 4.8244e-05,\n -5.0370e-05, -2.7739e-05, -8.9205e-06, 5.6052e-45, -5.1096e-08,\n -1.4312e-04, 1.5349e-04, -1.2869e-05, 6.4124e-05, 8.5775e-07,\n -4.4252e-05, 1.0077e-04, 9.8495e-05, 1.6101e-05, 1.3111e-04,\n -1.2295e-05, -3.1138e-05, 2.0143e-06, 5.6052e-45, -5.1130e-05,\n -1.6264e-04, -8.6253e-06, -6.4645e-05, 2.3816e-05, -1.9372e-05,\n 1.7761e-04, 1.8192e-04, 5.6052e-45, 7.8429e-05, -5.5876e-05,\n -1.8691e-04, -1.0389e-06, -2.8513e-04, 3.8670e-05, -1.4340e-06,\n 4.6800e-05, 5.1832e-05, 4.5576e-06, 1.4786e-05, 5.1367e-05,\n 8.2881e-05, -1.5044e-18, -1.1618e-04, 5.6052e-45, 1.6986e-04,\n 5.3990e-05, 2.2211e-04, -1.2810e-05, 1.3811e-04, 5.6052e-45,\n -8.7641e-05, 8.9677e-06, 4.6456e-05, 1.3708e-04, 9.9589e-05,\n 1.8055e-05, 1.0672e-04, -1.6417e-04, -2.4381e-05, 6.0763e-05,\n 8.5640e-05, 8.0096e-05, -5.8416e-14, 5.6052e-45, 7.8898e-05,\n -4.5476e-05, -1.2014e-04, -3.8826e-04, 9.1430e-05, 6.2766e-05,\n 1.1925e-04, -6.9678e-31, -2.9612e-05, 1.4610e-04, 1.6963e-04,\n -2.6295e-04, 5.6052e-45, 4.7689e-05, -1.7575e-04, 3.6474e-05,\n 5.3510e-05, -5.1009e-06, 9.4082e-05, 1.4956e-04, 6.5515e-05,\n -4.2547e-05, -1.0017e-04, 3.7764e-05, 1.7355e-28, 5.6052e-45,\n -2.5037e-05, 3.7096e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.5992e-05, 2.3257e-06, 3.9828e-05, -4.1754e-05, 3.3585e-05,\n 5.6052e-45, 8.1778e-06, -3.5955e-06, 2.8306e-05, 5.6052e-45,\n -8.1166e-05, 1.1623e-04, -4.8540e-05, 5.6052e-45, -2.2035e-05,\n -3.7871e-05, 1.2515e-04, 9.4850e-05, -5.8377e-05, -5.0248e-05,\n -7.7020e-04, -4.0442e-05, 3.1163e-04, 5.8285e-05, 3.5595e-05,\n -1.1196e-04, 8.8795e-09, -2.3106e-04, 4.9203e-19, 7.4549e-06,\n -2.9382e-05, 1.2463e-04, 5.6052e-45, 6.8643e-05, 5.7257e-06,\n 2.4418e-04, 4.5223e-05, 4.7212e-05, -1.0576e-04, 9.7705e-05,\n -1.0929e-04, 5.6052e-45, 6.1244e-05, 2.1550e-05, -7.7412e-05,\n 5.6052e-45, -1.7830e-04, 1.4335e-04, -1.1816e-04, 1.2455e-04,\n 3.5963e-04, -5.2330e-05, 2.0618e-04, -2.8834e-04, 1.4231e-05,\n 9.5560e-05, -2.3463e-04, 1.3726e-04, -1.9363e-04, -1.1542e-04,\n 5.9719e-05, 1.0063e-04, -1.3939e-05, -5.4109e-05, 5.1683e-05,\n 1.1323e-04, 1.1217e-04, 8.4128e-05, -1.4960e-04, 1.0453e-04,\n 5.6052e-45, 5.4710e-05, -1.4260e-05, 4.4808e-05, 4.0193e-06,\n 6.7333e-05, -1.0266e-04, 2.2544e-04, -2.5572e-04, 1.5214e-04,\n -1.0401e-04, -9.3180e-05, -1.8644e-04, -1.3265e-05, 5.6218e-05,\n -6.8698e-05, -6.4009e-05, -7.5259e-05, 5.6052e-45, 5.5681e-05,\n -6.3020e-05, 7.3126e-05, -2.7347e-05, -2.0996e-04, 7.8351e-05,\n -7.5844e-05, 5.6052e-45, -2.9850e-05, -4.5902e-05, -9.4897e-05,\n 1.4031e-04, -1.8612e-04, 1.1664e-04, -1.2641e-05, -4.4997e-06,\n 1.2655e-04, 1.8093e-05, 5.6052e-45, 2.4961e-05, -4.5604e-04,\n -1.1351e-04, -2.6668e-04, 4.3833e-05, -2.9797e-05, -1.0482e-04,\n -1.7394e-18, -1.0907e-04, -4.1047e-05, 2.0687e-04, 6.5075e-05,\n -1.1219e-04, -6.0677e-05, -6.6832e-05, -9.9832e-05, 1.9359e-04,\n 4.2535e-05, -1.1121e-04, 1.2293e-04, 6.2709e-05, 1.7709e-05,\n 1.2128e-11, -3.2536e-05, 3.2604e-05, -3.6219e-05, 1.8014e-04,\n 1.2130e-04, 7.4951e-05, 5.6052e-45, 1.2233e-04, 5.6052e-45,\n 9.9826e-06, 3.7471e-05, 5.0985e-05, 9.5499e-31, 1.5749e-04,\n -1.2691e-04, -2.4912e-05, 1.2256e-05, -2.9885e-04, -5.6501e-05,\n 1.6152e-04, 5.6052e-45, 4.7886e-05, -3.1392e-05, 5.6052e-45,\n -3.0492e-04, 1.2449e-05, 1.7495e-04, -1.2557e-04, -6.6562e-05,\n -1.2466e-04, 5.3562e-41, 7.5010e-05, 1.0619e-04, -6.9164e-06,\n 7.9408e-05, 2.1803e-05, 2.1552e-05, -5.3166e-05, -1.1018e-05,\n -1.0263e-04, 6.3220e-05, -4.5029e-04, -1.7432e-05, -3.5489e-04,\n 1.9478e-04, -3.9741e-05, -3.7136e-06, 2.1685e-04, 2.1252e-05,\n 3.0014e-04, 1.7813e-04, 1.5587e-04, 2.4100e-19, -1.4856e-05,\n 5.6052e-45, 2.1316e-04, -2.5933e-35, -6.4867e-05, 8.8672e-05,\n 2.7863e-05, -1.9007e-04, -1.1425e-05, 1.5760e-04, 3.1228e-04,\n -2.7241e-04, 5.6052e-45, 6.6249e-05, 1.4112e-05, 1.8199e-05,\n 9.0675e-06, 5.6052e-45, 5.6052e-45, 5.6052e-45, 1.0783e-04,\n -2.0323e-05, 5.6052e-45, 1.3441e-04, 2.0043e-04, 6.8588e-05,\n 7.1324e-05, 5.6052e-45, -2.0448e-04, -6.6880e-05, 1.1991e-05,\n -2.2671e-04, -2.2429e-04, -4.4375e-04, -2.0082e-06, -1.5322e-05,\n 8.8474e-05, -1.5098e-05, -9.4342e-05, -9.2000e-05, 1.1421e-04,\n 8.1657e-05, 9.4784e-05, -2.4266e-05, -1.2188e-05, -7.4300e-06,\n -4.2374e-04, -3.5490e-05, -2.5278e-05, 2.5053e-05, -3.2132e-04,\n 3.1103e-04, -4.0482e-05, -1.7681e-05, 1.2131e-05, -1.0499e-17,\n 1.4120e-04, 5.6052e-45, -2.2712e-04, -2.8662e-04, -2.7610e-04,\n 1.4849e-05, 1.9036e-04, -5.6577e-08, -5.6052e-45, 5.6052e-45,\n 3.5628e-05, -2.1885e-04, -1.8366e-04, 5.6052e-45, -2.8512e-06,\n 8.1273e-27, 1.3676e-04, 1.9340e-04, -6.2535e-05, -2.0510e-05,\n -3.6166e-05, -1.4578e-04, 5.6052e-45, -8.7900e-05, -4.6426e-05,\n -1.0444e-04, -5.7292e-05, -1.2850e-05, 4.7803e-37, -2.4854e-05,\n -1.0326e-04, -1.1522e-04, -1.5457e-06, 1.5733e-04, -3.5736e-05,\n 4.3578e-06, 5.6052e-45, 3.1331e-04, -1.4163e-05, 5.6052e-45,\n 1.8118e-05, -1.6546e-04, -1.0439e-04, 6.6036e-05, 9.8913e-05,\n 5.7397e-05, 8.3186e-05, 6.0364e-05, -1.2176e-04, 1.7343e-04,\n 5.6052e-45, -2.2644e-04, -9.2222e-05, 8.9380e-05, -1.3487e-04,\n 5.0878e-05, -7.2112e-05, 7.9752e-05, 3.3456e-05, -3.7188e-05,\n 2.7403e-05, 2.0137e-04, 1.7045e-04, 1.6360e-04, 5.6052e-45,\n 4.5360e-05, 5.2170e-04, -2.3817e-04, -6.5981e-05, -4.7639e-05,\n 8.2872e-05, 2.8165e-05, -6.6102e-06, -3.9644e-05, 4.8232e-05,\n 6.3479e-05, -8.7849e-05, 6.2002e-13, 8.0774e-05, -5.6052e-45,\n -1.1994e-04, -5.1305e-05, 7.6211e-05, 2.3491e-05, 2.3746e-04,\n 4.0031e-06, -3.8844e-05, 2.6814e-04, -9.4837e-06, -2.1366e-04,\n -3.0659e-05, -9.7009e-05, 1.2654e-04, -9.0477e-05, 5.6052e-45,\n 5.6052e-45, -2.6142e-04, 8.2354e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.9787e-07, 5.4960e-09, 1.1300e-07, 1.6804e-07, 4.0614e-07, 5.1216e-08,\n 4.6980e-07, 3.4866e-07, 1.7439e-07, 4.0015e-07, 2.8661e-07, 2.8287e-08,\n 2.2284e-07, 1.1822e-07, 2.2500e-07, 2.4259e-07, 4.1483e-07, 6.1545e-08,\n 9.5374e-08, 2.7792e-07, 6.4184e-10, 1.1385e-07, 1.5269e-07, 2.8641e-07,\n 4.0428e-07, 3.1632e-07, 1.2460e-07, 1.1631e-10, 2.8061e-07, 2.8552e-07,\n 1.0643e-07, 2.1704e-07, 2.0715e-07, 2.9785e-07, 1.7895e-07, 1.7890e-07,\n 6.2461e-08, 2.7321e-07, 2.5917e-07, 2.4279e-07, 4.0728e-07, 1.9662e-07,\n 3.7759e-07, 3.2161e-07, 3.0701e-07, 1.6605e-07, 2.5025e-07, 2.1285e-07,\n 6.8579e-07, 2.5682e-07, 9.9758e-08, 1.1159e-06, 2.2410e-07, 1.8671e-07,\n 2.5009e-07, 1.5559e-07, 2.9956e-07, 1.1061e-07, 1.0357e-07, 2.4951e-07,\n 7.8470e-08, 1.8998e-07, 4.8784e-07, 2.3819e-07, 8.7685e-08, 1.6258e-07,\n 3.9505e-07, 2.9135e-07, 6.3024e-08, 1.7952e-08, 2.5887e-07, 4.5384e-08,\n 1.6177e-07, 6.0441e-07, 2.4969e-07, 2.8148e-07, 3.1500e-10, 4.1651e-07,\n 1.4168e-07, 1.9136e-07, 1.7970e-07, 2.5519e-07, 3.7545e-07, 8.6969e-10,\n 4.1920e-07, 3.2175e-07, 2.9942e-07, 4.1721e-07, 2.1455e-07, 4.0023e-07,\n 2.3185e-07, 1.6560e-07, 1.7416e-07, 2.8483e-07, 1.4649e-07, 4.3482e-07,\n 3.2429e-07, 4.0879e-07, 2.6879e-07, 3.0304e-07, 2.8274e-07, 3.0158e-07,\n 6.6636e-07, 4.3836e-07, 2.4077e-07, 3.1853e-07, 1.7897e-07, 2.2143e-07,\n 1.9495e-07, 1.1852e-07, 2.2477e-07, 4.4604e-07, 2.4669e-07, 7.5318e-08,\n 4.2339e-07, 2.7609e-07, 2.2352e-07, 1.4964e-07, 1.1498e-07, 2.3810e-08,\n 2.5876e-07, 4.5794e-07, 1.5655e-07, 5.9664e-08, 2.2381e-07, 3.1641e-07,\n 2.5708e-07, 1.9063e-09, 3.5785e-07, 2.9290e-07, 2.3824e-07, 1.9994e-07,\n 2.6063e-09, 2.8204e-07, 2.8045e-08, 2.2149e-07, 1.0561e-11, 2.9762e-07,\n 2.1885e-07, 8.3143e-08, 2.6277e-08, 1.4450e-08, 1.6451e-07, 4.1534e-12,\n 5.5984e-11, 7.4466e-08, 3.3294e-07, 3.0760e-07, 2.0007e-07, 1.2725e-07,\n 1.8955e-07, 3.1000e-07, 1.6888e-07, 2.2580e-07, 2.4992e-08, 2.7353e-07,\n 1.2937e-07, 2.6569e-07, 8.0852e-08, 2.2294e-07, 2.7319e-07, 1.6697e-07,\n 3.2299e-07, 4.1591e-07, 1.7182e-07, 2.3193e-07, 3.1699e-08, 1.0526e-13,\n 2.4305e-07, 1.8340e-07, 3.5148e-07, 3.2496e-07, 2.3349e-07, 2.9714e-07,\n 2.8921e-07, 2.4326e-07, 6.3664e-08, 2.9252e-07, 3.6861e-07, 4.8608e-07,\n 4.1012e-07, 2.6704e-07, 2.9031e-07, 8.4731e-08, 4.4213e-12, 6.1502e-17,\n 4.0849e-07, 2.4099e-07, 8.6675e-08, 8.9670e-08, 1.8568e-07, 1.7040e-07,\n 3.1860e-07, 4.0422e-07, 3.1660e-07, 1.5315e-07, 3.8357e-07, 3.3134e-07,\n 2.2169e-07, 6.7367e-19, 1.1606e-07, 1.4955e-07, 3.5342e-07, 3.3610e-07,\n 4.2231e-07, 1.9406e-07, 3.4893e-07, 2.1355e-07, 3.3644e-08, 5.0495e-10,\n 4.1043e-07, 7.0846e-10, 1.5706e-07, 4.4913e-07, 2.6942e-07, 2.5480e-07,\n 2.8717e-07, 1.4769e-07, 2.6766e-07, 2.1021e-07, 1.0581e-07, 3.8328e-07,\n 3.3786e-07, 2.0272e-07, 1.9837e-07, 9.9742e-08, 3.9717e-07, 4.3938e-07,\n 3.9772e-07, 3.0652e-08, 2.3608e-07, 2.4613e-07, 2.4020e-07, 4.3484e-07,\n 6.3114e-08, 3.4000e-07, 4.3882e-07, 1.1968e-11, 1.8443e-07, 2.7445e-07,\n 7.3912e-08, 3.2830e-08, 3.6227e-07, 2.3162e-07, 1.9343e-07, 2.6808e-07,\n 2.6612e-07, 3.7563e-07, 2.0068e-07, 4.1878e-08, 4.2593e-07, 5.0508e-07,\n 2.9274e-07, 3.9036e-07, 2.0392e-07, 3.9488e-07, 1.2804e-11, 1.4010e-07,\n 3.2694e-08, 5.5286e-07, 7.5396e-17, 3.0049e-07, 2.5423e-07, 1.8087e-07,\n 4.6354e-07, 2.0251e-07, 1.9328e-07, 1.8850e-07, 3.8694e-07, 2.1267e-07,\n 3.3415e-07, 2.4537e-07, 1.8378e-07, 3.1171e-07, 4.4294e-07, 2.1939e-07,\n 3.5608e-07, 6.5279e-11, 1.0816e-07, 4.7690e-07, 1.8614e-07, 4.9006e-07,\n 1.6984e-07, 2.8415e-07, 3.9908e-07, 1.7342e-07, 3.7931e-07, 3.0388e-07,\n 1.1356e-07, 4.8912e-07, 1.6712e-07, 2.7119e-07, 2.7078e-07, 1.1328e-07,\n 3.2003e-07, 3.2696e-07, 2.2132e-07, 4.2784e-07, 2.1075e-07, 9.7866e-08,\n 3.1647e-07, 4.4262e-07, 1.1758e-07, 1.2251e-07, 8.0221e-08, 1.5031e-10,\n 1.8498e-09, 1.2419e-07, 2.7385e-07, 2.9803e-07, 2.8875e-07, 9.7804e-12,\n 2.4710e-07, 1.7429e-07, 2.8125e-07, 2.1467e-11, 4.8201e-08, 6.4668e-07,\n 1.2869e-07, 1.4276e-07, 2.8334e-07, 6.6329e-08, 1.0736e-07, 2.8329e-07,\n 1.4484e-07, 7.7883e-08, 3.6745e-07, 2.8226e-07, 2.5990e-07, 2.6925e-07,\n 3.1118e-07, 1.3543e-07, 2.8249e-07, 1.7772e-07, 1.0112e-07, 4.9530e-07,\n 2.1232e-07, 1.0651e-07, 5.8496e-17, 3.0583e-10, 5.2431e-07, 2.2759e-07,\n 4.9460e-07, 1.0667e-07, 4.1261e-07, 2.1008e-07, 2.6537e-07, 6.6856e-07,\n 2.7858e-07, 3.2998e-07, 3.2988e-07, 2.8305e-07, 1.0520e-07, 1.5925e-08,\n 2.0489e-07, 3.9704e-07, 7.3340e-08, 4.6286e-08, 2.1087e-07, 4.2721e-07,\n 2.6884e-07, 2.6139e-07, 6.6044e-08, 4.0949e-07, 1.9711e-07, 7.6486e-08,\n 1.4768e-07, 3.6390e-07, 1.7253e-07, 9.1067e-08, 1.5973e-07, 5.7074e-08,\n 1.6770e-07, 1.8363e-07, 2.1989e-07, 2.0705e-07, 1.1498e-08, 6.0450e-08,\n 3.1357e-08, 1.4296e-07, 4.5719e-07, 3.2743e-07, 1.3067e-07, 2.8867e-07,\n 6.5530e-08, 3.4019e-07, 2.5523e-08, 2.9716e-07, 5.2069e-07, 1.5840e-07,\n 2.9442e-07, 3.4228e-07, 1.7393e-07, 8.7288e-08, 3.4933e-07, 4.2848e-07,\n 4.7862e-08, 2.7427e-08, 2.2537e-07, 7.1154e-08, 2.7043e-07, 3.2720e-07,\n 3.6741e-07, 5.1447e-07, 2.8477e-07, 3.1218e-07, 1.9029e-07, 3.8031e-07,\n 2.9843e-07, 3.6367e-07, 3.2553e-07, 4.0569e-13, 2.0285e-07, 2.8167e-07,\n 9.3794e-08, 2.2244e-07, 9.6574e-11, 3.5288e-07, 2.2628e-07, 1.8566e-07,\n 2.5817e-07, 3.7823e-07, 3.7120e-07, 2.7439e-11, 1.7998e-18, 1.3752e-07,\n 1.3430e-07, 1.5136e-08, 4.1611e-11, 8.7024e-11, 3.0947e-07, 2.2442e-07,\n 2.5766e-07, 2.1140e-07, 4.1888e-07, 9.6453e-09, 3.1538e-07, 2.1064e-07,\n 3.0039e-07, 2.2691e-10, 1.1761e-07, 2.0779e-07, 1.7273e-07, 2.7516e-10,\n 1.4389e-07, 4.6454e-07, 1.8539e-07, 2.1644e-07, 9.7542e-08, 9.3139e-08,\n 4.5987e-07, 1.4690e-07, 2.8715e-07, 2.8575e-07, 3.3087e-07, 2.8683e-07,\n 1.5031e-08, 5.1000e-07, 8.5275e-09, 6.4609e-08, 1.4915e-07, 1.7750e-07,\n 2.0776e-12, 2.5730e-07, 1.8606e-08, 2.5418e-07, 1.4350e-07, 1.4683e-07,\n 3.2795e-07, 1.3224e-07, 1.3860e-07, 8.4104e-10, 4.8989e-08, 3.5556e-07,\n 9.8230e-08, 3.7350e-11, 2.1002e-07, 2.1275e-07, 2.4015e-07, 4.3612e-07,\n 2.7251e-07, 3.6574e-07, 2.8081e-07, 4.2960e-07, 2.4329e-07, 2.5733e-07,\n 4.6028e-07, 3.6361e-07, 2.7757e-07, 2.4876e-07, 2.6837e-07, 1.1946e-07,\n 3.8260e-07, 1.8528e-07, 2.7602e-07, 1.5977e-07, 2.6357e-07, 3.0740e-07,\n 3.3309e-07, 1.2267e-07, 4.5022e-07, 5.1684e-07, 2.0089e-07, 1.6249e-07,\n 3.2634e-07, 1.4522e-07, 2.8576e-07, 3.2658e-07, 2.3547e-07, 3.4754e-07,\n 2.0625e-07, 3.2378e-07, 4.1414e-07, 3.8249e-07, 1.2767e-07, 5.3386e-08,\n 1.8244e-07, 1.3534e-07, 6.2360e-08, 3.2263e-07, 2.4196e-07, 1.1394e-07,\n 2.4923e-07, 2.3806e-07, 1.9702e-07, 2.6968e-07, 1.7869e-12, 3.7659e-07,\n 1.8706e-07, 2.2615e-07, 3.1584e-07, 4.0406e-07, 2.8103e-07, 3.2139e-07,\n 2.0145e-07, 5.7128e-08, 2.7118e-07, 2.0823e-07, 1.6825e-07, 2.7683e-07,\n 3.9637e-07, 3.0334e-07, 4.4156e-07, 5.2473e-07, 5.2753e-07, 7.4925e-10,\n 2.3900e-07, 2.9518e-07, 3.5120e-07, 3.4552e-07, 4.1989e-07, 3.6406e-07,\n 2.5464e-07, 2.6091e-07, 1.9751e-07, 4.8832e-07, 2.3650e-07, 3.2886e-07,\n 1.7686e-07, 2.6034e-07, 1.4182e-11, 4.1228e-07, 6.4464e-08, 1.8793e-07,\n 2.7504e-07, 2.5095e-07, 1.6190e-07, 2.9690e-10, 3.5443e-07, 1.4481e-08,\n 2.5138e-07, 1.3949e-07, 2.7057e-07, 1.2396e-08, 1.9369e-07, 3.1879e-07,\n 2.9602e-07, 8.4515e-08, 1.9143e-07, 2.0396e-07, 3.3760e-07, 3.7658e-11,\n 1.1979e-07, 9.7749e-08, 4.9991e-15, 3.7056e-07, 2.7733e-07, 2.6132e-07,\n 1.9219e-07, 1.8769e-07, 2.0875e-07, 5.8114e-12, 2.0218e-07, 2.8349e-07,\n 1.5433e-07, 1.7063e-07, 1.3770e-07, 2.0156e-07, 1.6915e-07, 1.5519e-07,\n 2.6530e-07, 1.9047e-07, 3.4263e-07, 1.8360e-07, 2.3873e-07, 3.8015e-07,\n 3.5195e-07, 4.6418e-08, 2.7560e-07, 2.2705e-07, 3.1265e-07, 2.1213e-07,\n 2.2522e-07, 1.0381e-07, 3.1899e-07, 2.6277e-11, 2.9310e-07, 3.0489e-08,\n 2.5398e-07, 4.7390e-07, 3.9462e-07, 3.5780e-07, 4.4137e-07, 4.4500e-07,\n 2.2057e-07, 2.8115e-07, 2.6071e-12, 2.9250e-07, 2.7853e-07, 5.4884e-08,\n 8.6325e-08, 9.8180e-09, 6.8446e-10, 2.1333e-11, 2.2386e-07, 4.4458e-07,\n 8.0758e-14, 1.9700e-07, 2.9701e-07, 3.4827e-07, 4.6211e-07, 8.6561e-12,\n 3.3383e-07, 2.4823e-07, 8.7855e-08, 2.2781e-07, 2.4956e-07, 2.6614e-07,\n 4.8229e-07, 1.1767e-07, 2.5167e-07, 2.5303e-07, 3.5467e-07, 3.1926e-07,\n 1.8272e-07, 2.1954e-07, 4.5158e-07, 1.1523e-07, 3.1622e-07, 9.1926e-09,\n 6.0841e-07, 2.4912e-07, 3.9583e-07, 2.9193e-07, 1.7358e-07, 5.4395e-07,\n 3.4455e-07, 1.0588e-07, 3.2158e-07, 3.1075e-09, 2.2421e-07, 7.6082e-13,\n 1.4163e-07, 1.9026e-07, 2.2687e-07, 2.6986e-07, 4.8505e-07, 3.8383e-09,\n 3.4211e-08, 8.1914e-11, 4.3863e-07, 2.9797e-07, 2.9739e-07, 1.9379e-07,\n 2.5495e-07, 3.5784e-08, 3.6520e-07, 1.6319e-07, 2.2808e-07, 3.7866e-07,\n 3.4859e-08, 1.1638e-07, 2.9372e-07, 1.0222e-07, 2.6341e-08, 1.7009e-07,\n 3.9296e-07, 2.4202e-07, 2.3250e-11, 1.9694e-07, 2.9199e-07, 2.6650e-07,\n 3.2000e-07, 3.9770e-07, 3.7168e-07, 2.1164e-07, 1.8510e-10, 8.0523e-07,\n 3.5398e-07, 1.4589e-12, 3.5360e-07, 1.7886e-07, 1.3013e-07, 3.3589e-07,\n 3.0434e-07, 1.0719e-07, 2.5082e-07, 2.9619e-07, 1.5261e-07, 3.7090e-07,\n 1.3019e-07, 5.0792e-07, 2.3075e-07, 3.0127e-07, 2.9691e-07, 1.0198e-07,\n 2.2873e-07, 4.0267e-07, 5.7454e-07, 4.4278e-07, 1.1760e-07, 4.0341e-07,\n 1.6072e-07, 3.4070e-07, 1.7479e-07, 1.0532e-07, 3.6764e-07, 2.6853e-07,\n 1.9878e-07, 1.5118e-07, 1.9043e-07, 4.3375e-07, 4.2423e-07, 2.2043e-07,\n 1.2792e-07, 2.2036e-07, 3.2919e-07, 1.1646e-07, 2.3857e-07, 3.4992e-07,\n 2.0766e-07, 9.7829e-08, 1.3398e-07, 1.9801e-07, 2.5945e-07, 3.9670e-07,\n 9.5537e-08, 1.7171e-07, 2.4698e-07, 2.7714e-07, 1.1461e-07, 1.6431e-07,\n 3.7028e-07, 1.9785e-07, 1.8528e-10, 2.8006e-17, 3.5061e-07, 1.6847e-07],\n device='cuda:0')" }, "4": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.6052e-45, 7.9874e-05, 9.6970e-43, ..., -2.6811e-04,\n -1.5187e-04, 5.9570e-07], device='cuda:0')", - "exp_avg_sq": "tensor([3.7229e-08, 5.4027e-07, 7.1286e-09, ..., 2.3510e-06, 1.7445e-06,\n 6.1113e-09], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[-5.8735e-07, 4.8569e-25, 2.0672e-05, ..., 5.6052e-45,\n -4.0880e-05, 7.9777e-06],\n [ 9.2971e-06, 1.5598e-25, 1.5837e-06, ..., -5.6052e-45,\n -2.5973e-06, -5.9872e-06],\n [-1.1278e-05, 1.6732e-25, 2.5069e-05, ..., -5.6052e-45,\n 5.0401e-05, 2.4138e-05],\n ...,\n [ 1.6577e-06, 1.8463e-25, -2.1244e-05, ..., -5.6052e-45,\n 5.0692e-05, -7.5169e-06],\n [-8.4020e-06, 4.3994e-25, 1.0613e-05, ..., -5.6052e-45,\n -8.8597e-06, 1.5104e-05],\n [ 2.0186e-06, 3.7173e-25, -2.4462e-05, ..., -5.6052e-45,\n 5.2665e-05, 1.3639e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5131e-09, 6.8519e-14, 2.4536e-10, ..., 7.8498e-20, 1.7754e-09,\n 3.8327e-10],\n [2.9172e-09, 1.2635e-12, 5.1498e-10, ..., 4.0118e-18, 2.8007e-09,\n 1.8120e-09],\n [4.2391e-09, 7.6922e-15, 9.4877e-10, ..., 1.7497e-18, 3.0958e-09,\n 1.9701e-09],\n ...,\n [3.9652e-09, 1.2506e-12, 7.4609e-10, ..., 1.5308e-18, 1.7783e-09,\n 1.1084e-09],\n [4.7277e-09, 1.0482e-13, 6.8378e-10, ..., 2.2813e-20, 2.5905e-09,\n 2.7353e-09],\n [3.8885e-09, 1.6098e-13, 1.2032e-09, ..., 1.4620e-18, 2.5534e-09,\n 2.0446e-09]], device='cuda:0')" }, "5": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, -9.5282e-07, -2.2421e-44, ..., -5.3672e-06,\n 1.4303e-06, 1.4593e-09],\n [ 5.6052e-45, 1.1093e-06, -2.3822e-44, ..., 2.6065e-06,\n -3.9888e-06, 1.4066e-10],\n [ 5.6052e-45, -1.1360e-06, 2.9427e-44, ..., -6.3705e-06,\n -5.0734e-07, 1.0122e-09],\n ...,\n [-5.6052e-45, -3.5286e-09, 5.4651e-44, ..., -8.6115e-07,\n 2.9393e-06, 2.3031e-09],\n [ 5.6052e-45, 5.6537e-06, 5.6052e-45, ..., -3.2853e-06,\n -9.1894e-07, 9.9749e-10],\n [-5.6052e-45, 7.1426e-07, 2.2421e-44, ..., 4.6902e-06,\n -4.2619e-06, -2.9763e-09]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.2588e-13, 8.0932e-11, 1.2150e-12, ..., 3.2871e-10, 1.9746e-10,\n 4.3390e-11],\n [3.5978e-12, 1.8005e-10, 3.2323e-12, ..., 2.7853e-10, 2.8891e-10,\n 8.4262e-11],\n [2.4029e-12, 1.3067e-10, 1.9927e-11, ..., 5.7953e-10, 3.3359e-10,\n 5.1162e-11],\n ...,\n [5.8343e-13, 1.2083e-10, 4.8009e-11, ..., 7.8236e-10, 3.5566e-10,\n 7.8672e-11],\n [1.2049e-11, 3.9570e-10, 4.5527e-12, ..., 1.0374e-09, 4.5623e-10,\n 9.0805e-11],\n [3.7272e-13, 1.3071e-10, 4.6136e-12, ..., 9.2254e-10, 3.5708e-10,\n 4.4778e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 4.1077e-08, -1.3984e-08, 0.0000e+00, ..., 1.0988e-07,\n 4.0628e-09, 9.9676e-13],\n [-5.4944e-06, -6.3427e-06, -5.6052e-45, ..., 1.8983e-06,\n 2.2179e-07, 6.9046e-09],\n [ 1.2553e-07, -8.2448e-07, -2.8306e-43, ..., -8.5227e-08,\n 2.0630e-06, -1.0564e-08],\n ...,\n [ 7.8341e-07, 6.8223e-07, 5.6052e-45, ..., -4.3376e-07,\n 9.5179e-07, -3.3700e-06],\n [ 1.0092e-07, -8.9138e-07, -5.6052e-45, ..., 9.4612e-08,\n 1.6499e-06, 3.1060e-08],\n [ 1.8421e-06, 1.3908e-06, 5.6052e-45, ..., -4.0586e-06,\n -2.3236e-06, -2.7607e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.5349e-13, 7.0549e-13, 0.0000e+00, ..., 9.0973e-13, 1.8718e-12,\n 4.9006e-14],\n [2.7152e-10, 7.4804e-11, 8.4360e-14, ..., 3.1960e-10, 1.2772e-10,\n 1.9254e-11],\n [6.6905e-12, 8.3255e-12, 5.5893e-15, ..., 1.2933e-11, 7.6728e-11,\n 1.2273e-11],\n ...,\n [4.4578e-10, 1.3013e-10, 1.2235e-13, ..., 2.7417e-11, 8.1413e-11,\n 2.1616e-09],\n [1.7065e-11, 1.6678e-10, 1.1750e-15, ..., 1.4255e-10, 1.2765e-10,\n 7.0486e-12],\n [7.8672e-11, 3.3206e-11, 2.1205e-14, ..., 2.0737e-10, 4.8531e-11,\n 6.5964e-10]], device='cuda:0')" }, - "15": { - "step": "tensor(2504.)", + "6": { + "step": "tensor(5008.)", + "exp_avg": "tensor([-7.2008e-07, -3.0324e-05, -1.2121e-05, ..., 4.6873e-06,\n 5.6040e-06, 1.8653e-05], device='cuda:0')", + "exp_avg_sq": "tensor([6.4102e-11, 1.6039e-08, 4.9925e-09, ..., 1.4547e-08, 6.3299e-09,\n 9.7342e-09], device='cuda:0')" + }, + "7": { + "step": "tensor(5008.)", + "exp_avg": "tensor([[-4.4023e-08, 7.2592e-08, 4.2675e-07, ..., -1.4111e-07,\n 3.2388e-08, -3.2449e-07],\n [ 3.0744e-08, -9.8427e-07, 2.1796e-07, ..., -2.7479e-07,\n 1.4039e-06, -6.8012e-08],\n [-2.4845e-08, 6.9495e-07, 3.8541e-07, ..., 2.7620e-07,\n 2.3047e-07, 1.5663e-06],\n ...,\n [ 1.5190e-10, -6.3821e-07, -6.1442e-07, ..., 7.7054e-07,\n -1.7749e-07, 1.1047e-07],\n [-1.8115e-08, 1.3022e-06, -3.2499e-07, ..., -1.0946e-07,\n 1.2228e-06, 6.6345e-07],\n [-1.7501e-07, -5.5627e-07, -2.9534e-08, ..., -1.1809e-07,\n -1.7901e-07, 4.4734e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6072e-12, 9.8176e-12, 2.4876e-11, ..., 1.0221e-11, 8.3117e-12,\n 9.9785e-12],\n [7.3777e-12, 2.5521e-11, 2.1757e-11, ..., 1.8811e-11, 1.0008e-11,\n 1.2684e-11],\n [9.2885e-13, 2.0288e-11, 1.8688e-11, ..., 1.8455e-11, 8.5347e-12,\n 2.0158e-11],\n ...,\n [1.8921e-12, 2.1504e-11, 1.4157e-11, ..., 2.1163e-11, 1.1271e-11,\n 2.5519e-11],\n [2.2871e-12, 2.9846e-11, 4.8118e-11, ..., 2.3456e-11, 9.3664e-12,\n 2.0419e-11],\n [1.6263e-12, 1.9157e-11, 1.0400e-10, ..., 1.7849e-11, 1.0994e-11,\n 1.5045e-11]], device='cuda:0')" + }, + "14": { + "step": "tensor(5008.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.3121e-05], device='cuda:0')" + "exp_avg_sq": "tensor([1.7587e-07], device='cuda:0')" }, - "16": { - "step": "tensor(2504.)", + "15": { + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.4604e-08, 8.3762e-08, 3.0760e-08], device='cuda:0')" + "exp_avg_sq": "tensor([2.2688e-10, 1.2843e-08, 9.6561e-09], device='cuda:0')" }, - "17": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.0446e-04, 7.3763e-06, 4.3291e-06, 8.2829e-06, 6.5167e-06],\n device='cuda:0')" + "16": { + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.3662e-05, 1.4077e-06, 1.5232e-06, 1.6271e-06], device='cuda:0')" + }, + "18": { + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4566e-11, 9.0929e-12, 0.0000e+00, ..., 5.9999e-11, 2.1578e-11,\n 6.9262e-13],\n [3.1077e-12, 7.9886e-12, 0.0000e+00, ..., 7.3445e-12, 2.5826e-11,\n 7.2784e-12],\n [9.3398e-13, 3.2961e-12, 0.0000e+00, ..., 1.1015e-12, 7.1412e-12,\n 2.3388e-12],\n ...,\n [1.1834e-13, 6.3651e-13, 0.0000e+00, ..., 9.0771e-13, 1.2570e-11,\n 7.0816e-14],\n [2.2796e-11, 2.3965e-11, 0.0000e+00, ..., 2.9006e-11, 1.2158e-10,\n 7.2961e-12],\n [7.1204e-14, 5.5338e-14, 0.0000e+00, ..., 1.0953e-13, 1.0864e-12,\n 2.5732e-13]], device='cuda:0')" }, "19": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.1509e-14, 3.9211e-13, 6.9879e-14, ..., 1.5807e-14, 1.8716e-13,\n 1.0091e-13],\n [4.3964e-11, 4.4169e-11, 3.3803e-14, ..., 8.1305e-12, 1.6260e-12,\n 3.4645e-12],\n [4.5848e-10, 5.2492e-10, 5.2173e-14, ..., 4.2327e-11, 5.7284e-11,\n 1.7387e-11],\n ...,\n [1.3774e-11, 6.2094e-12, 1.1894e-12, ..., 3.9597e-13, 3.6764e-12,\n 4.5228e-13],\n [1.7424e-12, 1.4222e-12, 3.6278e-14, ..., 3.2481e-14, 1.8038e-13,\n 2.0890e-14],\n [1.5746e-09, 1.8440e-09, 2.8113e-13, ..., 1.4220e-10, 2.0210e-10,\n 6.9021e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.0606e-08, 9.3954e-09, 2.1467e-09, 3.8972e-09, 1.4179e-09, 1.3740e-11,\n 2.4395e-10, 3.8693e-10, 9.8885e-10, 5.5979e-11, 3.1361e-09, 1.5948e-08,\n 6.5719e-09, 7.9609e-09, 1.2019e-09, 7.4954e-09, 1.7865e-08, 1.4808e-09,\n 9.9049e-09, 1.6225e-08, 1.9368e-10, 6.8170e-09, 1.4737e-09, 1.4571e-08,\n 2.9480e-09, 5.9659e-09, 2.6589e-09, 1.6540e-09, 4.5040e-10, 2.7903e-09,\n 8.3472e-09, 4.1833e-09, 1.5200e-10, 3.8372e-08, 1.9896e-08, 9.6730e-09,\n 1.0030e-09, 1.1061e-10, 7.6025e-09, 7.4092e-09, 3.6351e-08, 8.9915e-10,\n 1.6045e-10, 5.0813e-10, 1.2828e-09, 9.3198e-09, 4.0073e-09, 2.6122e-09,\n 2.5418e-09, 2.9797e-09, 1.2189e-10, 4.2907e-09, 1.0898e-08, 6.6486e-09,\n 1.4099e-09, 7.3521e-10, 5.0104e-09, 6.3822e-11, 1.9272e-09, 6.9291e-10,\n 2.1583e-09, 4.7684e-09, 3.0751e-09, 1.7823e-09, 1.7468e-10, 1.1375e-09,\n 4.1364e-09, 2.4332e-10, 5.2138e-09, 4.9074e-09, 4.6936e-11, 1.5843e-09,\n 4.1910e-09, 1.9052e-10, 1.6814e-09, 1.4406e-11, 1.1148e-09, 2.8467e-08,\n 5.6600e-10, 5.2312e-11, 8.7266e-09, 1.1680e-08, 1.9142e-12, 6.5134e-11,\n 2.0808e-09, 7.2274e-09, 2.1746e-10, 3.6989e-09, 4.7418e-10, 8.2244e-09,\n 1.3060e-09, 1.1929e-08, 1.5205e-08, 1.0759e-09, 1.5064e-09, 5.9965e-09,\n 3.9531e-10, 2.3574e-08, 7.8634e-09, 2.8165e-08, 8.0385e-09, 5.5699e-10,\n 1.1115e-10, 3.4015e-09, 2.8269e-10, 2.8079e-08, 7.0484e-10, 1.9970e-09,\n 4.4999e-09, 3.4717e-11, 3.6227e-08, 6.7111e-10, 1.0774e-08, 4.5758e-09,\n 1.5522e-09, 2.1353e-08, 5.3199e-10, 2.5884e-09, 7.1466e-09, 3.4193e-09,\n 3.7299e-09, 6.9018e-09, 6.8537e-10, 1.1521e-10, 4.4927e-11, 2.5097e-09,\n 5.9853e-08, 4.6670e-10, 3.9286e-09, 7.7916e-10, 1.0485e-10, 1.9248e-08,\n 1.6177e-09, 2.7093e-09, 1.0965e-08, 7.8298e-09, 6.6174e-09, 2.0289e-08,\n 8.7483e-09, 1.9246e-12, 7.9800e-11, 1.2948e-10, 7.0455e-09, 2.8913e-08,\n 1.6194e-08, 1.0639e-09, 1.0882e-09, 1.4047e-09, 8.6660e-09, 1.9625e-11,\n 4.4909e-10, 9.0218e-09, 3.1145e-09, 3.4686e-09, 7.7597e-09, 2.6695e-10,\n 2.4374e-09, 6.4040e-09, 1.2441e-09, 2.0743e-08, 9.8351e-09, 1.6139e-09,\n 3.8283e-09, 1.0394e-08, 6.5708e-10, 4.7012e-09, 4.6280e-08, 7.0317e-09,\n 4.8587e-09, 2.2404e-09, 3.2405e-10, 7.7298e-09, 3.1598e-08, 7.9476e-09,\n 1.0445e-08, 7.6081e-11, 1.0531e-09, 3.6363e-09, 4.1439e-09, 4.4426e-09,\n 5.4522e-09, 2.5457e-09, 5.1116e-10, 3.0694e-09, 5.8425e-09, 1.6212e-11,\n 1.5302e-09, 6.2847e-09, 1.5941e-09, 2.5334e-08, 1.8937e-08, 1.9640e-09,\n 1.3001e-09, 5.6315e-10, 3.7583e-12, 9.0749e-08, 1.0635e-09, 1.8311e-09,\n 2.0217e-09, 4.4324e-08, 4.4160e-13, 4.1393e-08, 7.7860e-09, 1.1846e-09,\n 7.5630e-09, 9.2929e-08, 8.5440e-09, 2.6665e-09, 3.8699e-11, 1.1994e-08,\n 9.6838e-10, 1.7504e-08, 2.7255e-09, 5.5532e-09, 4.2509e-11, 7.0713e-09,\n 5.4377e-08, 3.0782e-11, 5.9934e-10, 1.7578e-09, 1.2941e-09, 3.6175e-08,\n 4.5558e-09, 2.6011e-08, 3.6060e-09, 4.3788e-12, 9.2904e-11, 5.3223e-10,\n 1.9726e-10, 7.5946e-09, 6.4931e-11, 6.9400e-11, 1.3183e-08, 5.1555e-09,\n 4.0208e-09, 8.9585e-10, 1.0723e-09, 8.1170e-09, 1.0163e-09, 8.9298e-10,\n 1.0359e-08, 4.2160e-11, 2.2807e-09, 1.3985e-08, 2.4378e-09, 6.9836e-11,\n 1.3051e-10, 5.8687e-09, 6.6259e-10, 2.1107e-08, 1.1004e-08, 1.1057e-09,\n 9.5054e-09, 1.1223e-09, 3.1745e-08, 5.6132e-10], device='cuda:0')" }, "20": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.2944e-11, 4.0958e-08, 2.8493e-07, 7.1843e-09, 4.8972e-08, 1.9028e-07,\n 2.1526e-09, 2.9251e-07, 2.2852e-08, 5.3104e-10, 1.3194e-06, 4.2043e-09,\n 1.3313e-09, 3.0221e-08, 3.1236e-08, 2.4767e-07, 8.3586e-08, 8.3206e-09,\n 4.1425e-07, 1.2442e-07, 1.2459e-07, 1.1979e-08, 4.2682e-08, 8.6941e-08,\n 5.1852e-08, 8.4256e-09, 3.8480e-09, 5.7498e-07, 3.8638e-10, 1.2156e-08,\n 2.3425e-08, 1.1495e-07, 1.9044e-07, 7.2045e-09, 1.0103e-08, 2.7294e-08,\n 5.3535e-10, 3.4550e-08, 2.0808e-07, 1.0964e-09, 4.4234e-09, 1.4070e-08,\n 3.7733e-07, 6.4537e-08, 4.4611e-09, 3.4947e-10, 1.5487e-09, 1.2382e-08,\n 3.0406e-09, 6.7533e-11, 2.3559e-10, 2.7345e-09, 1.4618e-08, 7.8860e-08,\n 1.0330e-09, 1.1704e-07, 2.0602e-07, 9.2605e-07, 1.0542e-07, 1.9798e-10,\n 1.5107e-06, 5.1156e-11, 5.0641e-09, 7.8229e-08, 2.2932e-07, 3.3659e-07,\n 5.4807e-09, 5.7875e-09, 1.2160e-06, 1.1369e-08, 1.7563e-07, 4.8266e-08,\n 9.0201e-08, 1.5705e-08, 2.3731e-10, 2.2609e-08, 1.6363e-07, 2.6635e-09,\n 2.2916e-12, 9.2094e-11, 2.4144e-10, 3.5959e-09, 3.7951e-07, 1.5983e-07,\n 2.4730e-08, 5.9653e-09, 1.2932e-07, 2.2169e-08, 3.4262e-08, 8.5584e-07,\n 4.9158e-09, 3.2035e-08, 5.9044e-09, 2.4493e-07, 3.8451e-08, 6.9542e-09,\n 3.2337e-11, 9.6115e-09, 3.1050e-07, 6.1676e-08, 1.6803e-10, 2.2802e-07,\n 1.8376e-09, 1.1451e-10, 9.7054e-08, 1.7144e-08, 3.1495e-10, 4.1418e-09,\n 1.2880e-08, 1.6875e-08, 1.6142e-10, 3.4645e-09, 1.3664e-09, 3.8951e-10,\n 1.9713e-08, 8.3387e-09, 2.4385e-07, 6.7469e-09, 1.4747e-07, 6.4572e-08,\n 3.1071e-09, 5.5429e-08, 8.3802e-08, 6.4990e-09, 1.1008e-07, 1.2545e-08,\n 4.8781e-07, 8.3148e-09, 1.9251e-08, 1.3294e-09, 5.6816e-10, 8.9081e-09,\n 3.1326e-09, 4.1369e-08, 8.1907e-09, 1.3614e-07, 4.9259e-10, 3.1582e-09,\n 8.6024e-08, 1.2168e-08, 2.5826e-10, 7.0613e-10, 1.2711e-06, 3.2392e-06,\n 7.8820e-08, 3.4514e-10, 1.0364e-07, 8.8152e-08, 6.8407e-08, 1.8208e-09,\n 3.4275e-10, 6.1538e-08, 2.2458e-07, 6.5488e-08, 1.1072e-07, 3.3207e-08,\n 6.3740e-10, 3.1896e-09, 4.9460e-09, 3.3921e-07, 1.7344e-09, 5.0832e-08,\n 6.3747e-08, 1.6188e-09, 1.6423e-09, 2.3161e-06, 2.1296e-07, 5.2220e-10,\n 6.5832e-07, 4.7859e-11, 6.1705e-08, 3.3256e-09, 8.1809e-08, 1.7659e-08,\n 3.2816e-08, 4.5427e-07, 7.6673e-08, 3.5976e-08, 7.8306e-09, 2.0474e-07,\n 3.1516e-08, 7.5639e-08, 1.9129e-06, 2.4427e-08, 1.2307e-09, 1.9205e-11,\n 3.1831e-09, 2.9404e-07, 1.5204e-08, 6.4266e-08, 3.2246e-07, 1.2291e-07,\n 2.5757e-09, 1.4638e-09, 2.3656e-07, 4.1147e-08, 4.2504e-10, 9.2114e-08,\n 6.7926e-10, 7.8771e-08, 1.7408e-07, 9.7666e-09, 4.3017e-08, 1.2156e-09,\n 2.0221e-08, 6.2318e-07, 9.6644e-09, 7.0683e-08, 2.1837e-10, 5.3516e-07,\n 1.0144e-07, 6.2384e-10, 4.8834e-09, 4.0165e-10, 3.7621e-07, 3.7809e-09,\n 2.6809e-08, 1.5916e-07, 8.2601e-09, 7.0449e-08, 5.6668e-08, 4.3850e-08,\n 2.5415e-09, 1.4341e-07, 1.0845e-08, 5.0242e-08, 5.8482e-07, 5.2523e-09,\n 2.2636e-10, 1.8405e-09, 2.2206e-08, 3.2338e-08, 1.2661e-09, 4.2055e-08,\n 1.7565e-08, 2.0179e-07, 2.4357e-08, 1.4054e-07, 1.4883e-08, 2.4768e-09,\n 8.9290e-09, 1.5006e-09, 4.0633e-07, 2.6903e-08, 3.7546e-09, 7.9998e-08,\n 2.9122e-09, 8.4035e-09, 2.6431e-07, 7.2188e-09, 9.1326e-08, 5.2122e-10,\n 5.8531e-08, 3.9832e-09, 5.0394e-10, 9.6826e-07], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.0837e-11, 4.6489e-11, 3.1264e-12, 2.0402e-11, 2.3935e-12, 1.9740e-13,\n 4.3405e-13, 2.0156e-12, 1.1969e-12, 7.0310e-14, 5.7767e-12, 2.8928e-11,\n 1.6655e-11, 3.7466e-11, 3.8567e-12, 1.7655e-11, 5.7906e-11, 2.5684e-12,\n 3.2040e-11, 4.6163e-11, 9.5665e-14, 2.8674e-11, 3.2421e-12, 5.9469e-11,\n 1.1559e-11, 1.1579e-11, 6.7244e-12, 5.4956e-12, 1.6345e-12, 4.9386e-12,\n 2.5492e-11, 1.2411e-11, 9.6807e-13, 1.0331e-10, 7.7087e-11, 2.4123e-11,\n 1.9913e-12, 2.0152e-15, 1.5798e-11, 1.3394e-11, 1.5376e-10, 2.1449e-12,\n 6.8263e-13, 2.2293e-12, 2.5628e-12, 2.0965e-11, 8.7288e-12, 5.5107e-12,\n 3.8874e-12, 4.6039e-12, 9.5507e-14, 1.1724e-11, 2.2626e-11, 1.4493e-11,\n 1.9313e-12, 3.4671e-12, 1.6209e-11, 1.3354e-13, 6.3606e-12, 1.9626e-12,\n 3.8969e-12, 3.5572e-11, 8.6256e-12, 4.2808e-12, 2.6434e-15, 1.8173e-12,\n 9.0315e-12, 1.3211e-13, 9.6269e-12, 1.5888e-11, 1.1045e-12, 3.6402e-12,\n 4.6533e-12, 6.3967e-13, 3.0752e-12, 9.3100e-13, 6.5003e-12, 1.4454e-10,\n 4.9540e-13, 1.4783e-16, 3.6552e-11, 3.0283e-11, 2.7830e-13, 3.2177e-13,\n 4.0930e-12, 1.6148e-11, 8.6660e-13, 1.7316e-11, 1.3897e-12, 1.5336e-11,\n 3.9559e-12, 2.7258e-11, 5.5273e-11, 1.3642e-12, 4.1675e-12, 2.1135e-11,\n 8.5597e-13, 5.0211e-11, 3.0602e-11, 7.9197e-11, 2.4092e-11, 2.5930e-12,\n 1.3592e-12, 6.0288e-12, 1.1792e-13, 6.5984e-11, 7.3680e-13, 4.7081e-12,\n 9.3694e-12, 4.9065e-14, 9.2464e-11, 2.0920e-12, 3.0535e-11, 9.6079e-12,\n 2.8324e-12, 8.6184e-11, 2.0960e-12, 5.5189e-12, 2.1548e-11, 8.2947e-12,\n 6.8629e-12, 1.4374e-11, 5.7599e-13, 1.1117e-14, 4.5848e-13, 3.8622e-12,\n 2.7065e-10, 1.7699e-12, 1.2548e-11, 1.1884e-12, 3.8420e-14, 5.7402e-11,\n 3.9586e-12, 8.0607e-12, 2.1048e-11, 1.7682e-11, 1.8587e-11, 5.2364e-11,\n 2.3933e-11, 3.2469e-13, 2.0464e-12, 7.4192e-13, 1.5481e-11, 8.4985e-11,\n 1.0176e-10, 4.2116e-12, 4.3713e-12, 3.2883e-12, 1.7952e-11, 4.9772e-15,\n 4.4954e-12, 1.9783e-11, 1.0056e-11, 3.9488e-12, 1.8204e-11, 1.6173e-12,\n 3.5971e-12, 1.0911e-11, 2.0467e-12, 8.2042e-11, 1.9179e-11, 1.8265e-12,\n 8.9190e-12, 3.4649e-11, 1.9094e-12, 6.1096e-12, 1.5909e-10, 1.3336e-11,\n 1.4868e-11, 5.7095e-12, 1.6607e-13, 3.0832e-11, 7.6113e-11, 2.9662e-11,\n 1.9661e-11, 2.9267e-13, 1.6457e-12, 1.6265e-11, 1.2693e-11, 1.1603e-11,\n 1.4080e-11, 4.9266e-12, 6.2074e-13, 4.6085e-12, 1.4626e-11, 8.7429e-18,\n 2.3574e-12, 1.3981e-11, 1.4039e-12, 6.3436e-11, 4.9428e-11, 2.4737e-12,\n 3.8664e-12, 3.0811e-12, 3.5427e-13, 3.5179e-10, 1.3539e-12, 2.9321e-12,\n 6.1185e-12, 1.2182e-10, 1.0495e-13, 1.1122e-10, 1.4050e-11, 1.6182e-12,\n 2.5157e-11, 2.3684e-10, 1.8507e-11, 6.7287e-12, 1.2685e-14, 1.8743e-11,\n 1.3265e-12, 4.3632e-11, 1.8506e-11, 1.6947e-11, 2.5405e-14, 1.2280e-11,\n 2.5870e-10, 1.0554e-14, 2.2501e-12, 2.1057e-12, 1.6530e-12, 1.5912e-10,\n 2.5351e-11, 6.3299e-11, 1.0661e-11, 9.3165e-14, 4.5158e-15, 1.5781e-12,\n 1.9538e-13, 2.1354e-11, 5.2482e-13, 4.8876e-14, 7.7762e-11, 2.2765e-11,\n 1.2169e-11, 2.0868e-12, 1.3716e-12, 2.3608e-11, 2.7892e-12, 8.9159e-13,\n 2.5651e-11, 9.8212e-15, 6.8431e-12, 3.6019e-11, 3.6967e-12, 1.1972e-14,\n 3.0740e-14, 1.1226e-11, 1.0451e-12, 5.6963e-11, 2.2360e-11, 1.5541e-12,\n 2.1291e-11, 2.2211e-12, 1.0166e-10, 2.9559e-12], device='cuda:0')" }, "21": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.7327e-12, 4.6902e-12, 6.5231e-10, 4.0715e-12, 2.0034e-11, 2.2924e-11,\n 1.0866e-11, 7.6030e-11, 4.8903e-13, 2.6232e-12, 8.1450e-10, 1.6119e-12,\n 2.3751e-13, 1.1175e-11, 6.1200e-11, 2.0981e-10, 9.0258e-11, 6.4702e-13,\n 1.5381e-09, 7.7164e-10, 6.9532e-10, 9.1417e-13, 1.4724e-12, 5.3274e-11,\n 4.3988e-10, 6.2814e-12, 1.0025e-11, 5.5462e-10, 1.8212e-13, 1.3887e-11,\n 4.7173e-12, 9.0395e-11, 6.6209e-10, 1.7745e-12, 8.3497e-12, 2.0514e-10,\n 2.9401e-12, 1.6593e-11, 4.0718e-10, 2.4947e-14, 1.9873e-12, 8.6204e-13,\n 4.1929e-10, 2.3159e-11, 3.5733e-12, 2.9215e-13, 2.0928e-12, 1.7151e-12,\n 1.0005e-12, 3.0663e-12, 3.1011e-13, 3.4026e-12, 2.3174e-11, 1.7966e-11,\n 1.5417e-14, 2.0947e-11, 1.1674e-11, 7.0589e-10, 5.1817e-11, 5.7061e-13,\n 3.2949e-09, 2.1022e-14, 1.4713e-13, 9.6662e-12, 4.4809e-10, 2.7335e-10,\n 3.6666e-12, 5.9145e-12, 4.2916e-09, 1.6058e-12, 3.1986e-10, 1.0115e-12,\n 1.2480e-11, 8.8802e-11, 9.7551e-13, 7.4531e-12, 1.6667e-10, 1.3153e-11,\n 2.8133e-12, 2.4982e-14, 1.3004e-12, 8.4495e-12, 9.4228e-11, 4.6883e-10,\n 2.3232e-11, 3.2398e-11, 1.7471e-11, 1.3498e-12, 8.4390e-12, 2.3335e-09,\n 1.1383e-11, 1.0063e-11, 2.8971e-12, 2.9613e-10, 5.3666e-12, 1.9430e-11,\n 5.9025e-14, 2.6773e-11, 5.3992e-10, 1.0917e-10, 3.8849e-13, 6.0639e-10,\n 2.8531e-13, 8.3677e-13, 2.7382e-11, 2.9964e-11, 1.1820e-12, 4.1353e-12,\n 2.8738e-12, 2.4424e-12, 4.0107e-14, 4.4632e-12, 7.4551e-12, 1.0580e-12,\n 4.8235e-12, 5.4668e-13, 6.4466e-10, 1.5862e-11, 2.0297e-10, 9.1397e-12,\n 8.1565e-12, 5.4636e-11, 6.8973e-11, 4.5762e-12, 2.2474e-10, 1.0053e-13,\n 5.0595e-10, 1.2316e-13, 5.8061e-12, 4.6577e-13, 4.3606e-13, 4.8768e-13,\n 6.6649e-13, 1.6442e-11, 1.7118e-12, 3.5128e-10, 8.0253e-13, 4.1634e-13,\n 5.2229e-11, 1.8337e-12, 7.7593e-13, 3.2173e-12, 5.6178e-09, 6.3765e-09,\n 1.2222e-11, 1.4552e-14, 1.1195e-10, 9.6051e-11, 1.6145e-10, 8.2741e-14,\n 6.3705e-13, 1.2370e-10, 2.3704e-10, 6.7335e-12, 2.0814e-11, 6.1241e-11,\n 4.8146e-15, 1.0386e-11, 2.3076e-12, 2.3371e-11, 3.1228e-13, 8.3313e-11,\n 3.1689e-12, 9.5868e-12, 3.0412e-12, 5.4178e-09, 1.2486e-10, 3.2589e-13,\n 8.6288e-10, 6.1913e-15, 6.7880e-12, 5.1660e-12, 5.5164e-10, 2.3537e-11,\n 1.4477e-11, 2.6630e-10, 2.4638e-11, 1.7592e-12, 1.1320e-11, 2.3196e-10,\n 6.0463e-13, 2.1951e-11, 5.0084e-09, 8.3006e-12, 4.5005e-13, 3.7942e-13,\n 1.4429e-13, 2.3585e-10, 3.4518e-11, 6.4076e-11, 2.0118e-10, 1.6262e-11,\n 1.0023e-12, 1.2389e-13, 1.3874e-10, 8.2211e-13, 6.7424e-14, 5.4224e-12,\n 9.8565e-13, 2.5517e-11, 4.6320e-11, 8.4768e-13, 4.4994e-12, 1.0563e-12,\n 1.3591e-12, 1.4759e-09, 1.0199e-11, 6.1815e-12, 3.4351e-12, 8.6390e-10,\n 3.2058e-11, 4.3923e-13, 3.6788e-13, 5.7062e-12, 3.6787e-11, 1.0877e-13,\n 9.4608e-13, 2.6590e-10, 3.8658e-11, 1.1256e-11, 1.0408e-10, 2.0665e-10,\n 3.2796e-13, 1.1583e-10, 5.8434e-12, 1.6796e-11, 1.1791e-10, 2.0854e-12,\n 6.6988e-14, 1.8011e-13, 2.6559e-12, 5.7292e-11, 1.8793e-13, 8.2889e-11,\n 2.0164e-12, 2.3593e-10, 2.4612e-10, 9.3090e-11, 1.4899e-11, 5.3082e-14,\n 1.3935e-12, 2.7556e-13, 1.5027e-10, 6.0682e-12, 1.4176e-12, 3.8776e-11,\n 3.3851e-13, 2.7566e-13, 1.3945e-09, 6.5608e-12, 9.6834e-12, 8.5278e-13,\n 4.2119e-11, 6.8411e-12, 9.0604e-13, 2.7084e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.7193e-11, 4.4508e-11, 6.1277e-12, 2.0029e-11, 3.1496e-12, 1.3380e-13,\n 6.0683e-13, 3.2961e-12, 2.0118e-12, 3.9426e-14, 9.5755e-12, 5.4047e-11,\n 1.9061e-11, 3.7143e-11, 6.4909e-12, 3.4239e-11, 7.7432e-11, 4.0941e-12,\n 2.8456e-11, 6.7186e-11, 1.0816e-13, 3.2105e-11, 7.4311e-12, 6.2731e-11,\n 1.5443e-11, 1.7882e-11, 1.2383e-11, 8.6888e-12, 3.3658e-12, 7.1992e-12,\n 3.7434e-11, 1.9476e-11, 1.5138e-12, 1.3206e-10, 8.5087e-11, 4.1555e-11,\n 1.4390e-12, 4.1827e-16, 3.3554e-11, 2.5484e-11, 1.1825e-10, 4.9894e-12,\n 1.6464e-12, 3.9380e-12, 4.3938e-12, 2.9960e-11, 1.8505e-11, 1.2898e-11,\n 6.3870e-12, 9.6134e-12, 1.5608e-13, 1.8970e-11, 3.5524e-11, 2.0838e-11,\n 3.5157e-12, 4.7561e-12, 2.3158e-11, 1.2040e-13, 1.0739e-11, 4.1243e-12,\n 5.1648e-12, 2.5052e-11, 1.4068e-11, 8.1531e-12, 4.3431e-14, 2.8430e-12,\n 1.9556e-11, 3.5698e-13, 2.3180e-11, 2.3782e-11, 1.2142e-12, 8.0418e-12,\n 1.4568e-11, 1.3804e-12, 3.9982e-12, 8.5327e-13, 6.9230e-12, 1.2580e-10,\n 7.4656e-13, 1.1864e-16, 4.2155e-11, 5.2598e-11, 3.0659e-13, 6.9611e-13,\n 1.0607e-11, 3.3656e-11, 1.5304e-12, 1.9584e-11, 1.5377e-12, 3.6193e-11,\n 6.8366e-12, 5.1342e-11, 4.8966e-11, 2.8534e-12, 6.7788e-12, 1.6196e-11,\n 1.1227e-12, 8.0495e-11, 3.6871e-11, 1.1796e-10, 3.5435e-11, 3.8705e-12,\n 1.5501e-12, 9.2719e-12, 1.7419e-13, 9.2677e-11, 1.2553e-12, 1.0451e-11,\n 1.2663e-11, 3.2559e-14, 1.2166e-10, 4.0837e-12, 4.8465e-11, 1.6859e-11,\n 3.7283e-12, 6.7451e-11, 2.8814e-12, 6.9796e-12, 3.2689e-11, 1.6531e-11,\n 1.0569e-11, 2.0359e-11, 1.0597e-12, 3.0656e-14, 8.5377e-13, 7.1995e-12,\n 2.4062e-10, 2.6192e-12, 1.9836e-11, 2.1940e-12, 4.4356e-14, 8.3359e-11,\n 8.2576e-12, 1.2836e-11, 3.4564e-11, 2.5553e-11, 2.9313e-11, 6.8423e-11,\n 3.8947e-11, 3.9599e-13, 1.5474e-12, 1.2136e-12, 2.2550e-11, 9.8913e-11,\n 7.5255e-11, 6.3054e-12, 6.2416e-12, 7.3417e-12, 2.8414e-11, 1.1112e-14,\n 4.1223e-12, 2.7142e-11, 1.5787e-11, 9.9493e-12, 2.1545e-11, 2.4589e-12,\n 6.1008e-12, 2.3217e-11, 3.2995e-12, 8.7918e-11, 3.2296e-11, 4.4906e-12,\n 9.0052e-12, 4.6745e-11, 3.5309e-12, 1.3427e-11, 1.6525e-10, 2.2414e-11,\n 2.2273e-11, 1.0944e-11, 4.5643e-13, 2.0129e-11, 1.0838e-10, 3.7671e-11,\n 3.1408e-11, 5.9795e-13, 2.4481e-12, 1.9439e-11, 9.4806e-12, 2.1240e-11,\n 1.4400e-11, 6.3610e-12, 1.0352e-12, 8.3857e-12, 2.4494e-11, 2.5134e-15,\n 4.6966e-12, 1.7934e-11, 4.2851e-12, 1.0400e-10, 6.3202e-11, 5.1856e-12,\n 7.0746e-12, 3.7169e-12, 4.6532e-13, 3.7237e-10, 2.1514e-12, 3.9536e-12,\n 9.8160e-12, 1.5619e-10, 2.6248e-13, 1.7233e-10, 2.5053e-11, 2.6300e-12,\n 3.5473e-11, 3.2130e-10, 2.5606e-11, 6.2672e-12, 3.2042e-14, 4.1699e-11,\n 1.6047e-12, 7.5488e-11, 1.5535e-11, 2.6827e-11, 3.0372e-14, 2.3587e-11,\n 1.8260e-10, 7.1149e-15, 3.7872e-12, 3.9228e-12, 2.4038e-12, 1.5170e-10,\n 2.3632e-11, 1.1067e-10, 1.8530e-11, 1.7153e-13, 3.0955e-15, 3.5857e-12,\n 4.9844e-13, 3.2643e-11, 1.0499e-12, 1.0557e-13, 5.8959e-11, 2.4372e-11,\n 1.8550e-11, 4.4858e-12, 2.7769e-12, 3.6414e-11, 5.7864e-12, 2.1648e-12,\n 4.6752e-11, 9.2814e-15, 1.1101e-11, 4.3758e-11, 6.9385e-12, 2.3144e-14,\n 2.8395e-14, 1.8693e-11, 1.0436e-12, 9.0881e-11, 3.5112e-11, 2.5757e-12,\n 2.8538e-11, 3.7685e-12, 1.0527e-10, 4.1190e-12], device='cuda:0')" }, "22": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.1409e-13, 5.7443e-11, 4.5522e-10, 6.3895e-12, 7.0120e-11, 2.6330e-10,\n 3.1203e-12, 3.9074e-10, 3.3766e-11, 4.5250e-12, 1.5956e-09, 2.2330e-11,\n 5.7384e-12, 5.2531e-11, 6.4562e-11, 2.5515e-10, 1.3148e-10, 1.4909e-11,\n 4.3940e-10, 2.3513e-10, 2.0697e-10, 2.0104e-11, 4.4236e-11, 1.7140e-10,\n 1.6493e-10, 1.1935e-11, 3.4158e-12, 6.5180e-10, 7.2762e-14, 2.3320e-11,\n 1.0108e-11, 1.5858e-10, 3.0472e-10, 2.2035e-11, 1.7756e-11, 1.1878e-10,\n 1.5433e-12, 2.2792e-11, 3.2522e-10, 2.0838e-13, 1.7459e-11, 2.2010e-11,\n 5.7280e-10, 9.6802e-11, 2.4715e-11, 2.3012e-12, 5.9818e-13, 2.5003e-12,\n 5.8924e-12, 2.4668e-13, 1.4222e-13, 4.7430e-12, 2.9597e-11, 7.1645e-11,\n 4.4420e-14, 1.2704e-10, 2.7938e-10, 1.2510e-09, 1.0839e-10, 2.6135e-12,\n 2.0255e-09, 7.1877e-13, 2.7884e-12, 9.9944e-11, 3.3521e-10, 4.7294e-10,\n 1.1126e-11, 1.6947e-11, 1.6564e-09, 2.0040e-11, 2.5162e-10, 4.9967e-11,\n 1.1481e-10, 8.1408e-11, 5.6783e-13, 1.5763e-11, 2.3973e-10, 9.4529e-12,\n 6.1354e-14, 6.0421e-13, 8.7568e-12, 2.9867e-12, 5.1621e-10, 2.9056e-10,\n 4.3973e-11, 4.2434e-11, 1.4061e-10, 3.4313e-11, 5.5743e-11, 1.0146e-09,\n 3.2940e-11, 5.1378e-11, 6.6347e-13, 3.4605e-10, 5.6343e-11, 7.3162e-12,\n 2.6545e-12, 9.6334e-12, 4.4960e-10, 1.1993e-10, 2.2237e-12, 3.3599e-10,\n 3.9153e-12, 1.4242e-12, 1.0296e-10, 7.0314e-11, 7.2907e-13, 2.7117e-11,\n 2.0261e-11, 2.7544e-11, 9.2933e-13, 1.2272e-12, 1.3450e-11, 3.7930e-13,\n 2.7263e-11, 1.4465e-11, 3.7118e-10, 4.7473e-12, 2.1362e-10, 9.3693e-11,\n 2.5969e-12, 7.1624e-11, 1.2945e-10, 1.4841e-11, 2.0333e-10, 8.7521e-13,\n 5.5078e-10, 1.4722e-12, 3.1821e-11, 2.1323e-13, 1.2696e-12, 1.2024e-12,\n 1.0331e-12, 5.2295e-11, 1.2377e-11, 2.2845e-10, 3.9602e-12, 1.8523e-12,\n 1.2796e-10, 1.8058e-11, 6.8399e-13, 8.1296e-14, 1.5822e-09, 4.0812e-09,\n 7.5280e-11, 5.4419e-13, 1.7030e-10, 1.3622e-10, 1.1260e-10, 3.6727e-12,\n 3.3935e-12, 1.4459e-10, 2.3488e-10, 9.5533e-11, 1.5726e-10, 5.1609e-11,\n 2.1277e-15, 2.4486e-12, 8.7045e-12, 4.1051e-10, 1.1839e-13, 8.2287e-11,\n 9.1205e-11, 2.7370e-12, 1.3914e-12, 3.0651e-09, 3.0473e-10, 1.2966e-12,\n 7.8538e-10, 1.8582e-13, 6.2972e-11, 1.8314e-12, 1.8376e-10, 3.5436e-11,\n 5.5618e-11, 5.3602e-10, 6.3746e-11, 3.6607e-11, 2.9572e-12, 2.9496e-10,\n 3.5601e-11, 7.9502e-11, 2.5547e-09, 1.9248e-11, 3.3755e-12, 1.1371e-13,\n 6.2147e-12, 3.1708e-10, 6.5384e-11, 1.0266e-10, 4.4556e-10, 1.3925e-10,\n 4.4919e-12, 2.9688e-12, 2.4932e-10, 5.7843e-11, 5.7983e-14, 1.2996e-10,\n 4.3464e-12, 6.8339e-11, 1.8162e-10, 1.7105e-11, 4.1759e-11, 1.1233e-11,\n 1.9804e-11, 8.6820e-10, 8.4014e-12, 1.0199e-10, 3.9726e-13, 7.3100e-10,\n 1.0587e-10, 2.1193e-15, 3.9993e-13, 1.0474e-12, 5.0715e-10, 1.6442e-12,\n 4.0330e-11, 2.3436e-10, 1.2259e-11, 1.0417e-10, 1.1436e-10, 9.4523e-11,\n 5.4876e-12, 1.6571e-10, 3.2169e-11, 5.7975e-11, 7.8132e-10, 1.7501e-11,\n 6.7234e-13, 7.9488e-13, 2.8908e-11, 5.7201e-11, 2.1158e-13, 9.9792e-11,\n 6.7415e-12, 3.1088e-10, 1.1282e-10, 2.0366e-10, 5.8284e-11, 1.7188e-14,\n 1.3532e-11, 3.4446e-12, 4.7243e-10, 5.0888e-11, 1.5175e-11, 1.2038e-10,\n 5.0011e-12, 1.4184e-11, 4.4540e-10, 1.4895e-11, 1.3031e-10, 3.0054e-13,\n 5.2213e-11, 5.4198e-12, 3.0348e-13, 1.3685e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.1609e-12, 2.3143e-11, 0.0000e+00, ..., 3.3698e-11, 5.5105e-11,\n 8.4312e-12],\n [2.8259e-12, 3.1539e-12, 0.0000e+00, ..., 5.2046e-12, 1.6054e-11,\n 1.0205e-14],\n [3.7940e-12, 2.0610e-12, 0.0000e+00, ..., 3.4659e-12, 7.4052e-12,\n 5.9638e-13],\n ...,\n [6.4804e-12, 7.9899e-12, 0.0000e+00, ..., 9.8687e-12, 1.8674e-11,\n 5.9402e-12],\n [1.9731e-11, 6.2648e-12, 0.0000e+00, ..., 6.2276e-12, 3.9516e-11,\n 3.7619e-12],\n [9.6579e-14, 1.3525e-13, 0.0000e+00, ..., 1.4584e-12, 2.3245e-12,\n 4.7443e-13]], device='cuda:0')" }, "23": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0080e-09, 1.0689e-09, 7.6922e-13, ..., 7.5358e-11, 1.3537e-10,\n 2.5077e-11],\n [7.9168e-10, 8.9608e-10, 5.6093e-15, ..., 4.9939e-11, 1.0455e-10,\n 2.1752e-11],\n [2.3635e-10, 2.7787e-10, 5.8448e-13, ..., 2.1280e-11, 2.6194e-11,\n 8.3350e-12],\n ...,\n [2.3290e-11, 1.4620e-11, 1.1367e-13, ..., 1.4435e-12, 4.0226e-12,\n 6.0904e-13],\n [7.9757e-11, 8.4841e-11, 1.0972e-13, ..., 8.1598e-12, 9.9326e-12,\n 3.9424e-12],\n [7.1743e-11, 1.0432e-10, 9.1855e-14, ..., 9.8559e-12, 9.3286e-12,\n 5.4949e-12]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.4068e-08, 1.8525e-09, 4.0916e-09, 3.0792e-09, 1.0500e-09, 5.6321e-10,\n 2.4226e-09, 1.4414e-12, 9.6605e-10, 1.6385e-10, 1.8190e-08, 3.6018e-09,\n 4.8459e-09, 3.1442e-09, 9.2011e-10, 2.2707e-09, 9.5160e-09, 6.2247e-10,\n 1.8016e-09, 1.9190e-08, 7.5750e-11, 7.2855e-09, 3.1714e-09, 1.8322e-09,\n 2.4519e-10, 7.6415e-09, 2.4138e-09, 3.8378e-10, 2.6835e-10, 2.1374e-09,\n 1.0558e-08, 2.1983e-09, 2.6967e-10, 7.7247e-08, 7.6302e-09, 5.3465e-09,\n 9.1561e-10, 6.6355e-14, 7.1345e-09, 3.5982e-08, 4.5253e-09, 2.7623e-09,\n 4.8831e-10, 2.8092e-09, 1.6403e-08, 1.2171e-08, 6.8825e-09, 4.0271e-09,\n 1.0146e-09, 1.7478e-08, 1.9599e-10, 3.1591e-08, 2.1897e-08, 4.9939e-09,\n 5.0674e-10, 9.1554e-11, 1.2013e-08, 3.8195e-10, 5.4853e-09, 1.1060e-09,\n 2.5813e-09, 2.9600e-09, 1.5748e-08, 8.3587e-09, 3.8140e-10, 9.8680e-10,\n 5.8901e-09, 1.0504e-09, 8.2478e-09, 1.9830e-10, 2.9131e-13, 3.3840e-09,\n 6.2236e-09, 1.0933e-10, 2.6395e-09, 3.2257e-10, 9.1474e-10, 1.7556e-08,\n 2.5811e-10, 1.1033e-10, 1.4105e-09, 1.2370e-08, 3.5278e-13, 9.1676e-10,\n 2.5687e-10, 9.3183e-09, 3.7498e-09, 2.1012e-09, 3.7071e-09, 5.4300e-08,\n 7.2739e-10, 1.2243e-08, 1.5614e-08, 2.3614e-09, 2.9628e-08, 2.2137e-09,\n 2.6213e-09, 1.6643e-08, 5.5929e-09, 6.5217e-09, 1.4488e-08, 8.1564e-10,\n 1.5711e-10, 5.8532e-09, 2.1691e-10, 3.1242e-08, 1.7600e-09, 3.5624e-09,\n 1.0882e-09, 5.0319e-12, 2.5549e-08, 1.8281e-09, 2.5374e-08, 7.7458e-09,\n 8.4746e-10, 1.4096e-09, 1.6795e-09, 1.4273e-09, 1.0769e-09, 2.6173e-09,\n 7.8344e-09, 4.5816e-09, 7.7843e-10, 6.2094e-13, 4.6075e-12, 2.8193e-09,\n 1.5686e-08, 1.0171e-08, 8.2302e-10, 1.3666e-08, 1.0723e-11, 6.7534e-08,\n 3.0725e-09, 5.7113e-09, 9.7981e-09, 2.6307e-08, 8.3463e-09, 2.6521e-08,\n 7.2343e-09, 3.3002e-12, 9.4631e-11, 4.9411e-10, 3.1760e-09, 2.8597e-08,\n 4.9864e-09, 1.3503e-09, 1.1949e-09, 4.6312e-10, 2.8427e-08, 6.1040e-10,\n 4.9852e-10, 2.3746e-09, 2.5947e-09, 1.0327e-08, 3.5206e-09, 4.7152e-10,\n 2.4857e-09, 2.6399e-08, 7.4635e-10, 3.6347e-08, 9.2380e-09, 1.3430e-08,\n 1.4019e-09, 1.2221e-08, 9.3478e-09, 7.7021e-09, 7.4211e-08, 1.0440e-08,\n 1.1482e-08, 1.4849e-09, 4.7364e-10, 1.9972e-09, 1.3764e-08, 4.2342e-09,\n 7.8969e-09, 8.4580e-11, 6.9569e-10, 4.6405e-10, 9.4203e-10, 3.3164e-09,\n 4.8717e-10, 2.6873e-09, 1.7735e-09, 2.3770e-09, 1.8339e-08, 6.2330e-11,\n 1.8575e-09, 5.6874e-09, 1.0923e-09, 2.9482e-08, 3.1086e-09, 4.4414e-09,\n 4.7966e-09, 3.7903e-10, 1.6125e-12, 8.4350e-08, 1.0028e-09, 1.0395e-09,\n 6.2056e-09, 1.6158e-08, 6.5511e-13, 4.5870e-09, 1.7383e-08, 2.0567e-10,\n 1.0448e-08, 3.4773e-08, 5.4796e-09, 1.1255e-09, 4.2040e-12, 4.7524e-08,\n 9.7694e-10, 1.6298e-08, 5.0081e-10, 2.2393e-09, 3.7892e-10, 4.5201e-09,\n 5.9757e-09, 2.0549e-11, 4.5472e-10, 5.8861e-10, 1.9657e-09, 7.6375e-09,\n 1.4150e-09, 2.7077e-08, 4.7726e-09, 1.9497e-10, 6.0251e-11, 2.9102e-10,\n 1.0816e-09, 2.4785e-09, 7.5208e-11, 2.6411e-10, 4.3182e-09, 3.4969e-09,\n 1.4047e-08, 4.4011e-09, 1.0287e-09, 7.8449e-09, 1.1734e-09, 2.0582e-09,\n 7.5240e-09, 1.5629e-11, 9.8400e-09, 1.2209e-08, 4.2397e-09, 9.6427e-12,\n 3.3672e-13, 4.2070e-08, 2.0311e-10, 1.5460e-08, 5.3040e-09, 9.1214e-10,\n 6.9643e-09, 7.2863e-09, 1.6912e-08, 4.7694e-10], device='cuda:0')" }, "24": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.3686e-07, 4.2068e-07, 1.3797e-07, 8.8219e-08, 8.3893e-09, 5.5624e-07,\n 4.6981e-09, 4.9822e-07, 4.8816e-07, 4.4509e-10, 6.0659e-07, 4.1396e-08,\n 5.5211e-08, 3.8804e-09, 6.0245e-10, 3.5314e-09, 8.5535e-12, 6.2099e-09,\n 1.2178e-07, 1.4445e-07, 3.2688e-08, 6.0177e-08, 7.2295e-08, 7.5157e-10,\n 4.3310e-08, 4.5846e-08, 1.7553e-08, 1.1367e-07, 1.4742e-08, 6.2125e-10,\n 7.4153e-09, 4.1897e-08, 6.7236e-08, 8.5715e-08, 5.1870e-11, 3.3608e-11,\n 4.5090e-08, 6.2973e-10, 4.6970e-08, 3.6415e-08, 9.7488e-11, 2.2894e-07,\n 5.8287e-07, 1.5222e-10, 1.5586e-08, 5.2700e-10, 7.4863e-08, 4.4223e-10,\n 1.6394e-08, 1.5150e-07, 8.1615e-10, 1.2052e-08, 2.4714e-10, 7.1423e-08,\n 4.0534e-09, 1.6776e-09, 1.0988e-06, 9.5886e-07, 3.3259e-09, 1.2131e-10,\n 1.4334e-06, 9.2984e-09, 1.1097e-07, 3.8760e-07, 3.0834e-08, 6.6689e-07,\n 1.5611e-09, 5.9951e-09, 7.0580e-08, 1.1627e-08, 1.4039e-07, 2.6273e-08,\n 2.3562e-08, 9.0000e-09, 5.3112e-09, 1.5004e-08, 7.3193e-08, 3.3824e-11,\n 4.6340e-08, 1.3835e-08, 6.8856e-08, 1.5858e-07, 5.9450e-09, 2.6985e-10,\n 8.0108e-10, 4.1366e-08, 1.6656e-07, 9.5074e-10, 2.5419e-07, 1.5571e-08,\n 5.6861e-09, 4.1813e-10, 7.5575e-09, 1.4262e-07, 3.1456e-07, 1.6693e-07,\n 1.0841e-09, 8.1457e-10, 1.3719e-07, 1.0708e-08, 7.0364e-08, 8.0327e-10,\n 1.7853e-08, 1.7319e-10, 9.6810e-09, 3.3223e-10, 6.4972e-08, 2.0889e-11,\n 7.6403e-08, 1.9701e-08, 3.6531e-09, 3.5443e-09, 3.3951e-08, 3.6629e-10,\n 8.4415e-08, 7.9144e-08, 5.9722e-09, 7.8870e-07, 2.7038e-11, 2.9590e-07,\n 1.8047e-08, 2.6878e-09, 4.7884e-08, 4.2561e-10, 8.7155e-10, 8.2358e-10,\n 5.1034e-07, 3.0715e-08, 1.4845e-10, 2.3301e-09, 1.0272e-08, 1.6358e-08,\n 3.9015e-08, 6.0066e-08, 1.9420e-09, 4.8374e-08, 1.0799e-07, 9.7419e-09,\n 2.3424e-10, 5.1317e-09, 4.4934e-08, 1.0740e-07, 1.3670e-07, 2.6989e-06,\n 1.8952e-09, 4.9763e-11, 7.8463e-08, 1.0316e-08, 5.4656e-10, 7.1564e-09,\n 1.7548e-08, 4.6498e-09, 7.9830e-09, 5.0672e-08, 4.0945e-09, 4.0132e-09,\n 1.9210e-08, 4.3744e-07, 3.7564e-09, 1.4376e-06, 2.2249e-08, 2.2922e-09,\n 9.9568e-11, 3.2868e-07, 8.2408e-08, 1.6410e-07, 1.6515e-09, 3.1250e-08,\n 5.7352e-07, 1.3040e-09, 3.0069e-07, 7.5690e-08, 4.4520e-08, 7.8953e-09,\n 6.4737e-08, 4.5494e-07, 1.6323e-07, 7.4679e-08, 1.3436e-08, 2.0235e-07,\n 5.0427e-07, 4.8973e-08, 5.6939e-07, 5.6151e-11, 6.4317e-08, 3.4683e-11,\n 8.6546e-08, 4.3469e-09, 1.3756e-08, 1.5197e-09, 3.4355e-07, 2.2272e-10,\n 5.4862e-08, 1.2437e-09, 2.0589e-08, 2.5701e-08, 1.6638e-08, 5.0881e-07,\n 2.7797e-08, 1.2584e-09, 4.8109e-08, 2.1821e-09, 2.7728e-09, 1.0471e-08,\n 1.9656e-08, 9.4476e-08, 9.1364e-09, 8.6568e-09, 6.7832e-09, 1.2682e-09,\n 3.1742e-09, 9.8418e-08, 1.7571e-10, 3.5521e-07, 3.0616e-08, 1.6478e-08,\n 1.5487e-07, 3.4155e-08, 3.3293e-07, 9.4788e-08, 3.3928e-10, 1.3263e-07,\n 3.4717e-09, 1.6771e-10, 1.4867e-08, 4.2947e-09, 7.0717e-08, 2.7624e-07,\n 1.1147e-10, 2.4346e-09, 3.9263e-08, 2.5557e-08, 4.2723e-10, 3.3077e-09,\n 7.9928e-09, 1.5997e-07, 1.8381e-08, 1.1952e-08, 4.3148e-08, 1.1137e-08,\n 2.3597e-07, 2.1530e-08, 2.5659e-07, 8.9658e-08, 9.1094e-09, 4.1341e-09,\n 8.9937e-09, 9.0299e-08, 1.4734e-07, 1.9092e-09, 1.5539e-07, 1.5721e-09,\n 1.2254e-07, 9.4017e-09, 5.2547e-08, 6.5910e-08], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.7459e-11, 5.6995e-12, 6.4632e-12, 1.3986e-11, 1.6358e-12, 2.2088e-12,\n 5.6793e-12, 1.9008e-14, 1.1622e-12, 1.1149e-13, 6.4869e-11, 8.8477e-12,\n 1.0079e-11, 7.5010e-12, 2.5774e-12, 6.1835e-12, 2.2919e-11, 1.2666e-12,\n 3.9531e-12, 4.2664e-11, 9.6346e-16, 2.4999e-11, 9.7106e-12, 5.0563e-12,\n 1.0334e-12, 1.4048e-11, 5.3874e-12, 1.3409e-12, 1.0528e-12, 5.2779e-12,\n 4.0411e-11, 4.0571e-12, 6.6688e-13, 3.0584e-10, 1.7572e-11, 1.1262e-11,\n 2.2260e-12, 1.9514e-14, 1.9645e-11, 7.9935e-11, 8.7740e-12, 9.2396e-12,\n 1.1149e-12, 2.3251e-11, 4.8961e-11, 2.3934e-11, 3.1321e-11, 1.0294e-11,\n 1.6190e-12, 6.6987e-11, 1.7227e-13, 9.5156e-11, 5.4492e-11, 8.4844e-12,\n 4.3288e-13, 3.0550e-13, 3.9433e-11, 2.4703e-13, 1.5303e-11, 2.7578e-12,\n 5.7921e-12, 8.5241e-12, 6.7881e-11, 2.2954e-11, 2.6983e-13, 1.3843e-12,\n 1.5709e-11, 1.1586e-12, 1.6990e-11, 9.8805e-13, 3.4525e-13, 6.8430e-12,\n 1.2691e-11, 6.5315e-13, 4.7326e-12, 2.2113e-12, 2.9261e-12, 4.5239e-11,\n 4.1373e-13, 1.1756e-13, 3.3534e-12, 3.2552e-11, 3.1341e-14, 7.3233e-12,\n 1.1854e-12, 2.2646e-11, 1.6852e-11, 4.8089e-12, 6.7183e-12, 1.9280e-10,\n 1.9936e-12, 2.9210e-11, 5.4635e-11, 3.4997e-12, 1.9085e-10, 4.2505e-12,\n 3.3823e-12, 3.3439e-11, 1.4548e-11, 1.6775e-11, 6.1373e-11, 6.1503e-12,\n 7.8793e-13, 1.5211e-11, 9.0753e-14, 5.8048e-11, 3.4404e-12, 8.9438e-12,\n 1.7058e-12, 2.1672e-13, 5.5600e-11, 6.9412e-12, 1.5868e-10, 1.5820e-11,\n 7.3562e-13, 2.4806e-12, 4.6563e-12, 2.1678e-12, 3.8832e-12, 8.7645e-12,\n 2.2499e-11, 1.1659e-11, 8.6824e-13, 2.7668e-15, 1.0854e-13, 4.7279e-12,\n 3.4516e-11, 6.0411e-11, 1.8367e-12, 4.5985e-11, 1.3664e-14, 2.1881e-10,\n 7.6672e-12, 1.3320e-11, 1.4535e-11, 9.5560e-11, 1.8944e-11, 7.6995e-11,\n 1.7230e-11, 2.4545e-14, 9.7151e-13, 1.8169e-12, 5.3888e-12, 8.9275e-11,\n 1.0517e-11, 4.5763e-12, 3.7119e-12, 1.3196e-12, 9.9073e-11, 8.7216e-13,\n 2.1549e-12, 3.9805e-12, 7.5624e-12, 3.5470e-11, 5.8198e-12, 2.3056e-12,\n 3.6871e-12, 7.8449e-11, 1.6748e-12, 1.1149e-10, 1.9408e-11, 5.1643e-11,\n 1.9684e-12, 4.1386e-11, 5.6843e-11, 1.7232e-11, 2.5442e-10, 1.9788e-11,\n 4.9682e-11, 5.5388e-12, 3.5136e-13, 2.8608e-12, 2.5939e-11, 1.2916e-11,\n 1.4569e-11, 2.7848e-13, 7.9999e-13, 1.1027e-12, 1.6791e-12, 5.7407e-12,\n 8.8488e-13, 6.2834e-12, 3.0028e-12, 4.0829e-12, 3.9279e-11, 4.0042e-16,\n 3.3644e-12, 1.3836e-11, 1.4413e-12, 8.0094e-11, 3.8851e-12, 1.1468e-11,\n 2.2002e-11, 1.3705e-12, 3.9858e-14, 3.3310e-10, 1.8349e-12, 1.6081e-12,\n 2.4695e-11, 2.3218e-11, 1.0947e-13, 7.7812e-12, 5.9381e-11, 2.4409e-13,\n 3.2020e-11, 6.5276e-11, 9.1309e-12, 1.7710e-12, 1.3692e-15, 1.5129e-10,\n 2.0401e-12, 3.6533e-11, 2.3768e-12, 4.9061e-12, 4.4097e-13, 8.6896e-12,\n 1.1195e-11, 8.3558e-15, 1.3971e-12, 7.1971e-13, 4.7857e-12, 2.2039e-11,\n 6.4540e-12, 8.7061e-11, 1.6575e-11, 7.6258e-13, 2.7416e-14, 3.0512e-12,\n 1.7089e-12, 7.1250e-12, 3.9067e-13, 4.2965e-13, 7.4007e-12, 1.4059e-11,\n 3.5419e-11, 1.4363e-11, 1.6120e-12, 1.7217e-11, 3.6114e-12, 3.0862e-12,\n 1.9110e-11, 6.6285e-15, 8.3193e-11, 2.9799e-11, 6.9415e-12, 6.5653e-15,\n 1.1957e-13, 1.9847e-10, 2.1455e-13, 4.7901e-11, 1.0120e-11, 1.3666e-12,\n 1.5950e-11, 1.5106e-11, 3.6879e-11, 1.7887e-12], device='cuda:0')" }, "25": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.0435e-10, 4.3119e-10, 8.5839e-10, 1.4747e-12, 1.2036e-11, 1.3834e-09,\n 1.3896e-11, 5.2806e-10, 2.2821e-10, 7.7004e-13, 1.8052e-10, 5.9359e-10,\n 6.7752e-11, 2.5892e-13, 6.0555e-13, 5.6135e-12, 1.7185e-12, 4.3783e-13,\n 4.1856e-11, 4.5078e-10, 2.6728e-10, 7.0767e-12, 6.3812e-12, 8.7493e-12,\n 4.2512e-10, 1.5368e-11, 8.2464e-12, 1.5093e-11, 1.1909e-11, 9.6577e-13,\n 2.3794e-13, 4.5802e-11, 8.6759e-12, 1.2498e-10, 2.7282e-13, 4.1902e-13,\n 1.7684e-11, 3.9138e-15, 2.6984e-11, 2.2745e-11, 3.2090e-13, 1.0836e-09,\n 3.0768e-09, 1.2145e-12, 1.2672e-10, 8.4836e-14, 3.2210e-11, 1.6430e-14,\n 4.3180e-12, 3.6392e-11, 1.4250e-13, 1.1352e-12, 3.1284e-14, 7.5079e-11,\n 2.9858e-12, 1.0132e-12, 5.2927e-10, 1.1976e-09, 1.4619e-12, 2.9598e-13,\n 1.9802e-09, 2.1054e-12, 3.3514e-10, 1.8852e-10, 1.1086e-11, 9.9000e-10,\n 1.2987e-12, 7.9423e-12, 1.0379e-12, 3.3569e-12, 4.1109e-11, 2.8252e-12,\n 3.7188e-12, 8.6005e-12, 5.6657e-12, 1.8702e-12, 2.6427e-11, 8.3437e-13,\n 1.2805e-11, 2.3993e-12, 1.8815e-10, 2.5362e-11, 1.6626e-11, 4.3741e-12,\n 7.9229e-13, 5.3473e-10, 7.9722e-11, 4.9889e-13, 3.4775e-10, 1.9231e-12,\n 1.1011e-12, 4.4943e-13, 2.0748e-13, 7.1150e-11, 9.0945e-10, 4.2152e-11,\n 9.3916e-13, 6.7558e-13, 7.7640e-11, 6.8863e-12, 1.1379e-10, 1.0043e-12,\n 7.8794e-12, 8.4380e-13, 3.8728e-13, 7.9135e-14, 5.9204e-11, 2.2353e-13,\n 2.0764e-11, 2.2153e-12, 2.2055e-13, 4.5656e-11, 9.1554e-11, 3.3855e-13,\n 6.0112e-11, 2.3982e-10, 5.9189e-13, 1.8246e-09, 2.9463e-14, 4.6353e-10,\n 6.9370e-13, 2.8738e-13, 2.1166e-11, 6.9889e-15, 8.0873e-13, 3.8022e-14,\n 6.4294e-10, 8.9135e-12, 5.4774e-16, 2.8602e-13, 4.8217e-13, 1.3507e-12,\n 1.7935e-11, 9.0722e-11, 9.1284e-13, 1.1163e-11, 3.9820e-10, 1.8164e-12,\n 2.5298e-13, 1.5718e-11, 4.2240e-12, 2.2698e-11, 2.2098e-11, 6.5769e-09,\n 3.7512e-12, 6.7722e-15, 5.1381e-11, 1.3576e-12, 1.8884e-13, 5.0813e-12,\n 3.9056e-11, 5.8724e-13, 7.1367e-14, 1.9596e-11, 9.1853e-13, 1.2064e-11,\n 3.8661e-12, 4.0001e-10, 5.4448e-13, 1.3661e-09, 5.3053e-12, 8.4339e-14,\n 2.0049e-12, 9.8057e-10, 1.6933e-10, 2.3671e-12, 3.7841e-12, 4.4246e-12,\n 1.6358e-09, 2.9345e-14, 4.6893e-10, 7.7117e-12, 4.1440e-11, 2.4439e-12,\n 3.0135e-11, 4.5558e-10, 7.0249e-11, 1.7728e-11, 3.0275e-12, 1.4779e-10,\n 3.5344e-10, 1.9889e-12, 1.9566e-10, 1.0710e-13, 3.8950e-11, 6.4325e-13,\n 6.4348e-11, 5.6687e-12, 1.8171e-11, 2.6900e-12, 9.8758e-11, 1.8328e-12,\n 5.1773e-12, 5.3102e-13, 1.1264e-12, 2.6615e-12, 9.4789e-12, 1.4616e-09,\n 4.5382e-12, 1.2172e-12, 7.8921e-12, 1.2430e-13, 5.3676e-13, 2.6154e-11,\n 4.8211e-11, 2.0223e-11, 7.7092e-12, 3.8639e-13, 1.3581e-12, 2.9121e-12,\n 2.5633e-12, 1.1855e-10, 2.1597e-13, 1.1991e-09, 1.3880e-12, 1.5668e-12,\n 5.0596e-11, 4.5093e-12, 6.5705e-11, 8.4558e-12, 2.4525e-12, 2.8185e-10,\n 2.3895e-12, 4.3631e-13, 4.9156e-11, 1.8711e-13, 1.3145e-12, 5.0186e-10,\n 6.2745e-14, 1.9107e-13, 4.7478e-12, 2.0541e-11, 4.4012e-14, 9.2595e-13,\n 2.4170e-13, 5.3392e-10, 1.0302e-11, 3.6885e-13, 5.3778e-10, 8.9606e-13,\n 3.4440e-10, 6.3033e-12, 1.2719e-10, 1.2652e-10, 3.8478e-12, 1.6366e-13,\n 8.9500e-14, 1.3899e-10, 1.6416e-10, 1.5804e-11, 4.1666e-11, 7.7304e-13,\n 2.0978e-10, 2.6258e-11, 6.0698e-12, 2.7372e-11], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.7916e-11, 8.6332e-12, 1.1649e-11, 1.5799e-11, 2.4511e-12, 3.5090e-12,\n 7.1045e-12, 2.2312e-14, 2.1021e-12, 1.9432e-13, 5.8027e-11, 1.3053e-11,\n 1.4581e-11, 1.4568e-11, 4.6632e-12, 1.1356e-11, 4.1490e-11, 1.7635e-12,\n 5.7668e-12, 8.2942e-11, 1.5129e-14, 3.3209e-11, 1.5813e-11, 8.5709e-12,\n 1.3011e-12, 2.4950e-11, 1.1449e-11, 2.3554e-12, 1.8845e-12, 6.3287e-12,\n 4.7840e-11, 9.7838e-12, 1.6700e-12, 2.7230e-10, 3.3379e-11, 2.3270e-11,\n 1.7315e-12, 1.4756e-14, 3.2026e-11, 1.2650e-10, 1.5848e-11, 1.3349e-11,\n 2.7796e-12, 1.5458e-11, 5.3639e-11, 4.2551e-11, 3.1631e-11, 1.9443e-11,\n 2.6285e-12, 5.6316e-11, 1.7442e-13, 1.2900e-10, 7.1136e-11, 1.5890e-11,\n 9.1393e-13, 7.4466e-13, 5.3577e-11, 6.8353e-13, 2.3567e-11, 6.1642e-12,\n 6.6510e-12, 1.3886e-11, 6.7668e-11, 3.6309e-11, 4.6829e-13, 2.6266e-12,\n 2.6184e-11, 2.4435e-12, 3.5054e-11, 1.2204e-12, 2.8528e-13, 1.6146e-11,\n 2.0014e-11, 6.8967e-13, 7.5523e-12, 2.4939e-12, 5.3743e-12, 7.2772e-11,\n 6.7860e-13, 1.0420e-13, 7.4685e-12, 5.5422e-11, 8.3653e-14, 6.2007e-12,\n 1.5548e-12, 3.9635e-11, 1.8831e-11, 9.5001e-12, 1.1548e-11, 2.1822e-10,\n 3.5930e-12, 5.3056e-11, 4.9723e-11, 7.3413e-12, 1.2697e-10, 6.8243e-12,\n 8.2792e-12, 5.8824e-11, 2.4928e-11, 3.0490e-11, 6.2359e-11, 5.4812e-12,\n 1.4934e-12, 1.7017e-11, 2.6296e-13, 1.0673e-10, 4.3026e-12, 1.7245e-11,\n 3.1220e-12, 3.8673e-13, 8.7722e-11, 9.3942e-12, 1.0829e-10, 2.6683e-11,\n 2.2385e-12, 4.5805e-12, 8.6547e-12, 4.3216e-12, 5.2588e-12, 1.3534e-11,\n 2.3168e-11, 1.3958e-11, 1.7474e-12, 1.8117e-15, 1.1486e-13, 8.6173e-12,\n 6.6872e-11, 4.6279e-11, 4.1619e-12, 4.1620e-11, 1.8135e-14, 2.7384e-10,\n 1.3925e-11, 2.4534e-11, 3.1241e-11, 8.8130e-11, 3.7173e-11, 9.0881e-11,\n 3.1144e-11, 5.3417e-14, 1.2322e-12, 3.3120e-12, 9.8857e-12, 9.6233e-11,\n 2.1159e-11, 7.2945e-12, 6.0104e-12, 2.4256e-12, 9.6361e-11, 9.8618e-13,\n 3.5047e-12, 7.8655e-12, 1.2564e-11, 3.0585e-11, 1.0150e-11, 3.3570e-12,\n 6.6837e-12, 8.9433e-11, 2.2222e-12, 1.4818e-10, 3.0704e-11, 4.0448e-11,\n 3.3205e-12, 5.4601e-11, 4.3388e-11, 2.4199e-11, 2.5903e-10, 3.5448e-11,\n 4.8836e-11, 7.2593e-12, 9.0626e-13, 5.6698e-12, 4.7673e-11, 1.9874e-11,\n 2.5656e-11, 6.5372e-13, 1.5458e-12, 2.2567e-12, 2.1735e-12, 1.5212e-11,\n 1.2980e-12, 6.6707e-12, 4.8594e-12, 7.3577e-12, 7.6629e-11, 6.7447e-15,\n 6.0334e-12, 1.6858e-11, 2.8684e-12, 1.2252e-10, 1.0440e-11, 1.2914e-11,\n 2.2778e-11, 2.1776e-12, 1.0499e-13, 3.3840e-10, 2.3918e-12, 2.7548e-12,\n 2.8851e-11, 5.5438e-11, 2.6486e-13, 2.0373e-11, 5.4687e-11, 4.6136e-13,\n 4.3260e-11, 1.2208e-10, 1.8025e-11, 2.9023e-12, 2.4287e-15, 1.7131e-10,\n 1.9015e-12, 6.8448e-11, 3.5380e-12, 1.0915e-11, 5.9572e-13, 1.5157e-11,\n 1.9931e-11, 8.2035e-14, 2.7689e-12, 1.3343e-12, 4.0005e-12, 3.3400e-11,\n 7.5538e-12, 1.1342e-10, 2.2576e-11, 1.5670e-12, 2.1748e-14, 2.6924e-12,\n 2.2690e-12, 1.1621e-11, 8.3502e-13, 4.6279e-13, 1.9418e-11, 1.7817e-11,\n 6.0354e-11, 2.0746e-11, 2.3900e-12, 3.3176e-11, 6.8275e-12, 5.8514e-12,\n 3.3369e-11, 1.4545e-14, 4.6317e-11, 4.0607e-11, 1.2405e-11, 4.9396e-14,\n 2.4730e-13, 1.4315e-10, 3.4487e-13, 6.2741e-11, 1.6204e-11, 2.1040e-12,\n 2.1538e-11, 2.1766e-11, 5.4999e-11, 3.2305e-12], device='cuda:0')" }, "26": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.8853e-10, 5.2495e-10, 2.9373e-10, 9.7211e-11, 2.3630e-11, 7.1744e-10,\n 4.5670e-12, 6.7997e-10, 6.1199e-10, 2.2455e-13, 7.5928e-10, 1.7659e-10,\n 1.0212e-10, 5.4638e-12, 2.4343e-13, 2.0946e-12, 7.7522e-14, 8.5141e-12,\n 1.4679e-10, 2.9810e-10, 1.1687e-10, 7.6657e-11, 8.1505e-11, 3.0259e-12,\n 1.6927e-10, 5.5883e-11, 4.4183e-11, 1.2549e-10, 1.8503e-11, 3.4715e-13,\n 2.1900e-12, 8.9626e-11, 8.5724e-11, 1.9617e-10, 2.9498e-14, 2.8452e-13,\n 5.9689e-11, 1.5007e-13, 6.2077e-11, 4.3347e-11, 1.6926e-12, 3.2629e-10,\n 8.8420e-10, 4.3529e-13, 8.7762e-11, 8.4715e-13, 6.4738e-11, 6.5941e-13,\n 2.0321e-11, 1.9065e-10, 1.3652e-12, 1.6246e-11, 3.7314e-13, 5.6044e-11,\n 5.5225e-12, 3.9146e-13, 1.3816e-09, 1.2119e-09, 4.8689e-13, 2.1464e-12,\n 1.7843e-09, 1.3792e-11, 1.3771e-10, 5.0075e-10, 3.8461e-11, 8.5470e-10,\n 5.0729e-13, 1.0166e-11, 8.9616e-11, 1.5715e-11, 1.7590e-10, 2.6324e-11,\n 3.8032e-11, 3.4104e-11, 7.0162e-12, 4.6708e-12, 9.2722e-11, 4.0142e-12,\n 7.3472e-11, 1.8233e-11, 9.9961e-11, 1.9866e-10, 5.7121e-12, 1.5458e-12,\n 5.5637e-12, 1.2216e-10, 2.0931e-10, 1.1602e-12, 3.4338e-10, 2.2995e-11,\n 8.2682e-12, 3.6683e-12, 3.6716e-12, 1.7596e-10, 3.9034e-10, 2.2700e-10,\n 1.0119e-11, 8.7103e-14, 2.0999e-10, 3.6535e-11, 1.0993e-10, 1.8254e-13,\n 3.0209e-11, 1.0647e-12, 5.2890e-12, 4.8152e-12, 8.4789e-11, 1.0216e-14,\n 1.1351e-10, 2.5462e-11, 5.0593e-12, 4.0700e-11, 6.3936e-11, 2.8509e-13,\n 1.1649e-10, 1.1471e-10, 8.5312e-12, 1.0552e-09, 1.4247e-13, 3.5786e-10,\n 1.3799e-11, 3.1033e-12, 5.9451e-11, 3.0225e-13, 7.8379e-12, 6.6302e-15,\n 6.1638e-10, 2.5647e-11, 3.8894e-15, 1.0637e-12, 1.3981e-11, 8.4046e-12,\n 3.0878e-11, 1.0548e-10, 1.3453e-12, 6.2698e-11, 2.0099e-10, 1.1399e-11,\n 2.4706e-13, 6.1336e-12, 5.8131e-11, 1.1114e-10, 2.0928e-10, 3.5046e-09,\n 7.1391e-13, 6.5134e-14, 1.1041e-10, 1.3774e-11, 2.2423e-14, 6.0567e-12,\n 7.1022e-11, 7.2145e-12, 5.4531e-12, 6.4006e-11, 5.4302e-12, 2.3525e-11,\n 4.8291e-12, 5.4270e-10, 4.3502e-12, 1.8089e-09, 9.4142e-12, 3.3962e-12,\n 3.2211e-13, 4.7215e-10, 9.6427e-11, 2.0885e-10, 2.4475e-12, 3.9924e-11,\n 7.2382e-10, 1.8905e-12, 3.6365e-10, 8.5414e-11, 1.0577e-10, 1.0093e-11,\n 9.0067e-11, 5.8256e-10, 1.7962e-10, 1.0047e-10, 1.0853e-11, 2.5140e-10,\n 6.2677e-10, 5.3729e-11, 7.1707e-10, 3.1931e-12, 8.0301e-11, 2.4620e-13,\n 1.1415e-10, 1.9736e-12, 4.4298e-11, 1.0854e-12, 4.3140e-10, 1.9378e-13,\n 6.9698e-11, 1.6189e-12, 1.5456e-11, 3.3567e-11, 1.3505e-11, 6.3817e-10,\n 3.6310e-11, 3.3233e-13, 4.0493e-11, 3.2131e-12, 1.5940e-12, 4.4423e-11,\n 1.6213e-11, 1.1922e-10, 1.3611e-11, 1.1474e-11, 1.5842e-11, 6.1645e-12,\n 9.4578e-13, 1.1317e-10, 2.5903e-13, 5.1765e-10, 3.9553e-11, 1.5062e-11,\n 1.9735e-10, 4.3070e-11, 4.1810e-10, 1.2179e-10, 9.1741e-13, 2.2931e-10,\n 2.0469e-12, 1.6694e-13, 6.4336e-11, 4.0745e-12, 9.0984e-11, 3.9892e-10,\n 3.3587e-14, 5.2941e-13, 5.9034e-11, 3.1613e-11, 7.4107e-14, 1.5948e-11,\n 1.8282e-12, 2.9865e-10, 3.4904e-11, 1.5730e-11, 1.6852e-10, 2.9973e-12,\n 2.9558e-10, 2.9607e-11, 2.9978e-10, 1.6448e-10, 2.6840e-11, 5.6896e-12,\n 1.2197e-11, 1.1447e-10, 1.9938e-10, 2.5783e-11, 1.9918e-10, 2.4989e-13,\n 1.4331e-10, 8.5346e-12, 6.6371e-11, 1.4699e-10], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.7214e-12, 6.5265e-12, 0.0000e+00, ..., 1.3814e-11, 2.2276e-11,\n 2.7211e-12],\n [4.3650e-13, 6.0684e-13, 0.0000e+00, ..., 1.6960e-14, 5.4672e-13,\n 1.7459e-14],\n [3.5281e-12, 2.7571e-12, 0.0000e+00, ..., 2.8378e-12, 3.0650e-12,\n 1.9935e-12],\n ...,\n [1.7568e-12, 1.4438e-12, 0.0000e+00, ..., 1.0185e-12, 9.9470e-12,\n 2.8708e-13],\n [1.0310e-11, 3.1905e-12, 0.0000e+00, ..., 2.7496e-12, 2.2534e-11,\n 4.9905e-12],\n [2.0849e-12, 3.6655e-13, 0.0000e+00, ..., 5.0572e-13, 4.3443e-12,\n 4.5673e-14]], device='cuda:0')" }, "27": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.7614e-12, 2.9950e-14, 6.0509e-13, ..., 4.6730e-14, 1.6853e-13,\n 8.2678e-15],\n [1.0360e-09, 1.0252e-09, 7.1307e-13, ..., 7.2263e-11, 1.3886e-10,\n 2.9059e-11],\n [8.3023e-11, 9.7354e-11, 3.9590e-14, ..., 7.6795e-12, 1.1391e-11,\n 4.7825e-12],\n ...,\n [8.5872e-10, 9.1487e-10, 6.2725e-13, ..., 4.9956e-11, 1.1841e-10,\n 2.0504e-11],\n [1.1936e-11, 1.6449e-11, 2.3556e-13, ..., 1.3390e-12, 1.0480e-12,\n 3.3467e-13],\n [1.5111e-09, 1.5657e-09, 4.2978e-13, ..., 1.1640e-10, 1.9363e-10,\n 4.6866e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.0017e-09, 3.5886e-10, 2.8484e-09, 3.7011e-09, 1.1270e-09, 2.8408e-10,\n 3.7384e-09, 9.0982e-11, 1.6777e-09, 2.5605e-10, 8.5059e-09, 2.9439e-08,\n 5.0118e-09, 5.8939e-09, 6.1493e-09, 2.5454e-08, 2.8527e-09, 2.8281e-09,\n 9.5461e-09, 1.1099e-08, 3.5591e-11, 1.1925e-09, 2.4051e-09, 2.9819e-09,\n 2.7617e-09, 5.1198e-09, 4.0701e-09, 7.9944e-10, 1.1682e-10, 1.3306e-09,\n 7.0711e-09, 4.6717e-09, 7.3253e-10, 3.5931e-08, 1.3014e-08, 7.9766e-09,\n 2.3729e-10, 4.4974e-12, 3.3460e-09, 1.0805e-07, 1.6118e-08, 2.2661e-09,\n 7.1033e-10, 4.4458e-10, 1.8635e-08, 3.2817e-08, 5.1118e-09, 1.1324e-08,\n 1.4108e-09, 9.0657e-09, 1.7483e-10, 1.9301e-08, 4.9210e-09, 4.4334e-09,\n 7.0456e-10, 6.1458e-11, 8.4043e-09, 3.1188e-10, 5.5857e-09, 4.8882e-10,\n 2.8705e-09, 5.4953e-09, 1.3853e-08, 7.3829e-09, 2.0823e-10, 5.1522e-10,\n 5.5390e-09, 3.6060e-10, 3.5124e-09, 6.6737e-10, 1.2184e-11, 3.4039e-09,\n 1.4386e-08, 1.0566e-09, 2.9127e-09, 1.9471e-11, 1.8915e-10, 1.1751e-08,\n 7.5831e-10, 2.5040e-13, 6.9552e-09, 8.3705e-09, 1.1987e-11, 7.1052e-10,\n 4.9591e-09, 2.1182e-08, 1.7027e-09, 7.3138e-09, 1.4488e-09, 1.2081e-08,\n 8.5557e-09, 5.2625e-09, 1.3376e-08, 2.4406e-09, 4.3098e-09, 6.6058e-10,\n 2.2470e-09, 1.5383e-08, 3.7325e-09, 3.1067e-08, 2.7521e-08, 6.7337e-10,\n 1.1969e-11, 3.6962e-09, 2.0773e-10, 2.4372e-08, 1.9533e-09, 1.7839e-09,\n 5.3154e-09, 3.3147e-11, 2.6347e-09, 1.7193e-11, 1.7172e-08, 1.6821e-08,\n 1.3259e-09, 1.4704e-08, 1.2556e-09, 2.3982e-09, 3.5531e-09, 3.6494e-09,\n 1.2719e-09, 2.8937e-10, 5.2182e-10, 6.9224e-13, 6.7809e-11, 3.2660e-09,\n 2.7756e-08, 4.5843e-09, 1.1592e-08, 5.2709e-09, 2.9109e-11, 3.3448e-08,\n 5.3927e-09, 4.9845e-09, 1.2172e-08, 7.1024e-09, 7.9153e-09, 1.0621e-08,\n 7.2145e-09, 2.7796e-11, 2.1054e-10, 3.8792e-10, 2.8563e-09, 3.6347e-08,\n 6.9445e-09, 4.2423e-10, 6.5492e-09, 7.9116e-10, 6.6866e-09, 1.6541e-10,\n 3.2224e-10, 1.1331e-08, 8.2662e-09, 1.0345e-08, 2.1966e-09, 3.7033e-10,\n 6.5868e-09, 1.9500e-08, 3.4209e-09, 4.3105e-09, 5.3255e-09, 6.1143e-09,\n 3.3194e-09, 4.4322e-09, 4.1498e-09, 3.9059e-09, 2.8038e-08, 2.1595e-08,\n 3.1523e-09, 1.1894e-08, 1.1842e-09, 8.5449e-10, 1.7647e-08, 1.6790e-09,\n 1.2603e-08, 9.3739e-10, 3.6067e-10, 1.1379e-09, 1.2170e-09, 1.3532e-09,\n 1.5950e-09, 1.3729e-09, 5.0009e-10, 3.5625e-09, 5.4818e-08, 8.3738e-12,\n 2.7177e-09, 2.2789e-09, 1.6088e-09, 1.6442e-08, 4.8335e-09, 5.9276e-10,\n 2.4445e-09, 3.3702e-09, 2.5598e-11, 2.9862e-08, 1.5713e-09, 2.3929e-10,\n 7.4631e-09, 3.3116e-08, 2.7731e-14, 3.2384e-09, 1.6074e-08, 3.5772e-09,\n 2.9871e-09, 2.3320e-08, 5.7675e-09, 1.2426e-09, 1.2386e-12, 2.7968e-08,\n 5.6812e-10, 2.8025e-08, 1.7218e-11, 6.2144e-09, 8.9191e-11, 7.4668e-09,\n 1.0510e-08, 1.7867e-13, 8.8848e-10, 1.2354e-09, 1.0516e-09, 3.4202e-08,\n 1.8081e-09, 7.1740e-09, 1.6152e-09, 6.6459e-11, 3.6999e-11, 1.3181e-10,\n 1.2085e-09, 2.4079e-09, 2.0187e-10, 2.9620e-11, 1.2599e-08, 8.9925e-10,\n 6.6103e-10, 4.6097e-09, 3.9212e-09, 5.7696e-09, 2.3862e-09, 1.8515e-09,\n 6.2569e-10, 1.4174e-12, 4.7505e-09, 3.0804e-09, 1.3781e-09, 4.4022e-11,\n 2.8443e-11, 2.3533e-08, 1.0934e-09, 2.7132e-08, 8.6251e-09, 9.4150e-10,\n 1.2950e-08, 3.6602e-09, 7.6460e-09, 1.4532e-09], device='cuda:0')" }, "28": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.0088e-10, 5.6202e-07, 5.7997e-08, 1.1961e-07, 2.2876e-08, 1.8974e-09,\n 5.8516e-08, 2.8985e-07, 1.6941e-07, 2.1078e-09, 6.6933e-09, 4.0223e-09,\n 9.0646e-09, 8.7126e-09, 1.7250e-09, 8.4865e-09, 3.0100e-10, 3.2343e-08,\n 2.5607e-08, 2.0676e-07, 3.5716e-09, 9.1180e-08, 5.6524e-07, 1.0526e-07,\n 1.5313e-07, 1.0884e-09, 3.2553e-09, 6.0824e-07, 1.3756e-11, 9.9465e-10,\n 3.1952e-09, 1.1756e-07, 6.7376e-08, 4.8288e-08, 1.0462e-08, 2.7889e-08,\n 1.9061e-08, 6.8813e-09, 4.6243e-08, 9.4420e-09, 8.6681e-09, 4.3960e-08,\n 4.0367e-07, 3.8439e-08, 6.8592e-09, 3.5914e-08, 1.3674e-08, 3.0033e-08,\n 6.2763e-10, 6.8729e-08, 4.1542e-08, 8.8730e-07, 6.9463e-09, 1.6261e-08,\n 2.6611e-09, 2.3449e-07, 2.0741e-06, 4.5043e-07, 3.4063e-08, 4.8276e-08,\n 3.9206e-07, 6.8467e-09, 7.7641e-08, 8.3000e-07, 1.8894e-08, 3.0337e-07,\n 2.0563e-08, 1.9559e-08, 7.3986e-07, 1.5117e-07, 1.0640e-10, 9.5022e-09,\n 6.4674e-09, 9.1840e-09, 7.1767e-11, 2.9824e-09, 2.8875e-10, 1.1372e-09,\n 5.9950e-08, 5.0930e-09, 5.4216e-10, 1.5510e-07, 2.3375e-08, 1.1429e-07,\n 2.3852e-10, 1.8419e-08, 1.2831e-07, 2.1163e-09, 4.5914e-07, 2.7480e-09,\n 9.1995e-08, 3.7891e-08, 1.2961e-07, 1.9041e-08, 8.6467e-08, 9.5874e-07,\n 1.4626e-09, 6.6027e-10, 5.7265e-08, 2.3413e-08, 6.8251e-10, 9.8989e-09,\n 4.4475e-09, 5.2453e-08, 1.1583e-09, 1.6277e-08, 7.2876e-08, 2.8551e-11,\n 1.1092e-08, 5.5289e-10, 4.3713e-11, 7.8193e-09, 6.2485e-11, 4.4751e-09,\n 7.5470e-08, 1.3007e-07, 8.0439e-08, 6.1431e-07, 6.0999e-10, 3.8277e-09,\n 2.5393e-07, 5.0585e-10, 9.7987e-10, 7.9880e-11, 1.0798e-09, 9.2064e-10,\n 6.5502e-08, 1.0185e-08, 1.1969e-08, 4.9515e-08, 2.3931e-08, 3.8925e-09,\n 9.7436e-09, 3.0252e-09, 6.5110e-09, 1.8678e-11, 1.6610e-08, 1.3876e-08,\n 5.2873e-08, 9.7140e-07, 6.7460e-08, 3.8882e-10, 3.3698e-08, 1.6534e-06,\n 4.2644e-08, 7.3150e-09, 4.5175e-09, 5.8717e-09, 4.6812e-09, 1.1790e-08,\n 2.2968e-09, 1.6014e-09, 1.2585e-09, 1.2206e-07, 1.0329e-09, 5.0613e-10,\n 5.1888e-10, 2.3481e-07, 6.1887e-09, 3.9800e-08, 3.6128e-08, 3.1646e-11,\n 9.9545e-10, 3.7613e-07, 2.3290e-09, 2.4357e-07, 2.2391e-10, 4.7313e-08,\n 7.0219e-07, 1.1232e-08, 3.6064e-07, 7.9252e-09, 1.6951e-08, 6.2660e-11,\n 8.6220e-10, 9.2329e-09, 3.6588e-08, 4.8087e-11, 3.0154e-07, 6.4297e-09,\n 1.4520e-07, 1.0319e-07, 5.0743e-09, 3.5111e-09, 4.5470e-10, 1.8702e-08,\n 5.2128e-09, 2.1422e-09, 7.6332e-09, 3.3749e-08, 1.8013e-07, 1.0709e-11,\n 5.9593e-08, 9.8958e-08, 7.7988e-08, 2.4422e-06, 7.8202e-10, 4.2524e-08,\n 7.5936e-09, 6.2826e-08, 2.1594e-08, 7.0417e-08, 2.2193e-08, 1.1831e-08,\n 2.4146e-08, 2.1747e-08, 1.2607e-10, 4.6344e-08, 1.1024e-10, 5.3024e-07,\n 4.0335e-09, 9.9194e-10, 8.1661e-10, 5.3442e-07, 1.6619e-07, 3.9734e-08,\n 2.9395e-08, 4.5921e-07, 1.5107e-06, 4.1276e-10, 1.2687e-09, 1.3612e-07,\n 6.0697e-10, 8.9449e-08, 6.9324e-09, 1.8854e-09, 4.2517e-07, 3.4301e-07,\n 3.7984e-10, 1.4393e-09, 3.1783e-07, 1.7634e-08, 2.4932e-08, 1.3325e-08,\n 1.6520e-08, 2.9571e-10, 1.0070e-08, 3.3166e-10, 5.0573e-08, 8.3477e-09,\n 2.4491e-10, 4.2091e-08, 3.5093e-07, 1.7345e-08, 5.4041e-08, 1.3921e-08,\n 4.0492e-07, 3.8851e-09, 1.6282e-07, 2.8430e-08, 4.9138e-09, 2.1640e-11,\n 1.2990e-08, 3.8670e-07, 7.3095e-09, 8.0196e-07], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4317e-11, 1.7761e-12, 5.2149e-12, 1.2093e-11, 2.1572e-12, 1.3330e-12,\n 7.6957e-12, 5.0994e-13, 2.6088e-12, 3.1743e-13, 1.5953e-11, 7.7825e-11,\n 1.0940e-11, 1.4743e-11, 2.3648e-11, 1.0553e-10, 8.6074e-12, 6.9530e-12,\n 3.2184e-11, 2.2007e-11, 3.8922e-15, 3.1577e-12, 8.9512e-12, 6.0688e-12,\n 9.8478e-12, 1.2349e-11, 8.4726e-12, 2.5220e-12, 3.0015e-13, 2.3293e-12,\n 1.9141e-11, 1.1116e-11, 1.7240e-12, 9.0160e-11, 3.4044e-11, 2.4080e-11,\n 5.0835e-13, 1.9206e-15, 8.1180e-12, 6.6052e-10, 3.6168e-11, 7.2491e-12,\n 2.3722e-12, 1.3266e-12, 6.3084e-11, 1.4481e-10, 1.5782e-11, 5.1584e-11,\n 1.5321e-12, 1.8803e-11, 1.7420e-13, 4.8747e-11, 9.4453e-12, 9.7962e-12,\n 9.9713e-13, 2.3293e-13, 2.6933e-11, 2.6510e-13, 2.2820e-11, 1.6978e-12,\n 6.6490e-12, 3.9456e-11, 6.6051e-11, 2.5858e-11, 9.7426e-14, 5.9612e-13,\n 1.4997e-11, 3.1839e-13, 5.7506e-12, 2.1653e-12, 4.7749e-13, 1.0266e-11,\n 3.3451e-11, 2.4932e-12, 6.9827e-12, 2.0434e-13, 6.7983e-13, 3.3701e-11,\n 1.0754e-12, 1.9151e-14, 2.5846e-11, 1.8012e-11, 2.0921e-14, 3.4518e-12,\n 1.9651e-11, 6.1637e-11, 4.9304e-12, 3.1211e-11, 3.5457e-12, 2.7683e-11,\n 4.8484e-11, 1.3181e-11, 3.3047e-11, 3.5740e-12, 9.3222e-12, 6.4545e-13,\n 4.5360e-12, 2.7324e-11, 8.7845e-12, 8.6939e-11, 1.8019e-10, 3.0724e-12,\n 6.8726e-14, 8.4265e-12, 1.1010e-13, 5.3897e-11, 5.9558e-12, 4.8591e-12,\n 1.5905e-11, 1.3743e-16, 6.9257e-12, 3.4760e-13, 5.2540e-11, 3.3151e-11,\n 1.8521e-12, 3.8715e-11, 4.2007e-12, 4.6004e-12, 8.9126e-12, 9.6011e-12,\n 1.6685e-12, 1.1877e-12, 9.6600e-13, 4.0961e-15, 2.7005e-13, 6.1579e-12,\n 7.0688e-11, 1.4237e-11, 5.8992e-11, 1.1948e-11, 6.2804e-15, 8.7344e-11,\n 2.5026e-11, 9.6870e-12, 3.7946e-11, 1.2891e-11, 1.3546e-11, 1.8043e-11,\n 1.5547e-11, 1.5278e-15, 1.0496e-12, 2.3625e-12, 3.3976e-12, 1.0380e-10,\n 1.7829e-11, 1.2544e-12, 2.9706e-11, 2.0812e-12, 1.1826e-11, 3.3501e-14,\n 8.8193e-13, 2.9442e-11, 5.0972e-11, 4.1918e-11, 2.2123e-12, 3.1777e-12,\n 3.1197e-11, 5.1991e-11, 7.5208e-12, 9.2699e-12, 7.2519e-12, 1.0222e-11,\n 1.6279e-11, 8.1721e-12, 1.2910e-11, 9.1411e-12, 5.0787e-11, 7.1228e-11,\n 6.4857e-12, 2.8009e-11, 2.1829e-12, 1.2437e-12, 3.8725e-11, 3.3033e-12,\n 3.6856e-11, 5.6416e-12, 7.8195e-13, 2.6440e-12, 2.3735e-12, 4.7552e-12,\n 2.1488e-12, 2.9943e-12, 5.6379e-13, 6.6208e-12, 1.6857e-10, 5.6987e-14,\n 4.3166e-12, 4.1422e-12, 1.7919e-12, 3.6067e-11, 7.1404e-12, 9.8535e-13,\n 8.5896e-12, 1.5502e-11, 1.0713e-13, 6.6319e-11, 5.3133e-12, 3.6748e-13,\n 3.4643e-11, 8.2040e-11, 1.0951e-14, 7.3772e-12, 5.8623e-11, 1.1771e-11,\n 1.1046e-11, 4.6375e-11, 1.1226e-11, 3.2267e-12, 4.5276e-15, 5.9797e-11,\n 8.9542e-13, 7.6046e-11, 1.9838e-13, 1.6623e-11, 9.3342e-14, 1.6848e-11,\n 1.4810e-11, 8.0388e-14, 2.9784e-12, 2.1745e-12, 2.6592e-12, 1.5490e-10,\n 6.5571e-12, 1.8776e-11, 4.4865e-12, 7.1965e-13, 1.0225e-15, 7.5602e-13,\n 1.6640e-12, 5.4954e-12, 7.7738e-13, 5.6442e-16, 4.6946e-11, 3.0372e-12,\n 2.4021e-12, 1.6655e-11, 1.4250e-11, 1.1929e-11, 1.0754e-11, 4.1497e-12,\n 3.2295e-12, 2.8935e-14, 1.5945e-11, 5.1569e-12, 2.5135e-12, 4.4248e-14,\n 5.1250e-14, 7.8238e-11, 2.5039e-12, 8.8587e-11, 1.4683e-11, 1.6232e-12,\n 5.2703e-11, 6.6369e-12, 1.3793e-11, 9.3942e-12], device='cuda:0')" }, "29": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.9710e-12, 9.4406e-10, 2.2831e-11, 2.5432e-12, 3.8308e-12, 9.8559e-12,\n 1.3337e-12, 1.0782e-10, 2.9673e-11, 6.1189e-12, 2.3286e-11, 1.1657e-12,\n 5.7418e-12, 4.9102e-11, 6.2403e-12, 1.4723e-11, 1.3855e-13, 5.5172e-11,\n 1.0955e-12, 3.3234e-10, 6.5985e-12, 2.3403e-11, 2.5954e-09, 4.3241e-11,\n 4.4185e-10, 1.8133e-13, 7.9672e-13, 3.9879e-10, 4.8711e-13, 2.6432e-14,\n 8.6399e-13, 9.2614e-11, 1.4945e-11, 8.2441e-11, 1.1782e-12, 2.5566e-11,\n 1.7255e-12, 1.1956e-12, 4.7014e-11, 8.2382e-13, 1.8193e-11, 9.8472e-12,\n 5.3455e-10, 4.5207e-11, 1.4266e-11, 1.0939e-10, 7.1458e-13, 1.2293e-11,\n 1.0182e-12, 8.2674e-11, 2.1895e-11, 2.6311e-09, 4.0510e-12, 1.1857e-12,\n 6.1389e-12, 1.8737e-10, 4.5707e-09, 2.0289e-10, 1.2760e-11, 2.7842e-11,\n 5.4099e-11, 1.2983e-11, 2.0971e-11, 4.1338e-10, 9.5309e-13, 1.4582e-10,\n 1.8651e-11, 1.1533e-10, 2.7703e-09, 2.4089e-10, 1.2422e-12, 1.9377e-11,\n 2.4925e-12, 2.3818e-11, 9.2394e-12, 4.0847e-13, 3.8255e-12, 1.3474e-13,\n 1.5902e-11, 4.2963e-12, 1.4014e-14, 9.7508e-11, 1.1550e-12, 1.0089e-09,\n 3.2650e-13, 2.2195e-10, 1.7998e-11, 2.3386e-12, 9.4269e-10, 8.1543e-12,\n 2.3973e-10, 2.8940e-12, 3.6197e-11, 1.2412e-12, 1.7862e-11, 1.0171e-09,\n 4.6226e-11, 1.8597e-13, 3.9677e-11, 1.5488e-11, 5.2089e-12, 6.9187e-13,\n 1.9874e-12, 7.5633e-12, 1.5509e-12, 5.6191e-11, 1.5490e-10, 3.9108e-14,\n 2.4422e-12, 2.3292e-12, 1.2070e-12, 2.0414e-11, 1.2256e-11, 2.7085e-13,\n 3.4926e-10, 1.5595e-10, 5.5133e-11, 1.3727e-09, 3.0384e-14, 1.1114e-12,\n 1.4477e-10, 1.2805e-12, 1.5749e-13, 3.2320e-13, 5.2146e-13, 1.1600e-13,\n 1.2258e-12, 5.4588e-13, 6.9107e-13, 3.9924e-12, 4.3239e-12, 8.0944e-14,\n 3.1482e-13, 1.6901e-12, 2.3718e-12, 5.5534e-13, 8.8565e-12, 1.4175e-11,\n 1.0875e-10, 3.0782e-09, 1.0056e-11, 2.4354e-12, 6.4314e-12, 7.2695e-10,\n 7.8711e-12, 8.8226e-12, 4.6482e-13, 1.3445e-13, 5.5222e-13, 4.0130e-12,\n 7.5548e-13, 2.3647e-13, 1.7695e-12, 4.0563e-10, 2.4307e-13, 1.1554e-12,\n 4.7431e-14, 1.5472e-10, 2.3075e-14, 2.7735e-12, 1.4886e-11, 3.2658e-13,\n 9.0693e-12, 1.3411e-09, 1.3464e-13, 2.2442e-11, 2.2144e-12, 2.5281e-11,\n 9.4095e-10, 1.7922e-11, 7.6362e-10, 1.5951e-13, 2.0607e-11, 3.6633e-13,\n 6.3896e-12, 1.5432e-11, 4.6134e-12, 1.3763e-12, 6.6505e-11, 2.0658e-13,\n 1.5244e-11, 1.0796e-10, 2.8101e-11, 2.1654e-11, 2.5720e-12, 1.1450e-11,\n 1.3190e-11, 1.0544e-12, 7.9193e-12, 1.9331e-11, 8.6766e-11, 2.7630e-12,\n 1.1183e-11, 7.8073e-11, 7.3585e-12, 7.0814e-09, 1.4615e-14, 3.0436e-12,\n 3.1391e-13, 9.2663e-12, 1.3037e-12, 9.3825e-11, 1.7407e-12, 2.9584e-11,\n 6.4383e-12, 3.7713e-13, 8.4747e-13, 8.4030e-12, 8.7577e-13, 1.0861e-09,\n 2.6807e-12, 2.9815e-13, 6.1490e-13, 1.8006e-09, 6.7273e-12, 6.3638e-12,\n 3.2082e-12, 1.0572e-09, 2.8872e-09, 4.1711e-12, 5.8750e-13, 3.9520e-10,\n 7.8761e-14, 3.5010e-11, 3.5017e-12, 1.2824e-13, 8.0309e-11, 2.5235e-10,\n 1.3155e-14, 1.2216e-12, 5.6777e-11, 1.3546e-11, 1.2340e-10, 3.4857e-10,\n 1.2782e-12, 1.7397e-12, 1.3765e-11, 1.1201e-12, 9.1406e-11, 7.4333e-14,\n 3.7240e-12, 5.3370e-11, 4.0692e-10, 1.2463e-11, 4.7632e-11, 2.2436e-12,\n 3.5613e-10, 1.2888e-13, 1.2093e-10, 6.7890e-11, 6.8570e-13, 1.3585e-14,\n 1.4589e-12, 2.5690e-11, 5.6167e-13, 1.5675e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.1934e-11, 1.7548e-12, 9.5380e-12, 1.6244e-11, 2.6124e-12, 1.8145e-12,\n 1.1524e-11, 8.2556e-13, 4.7574e-12, 5.2007e-13, 2.8074e-11, 1.0744e-10,\n 1.6735e-11, 2.3987e-11, 2.7972e-11, 1.0933e-10, 1.4694e-11, 9.5436e-12,\n 3.1386e-11, 4.7888e-11, 5.3961e-16, 5.6248e-12, 1.1653e-11, 1.3780e-11,\n 1.3448e-11, 1.6997e-11, 1.8173e-11, 4.2824e-12, 7.4018e-13, 4.1504e-12,\n 3.1968e-11, 2.0094e-11, 3.8576e-12, 1.3043e-10, 5.6288e-11, 3.6099e-11,\n 4.0786e-13, 8.8243e-15, 1.4885e-11, 3.9315e-10, 5.6244e-11, 1.1427e-11,\n 3.8567e-12, 2.6185e-12, 6.4825e-11, 1.1508e-10, 2.3489e-11, 5.1193e-11,\n 3.8265e-12, 3.1635e-11, 3.0413e-13, 8.2052e-11, 1.6192e-11, 1.5211e-11,\n 1.8151e-12, 5.9917e-13, 3.6588e-11, 8.1168e-13, 2.6313e-11, 2.7560e-12,\n 8.4016e-12, 2.6976e-11, 6.0028e-11, 3.2869e-11, 3.4338e-13, 1.4800e-12,\n 2.5024e-11, 7.8122e-13, 1.5474e-11, 3.2953e-12, 3.0835e-13, 1.6307e-11,\n 4.8055e-11, 5.7258e-12, 8.9462e-12, 1.7169e-13, 1.0987e-12, 5.1285e-11,\n 1.5885e-12, 2.2281e-14, 3.0793e-11, 3.7463e-11, 8.0329e-14, 4.0207e-12,\n 2.3664e-11, 9.1668e-11, 7.7300e-12, 3.1877e-11, 4.9830e-12, 5.3273e-11,\n 3.9486e-11, 2.3459e-11, 4.6866e-11, 7.9087e-12, 1.8906e-11, 1.9321e-12,\n 7.3549e-12, 5.6698e-11, 1.7801e-11, 1.2517e-10, 1.1883e-10, 4.0422e-12,\n 1.1804e-13, 1.1385e-11, 1.6452e-13, 8.9039e-11, 5.0101e-12, 8.5903e-12,\n 1.6950e-11, 3.3300e-15, 9.3849e-12, 9.4640e-14, 7.1665e-11, 5.9992e-11,\n 4.2314e-12, 5.1302e-11, 6.1076e-12, 7.5868e-12, 1.6143e-11, 1.5409e-11,\n 4.0441e-12, 9.6075e-13, 1.1130e-12, 1.0681e-14, 6.3718e-13, 1.0326e-11,\n 1.1777e-10, 2.1180e-11, 5.1027e-11, 1.7080e-11, 3.8194e-16, 1.3466e-10,\n 2.5564e-11, 2.2433e-11, 4.1464e-11, 2.4354e-11, 3.4466e-11, 3.9113e-11,\n 3.1651e-11, 3.2870e-15, 1.5643e-12, 2.7625e-12, 9.8289e-12, 1.2729e-10,\n 2.9236e-11, 2.3724e-12, 2.9924e-11, 4.1682e-12, 2.2539e-11, 2.7323e-13,\n 1.9310e-12, 3.8285e-11, 3.7551e-11, 3.4339e-11, 6.7048e-12, 2.6920e-12,\n 2.0163e-11, 7.1214e-11, 1.0577e-11, 1.9883e-11, 1.8895e-11, 2.0743e-11,\n 8.9897e-12, 1.8959e-11, 1.8681e-11, 1.2735e-11, 1.0302e-10, 7.4763e-11,\n 1.5245e-11, 5.3110e-11, 2.9510e-12, 2.9100e-12, 6.3050e-11, 7.7669e-12,\n 4.2246e-11, 5.7697e-12, 1.0398e-12, 5.7344e-12, 3.1274e-12, 6.8295e-12,\n 4.7389e-12, 3.9979e-12, 1.3354e-12, 1.1079e-11, 2.1501e-10, 1.0683e-13,\n 9.0852e-12, 7.1491e-12, 4.6496e-12, 6.7857e-11, 1.7713e-11, 1.8368e-12,\n 1.1523e-11, 1.6939e-11, 3.9290e-13, 1.2930e-10, 4.2499e-12, 6.2984e-13,\n 3.4761e-11, 1.2378e-10, 1.0052e-13, 1.4839e-11, 5.5452e-11, 9.8390e-12,\n 1.4643e-11, 9.0929e-11, 1.9394e-11, 3.7931e-12, 3.8584e-14, 1.0189e-10,\n 1.2424e-12, 1.1283e-10, 1.8708e-13, 2.7328e-11, 7.3957e-14, 2.7662e-11,\n 3.6318e-11, 9.4482e-14, 4.9319e-12, 3.9444e-12, 2.2155e-12, 1.4192e-10,\n 8.9853e-12, 3.3201e-11, 8.4421e-12, 9.4607e-13, 1.5929e-15, 1.2281e-12,\n 3.1780e-12, 1.0719e-11, 1.3879e-12, 1.6804e-14, 5.5566e-11, 4.4800e-12,\n 3.2184e-12, 2.1342e-11, 1.1416e-11, 2.6129e-11, 1.1947e-11, 5.4501e-12,\n 2.5427e-12, 6.7851e-14, 2.1833e-11, 1.0716e-11, 4.6031e-12, 5.3829e-14,\n 2.2359e-14, 8.1531e-11, 2.5996e-12, 1.1203e-10, 2.9068e-11, 2.5553e-12,\n 4.2582e-11, 1.1780e-11, 2.9119e-11, 8.0715e-12], device='cuda:0')" }, "30": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.5679e-13, 7.7381e-10, 9.0448e-11, 1.3735e-10, 3.5862e-11, 4.5921e-12,\n 8.0079e-11, 3.9836e-10, 2.3432e-10, 9.6894e-12, 6.6087e-12, 1.0699e-11,\n 3.1390e-11, 5.8652e-11, 1.3711e-11, 5.0782e-12, 5.1728e-13, 5.9256e-11,\n 3.0085e-11, 3.0562e-10, 2.4109e-12, 1.3251e-10, 5.6276e-10, 1.8348e-10,\n 2.5434e-10, 2.1142e-12, 7.7268e-12, 6.8749e-10, 1.4255e-14, 1.6703e-12,\n 3.0235e-12, 1.7734e-10, 9.8564e-11, 1.5420e-10, 2.4571e-12, 6.9425e-11,\n 2.9868e-11, 6.5999e-12, 8.1334e-11, 5.3777e-12, 3.1582e-11, 6.7408e-11,\n 5.7774e-10, 7.1165e-11, 3.6987e-11, 6.7668e-11, 1.0853e-11, 7.4708e-12,\n 3.6274e-13, 8.3502e-11, 6.2363e-11, 1.2755e-09, 1.4369e-11, 1.1377e-11,\n 7.7674e-12, 2.5199e-10, 2.7800e-09, 6.1350e-10, 3.4378e-11, 7.1678e-11,\n 5.1757e-10, 2.1137e-11, 7.6707e-11, 1.0074e-09, 2.8085e-11, 4.2094e-10,\n 3.2346e-11, 6.1959e-11, 1.0509e-09, 2.2785e-10, 1.1474e-13, 7.1254e-12,\n 1.4053e-11, 5.1673e-11, 1.1743e-11, 1.0792e-12, 1.2579e-12, 5.1990e-14,\n 9.0432e-11, 1.2573e-11, 3.5529e-13, 1.9520e-10, 3.1279e-11, 2.6276e-10,\n 1.9369e-12, 7.1856e-11, 1.4625e-10, 1.1868e-12, 6.8045e-10, 2.1481e-12,\n 1.6736e-10, 5.4331e-11, 1.3532e-10, 2.6175e-11, 1.2255e-10, 1.1656e-09,\n 3.0877e-11, 5.4327e-14, 9.9611e-11, 5.0540e-11, 3.0042e-12, 1.4887e-11,\n 1.3138e-11, 6.5783e-11, 5.7234e-13, 7.9803e-11, 1.2267e-10, 7.0184e-13,\n 3.1508e-11, 4.9815e-13, 5.4377e-13, 2.2890e-11, 1.1884e-11, 6.3850e-12,\n 8.3169e-11, 1.9813e-10, 1.2588e-10, 7.4695e-10, 1.1827e-12, 4.8535e-12,\n 2.7133e-10, 2.1033e-13, 1.6013e-12, 1.7214e-13, 6.7176e-12, 1.9083e-12,\n 6.9897e-11, 5.5233e-12, 6.0543e-12, 4.3349e-11, 3.6149e-11, 9.2443e-13,\n 8.1247e-12, 4.6240e-13, 6.4831e-12, 3.2149e-13, 3.5912e-11, 2.1996e-11,\n 8.9428e-11, 1.3574e-09, 9.4391e-11, 2.3494e-13, 6.6239e-11, 2.0772e-09,\n 3.7437e-11, 3.6852e-12, 9.1226e-12, 9.0568e-12, 8.4015e-12, 1.9864e-11,\n 1.7468e-13, 3.9628e-12, 5.8007e-13, 1.9640e-10, 1.2333e-12, 1.7536e-13,\n 5.7312e-15, 3.2776e-10, 4.3842e-12, 4.6730e-11, 1.6835e-11, 2.6751e-14,\n 3.7513e-12, 4.5616e-10, 4.2308e-12, 3.2334e-10, 1.5940e-12, 7.6432e-11,\n 8.0760e-10, 1.2757e-11, 3.7132e-10, 4.9911e-12, 5.4491e-11, 1.1084e-13,\n 3.2323e-12, 4.5166e-12, 2.9912e-11, 3.9109e-13, 3.3580e-10, 9.3944e-12,\n 1.7154e-10, 9.2195e-11, 7.1063e-12, 1.0369e-11, 8.5345e-13, 3.4441e-11,\n 2.7791e-11, 6.4111e-13, 3.6267e-11, 5.5786e-11, 2.5089e-10, 4.0897e-15,\n 8.7275e-11, 1.4315e-10, 7.5719e-11, 3.2605e-09, 6.4383e-14, 6.1137e-11,\n 1.1526e-11, 5.4864e-11, 1.7207e-11, 1.1596e-10, 1.8082e-11, 3.6122e-11,\n 2.4778e-11, 3.0927e-11, 6.1455e-13, 6.7318e-11, 1.0794e-12, 6.8525e-10,\n 9.7989e-13, 1.7113e-13, 1.7918e-13, 6.8613e-10, 2.2165e-10, 3.6994e-11,\n 4.2355e-11, 6.5241e-10, 2.0297e-09, 2.8797e-12, 2.4042e-12, 1.9467e-10,\n 1.0748e-12, 1.0485e-10, 2.6338e-11, 8.4466e-13, 5.6319e-10, 4.4623e-10,\n 4.0933e-13, 3.4687e-12, 3.9820e-10, 2.9952e-11, 3.9655e-11, 8.0860e-11,\n 7.1977e-12, 2.4045e-12, 4.5367e-11, 1.2348e-13, 1.3264e-10, 3.7663e-12,\n 3.2964e-13, 8.1226e-11, 3.9202e-10, 3.7618e-11, 7.8824e-11, 2.2974e-11,\n 5.6112e-10, 6.6549e-12, 2.4103e-10, 4.4465e-11, 7.2934e-12, 1.9672e-14,\n 1.5412e-11, 5.0994e-10, 1.0617e-11, 1.1123e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9404e-12, 9.9306e-12, 0.0000e+00, ..., 3.4161e-11, 1.9439e-11,\n 2.0781e-11],\n [4.6973e-12, 1.7545e-12, 0.0000e+00, ..., 1.4674e-12, 1.6997e-11,\n 7.6646e-13],\n [3.8258e-12, 6.7511e-12, 0.0000e+00, ..., 4.6961e-12, 3.1858e-11,\n 1.8835e-12],\n ...,\n [9.2494e-13, 2.5181e-12, 0.0000e+00, ..., 3.2028e-12, 4.7602e-12,\n 2.9387e-14],\n [2.1119e-11, 1.8774e-11, 0.0000e+00, ..., 2.7300e-11, 1.1645e-10,\n 2.3740e-11],\n [1.7049e-13, 2.2796e-13, 0.0000e+00, ..., 8.7930e-13, 2.0564e-12,\n 2.1398e-14]], device='cuda:0')" }, "31": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.3868e-10, 3.5277e-10, 1.7833e-13, ..., 2.4725e-11, 4.0442e-11,\n 1.1753e-11],\n [4.5666e-12, 5.8997e-12, 1.0605e-14, ..., 4.2893e-13, 5.3468e-13,\n 1.1553e-13],\n [2.9938e-10, 3.2953e-10, 1.3024e-13, ..., 2.6344e-11, 3.1763e-11,\n 1.1461e-11],\n ...,\n [2.1508e-09, 2.4148e-09, 4.8157e-14, ..., 1.9527e-10, 2.4156e-10,\n 1.0970e-10],\n [1.5896e-10, 1.8201e-10, 3.8877e-13, ..., 1.2758e-11, 2.1918e-11,\n 6.4550e-12],\n [1.8318e-12, 1.9308e-12, 3.3408e-16, ..., 1.2721e-13, 1.5415e-13,\n 3.5101e-14]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8221e-08, 5.6640e-09, 7.2018e-09, 2.2413e-10, 2.0364e-09, 2.1541e-10,\n 2.2094e-09, 1.8894e-10, 1.2910e-09, 8.2745e-10, 1.6998e-08, 1.6755e-08,\n 2.2996e-09, 1.6601e-09, 3.2482e-09, 4.3943e-10, 8.2496e-09, 2.4540e-09,\n 8.2403e-09, 2.2943e-08, 3.7738e-11, 6.5768e-09, 1.4492e-09, 1.3464e-08,\n 9.9305e-10, 2.1807e-09, 1.4419e-08, 2.3310e-10, 8.1870e-10, 2.4998e-09,\n 4.3365e-09, 1.4275e-08, 1.8865e-11, 2.5691e-08, 1.4782e-08, 1.7548e-08,\n 6.6557e-10, 2.3723e-11, 9.5354e-09, 6.2961e-08, 2.6461e-08, 7.4517e-10,\n 6.7723e-10, 6.8263e-10, 4.8119e-09, 1.0984e-08, 6.6944e-09, 3.0939e-09,\n 6.7389e-10, 9.3074e-09, 2.4947e-10, 2.3308e-08, 1.1682e-08, 7.7831e-09,\n 3.4453e-10, 7.1556e-10, 5.2383e-09, 6.1283e-10, 3.8836e-09, 1.2279e-09,\n 1.5005e-09, 3.8834e-09, 6.3922e-09, 1.5723e-08, 1.6472e-10, 3.9191e-09,\n 3.7746e-09, 9.1257e-10, 7.5192e-09, 1.0917e-09, 7.6560e-12, 3.0421e-09,\n 1.2485e-08, 1.2168e-09, 7.4507e-10, 9.6445e-12, 1.9585e-09, 2.1912e-08,\n 8.5736e-10, 6.1528e-12, 8.9283e-09, 2.8513e-08, 1.2744e-11, 2.0972e-10,\n 3.5412e-09, 2.1109e-08, 9.5305e-10, 4.3294e-09, 3.1672e-09, 8.1074e-10,\n 4.4851e-09, 1.6172e-08, 4.9262e-09, 5.7154e-09, 1.8329e-08, 4.5684e-09,\n 8.1005e-09, 2.7484e-08, 4.2840e-09, 3.3246e-08, 3.0969e-09, 6.3425e-10,\n 1.2356e-12, 3.2793e-09, 3.5997e-11, 3.9629e-08, 1.1344e-09, 4.4831e-09,\n 3.4088e-09, 2.3696e-11, 3.0048e-08, 1.3292e-09, 9.3338e-09, 1.4057e-08,\n 2.4560e-09, 1.9571e-08, 4.2185e-10, 1.5540e-09, 2.4313e-09, 1.3049e-08,\n 1.3608e-09, 1.0419e-09, 5.8009e-10, 9.7413e-12, 9.6428e-11, 7.0110e-09,\n 1.1732e-08, 1.7285e-09, 5.8141e-09, 1.1915e-08, 1.3010e-10, 2.6716e-08,\n 6.5243e-09, 9.7674e-10, 1.1309e-08, 4.2786e-09, 2.1409e-08, 4.1486e-09,\n 3.2693e-09, 2.3521e-12, 8.3370e-12, 8.0587e-11, 1.4662e-08, 5.4359e-08,\n 9.5185e-09, 1.8740e-09, 2.2996e-09, 3.6381e-09, 3.2128e-09, 2.7454e-10,\n 8.4052e-10, 7.4460e-09, 5.3998e-09, 1.3216e-09, 1.6174e-09, 9.5671e-10,\n 2.5728e-09, 2.8957e-08, 1.2813e-09, 1.3880e-09, 4.5871e-09, 1.4360e-09,\n 1.3674e-09, 4.0966e-09, 1.3002e-09, 6.1012e-09, 2.7976e-08, 2.0240e-09,\n 9.5547e-09, 1.7324e-09, 8.7816e-11, 3.1728e-09, 5.8639e-09, 9.1727e-10,\n 1.1869e-08, 2.0177e-09, 1.4010e-09, 1.3382e-09, 1.4917e-09, 4.0330e-09,\n 4.8699e-09, 1.4530e-09, 2.1739e-09, 3.6102e-09, 3.1899e-08, 1.8642e-11,\n 2.5011e-09, 9.4017e-09, 1.6695e-09, 3.0636e-08, 5.3221e-09, 3.1337e-09,\n 1.8017e-09, 6.2825e-10, 2.7564e-12, 8.4957e-08, 1.1871e-09, 2.5340e-10,\n 2.4901e-09, 1.1650e-08, 1.3371e-11, 3.4560e-08, 2.3823e-09, 2.9728e-09,\n 2.9207e-09, 8.0255e-09, 1.4570e-08, 2.9670e-10, 6.7857e-12, 5.5618e-09,\n 1.6067e-09, 4.0845e-09, 1.5222e-10, 1.0165e-08, 2.0130e-10, 2.2135e-08,\n 2.5684e-08, 1.0110e-10, 2.5293e-10, 2.5731e-09, 1.0481e-09, 1.4341e-08,\n 1.3336e-09, 1.7363e-08, 2.3419e-09, 3.2148e-13, 2.9780e-12, 2.2961e-10,\n 3.5637e-10, 3.7527e-09, 9.8776e-11, 3.3320e-10, 3.8770e-09, 2.2491e-09,\n 1.5349e-08, 1.1814e-10, 1.0787e-09, 7.1858e-09, 2.0075e-09, 2.3517e-09,\n 4.2644e-09, 1.7390e-11, 1.6886e-09, 6.0244e-09, 2.4756e-09, 5.9730e-11,\n 1.3920e-10, 1.1506e-08, 7.9750e-10, 2.5751e-09, 2.0709e-08, 3.1722e-10,\n 1.2916e-08, 1.1420e-09, 3.0749e-08, 7.3548e-10], device='cuda:0')" }, "32": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7689e-07, 2.1301e-09, 1.7006e-07, 6.8511e-09, 1.4572e-10, 2.3300e-09,\n 3.4235e-08, 3.3181e-07, 1.2361e-07, 6.9997e-09, 1.2275e-06, 9.5959e-09,\n 6.2678e-11, 5.9557e-11, 9.4955e-10, 2.2901e-08, 1.4273e-09, 1.1109e-07,\n 1.8432e-07, 6.9435e-08, 1.2492e-07, 3.3760e-09, 1.1177e-08, 4.8930e-07,\n 3.2402e-09, 4.3214e-08, 1.5368e-07, 2.7718e-09, 2.0918e-10, 5.9036e-08,\n 2.3743e-09, 2.0714e-07, 5.3659e-08, 4.4498e-08, 1.3157e-08, 1.3911e-07,\n 3.2991e-09, 1.7628e-08, 1.2649e-07, 9.4211e-09, 4.3875e-09, 3.6157e-08,\n 1.2163e-07, 3.0624e-09, 2.2139e-10, 1.1405e-09, 5.7367e-08, 9.2561e-10,\n 1.1788e-07, 1.0728e-07, 1.8157e-08, 1.0860e-07, 1.4352e-09, 5.3840e-08,\n 2.0206e-08, 1.5541e-07, 1.2260e-07, 4.5834e-07, 2.4438e-08, 8.6141e-08,\n 1.6400e-06, 1.1769e-09, 1.6341e-09, 1.5545e-07, 1.9545e-09, 3.2462e-07,\n 3.9040e-09, 3.2320e-09, 2.6659e-10, 6.2289e-08, 9.0254e-08, 1.3103e-07,\n 1.0821e-07, 2.9382e-08, 7.3722e-10, 7.6106e-10, 1.4101e-08, 1.0166e-08,\n 1.2457e-07, 4.1495e-09, 7.0022e-11, 5.3620e-07, 4.5311e-07, 1.3329e-10,\n 4.0047e-08, 1.1707e-08, 1.8637e-07, 5.6575e-08, 5.4173e-07, 2.2877e-09,\n 1.4209e-07, 6.5365e-08, 8.1955e-09, 2.6639e-09, 2.3266e-08, 4.0086e-07,\n 6.5208e-08, 8.3853e-10, 3.0124e-07, 2.2706e-08, 1.5952e-07, 4.2562e-10,\n 7.5400e-09, 3.7087e-08, 6.4499e-09, 3.2535e-08, 1.6391e-08, 2.0321e-10,\n 2.2004e-08, 1.3501e-09, 8.7207e-08, 8.4147e-13, 8.4525e-10, 4.0203e-10,\n 2.6617e-07, 4.2455e-08, 1.0028e-07, 2.2562e-07, 4.2438e-09, 5.2592e-07,\n 2.3477e-08, 2.1335e-08, 5.8049e-08, 2.6807e-10, 4.9142e-09, 1.9779e-09,\n 1.9835e-09, 3.0510e-08, 6.0014e-09, 2.3709e-09, 1.4575e-09, 4.1408e-08,\n 1.6591e-09, 2.7766e-11, 7.3035e-10, 4.3209e-09, 1.4383e-09, 5.9245e-09,\n 3.0718e-08, 5.1275e-07, 9.9189e-08, 5.9470e-09, 6.4859e-07, 3.5432e-06,\n 2.6454e-07, 9.0689e-09, 3.2400e-09, 1.3025e-07, 7.3879e-11, 1.2350e-08,\n 1.4295e-08, 5.8398e-10, 4.4776e-09, 1.2196e-07, 1.1221e-07, 4.3443e-08,\n 1.4423e-08, 2.9068e-07, 3.5344e-09, 2.0073e-08, 1.7907e-09, 2.9031e-10,\n 5.5040e-08, 1.2304e-07, 1.3222e-09, 8.1816e-08, 8.9917e-08, 2.9421e-09,\n 3.3353e-07, 1.0235e-09, 4.7488e-07, 1.2536e-07, 1.0978e-07, 2.4378e-10,\n 4.5166e-09, 1.2571e-08, 4.9911e-08, 1.9865e-07, 2.9978e-07, 6.1409e-10,\n 8.0195e-07, 6.4143e-10, 6.0315e-07, 6.3970e-09, 3.8708e-09, 3.9120e-08,\n 1.9069e-09, 3.4524e-09, 4.7023e-08, 1.0374e-08, 1.8108e-08, 5.9597e-09,\n 2.6453e-07, 1.2550e-07, 1.6378e-08, 2.3742e-06, 8.0659e-09, 6.9345e-10,\n 4.5639e-09, 6.0783e-09, 3.7714e-07, 1.9243e-09, 7.5443e-08, 9.5690e-10,\n 3.7070e-09, 7.2260e-08, 2.2219e-09, 2.5608e-07, 2.2523e-09, 9.2244e-07,\n 6.3234e-10, 8.8009e-08, 1.9293e-09, 2.3631e-07, 1.3611e-08, 4.1970e-09,\n 1.8281e-10, 9.8015e-09, 1.5772e-06, 1.3026e-07, 2.0937e-08, 1.3947e-09,\n 4.8839e-10, 2.4050e-08, 1.1630e-08, 3.1900e-08, 4.1069e-07, 1.7410e-08,\n 5.4455e-09, 4.4014e-09, 3.0622e-09, 1.4207e-08, 1.2031e-09, 7.0658e-08,\n 1.9009e-08, 9.7195e-08, 9.4809e-09, 5.3922e-10, 5.3477e-08, 1.4727e-07,\n 2.1984e-07, 1.5655e-09, 1.3082e-07, 9.1706e-08, 8.5733e-10, 1.3082e-09,\n 1.4644e-07, 4.1890e-08, 2.0546e-08, 2.2402e-09, 1.1325e-07, 4.1172e-10,\n 9.7228e-08, 1.3873e-06, 1.0239e-07, 9.2072e-10], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.9274e-11, 1.8154e-11, 2.0117e-11, 1.1906e-12, 4.5109e-12, 6.3944e-13,\n 3.2225e-12, 1.5377e-12, 1.7615e-12, 1.9861e-12, 3.1294e-11, 2.7548e-11,\n 4.7595e-12, 4.2067e-12, 1.0725e-11, 3.1908e-12, 1.9686e-11, 4.2706e-12,\n 2.0162e-11, 5.4554e-11, 1.8157e-14, 2.3088e-11, 6.9070e-12, 2.8359e-11,\n 3.3333e-12, 2.5237e-12, 6.8857e-11, 7.1771e-13, 2.8473e-12, 4.7980e-12,\n 6.5610e-12, 1.0397e-10, 1.7740e-13, 4.3926e-11, 4.3960e-11, 6.6829e-11,\n 8.3350e-13, 2.6876e-14, 2.7819e-11, 1.5937e-10, 7.8260e-11, 1.2393e-12,\n 1.8045e-12, 3.1076e-12, 6.2382e-12, 1.9701e-11, 2.7615e-11, 7.4501e-12,\n 8.6012e-13, 2.1615e-11, 2.7246e-13, 7.1405e-11, 2.4278e-11, 1.6954e-11,\n 4.0315e-13, 5.1025e-12, 1.1286e-11, 6.7818e-13, 1.9661e-11, 4.2338e-12,\n 3.4507e-12, 1.5927e-11, 2.0294e-11, 7.0608e-11, 1.9859e-13, 1.1405e-11,\n 1.0285e-11, 1.4633e-12, 1.7465e-11, 2.3667e-12, 2.2176e-15, 1.1095e-11,\n 3.1915e-11, 4.0233e-12, 1.1264e-12, 3.6471e-14, 1.3923e-11, 6.3695e-11,\n 1.7853e-12, 3.2955e-16, 5.2120e-11, 1.5693e-10, 7.7389e-14, 1.1779e-12,\n 1.5935e-11, 6.1642e-11, 3.6494e-12, 1.6894e-11, 6.7462e-12, 4.2576e-12,\n 1.2206e-11, 3.8752e-11, 7.1334e-12, 1.5094e-11, 6.2655e-11, 9.1153e-12,\n 2.1666e-11, 6.2905e-11, 1.0920e-11, 9.6296e-11, 6.8475e-12, 3.8434e-12,\n 1.7631e-13, 6.2818e-12, 1.8272e-14, 1.2201e-10, 2.2538e-12, 2.2718e-11,\n 5.4785e-12, 8.7161e-18, 9.2027e-11, 3.9136e-12, 2.3521e-11, 2.4464e-11,\n 4.3730e-12, 6.1481e-11, 1.3202e-12, 2.5427e-12, 4.8434e-12, 5.6399e-11,\n 1.6583e-12, 1.4929e-12, 6.1549e-13, 4.5575e-13, 8.9504e-13, 2.3210e-11,\n 2.1008e-11, 4.3488e-12, 1.7894e-11, 4.4923e-11, 6.7795e-15, 5.0140e-11,\n 2.5175e-11, 2.9899e-12, 1.8745e-11, 6.1572e-12, 7.3065e-11, 9.3592e-12,\n 8.2805e-12, 3.6218e-16, 3.4257e-15, 3.9662e-13, 3.5235e-11, 2.2497e-10,\n 3.2989e-11, 7.1245e-12, 5.6435e-12, 2.1725e-11, 3.8026e-12, 2.7362e-13,\n 5.5107e-12, 1.5203e-11, 2.1368e-11, 2.1559e-12, 2.1327e-12, 7.6453e-12,\n 4.5530e-12, 7.3567e-11, 1.9358e-12, 6.0424e-12, 1.0166e-11, 2.1350e-12,\n 1.6548e-12, 1.3961e-11, 3.1183e-12, 1.3247e-11, 5.2430e-11, 3.8783e-12,\n 4.6006e-11, 5.7063e-12, 2.7667e-14, 5.9548e-12, 9.5274e-12, 2.7998e-12,\n 3.3688e-11, 1.3987e-11, 2.0638e-12, 4.3191e-12, 3.1162e-12, 8.6227e-12,\n 1.1031e-11, 2.2319e-12, 4.6375e-12, 5.9159e-12, 7.4060e-11, 1.2028e-15,\n 4.2156e-12, 3.4294e-11, 1.9098e-12, 6.6839e-11, 8.9582e-12, 7.7147e-12,\n 6.9713e-12, 1.9747e-12, 7.1110e-15, 3.4806e-10, 1.2752e-12, 1.8777e-13,\n 5.1962e-12, 2.2328e-11, 4.6073e-13, 1.5176e-10, 4.1798e-12, 8.2054e-12,\n 5.8246e-12, 1.7914e-11, 3.7585e-11, 5.2139e-13, 4.8957e-16, 9.6792e-12,\n 3.3138e-12, 9.2538e-12, 4.8039e-13, 3.5599e-11, 1.4536e-13, 6.4158e-11,\n 6.0519e-11, 1.2249e-14, 7.0403e-13, 5.5154e-12, 2.0422e-12, 3.4862e-11,\n 4.5081e-12, 3.6924e-11, 6.4172e-12, 2.9153e-14, 2.1617e-14, 8.4001e-13,\n 5.0630e-13, 1.1330e-11, 2.9576e-13, 1.9148e-13, 1.3417e-11, 7.8683e-12,\n 4.8381e-11, 8.0127e-13, 1.5938e-12, 1.5348e-11, 8.5778e-12, 4.3071e-12,\n 8.3768e-12, 9.4670e-15, 4.7940e-12, 1.7174e-11, 4.0482e-12, 4.7172e-15,\n 5.9826e-14, 1.8826e-11, 1.2602e-12, 8.7980e-12, 6.0900e-11, 2.9653e-13,\n 4.6311e-11, 2.1521e-12, 8.2001e-11, 2.3181e-12], device='cuda:0')" }, "33": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.1658e-12, 7.5568e-12, 6.0718e-11, 4.4676e-12, 1.5958e-11, 1.0221e-11,\n 1.6237e-12, 3.0589e-10, 1.4266e-11, 9.6176e-12, 1.8478e-09, 4.1747e-12,\n 3.1966e-12, 1.8259e-13, 1.9633e-11, 6.4977e-13, 1.2315e-12, 3.1479e-10,\n 1.0136e-10, 4.3208e-11, 7.9330e-11, 1.2194e-11, 5.6938e-13, 9.4440e-10,\n 8.8215e-13, 1.2225e-11, 8.8909e-11, 1.6386e-12, 1.8472e-12, 8.2121e-11,\n 4.1051e-13, 3.9837e-10, 6.9143e-12, 5.1325e-11, 1.9046e-12, 7.9278e-10,\n 1.6389e-12, 3.1072e-12, 7.1747e-11, 2.8024e-13, 1.9233e-12, 6.5629e-12,\n 4.4897e-11, 5.9773e-11, 2.5731e-13, 8.9844e-14, 1.9447e-11, 1.5463e-14,\n 3.9316e-10, 3.4381e-11, 1.1144e-11, 4.9555e-12, 6.1958e-12, 1.3808e-11,\n 2.0668e-11, 3.2343e-11, 1.5183e-11, 1.9844e-10, 5.6252e-12, 4.3079e-11,\n 2.0602e-09, 3.5620e-13, 5.6171e-12, 1.1852e-11, 2.8418e-13, 1.8793e-10,\n 2.0571e-13, 3.3884e-13, 8.4672e-12, 1.5259e-11, 4.5646e-11, 5.9623e-12,\n 9.8090e-12, 7.8130e-11, 4.9064e-12, 1.9562e-13, 1.9043e-13, 1.5580e-12,\n 3.4689e-11, 7.7429e-11, 1.1907e-14, 1.5726e-09, 3.5719e-10, 2.9413e-12,\n 1.8653e-11, 1.2340e-11, 8.6397e-11, 2.2027e-11, 1.8884e-09, 1.5945e-12,\n 6.6815e-10, 4.4302e-10, 1.8103e-13, 1.4544e-11, 2.0026e-12, 9.3006e-11,\n 2.3057e-10, 1.7031e-12, 1.2766e-10, 1.0476e-11, 1.8030e-10, 1.2846e-12,\n 4.7761e-12, 4.7016e-12, 1.5393e-12, 2.8692e-10, 8.3647e-13, 1.8811e-12,\n 3.7373e-12, 5.6756e-13, 3.7110e-11, 1.7020e-13, 3.8145e-12, 2.2349e-12,\n 4.2566e-10, 2.1265e-10, 8.7967e-11, 7.8359e-11, 8.4368e-13, 7.1794e-10,\n 8.8877e-13, 7.2256e-13, 1.0733e-10, 7.0406e-14, 3.2650e-12, 4.1094e-13,\n 1.7750e-12, 6.0770e-12, 6.4893e-12, 3.7302e-12, 7.9213e-12, 2.2039e-11,\n 9.7275e-13, 2.4235e-14, 4.5300e-13, 8.3844e-13, 3.6585e-12, 1.9565e-12,\n 1.5567e-11, 4.2887e-10, 2.2588e-10, 2.0261e-12, 7.2695e-10, 1.4149e-08,\n 2.7067e-10, 2.8143e-12, 4.0177e-13, 8.4849e-11, 1.6720e-13, 2.0685e-11,\n 5.8138e-11, 7.9790e-13, 6.7495e-12, 9.5245e-11, 1.8282e-11, 2.8355e-10,\n 1.0712e-12, 1.2118e-10, 1.0530e-12, 5.6948e-11, 4.5642e-13, 5.8432e-13,\n 1.5080e-12, 1.2471e-10, 7.4980e-13, 7.5286e-13, 2.2084e-10, 2.4471e-13,\n 4.5389e-10, 7.9563e-13, 1.0475e-09, 3.8762e-11, 1.4746e-10, 1.3297e-13,\n 1.0687e-12, 5.1743e-12, 1.2997e-11, 7.1352e-11, 4.3967e-11, 2.3017e-12,\n 5.6027e-10, 1.3313e-12, 1.7177e-10, 1.6134e-11, 3.5865e-13, 1.2523e-11,\n 8.4405e-13, 5.2634e-12, 1.0850e-10, 4.5901e-13, 9.7215e-13, 1.4606e-12,\n 1.1408e-09, 1.7599e-10, 6.8856e-13, 5.8594e-09, 2.0819e-13, 2.3890e-12,\n 4.9548e-13, 7.5096e-13, 2.9711e-10, 9.1248e-12, 1.5162e-11, 3.6567e-12,\n 3.3916e-12, 1.5227e-11, 2.1532e-11, 1.3240e-10, 1.1906e-12, 2.8964e-09,\n 1.5856e-12, 2.4586e-11, 1.7727e-13, 3.4278e-10, 7.1443e-12, 7.5123e-12,\n 9.4749e-13, 6.2220e-13, 2.4371e-09, 3.5860e-10, 3.9040e-11, 3.4752e-13,\n 3.2208e-14, 4.2130e-11, 5.2296e-12, 6.3054e-12, 8.2424e-11, 2.0428e-12,\n 2.7616e-13, 1.0450e-12, 1.3572e-11, 4.3055e-13, 2.3325e-13, 1.1522e-10,\n 3.3149e-12, 2.9320e-11, 2.8645e-12, 1.0498e-13, 3.5062e-10, 1.5022e-10,\n 4.2587e-10, 2.8009e-13, 1.1990e-11, 4.4317e-11, 3.0120e-13, 5.2069e-14,\n 5.8538e-11, 1.9985e-11, 1.5424e-12, 2.9524e-12, 2.8859e-11, 1.3274e-13,\n 1.2234e-10, 1.0601e-09, 4.8970e-11, 9.6804e-12], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.5326e-11, 2.6558e-11, 2.1104e-11, 1.3712e-12, 4.4045e-12, 1.7125e-12,\n 6.0641e-12, 1.9858e-12, 2.9704e-12, 1.3816e-12, 5.5497e-11, 5.9627e-11,\n 7.1590e-12, 7.9347e-12, 1.6872e-11, 1.8263e-12, 3.6970e-11, 6.2897e-12,\n 2.6500e-11, 9.5658e-11, 1.4967e-14, 3.0500e-11, 8.5262e-12, 5.6574e-11,\n 5.5242e-12, 6.8938e-12, 6.4719e-11, 1.3667e-12, 4.9579e-12, 6.5542e-12,\n 2.0590e-11, 6.6831e-11, 1.9951e-13, 8.7980e-11, 6.2681e-11, 7.7593e-11,\n 1.4008e-12, 4.8241e-14, 4.2497e-11, 2.2049e-10, 8.8591e-11, 4.3884e-12,\n 4.0150e-12, 4.2567e-12, 1.6227e-11, 3.6663e-11, 3.2323e-11, 1.5100e-11,\n 1.7875e-12, 3.1510e-11, 2.3460e-13, 9.9106e-11, 3.9705e-11, 2.3611e-11,\n 6.8542e-13, 5.4702e-12, 2.3259e-11, 1.5061e-12, 2.0060e-11, 6.7288e-12,\n 3.3398e-12, 1.9836e-11, 3.0468e-11, 6.9986e-11, 2.7397e-13, 1.0254e-11,\n 1.8771e-11, 1.9558e-12, 3.2517e-11, 5.5613e-12, 5.5478e-17, 1.5243e-11,\n 4.0280e-11, 6.9120e-12, 2.0678e-12, 1.6763e-13, 1.1348e-11, 9.0567e-11,\n 1.3077e-12, 8.9810e-15, 4.2286e-11, 1.2390e-10, 1.0567e-13, 1.8902e-12,\n 1.7979e-11, 8.8212e-11, 5.4504e-12, 2.1437e-11, 1.0320e-11, 3.5736e-12,\n 2.0634e-11, 7.1451e-11, 1.6036e-11, 1.8224e-11, 7.9955e-11, 1.3154e-11,\n 2.4764e-11, 9.3898e-11, 2.1400e-11, 1.3967e-10, 1.4933e-11, 4.1574e-12,\n 3.7364e-13, 9.1650e-12, 1.1073e-14, 1.3372e-10, 2.3251e-12, 2.2319e-11,\n 9.9705e-12, 1.1546e-14, 1.0115e-10, 7.2264e-12, 4.2047e-11, 4.8330e-11,\n 6.1062e-12, 6.4947e-11, 2.2551e-12, 4.3913e-12, 1.0979e-11, 5.9999e-11,\n 4.1362e-12, 2.9805e-12, 1.0928e-12, 2.4122e-13, 1.1122e-12, 2.0612e-11,\n 4.9365e-11, 8.7100e-12, 2.6921e-11, 3.5909e-11, 3.6592e-14, 1.0946e-10,\n 3.0648e-11, 5.1202e-12, 3.7125e-11, 1.3828e-11, 9.0570e-11, 1.4312e-11,\n 1.5366e-11, 1.4749e-14, 3.1936e-14, 7.0042e-13, 4.4997e-11, 1.8321e-10,\n 4.1819e-11, 9.9099e-12, 1.0960e-11, 1.9476e-11, 1.0670e-11, 2.9111e-13,\n 5.6703e-12, 2.3676e-11, 2.6269e-11, 3.9955e-12, 4.9595e-12, 6.9205e-12,\n 7.2297e-12, 9.8903e-11, 3.6554e-12, 6.2277e-12, 1.5326e-11, 4.2122e-12,\n 3.3620e-12, 1.8826e-11, 6.6846e-12, 1.9113e-11, 9.8514e-11, 6.6932e-12,\n 4.5396e-11, 8.3662e-12, 1.5270e-13, 9.0478e-12, 2.0685e-11, 5.0089e-12,\n 3.8412e-11, 1.2330e-11, 3.5258e-12, 7.6270e-12, 3.1362e-12, 1.8324e-11,\n 1.3342e-11, 3.8250e-12, 5.2101e-12, 1.0373e-11, 1.3472e-10, 9.9532e-15,\n 7.8698e-12, 2.7098e-11, 4.5912e-12, 1.2424e-10, 1.7942e-11, 9.0964e-12,\n 9.2508e-12, 4.0216e-12, 6.7478e-15, 3.4320e-10, 2.7920e-12, 4.6587e-13,\n 1.2341e-11, 4.1783e-11, 8.0916e-13, 1.4878e-10, 8.4440e-12, 7.4997e-12,\n 1.3880e-11, 3.0270e-11, 4.4786e-11, 8.4800e-13, 2.3207e-14, 1.9939e-11,\n 3.4819e-12, 1.7109e-11, 1.1537e-12, 4.4752e-11, 1.6319e-13, 7.3044e-11,\n 8.6934e-11, 1.4988e-14, 1.4351e-12, 6.4310e-12, 1.6047e-12, 6.2543e-11,\n 7.5057e-12, 7.4372e-11, 1.1226e-11, 7.7261e-14, 1.7699e-14, 1.7735e-12,\n 8.4685e-13, 1.8159e-11, 6.5684e-13, 3.8980e-13, 1.7606e-11, 1.1617e-11,\n 6.6657e-11, 6.0551e-13, 2.5955e-12, 3.2245e-11, 1.0979e-11, 6.2260e-12,\n 1.9164e-11, 9.8857e-15, 9.0175e-12, 2.1268e-11, 7.3676e-12, 2.1421e-16,\n 8.3170e-14, 3.7367e-11, 1.7784e-12, 1.1841e-11, 6.9613e-11, 6.4526e-13,\n 3.8576e-11, 3.5449e-12, 1.0689e-10, 4.5275e-12], device='cuda:0')" }, "34": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.1566e-10, 2.8212e-12, 2.4293e-10, 5.9258e-12, 1.6155e-11, 3.7153e-12,\n 4.9620e-11, 4.7397e-10, 1.7145e-10, 1.5791e-11, 1.4669e-09, 3.9229e-11,\n 1.1098e-12, 2.0715e-13, 2.2620e-11, 2.2317e-11, 3.7589e-12, 1.8144e-10,\n 2.0522e-10, 1.3879e-10, 1.9598e-10, 5.0059e-12, 9.2479e-12, 6.8151e-10,\n 1.5921e-11, 6.4076e-11, 2.0472e-10, 8.4098e-13, 6.2242e-13, 9.0866e-11,\n 1.9371e-12, 2.7960e-10, 7.9157e-11, 1.2826e-10, 1.1477e-12, 2.6588e-10,\n 1.1554e-11, 7.7878e-12, 1.8311e-10, 1.2032e-12, 1.2748e-11, 5.5886e-11,\n 2.0293e-10, 2.8652e-11, 2.4839e-12, 2.7791e-12, 5.1275e-11, 1.6703e-13,\n 1.7536e-10, 1.3936e-10, 2.7866e-11, 1.4962e-10, 5.2446e-12, 4.1062e-11,\n 3.4156e-11, 1.7560e-10, 1.6468e-10, 6.2014e-10, 2.7506e-11, 1.1162e-10,\n 2.1607e-09, 2.0198e-12, 1.4404e-12, 1.8983e-10, 3.2871e-12, 4.4843e-10,\n 4.6255e-14, 5.9091e-12, 8.9571e-13, 9.1273e-11, 1.2710e-10, 1.3948e-10,\n 1.4309e-10, 8.7281e-11, 5.7073e-12, 1.0934e-13, 2.1060e-11, 7.1013e-12,\n 1.6902e-10, 3.9655e-11, 1.2756e-13, 6.2004e-10, 6.1283e-10, 1.1562e-12,\n 3.9931e-11, 2.7767e-11, 2.1733e-10, 8.3401e-11, 8.0003e-10, 1.6333e-12,\n 2.5872e-10, 1.5579e-10, 4.0413e-12, 4.2102e-12, 3.5445e-11, 5.0956e-10,\n 1.2279e-10, 4.3031e-12, 4.1771e-10, 5.7419e-11, 2.3959e-10, 4.8653e-13,\n 3.5288e-11, 4.5053e-11, 3.5007e-12, 1.4409e-10, 2.4390e-11, 5.6915e-13,\n 4.3083e-11, 2.9558e-13, 1.2726e-10, 2.8932e-14, 7.5001e-12, 1.0134e-12,\n 3.2236e-10, 9.2639e-11, 1.5362e-10, 2.8778e-10, 7.9579e-12, 7.1382e-10,\n 1.7992e-11, 2.2729e-11, 9.7846e-11, 1.3266e-12, 2.2260e-11, 1.4590e-13,\n 7.0792e-13, 1.5890e-11, 2.6197e-12, 1.2544e-12, 2.8176e-12, 1.9563e-11,\n 1.6467e-12, 1.4248e-12, 2.1271e-12, 7.4441e-12, 1.3897e-12, 1.0740e-11,\n 4.9326e-11, 6.9738e-10, 1.5928e-10, 4.2583e-12, 8.3384e-10, 4.4006e-09,\n 2.6299e-10, 1.1527e-11, 1.3776e-11, 1.8955e-10, 2.0874e-13, 1.7893e-11,\n 7.2091e-11, 3.3727e-12, 2.2458e-12, 1.7885e-10, 1.5827e-10, 6.9356e-11,\n 6.1370e-12, 3.9681e-10, 6.5842e-12, 2.2490e-11, 1.4302e-13, 3.4415e-13,\n 7.5886e-11, 1.7059e-10, 1.9379e-13, 1.1163e-10, 1.5185e-10, 6.0452e-12,\n 3.6279e-10, 2.5302e-13, 5.1594e-10, 1.3987e-10, 1.8785e-10, 3.1753e-13,\n 9.4874e-12, 1.7932e-11, 4.3516e-11, 2.4004e-10, 3.3678e-10, 8.6916e-13,\n 9.5185e-10, 1.0451e-13, 8.0676e-10, 1.8003e-11, 7.3789e-12, 6.2054e-11,\n 1.0379e-11, 2.2401e-12, 1.2009e-10, 1.6724e-11, 2.7033e-11, 8.5844e-12,\n 4.4014e-10, 1.7819e-10, 1.2440e-11, 3.1381e-09, 2.8966e-13, 1.3077e-12,\n 7.8178e-12, 3.7958e-12, 4.0608e-10, 1.4411e-11, 7.0129e-11, 1.2615e-11,\n 3.0637e-12, 1.0508e-10, 9.8164e-12, 3.5079e-10, 6.7813e-12, 1.1614e-09,\n 6.3732e-14, 9.3752e-11, 8.4683e-13, 3.2628e-10, 1.8365e-11, 2.5060e-12,\n 3.1677e-13, 1.5200e-11, 2.0950e-09, 2.1909e-10, 5.7341e-11, 8.4057e-12,\n 1.4602e-12, 3.6646e-11, 3.8245e-11, 2.6581e-11, 5.5188e-10, 3.4752e-11,\n 9.5658e-12, 9.6695e-13, 3.9124e-12, 2.1950e-11, 1.1600e-13, 1.3351e-10,\n 8.8109e-12, 1.4408e-10, 3.6425e-11, 1.2949e-12, 1.7876e-10, 1.3750e-10,\n 3.2166e-10, 3.2710e-12, 1.4673e-10, 1.4186e-10, 6.8110e-12, 2.7083e-12,\n 2.0617e-10, 6.4867e-11, 3.0121e-11, 1.3738e-11, 1.6194e-10, 3.8547e-14,\n 1.0944e-10, 1.8292e-09, 1.4473e-10, 2.8571e-12], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6849e-13, 9.6948e-13, 5.9143e-13, ..., 1.3386e-13, 7.3290e-13,\n 4.4041e-13],\n [1.9109e-14, 2.8588e-15, 3.1159e-14, ..., 8.3152e-14, 9.6665e-14,\n 3.4452e-15],\n [9.3147e-14, 4.0966e-13, 1.2917e-13, ..., 6.6098e-14, 3.9804e-13,\n 7.6162e-14],\n ...,\n [2.3308e-12, 5.5287e-12, 5.7296e-12, ..., 1.0181e-12, 8.5963e-12,\n 1.2441e-11],\n [1.5564e-11, 2.4617e-11, 3.7995e-11, ..., 8.4373e-12, 3.7176e-11,\n 6.8257e-11],\n [2.1354e-10, 4.7365e-10, 5.6600e-10, ..., 1.2049e-10, 7.1290e-10,\n 1.1185e-09]], device='cuda:0')" }, "35": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.7814e-11, 2.1030e-11, 1.0970e-14, ..., 1.5926e-12, 1.8348e-12,\n 7.8504e-13],\n [9.1286e-13, 5.2039e-13, 5.3265e-14, ..., 2.2477e-14, 2.6228e-13,\n 1.2107e-13],\n [6.3202e-10, 7.2591e-10, 8.9526e-13, ..., 5.1283e-11, 8.7455e-11,\n 2.2736e-11],\n ...,\n [2.1155e-09, 2.1772e-09, 2.3828e-12, ..., 1.4333e-10, 2.5639e-10,\n 7.0505e-11],\n [5.8605e-14, 2.0223e-15, 2.2159e-14, ..., 1.1119e-14, 1.2913e-14,\n 3.7693e-15],\n [1.1854e-09, 1.3500e-09, 8.6191e-13, ..., 1.0102e-10, 1.6207e-10,\n 4.2182e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.4132e-12, 1.7397e-13, 1.5531e-12, 1.0220e-13, 6.8432e-13, 1.0117e-12,\n 1.4939e-13, 1.2923e-13, 6.2956e-15, 1.0547e-12, 8.5550e-13, 1.4796e-12,\n 3.6750e-14, 3.1111e-14, 3.7853e-13, 2.4827e-13, 8.1897e-14, 3.2414e-12,\n 1.1062e-12, 1.4168e-14, 5.5779e-14, 4.5263e-13, 1.0451e-12, 4.0314e-14,\n 1.9525e-12, 2.2899e-13, 3.2423e-13, 3.3060e-14, 7.5353e-14, 1.5414e-13,\n 1.5384e-13, 2.5588e-12, 3.7081e-13, 1.9849e-14, 7.4876e-16, 7.6500e-13,\n 2.7223e-13, 1.0967e-13, 3.2793e-15, 1.4622e-16, 4.4427e-13, 5.5595e-14,\n 2.7147e-13, 1.2818e-13, 2.1928e-14, 2.4963e-13, 3.1609e-13, 3.2070e-13,\n 1.6933e-13, 7.1953e-13, 2.7923e-13, 7.9419e-14, 1.0363e-13, 6.1026e-14,\n 2.2892e-15, 3.1126e-15, 7.1239e-14, 5.7180e-15, 2.2921e-13, 9.7482e-15,\n 8.0954e-13, 2.6999e-13, 6.3713e-16, 4.9117e-13, 9.4990e-13, 3.2353e-13,\n 4.8509e-13, 9.6050e-14, 1.2301e-14, 7.5826e-15, 4.9701e-13, 5.2501e-14,\n 4.1775e-15, 2.0525e-13, 1.1959e-13, 3.2236e-13, 2.3710e-13, 6.1403e-14,\n 1.0169e-18, 2.2267e-13, 1.6958e-13, 1.4926e-12, 2.2841e-14, 2.1102e-13,\n 2.8707e-14, 1.4484e-13, 8.4433e-14, 2.3364e-16, 1.4367e-13, 1.8646e-14,\n 6.6693e-16, 1.0821e-13, 9.6760e-14, 2.7575e-12, 9.0854e-13, 6.3181e-13,\n 6.8834e-14, 3.2481e-12, 2.2436e-13, 4.1608e-13, 3.8672e-14, 4.4486e-14,\n 2.1910e-14, 2.3796e-13, 2.8925e-15, 8.2985e-13, 1.3627e-12, 1.5147e-13,\n 1.7199e-13, 2.5857e-14, 2.6156e-13, 2.9702e-12, 2.2469e-14, 3.4028e-12,\n 4.7200e-13, 2.1027e-12, 2.0634e-19, 1.9288e-12, 1.4050e-12, 6.9537e-13,\n 2.6612e-12, 1.1906e-14, 1.3893e-12, 5.2388e-15, 7.5825e-13, 1.2630e-12,\n 1.1446e-12, 8.2902e-13, 1.5303e-12, 2.3157e-12, 2.6632e-13, 5.1942e-13,\n 1.5643e-14, 8.9259e-13, 2.5644e-13, 2.3456e-13, 5.4234e-13, 7.3434e-13,\n 6.1700e-16, 1.4327e-13, 1.8013e-12, 6.6288e-15, 3.7886e-13, 1.3978e-13,\n 8.7503e-13, 3.6400e-12, 7.2377e-13, 9.2955e-15, 2.2956e-12, 3.4813e-14,\n 7.4039e-14, 6.0147e-13, 2.4974e-12, 2.0527e-13, 8.8194e-13, 1.3231e-12,\n 9.7432e-15, 2.4570e-15, 9.0065e-14, 4.4279e-13, 4.2243e-13, 1.1716e-13,\n 1.7636e-12, 9.1224e-13, 1.0158e-13, 9.8018e-16, 1.9574e-14, 2.7731e-15,\n 5.5374e-14, 2.4936e-13, 5.0422e-14, 3.0588e-14, 2.1117e-13, 2.5581e-13,\n 2.3285e-14, 6.5329e-15, 2.4479e-13, 5.6516e-14, 3.5099e-14, 5.0023e-15,\n 4.0785e-13, 1.1863e-15, 8.8212e-14, 3.8081e-13, 1.0841e-13, 5.0552e-14,\n 3.7777e-13, 1.2682e-13, 1.2726e-12, 4.1548e-14, 3.5486e-13, 2.9250e-14,\n 3.2142e-13, 3.1917e-12, 4.5048e-13, 9.5994e-13, 4.9102e-13, 1.4353e-12,\n 1.6416e-16, 1.7430e-14, 8.3488e-14, 1.2396e-13, 2.1370e-13, 1.7604e-12,\n 4.4196e-14, 1.3233e-13, 9.8956e-15, 1.4904e-13, 1.7168e-14, 3.9867e-13,\n 6.8222e-17, 2.6808e-13, 1.8113e-14, 1.9746e-13, 1.8168e-12, 9.2099e-13,\n 1.3530e-12, 3.6590e-13, 4.6303e-13, 1.4405e-12, 3.6581e-13, 6.9853e-15,\n 1.8520e-13, 1.6474e-12, 1.0538e-14, 2.1749e-13, 1.3170e-12, 4.6824e-13,\n 4.1306e-13, 3.9962e-13, 3.4587e-13, 1.0298e-14, 1.5190e-13, 8.0241e-14,\n 3.7225e-13, 1.0217e-13, 2.5727e-15, 2.3601e-14, 9.5305e-14, 1.0037e-14,\n 9.2326e-13, 2.5465e-13, 3.8985e-14, 3.0861e-13, 9.4522e-15, 3.5294e-13,\n 9.4798e-13, 1.2164e-13, 4.0304e-13, 1.2564e-13, 9.5848e-14, 9.8571e-13,\n 3.8284e-13, 1.0907e-12, 1.0203e-12, 2.2117e-13, 3.5089e-29, 1.5511e-30,\n 6.3283e-28, 4.4494e-29, 7.0852e-28, 2.7846e-29, 1.8266e-29, 3.8694e-28,\n 2.5357e-28, 3.6908e-28, 1.1142e-29, 1.3801e-29, 3.8705e-28, 3.4785e-28,\n 1.9176e-29, 4.2256e-29, 1.5567e-28, 9.9175e-32, 9.8407e-28, 1.5576e-28,\n 5.1256e-29, 3.5011e-28, 1.5571e-28, 1.5985e-28, 1.1688e-28, 3.7331e-29,\n 9.1718e-29, 1.0497e-29, 7.9312e-28, 1.6301e-28, 1.4160e-28, 1.2612e-28,\n 6.8264e-28, 2.0679e-28, 2.2306e-29, 7.9042e-29, 1.2561e-28, 1.7923e-30,\n 1.7253e-29, 5.4119e-30, 3.7259e-31, 1.6652e-29, 8.2462e-29, 1.1838e-30,\n 1.3524e-28, 3.9761e-29, 2.1011e-28, 1.0856e-30, 1.7184e-28, 1.4803e-30,\n 4.5301e-29, 5.1744e-33, 6.3617e-29, 3.0284e-28, 8.7874e-29, 1.6866e-29,\n 6.5793e-28, 8.4642e-29, 1.2226e-27, 9.8374e-29, 2.8262e-30, 1.5720e-29,\n 1.4271e-28, 8.4614e-29, 2.1801e-29, 1.1362e-28, 4.1582e-28, 5.4487e-28,\n 2.9017e-28, 1.1924e-29, 1.0152e-27, 7.4004e-28, 7.7771e-28, 1.2980e-27,\n 2.6854e-27, 8.4441e-29, 1.6735e-27, 3.5590e-29, 1.2326e-28, 2.4580e-29,\n 3.6223e-28, 1.4348e-27, 8.5728e-28, 5.5858e-28, 1.4862e-28, 2.6880e-28,\n 1.3081e-27, 2.9420e-27, 1.6476e-28, 4.5126e-28, 2.5976e-28, 1.3602e-27,\n 1.4272e-28, 8.3397e-30, 9.4642e-29, 9.0357e-29, 8.4031e-28, 2.8568e-30,\n 1.7133e-28, 1.2389e-30, 9.7410e-29, 4.5995e-28, 6.7622e-28, 5.7953e-29,\n 1.5288e-29, 6.6781e-29, 1.1555e-28, 6.6269e-30, 4.0705e-30, 1.9027e-29,\n 2.1995e-29, 3.7612e-30, 8.7054e-29, 1.0881e-28, 1.5392e-28, 1.3960e-28,\n 9.9277e-29, 1.3110e-30, 4.4281e-32, 2.5511e-30, 1.8069e-30, 1.8770e-28,\n 2.6024e-28, 1.9048e-28, 8.9937e-29, 2.4402e-28, 1.2095e-30, 4.0893e-28,\n 4.3537e-30, 5.0007e-29, 2.0369e-29, 6.4908e-28, 2.5944e-28, 4.0565e-28,\n 3.3748e-28, 7.1481e-30, 6.3580e-30, 7.5053e-29, 1.5026e-28, 4.6507e-28,\n 4.9160e-29, 2.3322e-29, 1.3382e-29, 2.0269e-30, 7.6294e-28, 7.1760e-29,\n 2.1567e-28, 1.6250e-29, 2.5551e-29, 1.6527e-27, 2.8094e-28, 3.1944e-30,\n 2.2801e-28, 2.7066e-28, 7.8557e-28, 4.6698e-28, 5.9814e-28, 2.8614e-28,\n 4.3182e-30, 3.3673e-27, 7.6292e-28, 9.3566e-30, 9.2283e-28, 2.3444e-28,\n 2.0404e-28, 1.2015e-29, 1.3852e-28, 6.6030e-29, 6.8780e-29, 2.7034e-28,\n 2.1621e-28, 2.5729e-32, 2.0060e-28, 2.0276e-29, 3.1217e-30, 4.7710e-29,\n 4.8852e-28, 3.4571e-28, 1.7875e-30, 2.7229e-28, 7.9044e-29, 5.6772e-29,\n 9.5136e-28, 9.5287e-28, 1.0085e-28, 5.1830e-30, 9.7366e-28, 9.0213e-28,\n 4.2066e-28, 5.3503e-29, 5.4597e-29, 2.1683e-28, 2.5428e-28, 2.8477e-28,\n 1.4013e-27, 9.8635e-29, 3.0897e-29, 4.8783e-28, 9.4163e-29, 4.7062e-28,\n 7.4272e-29, 8.0507e-29, 7.6351e-29, 3.6369e-28, 7.0961e-28, 5.1172e-28,\n 3.4087e-30, 1.2925e-29, 1.6609e-28, 6.8624e-32, 5.0372e-29, 3.1605e-28,\n 3.3733e-29, 9.7882e-29, 1.4441e-28, 2.8824e-28, 9.0057e-29, 1.4329e-28,\n 2.9657e-28, 1.6230e-28, 1.4014e-29, 3.2494e-29, 1.0351e-29, 1.8812e-29,\n 1.2786e-28, 8.4791e-29, 2.3916e-28, 8.2138e-28, 5.1789e-28, 3.5540e-28,\n 2.5436e-28, 3.0378e-28, 1.0972e-29, 6.2930e-28, 1.9045e-29, 2.8699e-29,\n 1.3538e-28, 7.8502e-29, 1.4303e-28, 2.8894e-29, 2.2167e-29, 4.4495e-33,\n 1.3428e-29, 1.1111e-29, 2.0669e-28, 3.6744e-28, 2.8585e-29, 2.4696e-28,\n 1.0866e-28, 3.2475e-28, 2.6835e-30, 9.1194e-29, 2.5991e-29, 3.8474e-28,\n 5.9666e-29, 3.4405e-29, 7.1399e-10, 1.1177e-08, 1.9675e-09, 1.8763e-09,\n 1.4845e-08, 1.3190e-08, 1.7993e-10, 9.5904e-11, 2.4043e-10, 2.4064e-09,\n 3.1874e-12, 6.4155e-10, 1.1646e-08, 3.9173e-10, 6.4458e-10, 2.9514e-09,\n 4.3329e-09, 8.9002e-10, 7.9333e-09, 2.9932e-09, 3.3198e-12, 9.3232e-12,\n 1.6101e-09, 1.0607e-08, 8.9361e-09, 3.1132e-11, 4.0251e-09, 2.1649e-10,\n 1.1326e-09, 2.2977e-10, 5.0089e-09, 4.1825e-09, 1.6211e-08, 1.7401e-10,\n 3.3165e-09, 4.7609e-09, 2.8085e-09, 2.8425e-09, 2.5069e-09, 9.9086e-11,\n 1.5353e-08, 5.0276e-11, 2.4836e-10, 4.3728e-10, 7.4349e-09, 6.3579e-10,\n 1.3904e-09, 5.8019e-10, 1.1160e-10, 1.0626e-09, 2.1301e-09, 3.8048e-10,\n 6.2130e-09, 5.8638e-09, 6.3195e-10, 4.4286e-10, 6.8526e-10, 4.9667e-11,\n 4.0125e-09, 1.5221e-09, 3.2444e-09, 3.2109e-09, 3.3807e-10, 8.7544e-11,\n 1.8074e-13, 8.8117e-12, 2.7217e-11, 8.1398e-11, 1.1739e-09, 8.3657e-11,\n 1.0454e-09, 2.7261e-09, 3.9234e-09, 6.2538e-10, 1.4172e-09, 1.7894e-11,\n 7.9923e-10, 2.3667e-08, 1.2418e-10, 1.1931e-09, 1.6018e-09, 1.0955e-08,\n 4.0205e-12, 3.1071e-09, 8.9526e-10, 4.3239e-09, 2.7606e-09, 3.2057e-09,\n 2.6194e-13, 1.0225e-08, 6.8735e-09, 6.2574e-12, 3.0784e-10, 7.4047e-09,\n 3.4392e-10, 1.4365e-09, 2.0302e-11, 5.1021e-09, 9.8236e-10, 4.7747e-09,\n 2.4886e-09, 8.6299e-09, 1.0869e-10, 2.0867e-09, 4.2783e-09, 1.5590e-08,\n 6.8941e-09, 1.2168e-11, 2.6139e-10, 2.6176e-09, 1.6361e-09, 3.0214e-10,\n 4.6096e-10, 5.4494e-10, 1.6161e-09, 2.4036e-10, 2.0364e-09, 4.0415e-11,\n 6.2549e-09, 7.9055e-11, 6.1285e-09, 7.0209e-09, 2.1632e-09, 6.3750e-11,\n 7.3932e-10, 8.6549e-11, 2.0239e-09, 4.7969e-09, 4.7898e-10, 1.2426e-10,\n 4.9874e-09, 2.4747e-10, 2.9128e-09, 1.6413e-09, 8.4555e-10, 5.7589e-12,\n 1.1750e-10, 1.2445e-08, 1.6037e-10, 8.6340e-09, 1.4908e-12, 4.2736e-09,\n 1.6193e-09, 1.6175e-09, 4.7741e-10, 5.8166e-09, 2.1881e-09, 8.4980e-10,\n 3.5274e-09, 1.1009e-11, 8.0170e-09, 5.0497e-10, 6.2172e-11, 8.3364e-10,\n 1.6482e-09, 6.4645e-09, 1.2458e-09, 2.9611e-09, 4.5930e-11, 7.4478e-09,\n 2.1775e-09, 2.2254e-09, 1.7463e-09, 1.3658e-08, 1.2439e-11, 3.8641e-10,\n 1.1851e-08, 9.2497e-10, 1.0927e-09, 5.6846e-11, 1.8935e-09, 4.8989e-09,\n 4.2568e-09, 6.4737e-09, 1.5709e-10, 1.9267e-09, 1.1244e-09, 1.0062e-09,\n 1.0283e-09, 6.4778e-10, 1.6391e-09, 4.1121e-09, 3.1583e-10, 7.0122e-09,\n 7.8309e-09, 1.9028e-09, 1.8688e-09, 1.7787e-09, 1.2865e-11, 3.8062e-10,\n 3.8764e-10, 8.3601e-10, 3.7819e-09, 4.7902e-10, 2.0843e-10, 2.8830e-09,\n 7.9559e-13, 7.0089e-09, 5.9267e-11, 7.6795e-09, 3.2672e-09, 9.1839e-09,\n 2.3706e-10, 1.5709e-09, 6.1686e-09, 2.3211e-09, 2.2535e-10, 5.9539e-09,\n 4.5362e-10, 2.8859e-09, 1.4660e-11, 4.4464e-10, 1.2987e-08, 2.8333e-09,\n 1.3035e-08, 1.4666e-09, 2.0940e-09, 5.1867e-12, 7.2429e-10, 1.8265e-09,\n 1.6795e-11, 1.2295e-10, 1.5778e-09, 5.0519e-10, 7.2586e-10, 4.1345e-10,\n 4.6845e-09, 4.6924e-10, 1.1850e-10, 3.9115e-11, 1.2952e-11, 2.2557e-11,\n 2.8264e-10, 3.8740e-09, 2.7027e-10, 5.5263e-10, 6.9069e-11, 6.5955e-09,\n 3.2343e-09, 6.2715e-09, 1.4270e-08, 2.9555e-10, 7.7238e-09, 3.1521e-09,\n 6.9997e-10, 2.6217e-08, 5.6577e-10, 2.6979e-09, 9.0457e-10, 6.4911e-10,\n 3.3768e-10, 2.9007e-09, 1.5626e-09, 7.5473e-11, 3.9088e-10, 6.7166e-09],\n device='cuda:0')" }, "36": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0302e-08, 3.3750e-11, 3.7092e-07, 1.6737e-06, 4.0887e-08, 8.7252e-10,\n 2.4468e-07, 1.4391e-07, 1.0242e-07, 2.3632e-10, 1.3892e-08, 7.5826e-11,\n 7.8771e-08, 4.7002e-09, 2.4413e-09, 3.1388e-08, 4.2148e-09, 3.9611e-08,\n 1.5058e-09, 3.3873e-08, 8.6142e-09, 1.9350e-10, 2.5646e-07, 5.1933e-07,\n 4.1168e-08, 6.2099e-09, 6.2734e-08, 5.5516e-08, 2.3219e-09, 1.3286e-08,\n 2.0114e-08, 3.1027e-08, 6.2282e-08, 5.0042e-08, 5.5554e-10, 4.2458e-08,\n 1.4733e-07, 2.0174e-08, 1.1088e-07, 8.4537e-10, 2.2594e-07, 1.3151e-09,\n 1.4304e-07, 1.7240e-07, 3.5233e-09, 4.4410e-10, 1.4193e-08, 5.6881e-09,\n 1.7590e-09, 2.1931e-07, 4.5039e-08, 2.1417e-07, 6.7893e-11, 2.7237e-08,\n 8.3768e-09, 2.5008e-09, 5.9538e-07, 8.8946e-09, 5.3725e-08, 9.1800e-11,\n 2.8094e-10, 6.4151e-10, 8.1664e-08, 4.0998e-08, 3.1874e-08, 1.9312e-07,\n 3.9088e-08, 1.4142e-09, 4.6141e-08, 1.8484e-07, 3.7573e-08, 2.2651e-08,\n 1.5144e-08, 2.2692e-10, 4.0700e-11, 1.9509e-08, 3.4592e-08, 4.0240e-09,\n 3.3996e-07, 3.4428e-11, 4.7450e-10, 3.7419e-08, 5.1085e-07, 1.0833e-10,\n 4.1392e-08, 1.2444e-09, 4.4067e-08, 5.2452e-09, 6.5984e-08, 4.6019e-07,\n 6.7153e-08, 1.6355e-08, 2.7615e-09, 4.6462e-08, 3.0721e-07, 1.6210e-06,\n 6.2840e-10, 8.5248e-10, 1.0167e-08, 2.9468e-08, 5.7269e-08, 1.1027e-08,\n 1.4825e-08, 5.1406e-08, 3.1462e-07, 7.5232e-10, 6.5401e-11, 1.8642e-08,\n 2.2831e-08, 1.7658e-07, 1.7623e-07, 2.3359e-10, 1.4379e-10, 1.8488e-09,\n 2.5129e-09, 5.7805e-08, 5.5761e-08, 1.5034e-08, 6.5726e-08, 2.9831e-09,\n 5.1192e-07, 1.8173e-07, 2.2215e-10, 1.6629e-08, 5.0108e-08, 1.4544e-09,\n 5.0795e-10, 1.2510e-08, 4.2908e-10, 2.9512e-08, 4.1423e-09, 1.7424e-08,\n 1.5027e-07, 2.2929e-08, 1.5845e-08, 1.4233e-08, 6.2495e-08, 6.6001e-09,\n 1.2478e-08, 1.5121e-10, 2.3202e-09, 3.3489e-07, 9.6527e-08, 1.0477e-08,\n 3.3950e-07, 1.8364e-10, 6.1542e-11, 7.6382e-08, 1.0579e-08, 3.4128e-09,\n 2.1139e-09, 2.3357e-08, 1.2571e-08, 5.2124e-10, 2.2591e-09, 3.3824e-09,\n 2.1242e-09, 5.7275e-07, 1.7011e-07, 5.8219e-07, 1.2997e-08, 1.7895e-08,\n 6.5430e-07, 1.8770e-11, 1.2428e-08, 1.8292e-06, 2.1003e-07, 3.9088e-08,\n 4.9314e-07, 6.8053e-10, 1.2098e-08, 1.0730e-07, 2.1776e-08, 4.6697e-12,\n 1.4447e-08, 5.6106e-07, 3.2333e-09, 7.0391e-10, 8.3487e-07, 2.4714e-07,\n 2.9469e-10, 2.5755e-09, 3.1713e-10, 1.5187e-08, 6.0498e-08, 3.4226e-11,\n 2.1398e-09, 4.8670e-07, 2.5721e-09, 7.0428e-11, 5.4187e-10, 9.7526e-08,\n 1.1428e-07, 9.5257e-08, 1.2478e-07, 1.4003e-08, 1.5658e-10, 3.9547e-07,\n 5.2096e-08, 2.1427e-08, 2.9569e-08, 1.3638e-08, 1.1697e-08, 2.3817e-09,\n 1.4845e-09, 2.1313e-09, 1.3356e-09, 8.0343e-09, 5.0864e-10, 1.2054e-07,\n 4.2705e-08, 3.3833e-09, 3.8782e-09, 1.7466e-08, 1.0719e-06, 1.4194e-09,\n 6.4398e-09, 6.5840e-10, 3.0889e-07, 4.5888e-07, 5.1783e-11, 2.3944e-08,\n 1.7743e-08, 6.2225e-08, 1.0675e-10, 1.4920e-09, 2.0553e-07, 2.3366e-07,\n 6.5279e-11, 1.4355e-10, 3.3749e-11, 3.5910e-09, 3.3809e-08, 1.0087e-09,\n 7.7837e-09, 9.0071e-08, 1.5305e-08, 2.1640e-11, 1.2472e-07, 1.5581e-09,\n 1.9833e-07, 1.5326e-07, 4.2078e-07, 3.1313e-07, 7.4669e-09, 6.5079e-10,\n 1.0102e-09, 1.0976e-07, 2.5002e-07, 3.2018e-09, 3.1772e-07, 8.0870e-10,\n 5.2799e-08, 1.0841e-06, 1.2156e-11, 6.9428e-07], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3916e-10, 1.3355e-13, 1.3355e-09, ..., 3.4586e-12, 3.4818e-11,\n 9.6246e-11],\n [3.2417e-10, 3.3785e-14, 1.8943e-09, ..., 5.4215e-12, 5.1450e-11,\n 1.3588e-10],\n [1.8306e-10, 2.3285e-14, 1.0706e-09, ..., 2.8288e-12, 3.0470e-11,\n 6.9231e-11],\n ...,\n [1.3852e-10, 1.9771e-14, 8.4007e-10, ..., 1.2993e-12, 2.0959e-11,\n 5.5722e-11],\n [1.8197e-11, 4.8669e-14, 1.0024e-10, ..., 3.4863e-13, 1.9703e-12,\n 7.5544e-12],\n [9.6284e-11, 1.1602e-13, 5.6499e-10, ..., 1.6443e-12, 1.3158e-11,\n 3.8664e-11]], device='cuda:0')" }, "37": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.9106e-11, 4.6640e-12, 8.7353e-10, 3.1080e-09, 7.3713e-12, 9.1688e-12,\n 1.1341e-10, 4.7751e-11, 1.4666e-11, 4.9601e-14, 3.7744e-11, 1.7212e-12,\n 1.4387e-10, 8.3409e-12, 1.9431e-11, 6.8902e-13, 2.8577e-11, 1.6188e-11,\n 1.1421e-12, 2.6088e-11, 3.5513e-12, 1.3969e-12, 8.7674e-11, 2.5687e-09,\n 1.0106e-10, 1.7759e-12, 5.1805e-11, 2.0812e-12, 6.3685e-12, 1.5455e-12,\n 1.8367e-11, 6.2765e-11, 6.9375e-12, 6.6555e-11, 1.3575e-13, 2.9621e-10,\n 3.7547e-10, 8.7622e-12, 1.8750e-10, 3.2538e-12, 6.5894e-10, 5.0011e-13,\n 1.1749e-10, 5.9941e-10, 1.8909e-12, 7.7731e-12, 1.0720e-12, 1.7619e-13,\n 5.2404e-13, 1.7149e-10, 4.3894e-11, 5.9725e-11, 2.5800e-13, 2.0358e-12,\n 1.0549e-12, 3.8819e-13, 1.5499e-10, 3.7518e-11, 4.5412e-12, 1.3577e-12,\n 8.4191e-12, 7.4626e-13, 4.8212e-11, 1.2120e-12, 5.4669e-12, 5.1885e-11,\n 4.6278e-11, 2.5893e-13, 2.7365e-12, 4.0911e-10, 1.6522e-11, 1.0810e-12,\n 4.1381e-12, 1.2014e-12, 1.8911e-13, 1.2490e-12, 9.1295e-12, 1.1160e-12,\n 4.1552e-10, 3.4074e-14, 4.5563e-13, 3.2311e-12, 5.3593e-10, 3.0603e-12,\n 6.9695e-11, 1.4843e-13, 1.4470e-11, 9.8901e-13, 2.5732e-11, 2.7215e-10,\n 6.1113e-11, 2.8795e-12, 2.3419e-12, 2.0572e-12, 4.8826e-10, 6.8503e-09,\n 3.5276e-12, 1.9781e-12, 3.6546e-12, 7.7271e-11, 8.3056e-12, 7.3702e-13,\n 4.0490e-11, 1.6591e-11, 2.4150e-10, 8.5028e-14, 1.8871e-13, 6.5281e-11,\n 4.6597e-12, 2.2811e-10, 6.9129e-10, 2.2461e-13, 5.6502e-14, 1.8538e-13,\n 3.3908e-12, 2.1689e-11, 4.3060e-11, 1.9534e-12, 2.3395e-11, 1.4098e-11,\n 3.7999e-10, 3.9399e-10, 5.3722e-14, 1.8996e-11, 3.7024e-10, 1.4581e-13,\n 1.3017e-12, 1.6078e-12, 2.4166e-13, 6.3214e-12, 3.4444e-13, 8.0737e-12,\n 8.3655e-11, 4.5418e-12, 1.8590e-12, 7.0969e-13, 6.6725e-11, 3.5662e-13,\n 2.3683e-11, 5.1470e-12, 2.7509e-13, 2.1612e-10, 2.3167e-11, 6.2384e-11,\n 9.5425e-10, 2.4695e-13, 2.6147e-13, 7.2213e-11, 2.1563e-11, 6.1932e-13,\n 8.4223e-13, 2.3259e-11, 5.3116e-13, 2.4902e-13, 1.2273e-13, 1.1559e-10,\n 3.5264e-13, 2.1465e-09, 7.6639e-11, 1.7384e-10, 1.6691e-12, 1.6857e-11,\n 2.7592e-09, 1.1082e-12, 8.5470e-13, 3.3876e-09, 5.1703e-10, 8.9571e-11,\n 5.4601e-10, 3.9197e-12, 1.4684e-14, 4.9661e-11, 1.6387e-11, 3.6926e-12,\n 4.3680e-13, 5.9702e-10, 2.6808e-12, 1.6819e-12, 9.4095e-10, 4.9446e-10,\n 4.4897e-12, 5.6050e-12, 1.4502e-11, 1.1895e-11, 1.1919e-11, 3.1173e-13,\n 5.1240e-13, 7.3655e-10, 7.9120e-13, 3.9973e-13, 3.8397e-12, 1.4691e-11,\n 3.8053e-10, 1.6168e-10, 4.6370e-11, 6.0786e-11, 1.1048e-14, 2.1334e-09,\n 5.9988e-11, 7.1531e-13, 3.0458e-12, 4.4214e-13, 1.3454e-12, 2.7744e-11,\n 2.0195e-13, 3.8845e-13, 1.7189e-11, 4.9349e-14, 5.5858e-12, 4.4931e-11,\n 2.9484e-11, 2.8571e-13, 1.4979e-13, 5.0414e-12, 1.4385e-09, 4.5008e-14,\n 1.1457e-13, 4.8052e-13, 3.2598e-11, 1.0470e-09, 6.4398e-13, 7.1056e-12,\n 1.3477e-11, 1.1744e-10, 2.3261e-12, 2.4310e-12, 1.3126e-11, 8.9170e-11,\n 1.4115e-12, 8.1451e-13, 5.4231e-12, 8.3207e-13, 7.2478e-11, 6.6668e-13,\n 2.5290e-13, 1.4534e-10, 1.6281e-11, 9.8175e-14, 4.3817e-10, 3.7537e-13,\n 3.4856e-11, 2.0738e-10, 5.0475e-10, 9.5968e-10, 2.7823e-12, 3.2181e-12,\n 6.0374e-12, 7.4388e-11, 8.5566e-10, 1.9588e-12, 7.5794e-10, 5.8365e-14,\n 2.8668e-11, 7.8858e-10, 3.8265e-13, 4.5981e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.6521e-09, 1.2037e-08, 6.4371e-09, 3.4176e-08, 1.0372e-10, 9.9126e-10,\n 1.8153e-08, 3.4086e-08, 2.7570e-08, 1.3729e-08, 5.9507e-10, 2.8667e-08,\n 1.4162e-09, 7.0624e-11, 3.7278e-09, 3.5617e-11, 1.5570e-08, 2.9486e-08,\n 8.2463e-09, 1.7617e-09, 3.2107e-09, 2.3631e-09, 1.1249e-08, 1.4420e-10,\n 3.7484e-10, 3.1807e-09, 3.2796e-10, 4.7950e-09, 2.2723e-09, 7.1341e-09,\n 1.5600e-11, 3.8282e-09, 3.8737e-09, 1.3491e-09, 1.7369e-10, 7.5948e-09,\n 2.6005e-10, 4.3670e-08, 7.3232e-09, 5.2118e-10, 1.6452e-08, 1.0717e-09,\n 3.1490e-09, 2.8475e-10, 5.7460e-09, 9.8548e-09, 1.2374e-10, 1.2873e-11,\n 8.0839e-09, 7.3448e-09, 1.0492e-08, 4.3729e-10, 4.5600e-09, 1.7092e-08,\n 2.1834e-10, 1.9098e-09, 1.7276e-08, 6.8540e-11, 7.9279e-10, 1.4233e-09,\n 5.9747e-09, 4.6765e-09, 9.6207e-10, 9.1411e-09, 5.7467e-11, 1.4409e-08,\n 1.9625e-09, 3.7520e-09, 4.2249e-09, 1.5470e-09, 7.2679e-10, 1.4239e-08,\n 1.7446e-08, 3.6551e-08, 2.3587e-10, 2.8537e-11, 1.4393e-09, 1.2289e-08,\n 1.8622e-10, 3.5342e-11, 2.8888e-09, 1.8317e-10, 7.3241e-09, 1.2581e-09,\n 1.3844e-09, 7.1560e-10, 1.3983e-09, 2.0104e-09, 2.0724e-09, 2.4178e-08,\n 2.1071e-11, 1.4375e-08, 5.9609e-09, 1.6016e-08, 3.9033e-08, 2.4879e-08,\n 2.5264e-09, 3.4061e-08, 1.2244e-09, 4.3048e-10, 1.5161e-08, 5.2118e-12,\n 6.6090e-09, 3.9928e-09, 3.7640e-10, 1.0564e-08, 6.3490e-12, 6.2212e-08,\n 1.2889e-08, 1.3371e-08, 4.7254e-11, 3.8677e-10, 1.1978e-09, 1.1949e-08,\n 3.7665e-09, 1.0041e-08, 2.1233e-11, 1.2410e-08, 1.5605e-08, 6.2562e-09,\n 2.0864e-08, 8.2424e-09, 6.4423e-09, 9.2326e-10, 4.3649e-08, 2.2889e-08,\n 5.8177e-09, 2.4043e-09, 4.9843e-09, 1.7741e-11, 5.1904e-08, 5.0444e-09,\n 6.5528e-11, 1.9351e-11, 3.3714e-08, 1.3005e-08, 1.9582e-09, 8.6047e-10,\n 7.9572e-11, 2.6055e-11, 2.1493e-08, 4.4705e-11, 3.2250e-08, 6.2753e-10,\n 7.2805e-10, 1.5434e-09, 1.2842e-08, 1.0772e-08, 3.1283e-09, 3.7672e-11,\n 6.2025e-10, 1.9792e-10, 2.3486e-11, 2.1235e-08, 1.8789e-09, 2.7858e-08,\n 6.6839e-10, 1.5616e-08, 2.1129e-09, 2.1163e-08, 5.5321e-09, 9.0048e-10,\n 9.5002e-09, 8.2650e-09, 1.1467e-08, 2.5272e-08, 5.3098e-08, 2.5087e-11,\n 9.9628e-11, 4.8316e-09, 2.7939e-09, 4.2537e-11, 1.7134e-08, 2.2054e-09,\n 2.4771e-08, 3.0890e-11, 1.1576e-08, 1.2917e-09, 5.5733e-09, 5.5932e-09,\n 5.9631e-08, 5.9374e-09, 3.8339e-09, 5.6999e-12, 1.1171e-09, 1.7315e-08,\n 7.0726e-11, 6.3258e-09, 1.6116e-08, 1.4359e-09, 1.6466e-08, 1.4318e-09,\n 4.1509e-09, 3.7994e-09, 2.5106e-08, 1.3435e-08, 1.5807e-08, 1.1604e-10,\n 5.0356e-09, 3.2251e-08, 3.4481e-11, 2.9914e-09, 1.5420e-09, 2.2612e-08,\n 2.2092e-08, 3.1598e-09, 1.0452e-08, 2.1560e-09, 1.5444e-10, 5.8911e-11,\n 6.9963e-11, 3.5183e-09, 1.8128e-08, 1.6135e-08, 5.8224e-09, 2.0254e-08,\n 2.4601e-09, 1.2169e-08, 8.8650e-11, 3.1932e-10, 6.0976e-12, 1.7187e-09,\n 1.9065e-08, 1.4207e-09, 4.7914e-10, 9.8124e-09, 2.7622e-09, 8.6252e-09,\n 6.1337e-10, 1.2167e-08, 1.7027e-10, 6.6576e-09, 2.0694e-08, 8.7289e-11,\n 1.7231e-08, 3.8666e-10, 4.0916e-09, 1.5358e-11, 3.2489e-08, 6.5481e-10,\n 2.9780e-09, 6.7227e-10, 7.8653e-10, 4.6069e-09, 6.7799e-09, 4.6983e-09,\n 1.6984e-08, 6.4585e-09, 2.7494e-08, 2.6418e-09, 1.4300e-08, 3.0999e-13,\n 1.1271e-09, 5.3104e-09, 6.8644e-10, 3.6539e-09], device='cuda:0')" }, "38": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0666e-11, 1.4355e-14, 5.8703e-10, 2.0940e-09, 6.4499e-11, 2.1052e-12,\n 3.5251e-10, 2.2652e-10, 1.5330e-10, 1.7120e-13, 1.0500e-11, 6.3984e-13,\n 1.6181e-10, 3.1219e-11, 2.1619e-11, 3.0226e-11, 2.4829e-11, 6.5524e-11,\n 1.3275e-12, 8.3768e-11, 2.8438e-11, 7.9818e-13, 3.1956e-10, 7.8548e-10,\n 1.2591e-10, 7.5734e-12, 1.1080e-10, 5.6894e-11, 2.4173e-12, 2.2665e-11,\n 3.1131e-11, 8.0741e-11, 9.4031e-11, 1.5011e-10, 1.2632e-12, 1.4158e-10,\n 2.5931e-10, 8.1747e-12, 1.8514e-10, 4.8349e-12, 3.1721e-10, 2.7200e-12,\n 2.6607e-10, 2.4070e-10, 1.8087e-11, 1.2980e-11, 1.0007e-11, 6.1326e-13,\n 1.7515e-13, 2.8544e-10, 7.0214e-11, 3.1083e-10, 2.3524e-13, 2.0312e-11,\n 4.3612e-12, 1.7642e-12, 8.2980e-10, 1.4559e-11, 6.5866e-11, 1.2010e-12,\n 2.7159e-15, 5.9942e-12, 8.0132e-11, 5.4007e-11, 4.9091e-11, 2.8479e-10,\n 5.2911e-11, 2.8711e-12, 6.8497e-11, 2.9228e-10, 5.8947e-11, 1.9381e-11,\n 3.8098e-11, 3.9327e-13, 1.2041e-13, 1.5444e-11, 5.6387e-11, 9.9002e-13,\n 4.6753e-10, 1.4267e-13, 1.0355e-12, 4.6801e-11, 7.3245e-10, 7.3901e-13,\n 5.0630e-11, 3.4051e-12, 3.9877e-11, 9.6111e-12, 1.0994e-10, 5.9342e-10,\n 1.1381e-10, 2.9169e-11, 8.4922e-13, 6.9577e-11, 4.5385e-10, 2.1069e-09,\n 1.6920e-12, 5.3344e-12, 2.9784e-11, 8.4042e-11, 8.7315e-11, 1.8043e-11,\n 6.9571e-11, 5.7272e-11, 3.7771e-10, 6.1058e-12, 2.2870e-13, 7.2082e-11,\n 3.9863e-11, 2.6457e-10, 2.9438e-10, 4.3625e-12, 1.3810e-12, 2.9335e-12,\n 1.1143e-12, 9.2805e-11, 9.2073e-11, 2.5932e-11, 1.0007e-10, 4.9762e-12,\n 6.0250e-10, 2.1781e-10, 5.1303e-13, 3.5052e-11, 1.6323e-10, 3.2324e-12,\n 1.2444e-14, 1.0214e-12, 7.3955e-16, 1.9375e-11, 7.4361e-12, 2.8335e-11,\n 1.6505e-10, 3.8769e-11, 2.5593e-11, 2.2225e-11, 1.2144e-10, 2.7870e-12,\n 2.4527e-11, 2.8528e-13, 3.8984e-12, 3.8762e-10, 1.5848e-10, 1.4740e-11,\n 3.4509e-10, 4.3909e-12, 3.9923e-14, 1.2178e-10, 2.7355e-11, 2.0345e-12,\n 7.8911e-12, 7.2075e-11, 1.0937e-11, 8.6083e-13, 3.8281e-12, 3.5734e-11,\n 1.0423e-13, 8.3601e-10, 2.4994e-10, 7.3667e-10, 4.3012e-12, 3.2161e-11,\n 9.8154e-10, 1.2645e-14, 2.0657e-11, 2.5527e-09, 3.3457e-10, 8.0458e-11,\n 6.2412e-10, 5.7430e-12, 9.6237e-12, 1.0934e-10, 7.0662e-11, 8.2141e-12,\n 2.3465e-11, 7.1586e-10, 9.9600e-13, 3.5319e-13, 1.0096e-09, 3.7299e-10,\n 3.8569e-13, 1.9116e-12, 4.6294e-13, 2.1899e-11, 9.1983e-11, 1.5348e-13,\n 3.5261e-12, 5.6798e-10, 1.2876e-11, 1.5590e-13, 1.9038e-13, 1.1020e-10,\n 2.1655e-10, 1.4504e-10, 1.3244e-10, 2.1918e-11, 5.7416e-14, 6.1521e-10,\n 9.7123e-11, 1.8223e-11, 2.2094e-11, 2.2107e-11, 9.1852e-12, 3.0863e-11,\n 5.8494e-14, 3.7140e-12, 8.8617e-12, 1.3552e-11, 1.0087e-12, 1.9903e-10,\n 4.1532e-11, 4.5461e-12, 7.1550e-12, 4.1019e-11, 1.5023e-09, 3.6736e-13,\n 1.0467e-11, 1.1749e-12, 4.3426e-10, 6.9269e-10, 4.2022e-14, 4.3376e-11,\n 1.1108e-11, 9.7729e-11, 6.5192e-13, 8.8132e-13, 2.8648e-10, 3.2154e-10,\n 1.6335e-13, 3.4386e-13, 4.7869e-14, 6.1721e-12, 5.4281e-11, 1.0630e-11,\n 2.9958e-12, 1.7688e-10, 4.8657e-11, 3.7968e-15, 2.4053e-10, 1.4479e-13,\n 2.8443e-10, 2.4587e-10, 5.0842e-10, 4.3863e-10, 1.5371e-11, 1.8488e-12,\n 2.3880e-12, 1.6830e-10, 4.2134e-10, 8.3794e-12, 5.1705e-10, 7.3644e-13,\n 5.9975e-11, 1.5068e-09, 7.6909e-14, 1.1161e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.0536e-08, 1.6024e-08, 2.2937e-08, ..., 2.9215e-09, 1.7832e-08,\n 3.2929e-08],\n [6.2506e-09, 1.6247e-09, 2.4324e-09, ..., 3.2392e-10, 1.8005e-09,\n 3.2645e-09],\n [6.8091e-09, 1.7555e-09, 2.5344e-09, ..., 3.1189e-10, 2.0211e-09,\n 3.7756e-09],\n [7.1340e-09, 1.9696e-09, 2.6820e-09, ..., 3.3829e-10, 2.1298e-09,\n 3.9542e-09]], device='cuda:0')" }, "39": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.9731e-13, 1.4911e-11, 3.0222e-12, ..., 1.1798e-14, 1.4404e-12,\n 4.6471e-11],\n [5.8929e-12, 8.3959e-13, 2.3957e-13, ..., 5.8394e-12, 7.9410e-15,\n 6.6600e-11],\n [9.5389e-14, 2.4071e-11, 6.0747e-13, ..., 4.0387e-13, 1.5031e-12,\n 3.7412e-11],\n ...,\n [1.1491e-10, 1.2390e-11, 2.0024e-10, ..., 8.0737e-12, 2.5410e-11,\n 2.0414e-09],\n [3.5168e-11, 3.3809e-11, 3.8462e-10, ..., 5.5748e-12, 7.4866e-12,\n 6.2463e-10],\n [1.0998e-10, 2.6366e-09, 3.3278e-08, ..., 7.6446e-10, 2.3730e-11,\n 1.8487e-08]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.5180e-06, 1.5641e-07, 1.6925e-07, 1.8079e-07], device='cuda:0')" }, "40": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8445e-11, 4.5690e-11, 1.2304e-11, 4.6142e-12, 3.7957e-11, 1.0584e-11,\n 4.2366e-12, 1.3825e-11, 3.6597e-12, 9.9173e-12, 3.3563e-11, 8.3257e-12,\n 3.0827e-13, 3.9027e-12, 1.2434e-12, 6.3205e-13, 5.0960e-11, 1.7224e-11,\n 8.6111e-12, 4.6243e-11, 4.0339e-12, 7.6690e-11, 1.0562e-11, 5.7889e-12,\n 1.8365e-11, 3.2598e-11, 2.2816e-13, 8.8165e-13, 2.3344e-11, 3.9777e-11,\n 2.8732e-11, 3.1565e-11, 1.7349e-11, 3.9295e-11, 6.2067e-11, 1.8776e-10,\n 5.2068e-11, 7.9412e-12, 5.9564e-11, 3.9233e-11, 1.5926e-11, 2.4561e-11,\n 3.7635e-11, 1.9990e-11, 3.8885e-11, 1.6207e-11, 2.9656e-11, 1.2639e-11,\n 8.3663e-13, 1.0896e-10, 1.4269e-11, 2.2488e-11, 1.7157e-12, 1.0409e-10,\n 2.9201e-12, 1.8096e-11, 4.4285e-11, 9.2939e-13, 5.4480e-11, 1.1204e-11,\n 1.8193e-10, 1.0971e-10, 3.7951e-12, 1.2687e-11, 8.2389e-13, 2.9959e-12,\n 1.1393e-11, 5.9114e-12, 1.5945e-11, 6.8923e-11, 2.8043e-11, 1.4368e-11,\n 7.5271e-12, 2.0952e-11, 7.5111e-11, 1.1125e-10, 2.5540e-11, 6.2674e-12,\n 4.1729e-11, 4.2376e-12, 8.3120e-11, 8.9673e-12, 2.2142e-12, 1.5238e-12,\n 1.8192e-12, 6.7500e-12, 1.6074e-10, 1.9652e-11, 7.0175e-13, 4.0152e-11,\n 5.1074e-13, 2.3992e-11, 3.2051e-12, 2.1324e-10, 3.6773e-13, 1.5809e-11,\n 3.4424e-11, 2.5380e-13, 7.5467e-13, 2.0472e-11, 5.9851e-11, 1.6935e-13,\n 8.9484e-11, 5.4597e-12, 1.0151e-11, 9.6093e-12, 1.3509e-11, 8.2607e-11,\n 5.5215e-11, 3.0599e-11, 7.5763e-12, 2.1283e-11, 1.3403e-11, 1.6192e-11,\n 9.0111e-12, 2.0618e-11, 1.7264e-11, 3.0943e-11, 5.0123e-11, 2.0711e-10,\n 1.4591e-12, 1.9376e-11, 3.5321e-11, 1.0260e-12, 6.1593e-13, 7.6893e-13,\n 2.7610e-11, 1.1961e-10, 1.1672e-12, 2.0288e-12, 1.0752e-11, 9.4745e-11,\n 1.1476e-13, 6.1082e-11, 1.4483e-11, 2.4694e-11, 5.2862e-11, 1.7193e-12,\n 1.5672e-11, 3.1480e-13, 1.7583e-11, 2.7726e-12, 2.9965e-12, 4.4435e-12,\n 7.2108e-13, 3.4710e-14, 1.0001e-11, 6.2904e-12, 2.2632e-12, 3.2061e-12,\n 5.9500e-13, 2.6488e-12, 1.9204e-11, 3.5873e-11, 2.5969e-12, 2.5407e-12,\n 7.7188e-12, 2.7560e-12, 1.3047e-11, 2.0333e-12, 5.5319e-11, 3.9469e-13,\n 5.2322e-13, 7.6869e-11, 2.7191e-11, 2.2094e-10, 2.2890e-10, 3.3340e-11,\n 4.5599e-11, 2.5701e-10, 7.3678e-12, 9.0680e-12, 1.4532e-11, 8.9184e-11,\n 7.2181e-12, 6.9636e-11, 1.6346e-12, 3.3576e-11, 3.0603e-11, 3.8131e-10,\n 8.8528e-11, 1.4555e-10, 1.0190e-11, 2.2210e-11, 2.7582e-10, 5.5226e-12,\n 7.1383e-14, 2.5845e-13, 9.2961e-11, 2.3152e-11, 8.8791e-11, 4.1457e-11,\n 2.2268e-11, 3.6546e-12, 1.4031e-10, 1.3777e-10, 7.1926e-11, 2.1619e-11,\n 7.7640e-11, 5.1436e-13, 2.1532e-11, 5.1043e-11, 3.1440e-13, 6.8617e-13,\n 1.2377e-11, 1.0343e-10, 7.2206e-11, 1.1224e-11, 2.1024e-12, 9.9098e-14,\n 1.2698e-10, 6.3218e-11, 1.7086e-12, 1.1088e-12, 3.5421e-11, 1.1392e-11,\n 1.5401e-11, 6.4886e-11, 5.8014e-11, 1.4118e-10, 2.1144e-12, 2.4044e-11,\n 5.5175e-13, 1.6360e-10, 9.8629e-12, 5.6166e-12, 2.8602e-12, 1.9982e-12,\n 1.4575e-11, 2.3340e-12, 3.0435e-11, 3.3835e-12, 1.0824e-12, 1.7689e-11,\n 9.3472e-12, 1.5034e-12, 1.5460e-12, 8.5509e-12, 7.1027e-12, 3.0553e-12,\n 2.5339e-12, 3.4054e-12, 4.0199e-12, 5.5102e-12, 1.8063e-11, 4.1839e-12,\n 4.4189e-12, 2.6021e-12, 1.5577e-12, 7.2740e-12, 1.9725e-11, 2.6071e-11,\n 2.1799e-12, 1.7113e-11, 2.9612e-12, 2.2504e-11, 4.4298e-26, 3.6876e-27,\n 1.0330e-26, 2.2834e-27, 2.2301e-28, 6.2035e-27, 3.1224e-27, 9.4983e-27,\n 7.1831e-29, 6.8845e-27, 1.4416e-27, 2.9607e-27, 1.3270e-26, 5.1083e-28,\n 3.0093e-28, 7.8842e-28, 1.9288e-27, 6.4853e-28, 2.4041e-26, 4.7192e-27,\n 4.5278e-28, 4.1584e-29, 1.0800e-27, 7.3017e-27, 6.9928e-28, 1.6272e-28,\n 2.5336e-28, 1.7909e-27, 3.8330e-29, 1.2949e-27, 5.7817e-27, 5.4600e-27,\n 3.3597e-27, 1.8749e-28, 1.0995e-27, 1.2765e-27, 3.8979e-27, 8.6191e-28,\n 2.7429e-27, 6.0789e-27, 3.8574e-27, 2.3749e-27, 8.2301e-28, 6.8656e-27,\n 6.7877e-27, 1.1373e-27, 5.2892e-28, 1.0651e-27, 6.9428e-27, 2.0923e-27,\n 7.8981e-28, 1.8473e-28, 2.5733e-27, 2.8689e-27, 6.7642e-27, 7.1338e-27,\n 5.0876e-27, 2.9600e-26, 3.8613e-27, 2.9765e-27, 1.0562e-26, 5.4635e-27,\n 6.2141e-27, 6.0324e-27, 1.4984e-27, 1.9194e-27, 3.1045e-28, 1.0328e-27,\n 3.7754e-27, 1.0208e-27, 5.1697e-27, 3.3857e-28, 8.6840e-29, 3.2288e-27,\n 8.4259e-28, 2.7945e-27, 1.7856e-27, 1.6234e-27, 4.9682e-27, 2.0886e-27,\n 1.9925e-27, 1.5727e-26, 9.0760e-28, 1.2913e-27, 2.7147e-27, 1.7083e-28,\n 1.5948e-27, 4.5352e-29, 1.5364e-27, 1.2889e-28, 6.2966e-28, 3.8620e-27,\n 2.0735e-28, 1.6941e-27, 5.3781e-28, 1.4396e-27, 2.2743e-27, 1.1027e-27,\n 8.8123e-28, 1.8916e-27, 1.8181e-27, 3.9940e-28, 6.4652e-27, 7.1489e-27,\n 1.0229e-27, 2.5973e-27, 1.7774e-27, 7.6970e-27, 3.0670e-27, 1.0302e-27,\n 3.0115e-28, 1.6470e-27, 4.0125e-27, 3.9188e-28, 1.7458e-27, 1.2055e-28,\n 6.0684e-28, 1.7020e-27, 2.8030e-28, 8.9023e-28, 1.1109e-27, 1.3828e-27,\n 5.0247e-27, 2.0112e-27, 8.0456e-27, 1.1369e-26, 5.8366e-27, 1.6162e-26,\n 3.1443e-28, 3.1283e-28, 3.1391e-27, 1.1555e-27, 1.2138e-28, 3.0439e-29,\n 1.1407e-27, 2.9132e-28, 1.3458e-27, 8.4937e-28, 4.4286e-27, 5.7711e-28,\n 1.0751e-27, 1.1764e-26, 1.1551e-27, 2.0180e-28, 5.1382e-28, 1.5839e-28,\n 1.5603e-28, 1.9186e-27, 3.2880e-27, 3.6439e-27, 2.0451e-28, 5.2065e-27,\n 5.3871e-28, 5.0227e-28, 1.2566e-26, 2.5033e-27, 4.0543e-28, 1.6966e-28,\n 1.1083e-27, 4.8145e-28, 2.4861e-27, 7.8620e-29, 1.7235e-27, 3.9536e-28,\n 6.4949e-27, 8.0718e-28, 1.5081e-27, 5.6844e-27, 2.2838e-27, 1.1237e-27,\n 4.7290e-27, 1.8078e-27, 3.0546e-27, 2.9478e-28, 2.8470e-26, 3.9359e-28,\n 6.0268e-27, 9.4530e-28, 1.2598e-27, 3.2577e-27, 1.1672e-28, 7.4367e-28,\n 3.1512e-27, 2.1438e-27, 1.9209e-27, 1.7235e-27, 3.3175e-27, 6.2730e-28,\n 2.4048e-27, 2.5941e-27, 2.6347e-27, 2.4640e-27, 5.4071e-27, 9.1867e-27,\n 1.4708e-27, 4.0538e-28, 4.6063e-27, 1.2791e-27, 1.1049e-28, 2.9881e-28,\n 3.4959e-27, 7.9634e-27, 3.2116e-27, 1.4052e-27, 3.8736e-28, 6.7746e-29,\n 2.6998e-27, 4.5755e-27, 6.8478e-28, 2.6634e-28, 7.5642e-28, 1.0366e-27,\n 1.3033e-27, 7.5141e-28, 2.7042e-27, 3.7953e-27, 7.7750e-27, 7.7454e-28,\n 8.1356e-28, 2.1841e-27, 2.7625e-27, 1.7453e-27, 1.6939e-28, 5.2093e-27,\n 1.6677e-28, 7.6178e-28, 2.7748e-27, 2.6589e-28, 3.3530e-27, 5.1311e-27,\n 7.9975e-28, 5.1788e-27, 5.0979e-27, 2.9929e-27, 2.4584e-27, 3.2418e-27,\n 7.6405e-29, 2.2410e-27, 6.9318e-28, 1.8942e-27, 2.8340e-28, 9.4624e-28,\n 1.8141e-28, 1.1661e-26, 1.8724e-27, 3.0065e-27, 3.6100e-28, 1.0794e-27,\n 3.1505e-27, 1.2102e-26, 2.8867e-27, 5.1599e-27, 6.2198e-27, 4.8290e-27,\n 7.7503e-28, 1.0293e-27, 1.8257e-07, 6.7960e-09, 4.0021e-08, 2.6300e-09,\n 1.0349e-08, 3.4457e-09, 7.0517e-10, 1.4468e-08, 7.9884e-09, 3.2888e-09,\n 1.1420e-07, 2.2272e-09, 6.6522e-09, 8.8840e-08, 1.3069e-08, 1.7734e-08,\n 2.9491e-09, 1.0938e-10, 7.4890e-09, 1.0524e-08, 2.2153e-08, 1.6035e-08,\n 9.6620e-09, 2.3879e-09, 2.3480e-10, 1.9238e-09, 5.5088e-10, 1.3830e-08,\n 1.2517e-07, 3.5807e-09, 4.0011e-08, 9.4479e-09, 8.9704e-09, 2.2684e-08,\n 6.6525e-11, 5.4755e-08, 2.8035e-08, 8.8254e-09, 5.7324e-08, 1.3771e-09,\n 2.2493e-09, 4.3437e-09, 4.0795e-09, 1.1077e-08, 6.3049e-09, 1.8943e-10,\n 1.3475e-09, 1.4397e-08, 2.3793e-09, 2.7565e-08, 9.4513e-08, 1.9906e-08,\n 2.0923e-09, 4.5627e-08, 5.8397e-09, 3.1789e-08, 2.9983e-08, 4.8630e-09,\n 4.3121e-08, 3.6204e-09, 1.5586e-09, 2.7788e-11, 4.5083e-10, 7.4469e-10,\n 1.8532e-08, 1.1148e-08, 8.5084e-09, 3.5472e-09, 1.1310e-08, 1.6788e-09,\n 2.3210e-08, 5.0127e-08, 3.0577e-08, 7.6957e-10, 1.3265e-09, 4.0145e-08,\n 2.8897e-08, 7.7497e-08, 3.4835e-08, 2.5727e-09, 1.4448e-09, 2.9820e-09,\n 1.4239e-08, 5.4341e-09, 1.3474e-08, 2.4830e-09, 1.6364e-08, 1.8954e-09,\n 4.8276e-09, 5.2488e-08, 7.2980e-08, 5.9436e-09, 2.5806e-08, 1.7687e-08,\n 1.0449e-08, 2.6058e-08, 2.5626e-08, 1.7218e-09, 1.0784e-08, 8.7391e-09,\n 7.2676e-09, 6.9728e-08, 9.5269e-09, 5.7947e-08, 2.9987e-08, 5.5564e-09,\n 1.1562e-08, 1.3561e-09, 2.8653e-09, 2.7808e-10, 3.1064e-08, 3.2316e-09,\n 3.0336e-08, 4.2380e-09, 7.3550e-08, 1.8649e-08, 1.2019e-08, 6.3883e-08,\n 1.0273e-08, 6.3109e-09, 7.4716e-10, 1.0011e-08, 1.5020e-08, 3.6520e-09,\n 3.6917e-09, 1.5398e-08, 3.0610e-09, 1.6016e-09, 3.7103e-08, 1.5058e-09,\n 2.1000e-09, 2.3955e-09, 2.6065e-09, 1.0403e-08, 1.4476e-08, 6.5160e-09,\n 6.4377e-09, 4.2389e-10, 1.3592e-08, 9.4861e-09, 1.6439e-09, 8.3458e-09,\n 2.9254e-09, 1.8297e-09, 1.8917e-08, 4.3743e-09, 4.6635e-11, 1.2601e-08,\n 9.4681e-09, 2.9250e-09, 2.6130e-08, 1.4818e-08, 4.9048e-09, 2.0157e-09,\n 4.9450e-08, 1.7606e-09, 1.7930e-09, 2.0122e-08, 7.4958e-09, 1.8178e-08,\n 1.7439e-08, 6.2312e-09, 2.6831e-08, 1.6110e-08, 1.3396e-09, 2.5756e-09,\n 3.4276e-11, 2.9881e-09, 1.8941e-08, 2.7437e-11, 9.7655e-08, 4.9982e-11,\n 5.8998e-09, 2.3515e-09, 5.3464e-08, 2.0641e-08, 9.1801e-09, 1.9344e-08,\n 4.9638e-08, 7.5586e-09, 8.5443e-10, 3.5393e-09, 5.8825e-09, 3.1606e-09,\n 1.0374e-09, 2.2689e-09, 4.0608e-09, 9.2402e-10, 4.3191e-08, 2.5969e-10,\n 9.4036e-09, 1.9460e-09, 6.8855e-08, 3.3709e-08, 1.4092e-09, 4.7627e-09,\n 2.6444e-08, 1.3187e-09, 1.3232e-09, 3.1236e-08, 7.4074e-09, 1.5284e-08,\n 4.5129e-09, 2.0788e-08, 2.3204e-09, 5.2703e-09, 2.4803e-08, 1.1642e-09,\n 2.0275e-08, 9.5437e-09, 3.3648e-09, 1.5209e-08, 7.3286e-08, 1.4908e-09,\n 6.8126e-09, 1.2676e-08, 5.8018e-11, 1.0556e-08, 9.6674e-09, 2.2873e-08,\n 9.1972e-10, 1.5755e-08, 3.3299e-09, 5.2459e-09, 1.4843e-08, 5.8991e-11,\n 2.3675e-08, 3.9754e-09, 3.2948e-08, 1.1154e-08, 1.5365e-08, 2.7786e-09,\n 7.2416e-09, 2.6159e-09, 4.5356e-08, 9.0610e-09, 6.1695e-09, 4.3162e-08,\n 3.8935e-08, 3.7232e-08, 7.8856e-09, 3.7742e-08, 1.3708e-09, 1.6102e-10,\n 3.6067e-09, 1.7015e-09, 1.0473e-09, 2.3627e-09, 2.9678e-08, 9.7925e-10,\n 2.5705e-09, 1.3454e-08, 4.9677e-09, 1.8411e-09, 7.7007e-10, 3.4654e-08],\n device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.0536e-08, 1.6024e-08, 2.2937e-08, ..., 2.9215e-09, 1.7832e-08,\n 3.2929e-08],\n [6.2506e-09, 1.6247e-09, 2.4324e-09, ..., 3.2392e-10, 1.8005e-09,\n 3.2645e-09],\n [6.8091e-09, 1.7555e-09, 2.5344e-09, ..., 3.1189e-10, 2.0211e-09,\n 3.7756e-09],\n [7.1340e-09, 1.9696e-09, 2.6820e-09, ..., 3.3829e-10, 2.1298e-09,\n 3.9542e-09]], device='cuda:0')" }, "41": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.6805e-09, 2.6935e-09, 8.6822e-10, ..., 1.3889e-08, 4.7952e-10,\n 5.4115e-09],\n [1.7360e-09, 7.1055e-10, 4.0918e-10, ..., 3.2650e-09, 1.5510e-10,\n 2.8352e-09],\n [4.0306e-10, 2.3823e-11, 9.9820e-11, ..., 1.2808e-10, 3.2007e-11,\n 1.3410e-10],\n ...,\n [1.4730e-09, 1.0638e-10, 3.9696e-10, ..., 6.5487e-10, 9.1545e-11,\n 3.3389e-10],\n [7.6147e-10, 1.2654e-10, 1.8910e-10, ..., 6.4245e-10, 4.7878e-11,\n 1.5506e-10],\n [1.8481e-09, 5.5388e-10, 4.2802e-10, ..., 2.3956e-09, 1.6200e-10,\n 1.7236e-09]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.5180e-06, 1.5641e-07, 1.6925e-07, 1.8079e-07], device='cuda:0')" }, "42": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.6356e-07, 4.1745e-08, 3.6905e-10, 1.6084e-07, 4.9675e-08, 4.2794e-09,\n 5.8247e-09, 8.9685e-09, 1.0155e-09, 2.2171e-08, 2.8505e-08, 3.2584e-08,\n 4.8535e-08, 9.7040e-09, 5.3580e-08, 2.4110e-08, 1.0241e-07, 1.5493e-08,\n 1.5761e-09, 1.2498e-07, 1.5878e-08, 1.0906e-08, 3.2746e-08, 1.3021e-07,\n 7.1160e-09, 1.7760e-07, 3.1790e-08, 3.3070e-08, 1.7294e-09, 1.0288e-08,\n 1.4806e-07, 3.7451e-09, 8.6772e-08, 2.8317e-09, 5.7345e-08, 1.1773e-09,\n 2.2102e-08, 4.6871e-08, 2.8809e-08, 2.7215e-08, 1.8149e-09, 3.8130e-08,\n 1.3120e-07, 3.5282e-09, 6.1680e-08, 4.5836e-09, 3.5190e-09, 1.3165e-09,\n 3.9941e-08, 1.2474e-08, 1.5211e-08, 8.3938e-09, 6.4426e-09, 1.7333e-08,\n 2.6548e-07, 2.7868e-09, 7.7244e-09, 3.8978e-09, 8.3892e-10, 6.6708e-08,\n 2.0755e-08, 7.5354e-10, 4.3558e-08, 1.7932e-07, 7.3456e-09, 1.1150e-08,\n 8.0295e-08, 1.2563e-09, 2.5087e-07, 2.5676e-09, 3.6016e-08, 2.5313e-08,\n 1.7133e-08, 8.9540e-10, 6.0379e-09, 7.8135e-09, 4.2351e-08, 1.4453e-08,\n 2.3105e-08, 4.4590e-08, 4.8358e-09, 4.6214e-09, 4.1514e-07, 1.3923e-08,\n 9.2541e-10, 1.7322e-09, 3.5439e-09, 1.8610e-09, 3.6306e-08, 2.8596e-09,\n 9.6575e-10, 7.2436e-08, 4.9007e-09, 1.3839e-07, 3.2844e-08, 4.3750e-09,\n 1.0148e-07, 3.6671e-08, 1.6765e-07, 1.2538e-09, 1.9067e-08, 1.6993e-07,\n 2.2314e-07, 3.7309e-07, 4.6335e-09, 1.6249e-08, 9.7254e-10, 5.9373e-08,\n 2.9039e-08, 1.4688e-09, 2.6046e-09, 2.8234e-08, 3.1853e-09, 4.8978e-09,\n 4.3044e-09, 2.3170e-08, 2.4138e-09, 1.8787e-08, 2.1158e-09, 1.6908e-08,\n 1.3670e-07, 7.9502e-10, 3.3666e-07, 1.2665e-08, 2.4471e-07, 2.2477e-09,\n 8.0142e-08, 9.8402e-08, 1.1382e-07, 9.4490e-09, 1.2081e-07, 9.1894e-08,\n 1.4720e-08, 1.1077e-09, 1.4721e-09, 1.3013e-07, 1.1369e-07, 7.9147e-08,\n 1.4023e-08, 2.1946e-09, 2.3842e-08, 1.5705e-07, 4.0764e-08, 6.0965e-08,\n 3.1880e-08, 7.1102e-10, 4.0028e-08, 2.8771e-08, 2.7132e-09, 1.1627e-09,\n 6.3122e-10, 1.0528e-07, 4.0575e-09, 1.2721e-08, 8.2799e-08, 1.7402e-07,\n 5.5701e-08, 1.2698e-07, 1.4575e-08, 1.0752e-07, 1.1452e-07, 9.7641e-08,\n 4.4686e-08, 1.5430e-07, 7.8346e-10, 1.9440e-07, 2.1345e-08, 2.4608e-08,\n 1.4057e-08, 1.9585e-09, 3.9984e-09, 9.2030e-10, 2.0521e-08, 5.9726e-09,\n 2.8954e-08, 9.1192e-08, 4.8898e-08, 1.6063e-09, 4.6251e-08, 1.0308e-09,\n 2.1357e-09, 6.1592e-08, 1.0956e-09, 2.5455e-08, 1.5811e-09, 3.9624e-08,\n 6.5704e-08, 1.8195e-08, 2.9499e-08, 6.3464e-08, 1.1494e-07, 7.0772e-08,\n 6.9460e-08, 1.2684e-09, 9.6670e-09, 1.4533e-09, 2.0054e-09, 1.6402e-08,\n 7.4209e-10, 2.1710e-08, 6.6311e-08, 7.5062e-08, 9.7206e-09, 2.8757e-08,\n 9.1605e-09, 1.5101e-07, 1.5863e-09, 1.0966e-07, 4.4548e-09, 1.0195e-08,\n 3.1293e-09, 2.6443e-08, 2.8720e-09, 1.5363e-08, 1.1495e-07, 1.8310e-08,\n 9.3906e-09, 1.3074e-09, 3.0670e-09, 8.8801e-09, 5.2095e-08, 2.3117e-09,\n 1.0612e-09, 2.3933e-08, 6.0671e-08, 3.2904e-07, 3.6327e-08, 7.6687e-09,\n 6.3760e-09, 1.8363e-07, 6.4194e-08, 1.2242e-08, 2.0392e-07, 6.0020e-09,\n 9.1140e-09, 2.2145e-08, 9.4260e-09, 5.4933e-08, 9.6007e-09, 1.6675e-10,\n 8.7680e-08, 5.6320e-09, 7.9842e-08, 1.0164e-08, 1.5690e-09, 2.9341e-09,\n 9.8147e-08, 3.7451e-09, 1.4204e-07, 1.6550e-07, 9.2561e-08, 1.6822e-08,\n 3.5002e-08, 2.8969e-09, 4.2748e-09, 2.9824e-08], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.0536e-08, 1.6024e-08, 2.2937e-08, ..., 2.9215e-09, 1.7832e-08,\n 3.2929e-08],\n [6.2506e-09, 1.6247e-09, 2.4324e-09, ..., 3.2392e-10, 1.8005e-09,\n 3.2645e-09],\n [6.8091e-09, 1.7555e-09, 2.5344e-09, ..., 3.1189e-10, 2.0211e-09,\n 3.7756e-09],\n [7.1340e-09, 1.9696e-09, 2.6820e-09, ..., 3.3829e-10, 2.1298e-09,\n 3.9542e-09]], device='cuda:0')" }, "43": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.2470e-08, 2.0408e-07, 2.5502e-08, ..., 5.2823e-07, 4.9773e-07,\n 2.3015e-07],\n [2.1664e-09, 1.3939e-08, 1.6482e-09, ..., 3.6723e-08, 3.5791e-08,\n 1.5418e-08],\n [1.5119e-09, 8.7714e-09, 1.1845e-09, ..., 2.2845e-08, 2.0315e-08,\n 1.0640e-08],\n [2.3710e-09, 1.5885e-08, 1.8863e-09, ..., 4.0679e-08, 3.9441e-08,\n 1.6849e-08],\n [2.3389e-09, 1.3381e-08, 1.8587e-09, ..., 3.4721e-08, 3.0689e-08,\n 1.6453e-08]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.5180e-06, 1.5641e-07, 1.6925e-07, 1.8079e-07], device='cuda:0')" }, - "44": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.1607e-05, 8.1958e-07, 4.8098e-07, 9.2031e-07, 7.2403e-07],\n device='cuda:0')" + "8": { + "step": "tensor(3756.)", + "exp_avg": "tensor([[-4.8903e-07, -1.4045e-07, 0.0000e+00, ..., -7.3340e-07,\n 1.4998e-05, 2.1285e-07],\n [-1.9784e-08, 1.2568e-06, 0.0000e+00, ..., -1.4949e-07,\n -3.2219e-06, -4.5879e-07],\n [ 1.6308e-07, 5.0047e-07, 5.6052e-45, ..., -1.2668e-06,\n -2.1327e-06, -3.3276e-07],\n ...,\n [ 3.7087e-07, 2.1190e-06, 0.0000e+00, ..., 2.7206e-06,\n 3.8326e-06, -1.6441e-08],\n [-1.9052e-07, 3.0003e-07, 4.2088e-11, ..., 5.3748e-07,\n 9.0616e-06, 2.4403e-07],\n [ 1.0096e-07, 3.6192e-08, 0.0000e+00, ..., -4.0276e-08,\n -8.6625e-08, 8.4811e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.7180e-11, 9.6704e-12, 0.0000e+00, ..., 6.1756e-11, 8.3537e-11,\n 7.5953e-11],\n [3.4743e-11, 5.2152e-11, 0.0000e+00, ..., 3.9450e-11, 6.6994e-10,\n 2.2686e-11],\n [1.5295e-10, 1.7517e-11, 9.5494e-17, ..., 3.3178e-11, 2.2306e-10,\n 4.8251e-11],\n ...,\n [6.5790e-11, 6.3602e-11, 0.0000e+00, ..., 2.4873e-11, 2.1711e-10,\n 9.2870e-12],\n [7.1215e-11, 7.2712e-11, 3.3815e-18, ..., 1.1008e-10, 1.3688e-10,\n 1.2221e-11],\n [8.9506e-11, 3.5121e-11, 0.0000e+00, ..., 3.3409e-11, 1.1314e-11,\n 1.1286e-11]], device='cuda:0')" }, - "45": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.2524e-08, 2.0418e-07, 2.5541e-08, ..., 5.2858e-07, 4.9773e-07,\n 2.3050e-07],\n [2.1674e-09, 1.3940e-08, 1.6489e-09, ..., 3.6730e-08, 3.5791e-08,\n 1.5425e-08],\n [1.5170e-09, 8.7802e-09, 1.1882e-09, ..., 2.2879e-08, 2.0315e-08,\n 1.0673e-08],\n [2.3722e-09, 1.5887e-08, 1.8872e-09, ..., 4.0687e-08, 3.9441e-08,\n 1.6857e-08],\n [2.3477e-09, 1.3396e-08, 1.8652e-09, ..., 3.4778e-08, 3.0689e-08,\n 1.6511e-08]], device='cuda:0')" + "9": { + "step": "tensor(3756.)", + "exp_avg": "tensor([ 2.4792e-05, 1.3956e-05, -4.1845e-06, ..., 4.0973e-05,\n -1.0948e-05, 3.6630e-08], device='cuda:0')", + "exp_avg_sq": "tensor([6.9141e-09, 1.2114e-08, 8.5247e-09, ..., 9.6235e-09, 8.3769e-09,\n 5.0240e-09], device='cuda:0')" }, - "46": { + "10": { + "step": "tensor(3756.)", + "exp_avg": "tensor([[-7.2394e-07, 9.9176e-07, 3.3189e-07, ..., 1.7271e-07,\n -1.6048e-07, 1.9659e-07],\n [ 2.8910e-07, 2.1317e-06, 3.0884e-07, ..., -6.8876e-07,\n -9.0864e-08, 3.8183e-07],\n [ 5.6754e-07, -1.7926e-06, 1.3411e-06, ..., -3.3674e-07,\n 2.2415e-07, -2.7177e-07],\n ...,\n [-8.5051e-07, 3.0009e-06, -1.7829e-07, ..., -7.6412e-07,\n 5.3012e-07, -1.4112e-07],\n [ 5.1514e-07, 4.0788e-07, -6.8600e-07, ..., -5.6059e-07,\n -1.2135e-07, -6.5150e-08],\n [-1.0949e-06, 3.6378e-07, -7.0776e-08, ..., 6.4191e-08,\n 5.4157e-07, -3.2797e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.4067e-12, 1.2408e-11, 1.0855e-11, ..., 5.0959e-12, 8.8449e-12,\n 1.4629e-11],\n [6.8501e-12, 2.5491e-11, 1.0946e-11, ..., 1.3110e-11, 9.4138e-12,\n 8.2610e-12],\n [1.0422e-11, 2.5428e-11, 1.3102e-11, ..., 1.3734e-11, 1.3396e-11,\n 1.5140e-11],\n ...,\n [1.7972e-11, 3.0262e-11, 1.4742e-11, ..., 1.4682e-11, 1.1497e-11,\n 3.0129e-11],\n [1.2878e-11, 2.3289e-11, 1.1450e-11, ..., 1.6605e-11, 9.6001e-12,\n 9.2498e-12],\n [8.9619e-12, 2.5655e-11, 1.4756e-11, ..., 2.5977e-11, 1.3260e-11,\n 9.4459e-12]], device='cuda:0')" + }, + "11": { "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.1607e-05, 8.1960e-07, 4.8107e-07, 9.2033e-07, 7.2418e-07],\n device='cuda:0')" + "exp_avg": "tensor([[ 1.2778e-06, 1.0016e-06, 1.4296e-09, ..., 1.3046e-06,\n 3.0553e-06, 5.1079e-06],\n [ 8.0536e-07, -5.7379e-08, 6.2426e-23, ..., 1.0864e-07,\n -5.8014e-07, 2.4104e-06],\n [ 4.7670e-07, 6.8543e-09, 0.0000e+00, ..., 2.0272e-07,\n -4.9184e-06, 3.7464e-08],\n ...,\n [ 3.7641e-07, 3.5784e-09, 5.6052e-45, ..., 3.5863e-09,\n 2.7128e-08, 3.5433e-09],\n [ 2.3868e-07, 7.5190e-08, 0.0000e+00, ..., 2.8456e-06,\n 7.6243e-06, -1.0378e-07],\n [-4.4136e-07, 2.0859e-07, 5.1894e-12, ..., -6.9046e-07,\n -4.3875e-07, 1.5809e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.1974e-11, 1.0273e-10, 3.3333e-15, ..., 7.2439e-10, 2.1659e-10,\n 6.2048e-11],\n [1.9480e-10, 1.6404e-11, 3.0942e-17, ..., 3.5723e-11, 1.8729e-10,\n 1.4771e-10],\n [1.1885e-10, 3.8970e-11, 0.0000e+00, ..., 1.9061e-11, 2.5919e-10,\n 1.5255e-11],\n ...,\n [1.1889e-10, 1.8331e-11, 1.4347e-17, ..., 6.5394e-12, 3.5567e-11,\n 8.2972e-12],\n [7.3297e-11, 3.0906e-11, 0.0000e+00, ..., 2.4757e-10, 1.0390e-09,\n 8.8339e-12],\n [3.3567e-11, 9.7932e-11, 2.4873e-15, ..., 6.1352e-10, 8.3351e-11,\n 1.6817e-11]], device='cuda:0')" }, - "47": { + "12": { "step": "tensor(2504.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.2470e-08, 2.0408e-07, 2.5502e-08, ..., 5.2823e-07, 4.9773e-07,\n 2.3015e-07],\n [2.1664e-09, 1.3939e-08, 1.6482e-09, ..., 3.6723e-08, 3.5791e-08,\n 1.5418e-08],\n [1.5119e-09, 8.7714e-09, 1.1845e-09, ..., 2.2845e-08, 2.0315e-08,\n 1.0640e-08],\n [2.3710e-09, 1.5885e-08, 1.8863e-09, ..., 4.0679e-08, 3.9441e-08,\n 1.6849e-08],\n [2.3389e-09, 1.3381e-08, 1.8587e-09, ..., 3.4721e-08, 3.0689e-08,\n 1.6453e-08]], device='cuda:0')" + "exp_avg": "tensor([ 2.4118e-05, 4.5042e-05, -3.2372e-06, ..., -8.2233e-07,\n 4.6033e-05, -2.9744e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.1918e-08, 1.0005e-08, 9.7556e-09, ..., 6.0090e-09, 1.3704e-08,\n 1.0873e-08], device='cuda:0')" }, - "48": { + "13": { "step": "tensor(2504.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.1607e-05, 8.1958e-07, 4.8098e-07, 9.2031e-07, 7.2403e-07],\n device='cuda:0')" + "exp_avg": "tensor([[-8.3479e-07, 5.4764e-07, -1.0775e-06, ..., 1.1893e-07,\n 4.6882e-07, -8.5413e-07],\n [-3.7459e-08, -3.5614e-07, 7.4919e-07, ..., -8.6935e-07,\n 3.3207e-07, -7.6068e-07],\n [-5.8790e-07, 1.2587e-07, -1.2202e-06, ..., -8.2444e-07,\n -2.7528e-07, -3.6729e-07],\n ...,\n [-1.3363e-06, -1.0929e-06, -4.5763e-07, ..., 4.2793e-07,\n -7.8387e-07, 6.9590e-08],\n [-4.0714e-07, -1.3150e-06, -1.2504e-07, ..., -4.0317e-07,\n 4.5231e-07, -2.0854e-07],\n [ 1.0503e-06, -5.4846e-07, -1.1344e-07, ..., -1.7724e-07,\n -2.3885e-07, -6.0194e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5478e-11, 7.0544e-12, 3.1112e-11, ..., 5.3519e-12, 5.3167e-12,\n 8.8194e-12],\n [1.5035e-11, 9.8603e-12, 4.0443e-11, ..., 1.0026e-11, 6.5719e-12,\n 1.8348e-11],\n [1.5861e-11, 1.0703e-11, 4.2777e-11, ..., 2.0316e-11, 1.0770e-11,\n 1.3061e-11],\n ...,\n [2.0773e-11, 2.2542e-11, 2.2820e-11, ..., 7.9171e-12, 1.1264e-11,\n 1.9694e-11],\n [2.3086e-11, 2.1862e-11, 1.4965e-11, ..., 1.5300e-11, 8.7356e-12,\n 1.6244e-11],\n [1.7134e-11, 1.4242e-11, 9.0803e-12, ..., 8.5805e-12, 7.6396e-12,\n 2.8204e-11]], device='cuda:0')" } }, "param_groups": [ { "lr": 0.005000500000000001, - "name": "scale_256", + "name": "shared", "betas": [ 0.9, 0.999 @@ -218,13 +238,12 @@ "initial_lr": 0.01, "params": [ 0, - 1, - 2 + 1 ] }, { "lr": 0.005000500000000001, - "name": "scale_512", + "name": "scale_384", "betas": [ 0.9, 0.999 @@ -240,9 +259,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 2, 3, - 4, - 5 + 4 ] }, { @@ -263,9 +282,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 5, 6, - 7, - 8 + 7 ] }, { @@ -286,9 +305,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 8, 9, - 10, - 11 + 10 ] }, { @@ -309,9 +328,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 11, 12, - 13, - 14 + 13 ] }, { @@ -332,6 +351,7 @@ "decoupled_weight_decay": true, "initial_lr": 0.005, "params": [ + 14, 15, 16, 17, @@ -360,12 +380,7 @@ 40, 41, 42, - 43, - 44, - 45, - 46, - 47, - 48 + 43 ] } ] @@ -398,20 +413,26 @@ ] }, "metrics": { - "val_acc": 74.24 + "val_acc": 82.08 }, "train_config": { "name": "david_training", - "run_id": "20251012_032356", + "run_id": "20251012_041353", "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", - "model_variant": "clip_vit_b16", + "model_variant": "clip_vit_l14", "num_classes": 1000, - "preset": "high_accuracy", + "preset": "clip_vit_l14", "custom_config_path": null, "num_classes_override": null, "use_belly_override": null, "belly_expand_override": null, "progressive_training_override": true, + "scale_warmup_epochs_override": { + "384": 0, + "768": 1, + "1024": 2, + "1280": 3 + }, "num_epochs": 20, "batch_size": 1024, "learning_rate": 0.01, @@ -428,8 +449,8 @@ "gradient_clip": 5.0, "scheduler_type": "cosine_restarts", "min_lr": 1e-06, - "freeze_strategy": "performance", - "freeze_threshold": 70.0, + "freeze_strategy": "never", + "freeze_threshold": 90.0, "unfreeze_on_plateau": true, "patience": 10, "track_gradients": true,